summaryrefslogtreecommitdiff
path: root/include/linux
diff options
context:
space:
mode:
authorPaul Mundt <lethal@linux-sh.org>2011-01-14 16:06:31 +0900
committerPaul Mundt <lethal@linux-sh.org>2011-01-14 16:06:31 +0900
commitc488a4731abb53aa1bab9fccd8a7472083159bfd (patch)
treedb6d4a664a1e4b7685c1d2d79da63263f40adf7b /include/linux
parent6d2ae89c36e2adab5cfa69fecb11290082817ac6 (diff)
parentbba958783b1b4cb0a9420f4e11082467132a334c (diff)
Merge branch 'common/mmcif' into rmobile-latest
Diffstat (limited to 'include/linux')
-rw-r--r--include/linux/compaction.h25
-rw-r--r--include/linux/device-mapper.h12
-rw-r--r--include/linux/dm-ioctl.h14
-rw-r--r--include/linux/dm-log-userspace.h13
-rw-r--r--include/linux/gfp.h15
-rw-r--r--include/linux/gpio.h6
-rw-r--r--include/linux/huge_mm.h179
-rw-r--r--include/linux/irqdesc.h2
-rw-r--r--include/linux/kernel.h7
-rw-r--r--include/linux/kernel_stat.h19
-rw-r--r--include/linux/khugepaged.h67
-rw-r--r--include/linux/memcontrol.h36
-rw-r--r--include/linux/memory_hotplug.h14
-rw-r--r--include/linux/migrate.h12
-rw-r--r--include/linux/mm.h140
-rw-r--r--include/linux/mm_inline.h19
-rw-r--r--include/linux/mm_types.h3
-rw-r--r--include/linux/mmc/sh_mmcif.h4
-rw-r--r--include/linux/mmu_notifier.h66
-rw-r--r--include/linux/mmzone.h14
-rw-r--r--include/linux/page-flags.h71
-rw-r--r--include/linux/page_cgroup.h54
-rw-r--r--include/linux/pagemap.h2
-rw-r--r--include/linux/radix-tree.h16
-rw-r--r--include/linux/rmap.h2
-rw-r--r--include/linux/sched.h6
-rw-r--r--include/linux/swap.h2
-rw-r--r--include/linux/vmalloc.h10
-rw-r--r--include/linux/vmstat.h7
29 files changed, 754 insertions, 83 deletions
diff --git a/include/linux/compaction.h b/include/linux/compaction.h
index 5ac51552d908..dfa2ed4c0d26 100644
--- a/include/linux/compaction.h
+++ b/include/linux/compaction.h
@@ -11,6 +11,9 @@
/* The full zone was compacted */
#define COMPACT_COMPLETE 3
+#define COMPACT_MODE_DIRECT_RECLAIM 0
+#define COMPACT_MODE_KSWAPD 1
+
#ifdef CONFIG_COMPACTION
extern int sysctl_compact_memory;
extern int sysctl_compaction_handler(struct ctl_table *table, int write,
@@ -21,7 +24,12 @@ extern int sysctl_extfrag_handler(struct ctl_table *table, int write,
extern int fragmentation_index(struct zone *zone, unsigned int order);
extern unsigned long try_to_compact_pages(struct zonelist *zonelist,
- int order, gfp_t gfp_mask, nodemask_t *mask);
+ int order, gfp_t gfp_mask, nodemask_t *mask,
+ bool sync);
+extern unsigned long compaction_suitable(struct zone *zone, int order);
+extern unsigned long compact_zone_order(struct zone *zone, int order,
+ gfp_t gfp_mask, bool sync,
+ int compact_mode);
/* Do not skip compaction more than 64 times */
#define COMPACT_MAX_DEFER_SHIFT 6
@@ -54,7 +62,20 @@ static inline bool compaction_deferred(struct zone *zone)
#else
static inline unsigned long try_to_compact_pages(struct zonelist *zonelist,
- int order, gfp_t gfp_mask, nodemask_t *nodemask)
+ int order, gfp_t gfp_mask, nodemask_t *nodemask,
+ bool sync)
+{
+ return COMPACT_CONTINUE;
+}
+
+static inline unsigned long compaction_suitable(struct zone *zone, int order)
+{
+ return COMPACT_SKIPPED;
+}
+
+static inline unsigned long compact_zone_order(struct zone *zone, int order,
+ gfp_t gfp_mask, bool sync,
+ int compact_mode)
{
return COMPACT_CONTINUE;
}
diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h
index 2970022faa63..272496d1fae4 100644
--- a/include/linux/device-mapper.h
+++ b/include/linux/device-mapper.h
@@ -193,6 +193,13 @@ struct dm_target {
char *error;
};
+/* Each target can link one of these into the table */
+struct dm_target_callbacks {
+ struct list_head list;
+ int (*congested_fn) (struct dm_target_callbacks *, int);
+ void (*unplug_fn)(struct dm_target_callbacks *);
+};
+
int dm_register_target(struct target_type *t);
void dm_unregister_target(struct target_type *t);
@@ -269,6 +276,11 @@ int dm_table_add_target(struct dm_table *t, const char *type,
sector_t start, sector_t len, char *params);
/*
+ * Target_ctr should call this if it needs to add any callbacks.
+ */
+void dm_table_add_target_callbacks(struct dm_table *t, struct dm_target_callbacks *cb);
+
+/*
* Finally call this to make the table ready for use.
*/
int dm_table_complete(struct dm_table *t);
diff --git a/include/linux/dm-ioctl.h b/include/linux/dm-ioctl.h
index 49eab360d5d4..78bbf47bbb96 100644
--- a/include/linux/dm-ioctl.h
+++ b/include/linux/dm-ioctl.h
@@ -44,7 +44,7 @@
* Remove a device, destroy any tables.
*
* DM_DEV_RENAME:
- * Rename a device.
+ * Rename a device or set its uuid if none was previously supplied.
*
* DM_SUSPEND:
* This performs both suspend and resume, depending which flag is
@@ -267,9 +267,9 @@ enum {
#define DM_DEV_SET_GEOMETRY _IOWR(DM_IOCTL, DM_DEV_SET_GEOMETRY_CMD, struct dm_ioctl)
#define DM_VERSION_MAJOR 4
-#define DM_VERSION_MINOR 18
-#define DM_VERSION_PATCHLEVEL 0
-#define DM_VERSION_EXTRA "-ioctl (2010-06-29)"
+#define DM_VERSION_MINOR 19
+#define DM_VERSION_PATCHLEVEL 1
+#define DM_VERSION_EXTRA "-ioctl (2011-01-07)"
/* Status bits */
#define DM_READONLY_FLAG (1 << 0) /* In/Out */
@@ -322,4 +322,10 @@ enum {
*/
#define DM_UEVENT_GENERATED_FLAG (1 << 13) /* Out */
+/*
+ * If set, rename changes the uuid not the name. Only permitted
+ * if no uuid was previously supplied: an existing uuid cannot be changed.
+ */
+#define DM_UUID_FLAG (1 << 14) /* In */
+
#endif /* _LINUX_DM_IOCTL_H */
diff --git a/include/linux/dm-log-userspace.h b/include/linux/dm-log-userspace.h
index 0c3c3a2110c4..eeace7d3ff15 100644
--- a/include/linux/dm-log-userspace.h
+++ b/include/linux/dm-log-userspace.h
@@ -370,6 +370,16 @@
#define DM_ULOG_REQUEST_TYPE(request_type) \
(DM_ULOG_REQUEST_MASK & (request_type))
+/*
+ * DM_ULOG_REQUEST_VERSION is incremented when there is a
+ * change to the way information is passed between kernel
+ * and userspace. This could be a structure change of
+ * dm_ulog_request or a change in the way requests are
+ * issued/handled. Changes are outlined here:
+ * version 1: Initial implementation
+ */
+#define DM_ULOG_REQUEST_VERSION 1
+
struct dm_ulog_request {
/*
* The local unique identifier (luid) and the universally unique
@@ -383,8 +393,9 @@ struct dm_ulog_request {
*/
uint64_t luid;
char uuid[DM_UUID_LEN];
- char padding[7]; /* Padding because DM_UUID_LEN = 129 */
+ char padding[3]; /* Padding because DM_UUID_LEN = 129 */
+ uint32_t version; /* See DM_ULOG_REQUEST_VERSION */
int32_t error; /* Used to report back processing errors */
uint32_t seq; /* Sequence number for request */
diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index f54adfcbec9c..a3b148a91874 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -34,6 +34,7 @@ struct vm_area_struct;
#else
#define ___GFP_NOTRACK 0
#endif
+#define ___GFP_NO_KSWAPD 0x400000u
/*
* GFP bitmasks..
@@ -81,13 +82,15 @@ struct vm_area_struct;
#define __GFP_RECLAIMABLE ((__force gfp_t)___GFP_RECLAIMABLE) /* Page is reclaimable */
#define __GFP_NOTRACK ((__force gfp_t)___GFP_NOTRACK) /* Don't track with kmemcheck */
+#define __GFP_NO_KSWAPD ((__force gfp_t)___GFP_NO_KSWAPD)
+
/*
* This may seem redundant, but it's a way of annotating false positives vs.
* allocations that simply cannot be supported (e.g. page tables).
*/
#define __GFP_NOTRACK_FALSE_POSITIVE (__GFP_NOTRACK)
-#define __GFP_BITS_SHIFT 22 /* Room for 22 __GFP_FOO bits */
+#define __GFP_BITS_SHIFT 23 /* Room for 23 __GFP_FOO bits */
#define __GFP_BITS_MASK ((__force gfp_t)((1 << __GFP_BITS_SHIFT) - 1))
/* This equals 0, but use constants in case they ever change */
@@ -106,6 +109,9 @@ struct vm_area_struct;
__GFP_HARDWALL | __GFP_HIGHMEM | \
__GFP_MOVABLE)
#define GFP_IOFS (__GFP_IO | __GFP_FS)
+#define GFP_TRANSHUGE (GFP_HIGHUSER_MOVABLE | __GFP_COMP | \
+ __GFP_NOMEMALLOC | __GFP_NORETRY | __GFP_NOWARN | \
+ __GFP_NO_KSWAPD)
#ifdef CONFIG_NUMA
#define GFP_THISNODE (__GFP_THISNODE | __GFP_NOWARN | __GFP_NORETRY)
@@ -325,14 +331,17 @@ alloc_pages(gfp_t gfp_mask, unsigned int order)
{
return alloc_pages_current(gfp_mask, order);
}
-extern struct page *alloc_page_vma(gfp_t gfp_mask,
+extern struct page *alloc_pages_vma(gfp_t gfp_mask, int order,
struct vm_area_struct *vma, unsigned long addr);
#else
#define alloc_pages(gfp_mask, order) \
alloc_pages_node(numa_node_id(), gfp_mask, order)
-#define alloc_page_vma(gfp_mask, vma, addr) alloc_pages(gfp_mask, 0)
+#define alloc_pages_vma(gfp_mask, order, vma, addr) \
+ alloc_pages(gfp_mask, order)
#endif
#define alloc_page(gfp_mask) alloc_pages(gfp_mask, 0)
+#define alloc_page_vma(gfp_mask, vma, addr) \
+ alloc_pages_vma(gfp_mask, 0, vma, addr)
extern unsigned long __get_free_pages(gfp_t gfp_mask, unsigned int order);
extern unsigned long get_zeroed_page(gfp_t gfp_mask);
diff --git a/include/linux/gpio.h b/include/linux/gpio.h
index f79d67f413e4..4b47ed96f131 100644
--- a/include/linux/gpio.h
+++ b/include/linux/gpio.h
@@ -30,7 +30,7 @@ static inline int gpio_is_valid(int number)
return 0;
}
-static inline int __must_check gpio_request(unsigned gpio, const char *label)
+static inline int gpio_request(unsigned gpio, const char *label)
{
return -ENOSYS;
}
@@ -62,12 +62,12 @@ static inline void gpio_free_array(struct gpio *array, size_t num)
WARN_ON(1);
}
-static inline int __must_check gpio_direction_input(unsigned gpio)
+static inline int gpio_direction_input(unsigned gpio)
{
return -ENOSYS;
}
-static inline int __must_check gpio_direction_output(unsigned gpio, int value)
+static inline int gpio_direction_output(unsigned gpio, int value)
{
return -ENOSYS;
}
diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
new file mode 100644
index 000000000000..8e6c8c42bc3c
--- /dev/null
+++ b/include/linux/huge_mm.h
@@ -0,0 +1,179 @@
+#ifndef _LINUX_HUGE_MM_H
+#define _LINUX_HUGE_MM_H
+
+extern int do_huge_pmd_anonymous_page(struct mm_struct *mm,
+ struct vm_area_struct *vma,
+ unsigned long address, pmd_t *pmd,
+ unsigned int flags);
+extern int copy_huge_pmd(struct mm_struct *dst_mm, struct mm_struct *src_mm,
+ pmd_t *dst_pmd, pmd_t *src_pmd, unsigned long addr,
+ struct vm_area_struct *vma);
+extern int do_huge_pmd_wp_page(struct mm_struct *mm, struct vm_area_struct *vma,
+ unsigned long address, pmd_t *pmd,
+ pmd_t orig_pmd);
+extern pgtable_t get_pmd_huge_pte(struct mm_struct *mm);
+extern struct page *follow_trans_huge_pmd(struct mm_struct *mm,
+ unsigned long addr,
+ pmd_t *pmd,
+ unsigned int flags);
+extern int zap_huge_pmd(struct mmu_gather *tlb,
+ struct vm_area_struct *vma,
+ pmd_t *pmd);
+extern int mincore_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
+ unsigned long addr, unsigned long end,
+ unsigned char *vec);
+extern int change_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
+ unsigned long addr, pgprot_t newprot);
+
+enum transparent_hugepage_flag {
+ TRANSPARENT_HUGEPAGE_FLAG,
+ TRANSPARENT_HUGEPAGE_REQ_MADV_FLAG,
+ TRANSPARENT_HUGEPAGE_DEFRAG_FLAG,
+ TRANSPARENT_HUGEPAGE_DEFRAG_REQ_MADV_FLAG,
+ TRANSPARENT_HUGEPAGE_DEFRAG_KHUGEPAGED_FLAG,
+#ifdef CONFIG_DEBUG_VM
+ TRANSPARENT_HUGEPAGE_DEBUG_COW_FLAG,
+#endif
+};
+
+enum page_check_address_pmd_flag {
+ PAGE_CHECK_ADDRESS_PMD_FLAG,
+ PAGE_CHECK_ADDRESS_PMD_NOTSPLITTING_FLAG,
+ PAGE_CHECK_ADDRESS_PMD_SPLITTING_FLAG,
+};
+extern pmd_t *page_check_address_pmd(struct page *page,
+ struct mm_struct *mm,
+ unsigned long address,
+ enum page_check_address_pmd_flag flag);
+
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+#define HPAGE_PMD_SHIFT HPAGE_SHIFT
+#define HPAGE_PMD_MASK HPAGE_MASK
+#define HPAGE_PMD_SIZE HPAGE_SIZE
+
+#define transparent_hugepage_enabled(__vma) \
+ ((transparent_hugepage_flags & \
+ (1<<TRANSPARENT_HUGEPAGE_FLAG) || \
+ (transparent_hugepage_flags & \
+ (1<<TRANSPARENT_HUGEPAGE_REQ_MADV_FLAG) && \
+ ((__vma)->vm_flags & VM_HUGEPAGE))) && \
+ !((__vma)->vm_flags & VM_NOHUGEPAGE))
+#define transparent_hugepage_defrag(__vma) \
+ ((transparent_hugepage_flags & \
+ (1<<TRANSPARENT_HUGEPAGE_DEFRAG_FLAG)) || \
+ (transparent_hugepage_flags & \
+ (1<<TRANSPARENT_HUGEPAGE_DEFRAG_REQ_MADV_FLAG) && \
+ (__vma)->vm_flags & VM_HUGEPAGE))
+#ifdef CONFIG_DEBUG_VM
+#define transparent_hugepage_debug_cow() \
+ (transparent_hugepage_flags & \
+ (1<<TRANSPARENT_HUGEPAGE_DEBUG_COW_FLAG))
+#else /* CONFIG_DEBUG_VM */
+#define transparent_hugepage_debug_cow() 0
+#endif /* CONFIG_DEBUG_VM */
+
+extern unsigned long transparent_hugepage_flags;
+extern int copy_pte_range(struct mm_struct *dst_mm, struct mm_struct *src_mm,
+ pmd_t *dst_pmd, pmd_t *src_pmd,
+ struct vm_area_struct *vma,
+ unsigned long addr, unsigned long end);
+extern int handle_pte_fault(struct mm_struct *mm,
+ struct vm_area_struct *vma, unsigned long address,
+ pte_t *pte, pmd_t *pmd, unsigned int flags);
+extern int split_huge_page(struct page *page);
+extern void __split_huge_page_pmd(struct mm_struct *mm, pmd_t *pmd);
+#define split_huge_page_pmd(__mm, __pmd) \
+ do { \
+ pmd_t *____pmd = (__pmd); \
+ if (unlikely(pmd_trans_huge(*____pmd))) \
+ __split_huge_page_pmd(__mm, ____pmd); \
+ } while (0)
+#define wait_split_huge_page(__anon_vma, __pmd) \
+ do { \
+ pmd_t *____pmd = (__pmd); \
+ spin_unlock_wait(&(__anon_vma)->root->lock); \
+ /* \
+ * spin_unlock_wait() is just a loop in C and so the \
+ * CPU can reorder anything around it. \
+ */ \
+ smp_mb(); \
+ BUG_ON(pmd_trans_splitting(*____pmd) || \
+ pmd_trans_huge(*____pmd)); \
+ } while (0)
+#define HPAGE_PMD_ORDER (HPAGE_PMD_SHIFT-PAGE_SHIFT)
+#define HPAGE_PMD_NR (1<<HPAGE_PMD_ORDER)
+#if HPAGE_PMD_ORDER > MAX_ORDER
+#error "hugepages can't be allocated by the buddy allocator"
+#endif
+extern int hugepage_madvise(struct vm_area_struct *vma,
+ unsigned long *vm_flags, int advice);
+extern void __vma_adjust_trans_huge(struct vm_area_struct *vma,
+ unsigned long start,
+ unsigned long end,
+ long adjust_next);
+static inline void vma_adjust_trans_huge(struct vm_area_struct *vma,
+ unsigned long start,
+ unsigned long end,
+ long adjust_next)
+{
+ if (!vma->anon_vma || vma->vm_ops || vma->vm_file)
+ return;
+ __vma_adjust_trans_huge(vma, start, end, adjust_next);
+}
+static inline int hpage_nr_pages(struct page *page)
+{
+ if (unlikely(PageTransHuge(page)))
+ return HPAGE_PMD_NR;
+ return 1;
+}
+static inline struct page *compound_trans_head(struct page *page)
+{
+ if (PageTail(page)) {
+ struct page *head;
+ head = page->first_page;
+ smp_rmb();
+ /*
+ * head may be a dangling pointer.
+ * __split_huge_page_refcount clears PageTail before
+ * overwriting first_page, so if PageTail is still
+ * there it means the head pointer isn't dangling.
+ */
+ if (PageTail(page))
+ return head;
+ }
+ return page;
+}
+#else /* CONFIG_TRANSPARENT_HUGEPAGE */
+#define HPAGE_PMD_SHIFT ({ BUG(); 0; })
+#define HPAGE_PMD_MASK ({ BUG(); 0; })
+#define HPAGE_PMD_SIZE ({ BUG(); 0; })
+
+#define hpage_nr_pages(x) 1
+
+#define transparent_hugepage_enabled(__vma) 0
+
+#define transparent_hugepage_flags 0UL
+static inline int split_huge_page(struct page *page)
+{
+ return 0;
+}
+#define split_huge_page_pmd(__mm, __pmd) \
+ do { } while (0)
+#define wait_split_huge_page(__anon_vma, __pmd) \
+ do { } while (0)
+#define compound_trans_head(page) compound_head(page)
+static inline int hugepage_madvise(struct vm_area_struct *vma,
+ unsigned long *vm_flags, int advice)
+{
+ BUG();
+ return 0;
+}
+static inline void vma_adjust_trans_huge(struct vm_area_struct *vma,
+ unsigned long start,
+ unsigned long end,
+ long adjust_next)
+{
+}
+#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
+
+#endif /* _LINUX_HUGE_MM_H */
diff --git a/include/linux/irqdesc.h b/include/linux/irqdesc.h
index 979c68cc7458..6a64c6fa81af 100644
--- a/include/linux/irqdesc.h
+++ b/include/linux/irqdesc.h
@@ -57,7 +57,7 @@ struct irq_desc {
#endif
struct timer_rand_state *timer_rand_state;
- unsigned int *kstat_irqs;
+ unsigned int __percpu *kstat_irqs;
irq_flow_handler_t handle_irq;
struct irqaction *action; /* IRQ action list */
unsigned int status; /* IRQ status */
diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index 57dac7022b63..5a9d9059520b 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -600,6 +600,13 @@ struct sysinfo {
#define NUMA_BUILD 0
#endif
+/* This helps us avoid #ifdef CONFIG_COMPACTION */
+#ifdef CONFIG_COMPACTION
+#define COMPACTION_BUILD 1
+#else
+#define COMPACTION_BUILD 0
+#endif
+
/* Rebuild everything on CONFIG_FTRACE_MCOUNT_RECORD */
#ifdef CONFIG_FTRACE_MCOUNT_RECORD
# define REBUILD_DUE_TO_FTRACE_MCOUNT_RECORD
diff --git a/include/linux/kernel_stat.h b/include/linux/kernel_stat.h
index 44e83ba12b5b..0cce2db580c3 100644
--- a/include/linux/kernel_stat.h
+++ b/include/linux/kernel_stat.h
@@ -46,16 +46,14 @@ DECLARE_PER_CPU(struct kernel_stat, kstat);
extern unsigned long long nr_context_switches(void);
#ifndef CONFIG_GENERIC_HARDIRQS
-#define kstat_irqs_this_cpu(irq) \
- (this_cpu_read(kstat.irqs[irq])
struct irq_desc;
static inline void kstat_incr_irqs_this_cpu(unsigned int irq,
struct irq_desc *desc)
{
- kstat_this_cpu.irqs[irq]++;
- kstat_this_cpu.irqs_sum++;
+ __this_cpu_inc(kstat.irqs[irq]);
+ __this_cpu_inc(kstat.irqs_sum);
}
static inline unsigned int kstat_irqs_cpu(unsigned int irq, int cpu)
@@ -65,17 +63,18 @@ static inline unsigned int kstat_irqs_cpu(unsigned int irq, int cpu)
#else
#include <linux/irq.h>
extern unsigned int kstat_irqs_cpu(unsigned int irq, int cpu);
-#define kstat_irqs_this_cpu(DESC) \
- ((DESC)->kstat_irqs[smp_processor_id()])
-#define kstat_incr_irqs_this_cpu(irqno, DESC) do {\
- ((DESC)->kstat_irqs[smp_processor_id()]++);\
- kstat_this_cpu.irqs_sum++; } while (0)
+
+#define kstat_incr_irqs_this_cpu(irqno, DESC) \
+do { \
+ __this_cpu_inc(*(DESC)->kstat_irqs); \
+ __this_cpu_inc(kstat.irqs_sum); \
+} while (0)
#endif
static inline void kstat_incr_softirqs_this_cpu(unsigned int irq)
{
- kstat_this_cpu.softirqs[irq]++;
+ __this_cpu_inc(kstat.softirqs[irq]);
}
static inline unsigned int kstat_softirqs_cpu(unsigned int irq, int cpu)
diff --git a/include/linux/khugepaged.h b/include/linux/khugepaged.h
new file mode 100644
index 000000000000..6b394f0b5148
--- /dev/null
+++ b/include/linux/khugepaged.h
@@ -0,0 +1,67 @@
+#ifndef _LINUX_KHUGEPAGED_H
+#define _LINUX_KHUGEPAGED_H
+
+#include <linux/sched.h> /* MMF_VM_HUGEPAGE */
+
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+extern int __khugepaged_enter(struct mm_struct *mm);
+extern void __khugepaged_exit(struct mm_struct *mm);
+extern int khugepaged_enter_vma_merge(struct vm_area_struct *vma);
+
+#define khugepaged_enabled() \
+ (transparent_hugepage_flags & \
+ ((1<<TRANSPARENT_HUGEPAGE_FLAG) | \
+ (1<<TRANSPARENT_HUGEPAGE_REQ_MADV_FLAG)))
+#define khugepaged_always() \
+ (transparent_hugepage_flags & \
+ (1<<TRANSPARENT_HUGEPAGE_FLAG))
+#define khugepaged_req_madv() \
+ (transparent_hugepage_flags & \
+ (1<<TRANSPARENT_HUGEPAGE_REQ_MADV_FLAG))
+#define khugepaged_defrag() \
+ (transparent_hugepage_flags & \
+ (1<<TRANSPARENT_HUGEPAGE_DEFRAG_KHUGEPAGED_FLAG))
+
+static inline int khugepaged_fork(struct mm_struct *mm, struct mm_struct *oldmm)
+{
+ if (test_bit(MMF_VM_HUGEPAGE, &oldmm->flags))
+ return __khugepaged_enter(mm);
+ return 0;
+}
+
+static inline void khugepaged_exit(struct mm_struct *mm)
+{
+ if (test_bit(MMF_VM_HUGEPAGE, &mm->flags))
+ __khugepaged_exit(mm);
+}
+
+static inline int khugepaged_enter(struct vm_area_struct *vma)
+{
+ if (!test_bit(MMF_VM_HUGEPAGE, &vma->vm_mm->flags))
+ if ((khugepaged_always() ||
+ (khugepaged_req_madv() &&
+ vma->vm_flags & VM_HUGEPAGE)) &&
+ !(vma->vm_flags & VM_NOHUGEPAGE))
+ if (__khugepaged_enter(vma->vm_mm))
+ return -ENOMEM;
+ return 0;
+}
+#else /* CONFIG_TRANSPARENT_HUGEPAGE */
+static inline int khugepaged_fork(struct mm_struct *mm, struct mm_struct *oldmm)
+{
+ return 0;
+}
+static inline void khugepaged_exit(struct mm_struct *mm)
+{
+}
+static inline int khugepaged_enter(struct vm_area_struct *vma)
+{
+ return 0;
+}
+static inline int khugepaged_enter_vma_merge(struct vm_area_struct *vma)
+{
+ return 0;
+}
+#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
+
+#endif /* _LINUX_KHUGEPAGED_H */
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 159a0762aeaf..6a576f989437 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -25,6 +25,11 @@ struct page_cgroup;
struct page;
struct mm_struct;
+/* Stats that can be updated by kernel. */
+enum mem_cgroup_page_stat_item {
+ MEMCG_NR_FILE_MAPPED, /* # of pages charged as file rss */
+};
+
extern unsigned long mem_cgroup_isolate_pages(unsigned long nr_to_scan,
struct list_head *dst,
unsigned long *scanned, int order,
@@ -93,7 +98,7 @@ extern int
mem_cgroup_prepare_migration(struct page *page,
struct page *newpage, struct mem_cgroup **ptr);
extern void mem_cgroup_end_migration(struct mem_cgroup *mem,
- struct page *oldpage, struct page *newpage);
+ struct page *oldpage, struct page *newpage, bool migration_ok);
/*
* For memory reclaim.
@@ -121,7 +126,22 @@ static inline bool mem_cgroup_disabled(void)
return false;
}
-void mem_cgroup_update_file_mapped(struct page *page, int val);
+void mem_cgroup_update_page_stat(struct page *page,
+ enum mem_cgroup_page_stat_item idx,
+ int val);
+
+static inline void mem_cgroup_inc_page_stat(struct page *page,
+ enum mem_cgroup_page_stat_item idx)
+{
+ mem_cgroup_update_page_stat(page, idx, 1);
+}
+
+static inline void mem_cgroup_dec_page_stat(struct page *page,
+ enum mem_cgroup_page_stat_item idx)
+{
+ mem_cgroup_update_page_stat(page, idx, -1);
+}
+
unsigned long mem_cgroup_soft_limit_reclaim(struct zone *zone, int order,
gfp_t gfp_mask);
u64 mem_cgroup_get_limit(struct mem_cgroup *mem);
@@ -231,8 +251,7 @@ mem_cgroup_prepare_migration(struct page *page, struct page *newpage,
}
static inline void mem_cgroup_end_migration(struct mem_cgroup *mem,
- struct page *oldpage,
- struct page *newpage)
+ struct page *oldpage, struct page *newpage, bool migration_ok)
{
}
@@ -293,8 +312,13 @@ mem_cgroup_print_oom_info(struct mem_cgroup *memcg, struct task_struct *p)
{
}
-static inline void mem_cgroup_update_file_mapped(struct page *page,
- int val)
+static inline void mem_cgroup_inc_page_stat(struct page *page,
+ enum mem_cgroup_page_stat_item idx)
+{
+}
+
+static inline void mem_cgroup_dec_page_stat(struct page *page,
+ enum mem_cgroup_page_stat_item idx)
{
}
diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h
index 31c237a00c48..24376fe7ee68 100644
--- a/include/linux/memory_hotplug.h
+++ b/include/linux/memory_hotplug.h
@@ -13,12 +13,16 @@ struct mem_section;
#ifdef CONFIG_MEMORY_HOTPLUG
/*
- * Types for free bootmem.
- * The normal smallest mapcount is -1. Here is smaller value than it.
+ * Types for free bootmem stored in page->lru.next. These have to be in
+ * some random range in unsigned long space for debugging purposes.
*/
-#define SECTION_INFO (-1 - 1)
-#define MIX_SECTION_INFO (-1 - 2)
-#define NODE_INFO (-1 - 3)
+enum {
+ MEMORY_HOTPLUG_MIN_BOOTMEM_TYPE = 12,
+ SECTION_INFO = MEMORY_HOTPLUG_MIN_BOOTMEM_TYPE,
+ MIX_SECTION_INFO,
+ NODE_INFO,
+ MEMORY_HOTPLUG_MAX_BOOTMEM_TYPE = NODE_INFO,
+};
/*
* pgdat resizing functions
diff --git a/include/linux/migrate.h b/include/linux/migrate.h
index 085527fb8261..e39aeecfe9a2 100644
--- a/include/linux/migrate.h
+++ b/include/linux/migrate.h
@@ -13,9 +13,11 @@ extern void putback_lru_pages(struct list_head *l);
extern int migrate_page(struct address_space *,
struct page *, struct page *);
extern int migrate_pages(struct list_head *l, new_page_t x,
- unsigned long private, int offlining);
+ unsigned long private, bool offlining,
+ bool sync);
extern int migrate_huge_pages(struct list_head *l, new_page_t x,
- unsigned long private, int offlining);
+ unsigned long private, bool offlining,
+ bool sync);
extern int fail_migrate_page(struct address_space *,
struct page *, struct page *);
@@ -33,9 +35,11 @@ extern int migrate_huge_page_move_mapping(struct address_space *mapping,
static inline void putback_lru_pages(struct list_head *l) {}
static inline int migrate_pages(struct list_head *l, new_page_t x,
- unsigned long private, int offlining) { return -ENOSYS; }
+ unsigned long private, bool offlining,
+ bool sync) { return -ENOSYS; }
static inline int migrate_huge_pages(struct list_head *l, new_page_t x,
- unsigned long private, int offlining) { return -ENOSYS; }
+ unsigned long private, bool offlining,
+ bool sync) { return -ENOSYS; }
static inline int migrate_prep(void) { return -ENOSYS; }
static inline int migrate_prep_local(void) { return -ENOSYS; }
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 721f451c3029..956a35532f47 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -14,6 +14,7 @@
#include <linux/mm_types.h>
#include <linux/range.h>
#include <linux/pfn.h>
+#include <linux/bit_spinlock.h>
struct mempolicy;
struct anon_vma;
@@ -82,6 +83,7 @@ extern unsigned int kobjsize(const void *objp);
#define VM_GROWSUP 0x00000200
#else
#define VM_GROWSUP 0x00000000
+#define VM_NOHUGEPAGE 0x00000200 /* MADV_NOHUGEPAGE marked this vma */
#endif
#define VM_PFNMAP 0x00000400 /* Page-ranges managed without "struct page", just pure PFN */
#define VM_DENYWRITE 0x00000800 /* ETXTBSY on write attempts.. */
@@ -101,7 +103,11 @@ extern unsigned int kobjsize(const void *objp);
#define VM_NORESERVE 0x00200000 /* should the VM suppress accounting */
#define VM_HUGETLB 0x00400000 /* Huge TLB Page VM */
#define VM_NONLINEAR 0x00800000 /* Is non-linear (remap_file_pages) */
+#ifndef CONFIG_TRANSPARENT_HUGEPAGE
#define VM_MAPPED_COPY 0x01000000 /* T if mapped copy of data (nommu mmap) */
+#else
+#define VM_HUGEPAGE 0x01000000 /* MADV_HUGEPAGE marked this vma */
+#endif
#define VM_INSERTPAGE 0x02000000 /* The vma has had "vm_insert_page()" done on it */
#define VM_ALWAYSDUMP 0x04000000 /* Always include in core dumps */
@@ -242,6 +248,7 @@ struct inode;
* files which need it (119 of them)
*/
#include <linux/page-flags.h>
+#include <linux/huge_mm.h>
/*
* Methods to modify the page usage count.
@@ -305,6 +312,39 @@ static inline int is_vmalloc_or_module_addr(const void *x)
}
#endif
+static inline void compound_lock(struct page *page)
+{
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+ bit_spin_lock(PG_compound_lock, &page->flags);
+#endif
+}
+
+static inline void compound_unlock(struct page *page)
+{
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+ bit_spin_unlock(PG_compound_lock, &page->flags);
+#endif
+}
+
+static inline unsigned long compound_lock_irqsave(struct page *page)
+{
+ unsigned long uninitialized_var(flags);
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+ local_irq_save(flags);
+ compound_lock(page);
+#endif
+ return flags;
+}
+
+static inline void compound_unlock_irqrestore(struct page *page,
+ unsigned long flags)
+{
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+ compound_unlock(page);
+ local_irq_restore(flags);
+#endif
+}
+
static inline struct page *compound_head(struct page *page)
{
if (unlikely(PageTail(page)))
@@ -319,9 +359,29 @@ static inline int page_count(struct page *page)
static inline void get_page(struct page *page)
{
- page = compound_head(page);
- VM_BUG_ON(atomic_read(&page->_count) == 0);
+ /*
+ * Getting a normal page or the head of a compound page
+ * requires to already have an elevated page->_count. Only if
+ * we're getting a tail page, the elevated page->_count is
+ * required only in the head page, so for tail pages the
+ * bugcheck only verifies that the page->_count isn't
+ * negative.
+ */
+ VM_BUG_ON(atomic_read(&page->_count) < !PageTail(page));
atomic_inc(&page->_count);
+ /*
+ * Getting a tail page will elevate both the head and tail
+ * page->_count(s).
+ */
+ if (unlikely(PageTail(page))) {
+ /*
+ * This is safe only because
+ * __split_huge_page_refcount can't run under
+ * get_page().
+ */
+ VM_BUG_ON(atomic_read(&page->first_page->_count) <= 0);
+ atomic_inc(&page->first_page->_count);
+ }
}
static inline struct page *virt_to_head_page(const void *x)
@@ -339,6 +399,27 @@ static inline void init_page_count(struct page *page)
atomic_set(&page->_count, 1);
}
+/*
+ * PageBuddy() indicate that the page is free and in the buddy system
+ * (see mm/page_alloc.c).
+ */
+static inline int PageBuddy(struct page *page)
+{
+ return atomic_read(&page->_mapcount) == -2;
+}
+
+static inline void __SetPageBuddy(struct page *page)
+{
+ VM_BUG_ON(atomic_read(&page->_mapcount) != -1);
+ atomic_set(&page->_mapcount, -2);
+}
+
+static inline void __ClearPageBuddy(struct page *page)
+{
+ VM_BUG_ON(!PageBuddy(page));
+ atomic_set(&page->_mapcount, -1);
+}
+
void put_page(struct page *page);
void put_pages_list(struct list_head *pages);
@@ -370,12 +451,39 @@ static inline int compound_order(struct page *page)
return (unsigned long)page[1].lru.prev;
}
+static inline int compound_trans_order(struct page *page)
+{
+ int order;
+ unsigned long flags;
+
+ if (!PageHead(page))
+ return 0;
+
+ flags = compound_lock_irqsave(page);
+ order = compound_order(page);
+ compound_unlock_irqrestore(page, flags);
+ return order;
+}
+
static inline void set_compound_order(struct page *page, unsigned long order)
{
page[1].lru.prev = (void *)order;
}
/*
+ * Do pte_mkwrite, but only if the vma says VM_WRITE. We do this when
+ * servicing faults for write access. In the normal case, do always want
+ * pte_mkwrite. But get_user_pages can cause write faults for mappings
+ * that do not have writing enabled, when used by access_process_vm.
+ */
+static inline pte_t maybe_mkwrite(pte_t pte, struct vm_area_struct *vma)
+{
+ if (likely(vma->vm_flags & VM_WRITE))
+ pte = pte_mkwrite(pte);
+ return pte;
+}
+
+/*
* Multiple processes may "see" the same page. E.g. for untouched
* mappings of /dev/null, all processes see the same page full of
* zeroes, and text pages of executables and shared libraries have
@@ -657,7 +765,7 @@ static inline struct address_space *page_mapping(struct page *page)
VM_BUG_ON(PageSlab(page));
if (unlikely(PageSwapCache(page)))
mapping = &swapper_space;
- else if (unlikely((unsigned long)mapping & PAGE_MAPPING_ANON))
+ else if ((unsigned long)mapping & PAGE_MAPPING_ANON)
mapping = NULL;
return mapping;
}
@@ -1064,7 +1172,8 @@ static inline int __pmd_alloc(struct mm_struct *mm, pud_t *pud,
int __pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long address);
#endif
-int __pte_alloc(struct mm_struct *mm, pmd_t *pmd, unsigned long address);
+int __pte_alloc(struct mm_struct *mm, struct vm_area_struct *vma,
+ pmd_t *pmd, unsigned long address);
int __pte_alloc_kernel(pmd_t *pmd, unsigned long address);
/*
@@ -1133,16 +1242,18 @@ static inline void pgtable_page_dtor(struct page *page)
pte_unmap(pte); \
} while (0)
-#define pte_alloc_map(mm, pmd, address) \
- ((unlikely(!pmd_present(*(pmd))) && __pte_alloc(mm, pmd, address))? \
- NULL: pte_offset_map(pmd, address))
+#define pte_alloc_map(mm, vma, pmd, address) \
+ ((unlikely(pmd_none(*(pmd))) && __pte_alloc(mm, vma, \
+ pmd, address))? \
+ NULL: pte_offset_map(pmd, address))
#define pte_alloc_map_lock(mm, pmd, address, ptlp) \
- ((unlikely(!pmd_present(*(pmd))) && __pte_alloc(mm, pmd, address))? \
+ ((unlikely(pmd_none(*(pmd))) && __pte_alloc(mm, NULL, \
+ pmd, address))? \
NULL: pte_offset_map_lock(mm, pmd, address, ptlp))
#define pte_alloc_kernel(pmd, address) \
- ((unlikely(!pmd_present(*(pmd))) && __pte_alloc_kernel(pmd, address))? \
+ ((unlikely(pmd_none(*(pmd))) && __pte_alloc_kernel(pmd, address))? \
NULL: pte_offset_kernel(pmd, address))
extern void free_area_init(unsigned long * zones_size);
@@ -1415,6 +1526,8 @@ struct page *follow_page(struct vm_area_struct *, unsigned long address,
#define FOLL_GET 0x04 /* do get_page on page */
#define FOLL_DUMP 0x08 /* give error on hole if it would be zero */
#define FOLL_FORCE 0x10 /* get_user_pages read/write w/o permission */
+#define FOLL_MLOCK 0x40 /* mark page as mlocked */
+#define FOLL_SPLIT 0x80 /* don't return transhuge pages, split them */
typedef int (*pte_fn_t)(pte_t *pte, pgtable_t token, unsigned long addr,
void *data);
@@ -1518,5 +1631,14 @@ static inline int is_hwpoison_address(unsigned long addr)
extern void dump_page(struct page *page);
+#if defined(CONFIG_TRANSPARENT_HUGEPAGE) || defined(CONFIG_HUGETLBFS)
+extern void clear_huge_page(struct page *page,
+ unsigned long addr,
+ unsigned int pages_per_huge_page);
+extern void copy_user_huge_page(struct page *dst, struct page *src,
+ unsigned long addr, struct vm_area_struct *vma,
+ unsigned int pages_per_huge_page);
+#endif /* CONFIG_TRANSPARENT_HUGEPAGE || CONFIG_HUGETLBFS */
+
#endif /* __KERNEL__ */
#endif /* _LINUX_MM_H */
diff --git a/include/linux/mm_inline.h b/include/linux/mm_inline.h
index 8835b877b8db..8f7d24712dc1 100644
--- a/include/linux/mm_inline.h
+++ b/include/linux/mm_inline.h
@@ -1,6 +1,8 @@
#ifndef LINUX_MM_INLINE_H
#define LINUX_MM_INLINE_H
+#include <linux/huge_mm.h>
+
/**
* page_is_file_cache - should the page be on a file LRU or anon LRU?
* @page: the page to test
@@ -20,18 +22,25 @@ static inline int page_is_file_cache(struct page *page)
}
static inline void
-add_page_to_lru_list(struct zone *zone, struct page *page, enum lru_list l)
+__add_page_to_lru_list(struct zone *zone, struct page *page, enum lru_list l,
+ struct list_head *head)
{
- list_add(&page->lru, &zone->lru[l].list);
- __inc_zone_state(zone, NR_LRU_BASE + l);
+ list_add(&page->lru, head);
+ __mod_zone_page_state(zone, NR_LRU_BASE + l, hpage_nr_pages(page));
mem_cgroup_add_lru_list(page, l);
}
static inline void
+add_page_to_lru_list(struct zone *zone, struct page *page, enum lru_list l)
+{
+ __add_page_to_lru_list(zone, page, l, &zone->lru[l].list);
+}
+
+static inline void
del_page_from_lru_list(struct zone *zone, struct page *page, enum lru_list l)
{
list_del(&page->lru);
- __dec_zone_state(zone, NR_LRU_BASE + l);
+ __mod_zone_page_state(zone, NR_LRU_BASE + l, -hpage_nr_pages(page));
mem_cgroup_del_lru_list(page, l);
}
@@ -66,7 +75,7 @@ del_page_from_lru(struct zone *zone, struct page *page)
l += LRU_ACTIVE;
}
}
- __dec_zone_state(zone, NR_LRU_BASE + l);
+ __mod_zone_page_state(zone, NR_LRU_BASE + l, -hpage_nr_pages(page));
mem_cgroup_del_lru_list(page, l);
}
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index bb7288a782fd..26bc4e2cd275 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -310,6 +310,9 @@ struct mm_struct {
#ifdef CONFIG_MMU_NOTIFIER
struct mmu_notifier_mm *mmu_notifier_mm;
#endif
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+ pgtable_t pmd_huge_pte; /* protected by page_table_lock */
+#endif
/* How many tasks sharing this mm are OOM_DISABLE */
atomic_t oom_disable_count;
};
diff --git a/include/linux/mmc/sh_mmcif.h b/include/linux/mmc/sh_mmcif.h
index bf173502d744..38d393092812 100644
--- a/include/linux/mmc/sh_mmcif.h
+++ b/include/linux/mmc/sh_mmcif.h
@@ -94,12 +94,12 @@ struct sh_mmcif_plat_data {
static inline u32 sh_mmcif_readl(void __iomem *addr, int reg)
{
- return readl(addr + reg);
+ return __raw_readl(addr + reg);
}
static inline void sh_mmcif_writel(void __iomem *addr, int reg, u32 val)
{
- writel(val, addr + reg);
+ __raw_writel(val, addr + reg);
}
#define SH_MMCIF_BBS 512 /* boot block size */
diff --git a/include/linux/mmu_notifier.h b/include/linux/mmu_notifier.h
index 43dcfbdc39de..cc2e7dfea9d7 100644
--- a/include/linux/mmu_notifier.h
+++ b/include/linux/mmu_notifier.h
@@ -62,6 +62,16 @@ struct mmu_notifier_ops {
unsigned long address);
/*
+ * test_young is called to check the young/accessed bitflag in
+ * the secondary pte. This is used to know if the page is
+ * frequently used without actually clearing the flag or tearing
+ * down the secondary mapping on the page.
+ */
+ int (*test_young)(struct mmu_notifier *mn,
+ struct mm_struct *mm,
+ unsigned long address);
+
+ /*
* change_pte is called in cases that pte mapping to page is changed:
* for example, when ksm remaps pte to point to a new shared page.
*/
@@ -163,6 +173,8 @@ extern void __mmu_notifier_mm_destroy(struct mm_struct *mm);
extern void __mmu_notifier_release(struct mm_struct *mm);
extern int __mmu_notifier_clear_flush_young(struct mm_struct *mm,
unsigned long address);
+extern int __mmu_notifier_test_young(struct mm_struct *mm,
+ unsigned long address);
extern void __mmu_notifier_change_pte(struct mm_struct *mm,
unsigned long address, pte_t pte);
extern void __mmu_notifier_invalidate_page(struct mm_struct *mm,
@@ -186,6 +198,14 @@ static inline int mmu_notifier_clear_flush_young(struct mm_struct *mm,
return 0;
}
+static inline int mmu_notifier_test_young(struct mm_struct *mm,
+ unsigned long address)
+{
+ if (mm_has_notifiers(mm))
+ return __mmu_notifier_test_young(mm, address);
+ return 0;
+}
+
static inline void mmu_notifier_change_pte(struct mm_struct *mm,
unsigned long address, pte_t pte)
{
@@ -243,6 +263,32 @@ static inline void mmu_notifier_mm_destroy(struct mm_struct *mm)
__pte; \
})
+#define pmdp_clear_flush_notify(__vma, __address, __pmdp) \
+({ \
+ pmd_t __pmd; \
+ struct vm_area_struct *___vma = __vma; \
+ unsigned long ___address = __address; \
+ VM_BUG_ON(__address & ~HPAGE_PMD_MASK); \
+ mmu_notifier_invalidate_range_start(___vma->vm_mm, ___address, \
+ (__address)+HPAGE_PMD_SIZE);\
+ __pmd = pmdp_clear_flush(___vma, ___address, __pmdp); \
+ mmu_notifier_invalidate_range_end(___vma->vm_mm, ___address, \
+ (__address)+HPAGE_PMD_SIZE); \
+ __pmd; \
+})
+
+#define pmdp_splitting_flush_notify(__vma, __address, __pmdp) \
+({ \
+ struct vm_area_struct *___vma = __vma; \
+ unsigned long ___address = __address; \
+ VM_BUG_ON(__address & ~HPAGE_PMD_MASK); \
+ mmu_notifier_invalidate_range_start(___vma->vm_mm, ___address, \
+ (__address)+HPAGE_PMD_SIZE);\
+ pmdp_splitting_flush(___vma, ___address, __pmdp); \
+ mmu_notifier_invalidate_range_end(___vma->vm_mm, ___address, \
+ (__address)+HPAGE_PMD_SIZE); \
+})
+
#define ptep_clear_flush_young_notify(__vma, __address, __ptep) \
({ \
int __young; \
@@ -254,6 +300,17 @@ static inline void mmu_notifier_mm_destroy(struct mm_struct *mm)
__young; \
})
+#define pmdp_clear_flush_young_notify(__vma, __address, __pmdp) \
+({ \
+ int __young; \
+ struct vm_area_struct *___vma = __vma; \
+ unsigned long ___address = __address; \
+ __young = pmdp_clear_flush_young(___vma, ___address, __pmdp); \
+ __young |= mmu_notifier_clear_flush_young(___vma->vm_mm, \
+ ___address); \
+ __young; \
+})
+
#define set_pte_at_notify(__mm, __address, __ptep, __pte) \
({ \
struct mm_struct *___mm = __mm; \
@@ -276,6 +333,12 @@ static inline int mmu_notifier_clear_flush_young(struct mm_struct *mm,
return 0;
}
+static inline int mmu_notifier_test_young(struct mm_struct *mm,
+ unsigned long address)
+{
+ return 0;
+}
+
static inline void mmu_notifier_change_pte(struct mm_struct *mm,
unsigned long address, pte_t pte)
{
@@ -305,7 +368,10 @@ static inline void mmu_notifier_mm_destroy(struct mm_struct *mm)
}
#define ptep_clear_flush_young_notify ptep_clear_flush_young
+#define pmdp_clear_flush_young_notify pmdp_clear_flush_young
#define ptep_clear_flush_notify ptep_clear_flush
+#define pmdp_clear_flush_notify pmdp_clear_flush
+#define pmdp_splitting_flush_notify pmdp_splitting_flush
#define set_pte_at_notify set_pte_at
#endif /* CONFIG_MMU_NOTIFIER */
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 39c24ebe9cfd..02ecb0189b1d 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -114,6 +114,7 @@ enum zone_stat_item {
NUMA_LOCAL, /* allocation from local node */
NUMA_OTHER, /* allocation from other node */
#endif
+ NR_ANON_TRANSPARENT_HUGEPAGES,
NR_VM_ZONE_STAT_ITEMS };
/*
@@ -458,12 +459,6 @@ static inline int zone_is_oom_locked(const struct zone *zone)
return test_bit(ZONE_OOM_LOCKED, &zone->flags);
}
-#ifdef CONFIG_SMP
-unsigned long zone_nr_free_pages(struct zone *zone);
-#else
-#define zone_nr_free_pages(zone) zone_page_state(zone, NR_FREE_PAGES)
-#endif /* CONFIG_SMP */
-
/*
* The "priority" of VM scanning is how much of the queues we will scan in one
* go. A value of 12 for DEF_PRIORITY implies that we will scan 1/4096th of the
@@ -645,6 +640,7 @@ typedef struct pglist_data {
wait_queue_head_t kswapd_wait;
struct task_struct *kswapd;
int kswapd_max_order;
+ enum zone_type classzone_idx;
} pg_data_t;
#define node_present_pages(nid) (NODE_DATA(nid)->node_present_pages)
@@ -660,8 +656,10 @@ typedef struct pglist_data {
extern struct mutex zonelists_mutex;
void build_all_zonelists(void *data);
-void wakeup_kswapd(struct zone *zone, int order);
-int zone_watermark_ok(struct zone *z, int order, unsigned long mark,
+void wakeup_kswapd(struct zone *zone, int order, enum zone_type classzone_idx);
+bool zone_watermark_ok(struct zone *z, int order, unsigned long mark,
+ int classzone_idx, int alloc_flags);
+bool zone_watermark_ok_safe(struct zone *z, int order, unsigned long mark,
int classzone_idx, int alloc_flags);
enum memmap_context {
MEMMAP_EARLY,
diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
index 5f38c460367e..0db8037e2725 100644
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -48,9 +48,6 @@
* struct page (these bits with information) are always mapped into kernel
* address space...
*
- * PG_buddy is set to indicate that the page is free and in the buddy system
- * (see mm/page_alloc.c).
- *
* PG_hwpoison indicates that a page got corrupted in hardware and contains
* data with incorrect ECC bits that triggered a machine check. Accessing is
* not safe since it may cause another machine check. Don't touch!
@@ -96,7 +93,6 @@ enum pageflags {
PG_swapcache, /* Swap page: swp_entry_t in private */
PG_mappedtodisk, /* Has blocks allocated on-disk */
PG_reclaim, /* To be reclaimed asap */
- PG_buddy, /* Page is free, on buddy lists */
PG_swapbacked, /* Page is backed by RAM/swap */
PG_unevictable, /* Page is "unevictable" */
#ifdef CONFIG_MMU
@@ -108,6 +104,9 @@ enum pageflags {
#ifdef CONFIG_MEMORY_FAILURE
PG_hwpoison, /* hardware poisoned page. Don't touch */
#endif
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+ PG_compound_lock,
+#endif
__NR_PAGEFLAGS,
/* Filesystems */
@@ -198,7 +197,7 @@ static inline int __TestClearPage##uname(struct page *page) { return 0; }
struct page; /* forward declaration */
TESTPAGEFLAG(Locked, locked) TESTSETFLAG(Locked, locked)
-PAGEFLAG(Error, error)
+PAGEFLAG(Error, error) TESTCLEARFLAG(Error, error)
PAGEFLAG(Referenced, referenced) TESTCLEARFLAG(Referenced, referenced)
PAGEFLAG(Dirty, dirty) TESTSCFLAG(Dirty, dirty) __CLEARPAGEFLAG(Dirty, dirty)
PAGEFLAG(LRU, lru) __CLEARPAGEFLAG(LRU, lru)
@@ -230,7 +229,6 @@ PAGEFLAG(OwnerPriv1, owner_priv_1) TESTCLEARFLAG(OwnerPriv1, owner_priv_1)
* risky: they bypass page accounting.
*/
TESTPAGEFLAG(Writeback, writeback) TESTSCFLAG(Writeback, writeback)
-__PAGEFLAG(Buddy, buddy)
PAGEFLAG(MappedToDisk, mappedtodisk)
/* PG_readahead is only used for file reads; PG_reclaim is only for writes */
@@ -344,7 +342,7 @@ static inline void set_page_writeback(struct page *page)
* tests can be used in performance sensitive paths. PageCompound is
* generally not used in hot code paths.
*/
-__PAGEFLAG(Head, head)
+__PAGEFLAG(Head, head) CLEARPAGEFLAG(Head, head)
__PAGEFLAG(Tail, tail)
static inline int PageCompound(struct page *page)
@@ -352,6 +350,13 @@ static inline int PageCompound(struct page *page)
return page->flags & ((1L << PG_head) | (1L << PG_tail));
}
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+static inline void ClearPageCompound(struct page *page)
+{
+ BUG_ON(!PageHead(page));
+ ClearPageHead(page);
+}
+#endif
#else
/*
* Reduce page flag use as much as possible by overlapping
@@ -389,14 +394,61 @@ static inline void __ClearPageTail(struct page *page)
page->flags &= ~PG_head_tail_mask;
}
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+static inline void ClearPageCompound(struct page *page)
+{
+ BUG_ON((page->flags & PG_head_tail_mask) != (1 << PG_compound));
+ clear_bit(PG_compound, &page->flags);
+}
+#endif
+
#endif /* !PAGEFLAGS_EXTENDED */
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+/*
+ * PageHuge() only returns true for hugetlbfs pages, but not for
+ * normal or transparent huge pages.
+ *
+ * PageTransHuge() returns true for both transparent huge and
+ * hugetlbfs pages, but not normal pages. PageTransHuge() can only be
+ * called only in the core VM paths where hugetlbfs pages can't exist.
+ */
+static inline int PageTransHuge(struct page *page)
+{
+ VM_BUG_ON(PageTail(page));
+ return PageHead(page);
+}
+
+static inline int PageTransCompound(struct page *page)
+{
+ return PageCompound(page);
+}
+
+#else
+
+static inline int PageTransHuge(struct page *page)
+{
+ return 0;
+}
+
+static inline int PageTransCompound(struct page *page)
+{
+ return 0;
+}
+#endif
+
#ifdef CONFIG_MMU
#define __PG_MLOCKED (1 << PG_mlocked)
#else
#define __PG_MLOCKED 0
#endif
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+#define __PG_COMPOUND_LOCK (1 << PG_compound_lock)
+#else
+#define __PG_COMPOUND_LOCK 0
+#endif
+
/*
* Flags checked when a page is freed. Pages being freed should not have
* these flags set. It they are, there is a problem.
@@ -404,9 +456,10 @@ static inline void __ClearPageTail(struct page *page)
#define PAGE_FLAGS_CHECK_AT_FREE \
(1 << PG_lru | 1 << PG_locked | \
1 << PG_private | 1 << PG_private_2 | \
- 1 << PG_buddy | 1 << PG_writeback | 1 << PG_reserved | \
+ 1 << PG_writeback | 1 << PG_reserved | \
1 << PG_slab | 1 << PG_swapcache | 1 << PG_active | \
- 1 << PG_unevictable | __PG_MLOCKED | __PG_HWPOISON)
+ 1 << PG_unevictable | __PG_MLOCKED | __PG_HWPOISON | \
+ __PG_COMPOUND_LOCK)
/*
* Flags checked when a page is prepped for return by the page allocator.
diff --git a/include/linux/page_cgroup.h b/include/linux/page_cgroup.h
index b02195dfc1b0..5b0c971d7cae 100644
--- a/include/linux/page_cgroup.h
+++ b/include/linux/page_cgroup.h
@@ -35,12 +35,18 @@ struct page_cgroup *lookup_page_cgroup(struct page *page);
enum {
/* flags for mem_cgroup */
- PCG_LOCK, /* page cgroup is locked */
+ PCG_LOCK, /* Lock for pc->mem_cgroup and following bits. */
PCG_CACHE, /* charged as cache */
PCG_USED, /* this object is in use. */
- PCG_ACCT_LRU, /* page has been accounted for */
- PCG_FILE_MAPPED, /* page is accounted as "mapped" */
PCG_MIGRATION, /* under page migration */
+ /* flags for mem_cgroup and file and I/O status */
+ PCG_MOVE_LOCK, /* For race between move_account v.s. following bits */
+ PCG_FILE_MAPPED, /* page is accounted as "mapped" */
+ PCG_FILE_DIRTY, /* page is dirty */
+ PCG_FILE_WRITEBACK, /* page is under writeback */
+ PCG_FILE_UNSTABLE_NFS, /* page is NFS unstable */
+ /* No lock in page_cgroup */
+ PCG_ACCT_LRU, /* page has been accounted for (under lru_lock) */
};
#define TESTPCGFLAG(uname, lname) \
@@ -59,6 +65,10 @@ static inline void ClearPageCgroup##uname(struct page_cgroup *pc) \
static inline int TestClearPageCgroup##uname(struct page_cgroup *pc) \
{ return test_and_clear_bit(PCG_##lname, &pc->flags); }
+#define TESTSETPCGFLAG(uname, lname) \
+static inline int TestSetPageCgroup##uname(struct page_cgroup *pc) \
+ { return test_and_set_bit(PCG_##lname, &pc->flags); }
+
/* Cache flag is set only once (at allocation) */
TESTPCGFLAG(Cache, CACHE)
CLEARPCGFLAG(Cache, CACHE)
@@ -78,6 +88,22 @@ SETPCGFLAG(FileMapped, FILE_MAPPED)
CLEARPCGFLAG(FileMapped, FILE_MAPPED)
TESTPCGFLAG(FileMapped, FILE_MAPPED)
+SETPCGFLAG(FileDirty, FILE_DIRTY)
+CLEARPCGFLAG(FileDirty, FILE_DIRTY)
+TESTPCGFLAG(FileDirty, FILE_DIRTY)
+TESTCLEARPCGFLAG(FileDirty, FILE_DIRTY)
+TESTSETPCGFLAG(FileDirty, FILE_DIRTY)
+
+SETPCGFLAG(FileWriteback, FILE_WRITEBACK)
+CLEARPCGFLAG(FileWriteback, FILE_WRITEBACK)
+TESTPCGFLAG(FileWriteback, FILE_WRITEBACK)
+
+SETPCGFLAG(FileUnstableNFS, FILE_UNSTABLE_NFS)
+CLEARPCGFLAG(FileUnstableNFS, FILE_UNSTABLE_NFS)
+TESTPCGFLAG(FileUnstableNFS, FILE_UNSTABLE_NFS)
+TESTCLEARPCGFLAG(FileUnstableNFS, FILE_UNSTABLE_NFS)
+TESTSETPCGFLAG(FileUnstableNFS, FILE_UNSTABLE_NFS)
+
SETPCGFLAG(Migration, MIGRATION)
CLEARPCGFLAG(Migration, MIGRATION)
TESTPCGFLAG(Migration, MIGRATION)
@@ -94,6 +120,10 @@ static inline enum zone_type page_cgroup_zid(struct page_cgroup *pc)
static inline void lock_page_cgroup(struct page_cgroup *pc)
{
+ /*
+ * Don't take this lock in IRQ context.
+ * This lock is for pc->mem_cgroup, USED, CACHE, MIGRATION
+ */
bit_spin_lock(PCG_LOCK, &pc->flags);
}
@@ -107,6 +137,24 @@ static inline int page_is_cgroup_locked(struct page_cgroup *pc)
return bit_spin_is_locked(PCG_LOCK, &pc->flags);
}
+static inline void move_lock_page_cgroup(struct page_cgroup *pc,
+ unsigned long *flags)
+{
+ /*
+ * We know updates to pc->flags of page cache's stats are from both of
+ * usual context or IRQ context. Disable IRQ to avoid deadlock.
+ */
+ local_irq_save(*flags);
+ bit_spin_lock(PCG_MOVE_LOCK, &pc->flags);
+}
+
+static inline void move_unlock_page_cgroup(struct page_cgroup *pc,
+ unsigned long *flags)
+{
+ bit_spin_unlock(PCG_MOVE_LOCK, &pc->flags);
+ local_irq_restore(*flags);
+}
+
#else /* CONFIG_CGROUP_MEM_RES_CTLR */
struct page_cgroup;
diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index 2d1ffe3cf1ee..9c66e994540f 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -48,7 +48,7 @@ static inline void mapping_clear_unevictable(struct address_space *mapping)
static inline int mapping_unevictable(struct address_space *mapping)
{
- if (likely(mapping))
+ if (mapping)
return test_bit(AS_UNEVICTABLE, &mapping->flags);
return !!mapping;
}
diff --git a/include/linux/radix-tree.h b/include/linux/radix-tree.h
index ab2baa5c4884..23241c2fecce 100644
--- a/include/linux/radix-tree.h
+++ b/include/linux/radix-tree.h
@@ -146,6 +146,22 @@ static inline void *radix_tree_deref_slot(void **pslot)
}
/**
+ * radix_tree_deref_slot_protected - dereference a slot without RCU lock but with tree lock held
+ * @pslot: pointer to slot, returned by radix_tree_lookup_slot
+ * Returns: item that was stored in that slot with any direct pointer flag
+ * removed.
+ *
+ * Similar to radix_tree_deref_slot but only used during migration when a pages
+ * mapping is being moved. The caller does not hold the RCU read lock but it
+ * must hold the tree lock to prevent parallel updates.
+ */
+static inline void *radix_tree_deref_slot_protected(void **pslot,
+ spinlock_t *treelock)
+{
+ return rcu_dereference_protected(*pslot, lockdep_is_held(treelock));
+}
+
+/**
* radix_tree_deref_retry - check radix_tree_deref_slot
* @arg: pointer returned by radix_tree_deref_slot
* Returns: 0 if retry is not required, otherwise retry is required
diff --git a/include/linux/rmap.h b/include/linux/rmap.h
index bb83c0da2071..e9fd04ca1e51 100644
--- a/include/linux/rmap.h
+++ b/include/linux/rmap.h
@@ -198,6 +198,8 @@ enum ttu_flags {
};
#define TTU_ACTION(x) ((x) & TTU_ACTION_MASK)
+bool is_vma_temporary_stack(struct vm_area_struct *vma);
+
int try_to_unmap(struct page *, enum ttu_flags flags);
int try_to_unmap_one(struct page *, struct vm_area_struct *,
unsigned long address, enum ttu_flags flags);
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 96e23215e276..d747f948b34e 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -21,7 +21,8 @@
#define CLONE_DETACHED 0x00400000 /* Unused, ignored */
#define CLONE_UNTRACED 0x00800000 /* set if the tracing process can't force CLONE_PTRACE on this clone */
#define CLONE_CHILD_SETTID 0x01000000 /* set the TID in the child */
-#define CLONE_STOPPED 0x02000000 /* Start in stopped state */
+/* 0x02000000 was previously the unused CLONE_STOPPED (Start in stopped state)
+ and is now available for re-use. */
#define CLONE_NEWUTS 0x04000000 /* New utsname group? */
#define CLONE_NEWIPC 0x08000000 /* New ipcs */
#define CLONE_NEWUSER 0x10000000 /* New user namespace */
@@ -433,6 +434,7 @@ extern int get_dumpable(struct mm_struct *mm);
#endif
/* leave room for more dump flags */
#define MMF_VM_MERGEABLE 16 /* KSM may merge identical pages */
+#define MMF_VM_HUGEPAGE 17 /* set when VM_HUGEPAGE is set on vma */
#define MMF_INIT_MASK (MMF_DUMPABLE_MASK | MMF_DUMP_FILTER_MASK)
@@ -633,6 +635,8 @@ struct signal_struct {
int oom_adj; /* OOM kill score adjustment (bit shift) */
int oom_score_adj; /* OOM kill score adjustment */
+ int oom_score_adj_min; /* OOM kill score adjustment minimum value.
+ * Only settable by CAP_SYS_RESOURCE. */
struct mutex cred_guard_mutex; /* guard against foreign influences on
* credential calculations
diff --git a/include/linux/swap.h b/include/linux/swap.h
index eba53e71d2cc..4d559325d919 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -208,6 +208,8 @@ extern unsigned int nr_free_pagecache_pages(void);
/* linux/mm/swap.c */
extern void __lru_cache_add(struct page *, enum lru_list lru);
extern void lru_cache_add_lru(struct page *, enum lru_list lru);
+extern void lru_add_page_tail(struct zone* zone,
+ struct page *page, struct page *page_tail);
extern void activate_page(struct page *);
extern void mark_page_accessed(struct page *);
extern void lru_add_drain(void);
diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h
index 44b54f619ac6..4ed6fcd6b726 100644
--- a/include/linux/vmalloc.h
+++ b/include/linux/vmalloc.h
@@ -59,8 +59,9 @@ extern void *vmalloc_exec(unsigned long size);
extern void *vmalloc_32(unsigned long size);
extern void *vmalloc_32_user(unsigned long size);
extern void *__vmalloc(unsigned long size, gfp_t gfp_mask, pgprot_t prot);
-extern void *__vmalloc_area(struct vm_struct *area, gfp_t gfp_mask,
- pgprot_t prot);
+extern void *__vmalloc_node_range(unsigned long size, unsigned long align,
+ unsigned long start, unsigned long end, gfp_t gfp_mask,
+ pgprot_t prot, int node, void *caller);
extern void vfree(const void *addr);
extern void *vmap(struct page **pages, unsigned int count,
@@ -90,9 +91,6 @@ extern struct vm_struct *__get_vm_area_caller(unsigned long size,
unsigned long flags,
unsigned long start, unsigned long end,
void *caller);
-extern struct vm_struct *get_vm_area_node(unsigned long size,
- unsigned long flags, int node,
- gfp_t gfp_mask);
extern struct vm_struct *remove_vm_area(const void *addr);
extern int map_vm_area(struct vm_struct *area, pgprot_t prot,
@@ -120,7 +118,7 @@ extern __init void vm_area_register_early(struct vm_struct *vm, size_t align);
#ifdef CONFIG_SMP
struct vm_struct **pcpu_get_vm_areas(const unsigned long *offsets,
const size_t *sizes, int nr_vms,
- size_t align, gfp_t gfp_mask);
+ size_t align);
void pcpu_free_vm_areas(struct vm_struct **vms, int nr_vms);
#endif
diff --git a/include/linux/vmstat.h b/include/linux/vmstat.h
index eaaea37b3b75..833e676d6d92 100644
--- a/include/linux/vmstat.h
+++ b/include/linux/vmstat.h
@@ -254,6 +254,11 @@ extern void dec_zone_state(struct zone *, enum zone_stat_item);
extern void __dec_zone_state(struct zone *, enum zone_stat_item);
void refresh_cpu_vm_stats(int);
+
+int calculate_pressure_threshold(struct zone *zone);
+int calculate_normal_threshold(struct zone *zone);
+void set_pgdat_percpu_threshold(pg_data_t *pgdat,
+ int (*calculate_pressure)(struct zone *));
#else /* CONFIG_SMP */
/*
@@ -298,6 +303,8 @@ static inline void __dec_zone_page_state(struct page *page,
#define dec_zone_page_state __dec_zone_page_state
#define mod_zone_page_state __mod_zone_page_state
+#define set_pgdat_percpu_threshold(pgdat, callback) { }
+
static inline void refresh_cpu_vm_stats(int cpu) { }
#endif