diff options
| author | Paul Mundt <lethal@linux-sh.org> | 2011-01-14 16:06:31 +0900 |
|---|---|---|
| committer | Paul Mundt <lethal@linux-sh.org> | 2011-01-14 16:06:31 +0900 |
| commit | c488a4731abb53aa1bab9fccd8a7472083159bfd (patch) | |
| tree | db6d4a664a1e4b7685c1d2d79da63263f40adf7b /include/linux | |
| parent | 6d2ae89c36e2adab5cfa69fecb11290082817ac6 (diff) | |
| parent | bba958783b1b4cb0a9420f4e11082467132a334c (diff) | |
Merge branch 'common/mmcif' into rmobile-latest
Diffstat (limited to 'include/linux')
29 files changed, 754 insertions, 83 deletions
diff --git a/include/linux/compaction.h b/include/linux/compaction.h index 5ac51552d908..dfa2ed4c0d26 100644 --- a/include/linux/compaction.h +++ b/include/linux/compaction.h @@ -11,6 +11,9 @@ /* The full zone was compacted */ #define COMPACT_COMPLETE 3 +#define COMPACT_MODE_DIRECT_RECLAIM 0 +#define COMPACT_MODE_KSWAPD 1 + #ifdef CONFIG_COMPACTION extern int sysctl_compact_memory; extern int sysctl_compaction_handler(struct ctl_table *table, int write, @@ -21,7 +24,12 @@ extern int sysctl_extfrag_handler(struct ctl_table *table, int write, extern int fragmentation_index(struct zone *zone, unsigned int order); extern unsigned long try_to_compact_pages(struct zonelist *zonelist, - int order, gfp_t gfp_mask, nodemask_t *mask); + int order, gfp_t gfp_mask, nodemask_t *mask, + bool sync); +extern unsigned long compaction_suitable(struct zone *zone, int order); +extern unsigned long compact_zone_order(struct zone *zone, int order, + gfp_t gfp_mask, bool sync, + int compact_mode); /* Do not skip compaction more than 64 times */ #define COMPACT_MAX_DEFER_SHIFT 6 @@ -54,7 +62,20 @@ static inline bool compaction_deferred(struct zone *zone) #else static inline unsigned long try_to_compact_pages(struct zonelist *zonelist, - int order, gfp_t gfp_mask, nodemask_t *nodemask) + int order, gfp_t gfp_mask, nodemask_t *nodemask, + bool sync) +{ + return COMPACT_CONTINUE; +} + +static inline unsigned long compaction_suitable(struct zone *zone, int order) +{ + return COMPACT_SKIPPED; +} + +static inline unsigned long compact_zone_order(struct zone *zone, int order, + gfp_t gfp_mask, bool sync, + int compact_mode) { return COMPACT_CONTINUE; } diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h index 2970022faa63..272496d1fae4 100644 --- a/include/linux/device-mapper.h +++ b/include/linux/device-mapper.h @@ -193,6 +193,13 @@ struct dm_target { char *error; }; +/* Each target can link one of these into the table */ +struct dm_target_callbacks { + struct list_head list; + int (*congested_fn) (struct dm_target_callbacks *, int); + void (*unplug_fn)(struct dm_target_callbacks *); +}; + int dm_register_target(struct target_type *t); void dm_unregister_target(struct target_type *t); @@ -269,6 +276,11 @@ int dm_table_add_target(struct dm_table *t, const char *type, sector_t start, sector_t len, char *params); /* + * Target_ctr should call this if it needs to add any callbacks. + */ +void dm_table_add_target_callbacks(struct dm_table *t, struct dm_target_callbacks *cb); + +/* * Finally call this to make the table ready for use. */ int dm_table_complete(struct dm_table *t); diff --git a/include/linux/dm-ioctl.h b/include/linux/dm-ioctl.h index 49eab360d5d4..78bbf47bbb96 100644 --- a/include/linux/dm-ioctl.h +++ b/include/linux/dm-ioctl.h @@ -44,7 +44,7 @@ * Remove a device, destroy any tables. * * DM_DEV_RENAME: - * Rename a device. + * Rename a device or set its uuid if none was previously supplied. * * DM_SUSPEND: * This performs both suspend and resume, depending which flag is @@ -267,9 +267,9 @@ enum { #define DM_DEV_SET_GEOMETRY _IOWR(DM_IOCTL, DM_DEV_SET_GEOMETRY_CMD, struct dm_ioctl) #define DM_VERSION_MAJOR 4 -#define DM_VERSION_MINOR 18 -#define DM_VERSION_PATCHLEVEL 0 -#define DM_VERSION_EXTRA "-ioctl (2010-06-29)" +#define DM_VERSION_MINOR 19 +#define DM_VERSION_PATCHLEVEL 1 +#define DM_VERSION_EXTRA "-ioctl (2011-01-07)" /* Status bits */ #define DM_READONLY_FLAG (1 << 0) /* In/Out */ @@ -322,4 +322,10 @@ enum { */ #define DM_UEVENT_GENERATED_FLAG (1 << 13) /* Out */ +/* + * If set, rename changes the uuid not the name. Only permitted + * if no uuid was previously supplied: an existing uuid cannot be changed. + */ +#define DM_UUID_FLAG (1 << 14) /* In */ + #endif /* _LINUX_DM_IOCTL_H */ diff --git a/include/linux/dm-log-userspace.h b/include/linux/dm-log-userspace.h index 0c3c3a2110c4..eeace7d3ff15 100644 --- a/include/linux/dm-log-userspace.h +++ b/include/linux/dm-log-userspace.h @@ -370,6 +370,16 @@ #define DM_ULOG_REQUEST_TYPE(request_type) \ (DM_ULOG_REQUEST_MASK & (request_type)) +/* + * DM_ULOG_REQUEST_VERSION is incremented when there is a + * change to the way information is passed between kernel + * and userspace. This could be a structure change of + * dm_ulog_request or a change in the way requests are + * issued/handled. Changes are outlined here: + * version 1: Initial implementation + */ +#define DM_ULOG_REQUEST_VERSION 1 + struct dm_ulog_request { /* * The local unique identifier (luid) and the universally unique @@ -383,8 +393,9 @@ struct dm_ulog_request { */ uint64_t luid; char uuid[DM_UUID_LEN]; - char padding[7]; /* Padding because DM_UUID_LEN = 129 */ + char padding[3]; /* Padding because DM_UUID_LEN = 129 */ + uint32_t version; /* See DM_ULOG_REQUEST_VERSION */ int32_t error; /* Used to report back processing errors */ uint32_t seq; /* Sequence number for request */ diff --git a/include/linux/gfp.h b/include/linux/gfp.h index f54adfcbec9c..a3b148a91874 100644 --- a/include/linux/gfp.h +++ b/include/linux/gfp.h @@ -34,6 +34,7 @@ struct vm_area_struct; #else #define ___GFP_NOTRACK 0 #endif +#define ___GFP_NO_KSWAPD 0x400000u /* * GFP bitmasks.. @@ -81,13 +82,15 @@ struct vm_area_struct; #define __GFP_RECLAIMABLE ((__force gfp_t)___GFP_RECLAIMABLE) /* Page is reclaimable */ #define __GFP_NOTRACK ((__force gfp_t)___GFP_NOTRACK) /* Don't track with kmemcheck */ +#define __GFP_NO_KSWAPD ((__force gfp_t)___GFP_NO_KSWAPD) + /* * This may seem redundant, but it's a way of annotating false positives vs. * allocations that simply cannot be supported (e.g. page tables). */ #define __GFP_NOTRACK_FALSE_POSITIVE (__GFP_NOTRACK) -#define __GFP_BITS_SHIFT 22 /* Room for 22 __GFP_FOO bits */ +#define __GFP_BITS_SHIFT 23 /* Room for 23 __GFP_FOO bits */ #define __GFP_BITS_MASK ((__force gfp_t)((1 << __GFP_BITS_SHIFT) - 1)) /* This equals 0, but use constants in case they ever change */ @@ -106,6 +109,9 @@ struct vm_area_struct; __GFP_HARDWALL | __GFP_HIGHMEM | \ __GFP_MOVABLE) #define GFP_IOFS (__GFP_IO | __GFP_FS) +#define GFP_TRANSHUGE (GFP_HIGHUSER_MOVABLE | __GFP_COMP | \ + __GFP_NOMEMALLOC | __GFP_NORETRY | __GFP_NOWARN | \ + __GFP_NO_KSWAPD) #ifdef CONFIG_NUMA #define GFP_THISNODE (__GFP_THISNODE | __GFP_NOWARN | __GFP_NORETRY) @@ -325,14 +331,17 @@ alloc_pages(gfp_t gfp_mask, unsigned int order) { return alloc_pages_current(gfp_mask, order); } -extern struct page *alloc_page_vma(gfp_t gfp_mask, +extern struct page *alloc_pages_vma(gfp_t gfp_mask, int order, struct vm_area_struct *vma, unsigned long addr); #else #define alloc_pages(gfp_mask, order) \ alloc_pages_node(numa_node_id(), gfp_mask, order) -#define alloc_page_vma(gfp_mask, vma, addr) alloc_pages(gfp_mask, 0) +#define alloc_pages_vma(gfp_mask, order, vma, addr) \ + alloc_pages(gfp_mask, order) #endif #define alloc_page(gfp_mask) alloc_pages(gfp_mask, 0) +#define alloc_page_vma(gfp_mask, vma, addr) \ + alloc_pages_vma(gfp_mask, 0, vma, addr) extern unsigned long __get_free_pages(gfp_t gfp_mask, unsigned int order); extern unsigned long get_zeroed_page(gfp_t gfp_mask); diff --git a/include/linux/gpio.h b/include/linux/gpio.h index f79d67f413e4..4b47ed96f131 100644 --- a/include/linux/gpio.h +++ b/include/linux/gpio.h @@ -30,7 +30,7 @@ static inline int gpio_is_valid(int number) return 0; } -static inline int __must_check gpio_request(unsigned gpio, const char *label) +static inline int gpio_request(unsigned gpio, const char *label) { return -ENOSYS; } @@ -62,12 +62,12 @@ static inline void gpio_free_array(struct gpio *array, size_t num) WARN_ON(1); } -static inline int __must_check gpio_direction_input(unsigned gpio) +static inline int gpio_direction_input(unsigned gpio) { return -ENOSYS; } -static inline int __must_check gpio_direction_output(unsigned gpio, int value) +static inline int gpio_direction_output(unsigned gpio, int value) { return -ENOSYS; } diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h new file mode 100644 index 000000000000..8e6c8c42bc3c --- /dev/null +++ b/include/linux/huge_mm.h @@ -0,0 +1,179 @@ +#ifndef _LINUX_HUGE_MM_H +#define _LINUX_HUGE_MM_H + +extern int do_huge_pmd_anonymous_page(struct mm_struct *mm, + struct vm_area_struct *vma, + unsigned long address, pmd_t *pmd, + unsigned int flags); +extern int copy_huge_pmd(struct mm_struct *dst_mm, struct mm_struct *src_mm, + pmd_t *dst_pmd, pmd_t *src_pmd, unsigned long addr, + struct vm_area_struct *vma); +extern int do_huge_pmd_wp_page(struct mm_struct *mm, struct vm_area_struct *vma, + unsigned long address, pmd_t *pmd, + pmd_t orig_pmd); +extern pgtable_t get_pmd_huge_pte(struct mm_struct *mm); +extern struct page *follow_trans_huge_pmd(struct mm_struct *mm, + unsigned long addr, + pmd_t *pmd, + unsigned int flags); +extern int zap_huge_pmd(struct mmu_gather *tlb, + struct vm_area_struct *vma, + pmd_t *pmd); +extern int mincore_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd, + unsigned long addr, unsigned long end, + unsigned char *vec); +extern int change_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd, + unsigned long addr, pgprot_t newprot); + +enum transparent_hugepage_flag { + TRANSPARENT_HUGEPAGE_FLAG, + TRANSPARENT_HUGEPAGE_REQ_MADV_FLAG, + TRANSPARENT_HUGEPAGE_DEFRAG_FLAG, + TRANSPARENT_HUGEPAGE_DEFRAG_REQ_MADV_FLAG, + TRANSPARENT_HUGEPAGE_DEFRAG_KHUGEPAGED_FLAG, +#ifdef CONFIG_DEBUG_VM + TRANSPARENT_HUGEPAGE_DEBUG_COW_FLAG, +#endif +}; + +enum page_check_address_pmd_flag { + PAGE_CHECK_ADDRESS_PMD_FLAG, + PAGE_CHECK_ADDRESS_PMD_NOTSPLITTING_FLAG, + PAGE_CHECK_ADDRESS_PMD_SPLITTING_FLAG, +}; +extern pmd_t *page_check_address_pmd(struct page *page, + struct mm_struct *mm, + unsigned long address, + enum page_check_address_pmd_flag flag); + +#ifdef CONFIG_TRANSPARENT_HUGEPAGE +#define HPAGE_PMD_SHIFT HPAGE_SHIFT +#define HPAGE_PMD_MASK HPAGE_MASK +#define HPAGE_PMD_SIZE HPAGE_SIZE + +#define transparent_hugepage_enabled(__vma) \ + ((transparent_hugepage_flags & \ + (1<<TRANSPARENT_HUGEPAGE_FLAG) || \ + (transparent_hugepage_flags & \ + (1<<TRANSPARENT_HUGEPAGE_REQ_MADV_FLAG) && \ + ((__vma)->vm_flags & VM_HUGEPAGE))) && \ + !((__vma)->vm_flags & VM_NOHUGEPAGE)) +#define transparent_hugepage_defrag(__vma) \ + ((transparent_hugepage_flags & \ + (1<<TRANSPARENT_HUGEPAGE_DEFRAG_FLAG)) || \ + (transparent_hugepage_flags & \ + (1<<TRANSPARENT_HUGEPAGE_DEFRAG_REQ_MADV_FLAG) && \ + (__vma)->vm_flags & VM_HUGEPAGE)) +#ifdef CONFIG_DEBUG_VM +#define transparent_hugepage_debug_cow() \ + (transparent_hugepage_flags & \ + (1<<TRANSPARENT_HUGEPAGE_DEBUG_COW_FLAG)) +#else /* CONFIG_DEBUG_VM */ +#define transparent_hugepage_debug_cow() 0 +#endif /* CONFIG_DEBUG_VM */ + +extern unsigned long transparent_hugepage_flags; +extern int copy_pte_range(struct mm_struct *dst_mm, struct mm_struct *src_mm, + pmd_t *dst_pmd, pmd_t *src_pmd, + struct vm_area_struct *vma, + unsigned long addr, unsigned long end); +extern int handle_pte_fault(struct mm_struct *mm, + struct vm_area_struct *vma, unsigned long address, + pte_t *pte, pmd_t *pmd, unsigned int flags); +extern int split_huge_page(struct page *page); +extern void __split_huge_page_pmd(struct mm_struct *mm, pmd_t *pmd); +#define split_huge_page_pmd(__mm, __pmd) \ + do { \ + pmd_t *____pmd = (__pmd); \ + if (unlikely(pmd_trans_huge(*____pmd))) \ + __split_huge_page_pmd(__mm, ____pmd); \ + } while (0) +#define wait_split_huge_page(__anon_vma, __pmd) \ + do { \ + pmd_t *____pmd = (__pmd); \ + spin_unlock_wait(&(__anon_vma)->root->lock); \ + /* \ + * spin_unlock_wait() is just a loop in C and so the \ + * CPU can reorder anything around it. \ + */ \ + smp_mb(); \ + BUG_ON(pmd_trans_splitting(*____pmd) || \ + pmd_trans_huge(*____pmd)); \ + } while (0) +#define HPAGE_PMD_ORDER (HPAGE_PMD_SHIFT-PAGE_SHIFT) +#define HPAGE_PMD_NR (1<<HPAGE_PMD_ORDER) +#if HPAGE_PMD_ORDER > MAX_ORDER +#error "hugepages can't be allocated by the buddy allocator" +#endif +extern int hugepage_madvise(struct vm_area_struct *vma, + unsigned long *vm_flags, int advice); +extern void __vma_adjust_trans_huge(struct vm_area_struct *vma, + unsigned long start, + unsigned long end, + long adjust_next); +static inline void vma_adjust_trans_huge(struct vm_area_struct *vma, + unsigned long start, + unsigned long end, + long adjust_next) +{ + if (!vma->anon_vma || vma->vm_ops || vma->vm_file) + return; + __vma_adjust_trans_huge(vma, start, end, adjust_next); +} +static inline int hpage_nr_pages(struct page *page) +{ + if (unlikely(PageTransHuge(page))) + return HPAGE_PMD_NR; + return 1; +} +static inline struct page *compound_trans_head(struct page *page) +{ + if (PageTail(page)) { + struct page *head; + head = page->first_page; + smp_rmb(); + /* + * head may be a dangling pointer. + * __split_huge_page_refcount clears PageTail before + * overwriting first_page, so if PageTail is still + * there it means the head pointer isn't dangling. + */ + if (PageTail(page)) + return head; + } + return page; +} +#else /* CONFIG_TRANSPARENT_HUGEPAGE */ +#define HPAGE_PMD_SHIFT ({ BUG(); 0; }) +#define HPAGE_PMD_MASK ({ BUG(); 0; }) +#define HPAGE_PMD_SIZE ({ BUG(); 0; }) + +#define hpage_nr_pages(x) 1 + +#define transparent_hugepage_enabled(__vma) 0 + +#define transparent_hugepage_flags 0UL +static inline int split_huge_page(struct page *page) +{ + return 0; +} +#define split_huge_page_pmd(__mm, __pmd) \ + do { } while (0) +#define wait_split_huge_page(__anon_vma, __pmd) \ + do { } while (0) +#define compound_trans_head(page) compound_head(page) +static inline int hugepage_madvise(struct vm_area_struct *vma, + unsigned long *vm_flags, int advice) +{ + BUG(); + return 0; +} +static inline void vma_adjust_trans_huge(struct vm_area_struct *vma, + unsigned long start, + unsigned long end, + long adjust_next) +{ +} +#endif /* CONFIG_TRANSPARENT_HUGEPAGE */ + +#endif /* _LINUX_HUGE_MM_H */ diff --git a/include/linux/irqdesc.h b/include/linux/irqdesc.h index 979c68cc7458..6a64c6fa81af 100644 --- a/include/linux/irqdesc.h +++ b/include/linux/irqdesc.h @@ -57,7 +57,7 @@ struct irq_desc { #endif struct timer_rand_state *timer_rand_state; - unsigned int *kstat_irqs; + unsigned int __percpu *kstat_irqs; irq_flow_handler_t handle_irq; struct irqaction *action; /* IRQ action list */ unsigned int status; /* IRQ status */ diff --git a/include/linux/kernel.h b/include/linux/kernel.h index 57dac7022b63..5a9d9059520b 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -600,6 +600,13 @@ struct sysinfo { #define NUMA_BUILD 0 #endif +/* This helps us avoid #ifdef CONFIG_COMPACTION */ +#ifdef CONFIG_COMPACTION +#define COMPACTION_BUILD 1 +#else +#define COMPACTION_BUILD 0 +#endif + /* Rebuild everything on CONFIG_FTRACE_MCOUNT_RECORD */ #ifdef CONFIG_FTRACE_MCOUNT_RECORD # define REBUILD_DUE_TO_FTRACE_MCOUNT_RECORD diff --git a/include/linux/kernel_stat.h b/include/linux/kernel_stat.h index 44e83ba12b5b..0cce2db580c3 100644 --- a/include/linux/kernel_stat.h +++ b/include/linux/kernel_stat.h @@ -46,16 +46,14 @@ DECLARE_PER_CPU(struct kernel_stat, kstat); extern unsigned long long nr_context_switches(void); #ifndef CONFIG_GENERIC_HARDIRQS -#define kstat_irqs_this_cpu(irq) \ - (this_cpu_read(kstat.irqs[irq]) struct irq_desc; static inline void kstat_incr_irqs_this_cpu(unsigned int irq, struct irq_desc *desc) { - kstat_this_cpu.irqs[irq]++; - kstat_this_cpu.irqs_sum++; + __this_cpu_inc(kstat.irqs[irq]); + __this_cpu_inc(kstat.irqs_sum); } static inline unsigned int kstat_irqs_cpu(unsigned int irq, int cpu) @@ -65,17 +63,18 @@ static inline unsigned int kstat_irqs_cpu(unsigned int irq, int cpu) #else #include <linux/irq.h> extern unsigned int kstat_irqs_cpu(unsigned int irq, int cpu); -#define kstat_irqs_this_cpu(DESC) \ - ((DESC)->kstat_irqs[smp_processor_id()]) -#define kstat_incr_irqs_this_cpu(irqno, DESC) do {\ - ((DESC)->kstat_irqs[smp_processor_id()]++);\ - kstat_this_cpu.irqs_sum++; } while (0) + +#define kstat_incr_irqs_this_cpu(irqno, DESC) \ +do { \ + __this_cpu_inc(*(DESC)->kstat_irqs); \ + __this_cpu_inc(kstat.irqs_sum); \ +} while (0) #endif static inline void kstat_incr_softirqs_this_cpu(unsigned int irq) { - kstat_this_cpu.softirqs[irq]++; + __this_cpu_inc(kstat.softirqs[irq]); } static inline unsigned int kstat_softirqs_cpu(unsigned int irq, int cpu) diff --git a/include/linux/khugepaged.h b/include/linux/khugepaged.h new file mode 100644 index 000000000000..6b394f0b5148 --- /dev/null +++ b/include/linux/khugepaged.h @@ -0,0 +1,67 @@ +#ifndef _LINUX_KHUGEPAGED_H +#define _LINUX_KHUGEPAGED_H + +#include <linux/sched.h> /* MMF_VM_HUGEPAGE */ + +#ifdef CONFIG_TRANSPARENT_HUGEPAGE +extern int __khugepaged_enter(struct mm_struct *mm); +extern void __khugepaged_exit(struct mm_struct *mm); +extern int khugepaged_enter_vma_merge(struct vm_area_struct *vma); + +#define khugepaged_enabled() \ + (transparent_hugepage_flags & \ + ((1<<TRANSPARENT_HUGEPAGE_FLAG) | \ + (1<<TRANSPARENT_HUGEPAGE_REQ_MADV_FLAG))) +#define khugepaged_always() \ + (transparent_hugepage_flags & \ + (1<<TRANSPARENT_HUGEPAGE_FLAG)) +#define khugepaged_req_madv() \ + (transparent_hugepage_flags & \ + (1<<TRANSPARENT_HUGEPAGE_REQ_MADV_FLAG)) +#define khugepaged_defrag() \ + (transparent_hugepage_flags & \ + (1<<TRANSPARENT_HUGEPAGE_DEFRAG_KHUGEPAGED_FLAG)) + +static inline int khugepaged_fork(struct mm_struct *mm, struct mm_struct *oldmm) +{ + if (test_bit(MMF_VM_HUGEPAGE, &oldmm->flags)) + return __khugepaged_enter(mm); + return 0; +} + +static inline void khugepaged_exit(struct mm_struct *mm) +{ + if (test_bit(MMF_VM_HUGEPAGE, &mm->flags)) + __khugepaged_exit(mm); +} + +static inline int khugepaged_enter(struct vm_area_struct *vma) +{ + if (!test_bit(MMF_VM_HUGEPAGE, &vma->vm_mm->flags)) + if ((khugepaged_always() || + (khugepaged_req_madv() && + vma->vm_flags & VM_HUGEPAGE)) && + !(vma->vm_flags & VM_NOHUGEPAGE)) + if (__khugepaged_enter(vma->vm_mm)) + return -ENOMEM; + return 0; +} +#else /* CONFIG_TRANSPARENT_HUGEPAGE */ +static inline int khugepaged_fork(struct mm_struct *mm, struct mm_struct *oldmm) +{ + return 0; +} +static inline void khugepaged_exit(struct mm_struct *mm) +{ +} +static inline int khugepaged_enter(struct vm_area_struct *vma) +{ + return 0; +} +static inline int khugepaged_enter_vma_merge(struct vm_area_struct *vma) +{ + return 0; +} +#endif /* CONFIG_TRANSPARENT_HUGEPAGE */ + +#endif /* _LINUX_KHUGEPAGED_H */ diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 159a0762aeaf..6a576f989437 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -25,6 +25,11 @@ struct page_cgroup; struct page; struct mm_struct; +/* Stats that can be updated by kernel. */ +enum mem_cgroup_page_stat_item { + MEMCG_NR_FILE_MAPPED, /* # of pages charged as file rss */ +}; + extern unsigned long mem_cgroup_isolate_pages(unsigned long nr_to_scan, struct list_head *dst, unsigned long *scanned, int order, @@ -93,7 +98,7 @@ extern int mem_cgroup_prepare_migration(struct page *page, struct page *newpage, struct mem_cgroup **ptr); extern void mem_cgroup_end_migration(struct mem_cgroup *mem, - struct page *oldpage, struct page *newpage); + struct page *oldpage, struct page *newpage, bool migration_ok); /* * For memory reclaim. @@ -121,7 +126,22 @@ static inline bool mem_cgroup_disabled(void) return false; } -void mem_cgroup_update_file_mapped(struct page *page, int val); +void mem_cgroup_update_page_stat(struct page *page, + enum mem_cgroup_page_stat_item idx, + int val); + +static inline void mem_cgroup_inc_page_stat(struct page *page, + enum mem_cgroup_page_stat_item idx) +{ + mem_cgroup_update_page_stat(page, idx, 1); +} + +static inline void mem_cgroup_dec_page_stat(struct page *page, + enum mem_cgroup_page_stat_item idx) +{ + mem_cgroup_update_page_stat(page, idx, -1); +} + unsigned long mem_cgroup_soft_limit_reclaim(struct zone *zone, int order, gfp_t gfp_mask); u64 mem_cgroup_get_limit(struct mem_cgroup *mem); @@ -231,8 +251,7 @@ mem_cgroup_prepare_migration(struct page *page, struct page *newpage, } static inline void mem_cgroup_end_migration(struct mem_cgroup *mem, - struct page *oldpage, - struct page *newpage) + struct page *oldpage, struct page *newpage, bool migration_ok) { } @@ -293,8 +312,13 @@ mem_cgroup_print_oom_info(struct mem_cgroup *memcg, struct task_struct *p) { } -static inline void mem_cgroup_update_file_mapped(struct page *page, - int val) +static inline void mem_cgroup_inc_page_stat(struct page *page, + enum mem_cgroup_page_stat_item idx) +{ +} + +static inline void mem_cgroup_dec_page_stat(struct page *page, + enum mem_cgroup_page_stat_item idx) { } diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h index 31c237a00c48..24376fe7ee68 100644 --- a/include/linux/memory_hotplug.h +++ b/include/linux/memory_hotplug.h @@ -13,12 +13,16 @@ struct mem_section; #ifdef CONFIG_MEMORY_HOTPLUG /* - * Types for free bootmem. - * The normal smallest mapcount is -1. Here is smaller value than it. + * Types for free bootmem stored in page->lru.next. These have to be in + * some random range in unsigned long space for debugging purposes. */ -#define SECTION_INFO (-1 - 1) -#define MIX_SECTION_INFO (-1 - 2) -#define NODE_INFO (-1 - 3) +enum { + MEMORY_HOTPLUG_MIN_BOOTMEM_TYPE = 12, + SECTION_INFO = MEMORY_HOTPLUG_MIN_BOOTMEM_TYPE, + MIX_SECTION_INFO, + NODE_INFO, + MEMORY_HOTPLUG_MAX_BOOTMEM_TYPE = NODE_INFO, +}; /* * pgdat resizing functions diff --git a/include/linux/migrate.h b/include/linux/migrate.h index 085527fb8261..e39aeecfe9a2 100644 --- a/include/linux/migrate.h +++ b/include/linux/migrate.h @@ -13,9 +13,11 @@ extern void putback_lru_pages(struct list_head *l); extern int migrate_page(struct address_space *, struct page *, struct page *); extern int migrate_pages(struct list_head *l, new_page_t x, - unsigned long private, int offlining); + unsigned long private, bool offlining, + bool sync); extern int migrate_huge_pages(struct list_head *l, new_page_t x, - unsigned long private, int offlining); + unsigned long private, bool offlining, + bool sync); extern int fail_migrate_page(struct address_space *, struct page *, struct page *); @@ -33,9 +35,11 @@ extern int migrate_huge_page_move_mapping(struct address_space *mapping, static inline void putback_lru_pages(struct list_head *l) {} static inline int migrate_pages(struct list_head *l, new_page_t x, - unsigned long private, int offlining) { return -ENOSYS; } + unsigned long private, bool offlining, + bool sync) { return -ENOSYS; } static inline int migrate_huge_pages(struct list_head *l, new_page_t x, - unsigned long private, int offlining) { return -ENOSYS; } + unsigned long private, bool offlining, + bool sync) { return -ENOSYS; } static inline int migrate_prep(void) { return -ENOSYS; } static inline int migrate_prep_local(void) { return -ENOSYS; } diff --git a/include/linux/mm.h b/include/linux/mm.h index 721f451c3029..956a35532f47 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -14,6 +14,7 @@ #include <linux/mm_types.h> #include <linux/range.h> #include <linux/pfn.h> +#include <linux/bit_spinlock.h> struct mempolicy; struct anon_vma; @@ -82,6 +83,7 @@ extern unsigned int kobjsize(const void *objp); #define VM_GROWSUP 0x00000200 #else #define VM_GROWSUP 0x00000000 +#define VM_NOHUGEPAGE 0x00000200 /* MADV_NOHUGEPAGE marked this vma */ #endif #define VM_PFNMAP 0x00000400 /* Page-ranges managed without "struct page", just pure PFN */ #define VM_DENYWRITE 0x00000800 /* ETXTBSY on write attempts.. */ @@ -101,7 +103,11 @@ extern unsigned int kobjsize(const void *objp); #define VM_NORESERVE 0x00200000 /* should the VM suppress accounting */ #define VM_HUGETLB 0x00400000 /* Huge TLB Page VM */ #define VM_NONLINEAR 0x00800000 /* Is non-linear (remap_file_pages) */ +#ifndef CONFIG_TRANSPARENT_HUGEPAGE #define VM_MAPPED_COPY 0x01000000 /* T if mapped copy of data (nommu mmap) */ +#else +#define VM_HUGEPAGE 0x01000000 /* MADV_HUGEPAGE marked this vma */ +#endif #define VM_INSERTPAGE 0x02000000 /* The vma has had "vm_insert_page()" done on it */ #define VM_ALWAYSDUMP 0x04000000 /* Always include in core dumps */ @@ -242,6 +248,7 @@ struct inode; * files which need it (119 of them) */ #include <linux/page-flags.h> +#include <linux/huge_mm.h> /* * Methods to modify the page usage count. @@ -305,6 +312,39 @@ static inline int is_vmalloc_or_module_addr(const void *x) } #endif +static inline void compound_lock(struct page *page) +{ +#ifdef CONFIG_TRANSPARENT_HUGEPAGE + bit_spin_lock(PG_compound_lock, &page->flags); +#endif +} + +static inline void compound_unlock(struct page *page) +{ +#ifdef CONFIG_TRANSPARENT_HUGEPAGE + bit_spin_unlock(PG_compound_lock, &page->flags); +#endif +} + +static inline unsigned long compound_lock_irqsave(struct page *page) +{ + unsigned long uninitialized_var(flags); +#ifdef CONFIG_TRANSPARENT_HUGEPAGE + local_irq_save(flags); + compound_lock(page); +#endif + return flags; +} + +static inline void compound_unlock_irqrestore(struct page *page, + unsigned long flags) +{ +#ifdef CONFIG_TRANSPARENT_HUGEPAGE + compound_unlock(page); + local_irq_restore(flags); +#endif +} + static inline struct page *compound_head(struct page *page) { if (unlikely(PageTail(page))) @@ -319,9 +359,29 @@ static inline int page_count(struct page *page) static inline void get_page(struct page *page) { - page = compound_head(page); - VM_BUG_ON(atomic_read(&page->_count) == 0); + /* + * Getting a normal page or the head of a compound page + * requires to already have an elevated page->_count. Only if + * we're getting a tail page, the elevated page->_count is + * required only in the head page, so for tail pages the + * bugcheck only verifies that the page->_count isn't + * negative. + */ + VM_BUG_ON(atomic_read(&page->_count) < !PageTail(page)); atomic_inc(&page->_count); + /* + * Getting a tail page will elevate both the head and tail + * page->_count(s). + */ + if (unlikely(PageTail(page))) { + /* + * This is safe only because + * __split_huge_page_refcount can't run under + * get_page(). + */ + VM_BUG_ON(atomic_read(&page->first_page->_count) <= 0); + atomic_inc(&page->first_page->_count); + } } static inline struct page *virt_to_head_page(const void *x) @@ -339,6 +399,27 @@ static inline void init_page_count(struct page *page) atomic_set(&page->_count, 1); } +/* + * PageBuddy() indicate that the page is free and in the buddy system + * (see mm/page_alloc.c). + */ +static inline int PageBuddy(struct page *page) +{ + return atomic_read(&page->_mapcount) == -2; +} + +static inline void __SetPageBuddy(struct page *page) +{ + VM_BUG_ON(atomic_read(&page->_mapcount) != -1); + atomic_set(&page->_mapcount, -2); +} + +static inline void __ClearPageBuddy(struct page *page) +{ + VM_BUG_ON(!PageBuddy(page)); + atomic_set(&page->_mapcount, -1); +} + void put_page(struct page *page); void put_pages_list(struct list_head *pages); @@ -370,12 +451,39 @@ static inline int compound_order(struct page *page) return (unsigned long)page[1].lru.prev; } +static inline int compound_trans_order(struct page *page) +{ + int order; + unsigned long flags; + + if (!PageHead(page)) + return 0; + + flags = compound_lock_irqsave(page); + order = compound_order(page); + compound_unlock_irqrestore(page, flags); + return order; +} + static inline void set_compound_order(struct page *page, unsigned long order) { page[1].lru.prev = (void *)order; } /* + * Do pte_mkwrite, but only if the vma says VM_WRITE. We do this when + * servicing faults for write access. In the normal case, do always want + * pte_mkwrite. But get_user_pages can cause write faults for mappings + * that do not have writing enabled, when used by access_process_vm. + */ +static inline pte_t maybe_mkwrite(pte_t pte, struct vm_area_struct *vma) +{ + if (likely(vma->vm_flags & VM_WRITE)) + pte = pte_mkwrite(pte); + return pte; +} + +/* * Multiple processes may "see" the same page. E.g. for untouched * mappings of /dev/null, all processes see the same page full of * zeroes, and text pages of executables and shared libraries have @@ -657,7 +765,7 @@ static inline struct address_space *page_mapping(struct page *page) VM_BUG_ON(PageSlab(page)); if (unlikely(PageSwapCache(page))) mapping = &swapper_space; - else if (unlikely((unsigned long)mapping & PAGE_MAPPING_ANON)) + else if ((unsigned long)mapping & PAGE_MAPPING_ANON) mapping = NULL; return mapping; } @@ -1064,7 +1172,8 @@ static inline int __pmd_alloc(struct mm_struct *mm, pud_t *pud, int __pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long address); #endif -int __pte_alloc(struct mm_struct *mm, pmd_t *pmd, unsigned long address); +int __pte_alloc(struct mm_struct *mm, struct vm_area_struct *vma, + pmd_t *pmd, unsigned long address); int __pte_alloc_kernel(pmd_t *pmd, unsigned long address); /* @@ -1133,16 +1242,18 @@ static inline void pgtable_page_dtor(struct page *page) pte_unmap(pte); \ } while (0) -#define pte_alloc_map(mm, pmd, address) \ - ((unlikely(!pmd_present(*(pmd))) && __pte_alloc(mm, pmd, address))? \ - NULL: pte_offset_map(pmd, address)) +#define pte_alloc_map(mm, vma, pmd, address) \ + ((unlikely(pmd_none(*(pmd))) && __pte_alloc(mm, vma, \ + pmd, address))? \ + NULL: pte_offset_map(pmd, address)) #define pte_alloc_map_lock(mm, pmd, address, ptlp) \ - ((unlikely(!pmd_present(*(pmd))) && __pte_alloc(mm, pmd, address))? \ + ((unlikely(pmd_none(*(pmd))) && __pte_alloc(mm, NULL, \ + pmd, address))? \ NULL: pte_offset_map_lock(mm, pmd, address, ptlp)) #define pte_alloc_kernel(pmd, address) \ - ((unlikely(!pmd_present(*(pmd))) && __pte_alloc_kernel(pmd, address))? \ + ((unlikely(pmd_none(*(pmd))) && __pte_alloc_kernel(pmd, address))? \ NULL: pte_offset_kernel(pmd, address)) extern void free_area_init(unsigned long * zones_size); @@ -1415,6 +1526,8 @@ struct page *follow_page(struct vm_area_struct *, unsigned long address, #define FOLL_GET 0x04 /* do get_page on page */ #define FOLL_DUMP 0x08 /* give error on hole if it would be zero */ #define FOLL_FORCE 0x10 /* get_user_pages read/write w/o permission */ +#define FOLL_MLOCK 0x40 /* mark page as mlocked */ +#define FOLL_SPLIT 0x80 /* don't return transhuge pages, split them */ typedef int (*pte_fn_t)(pte_t *pte, pgtable_t token, unsigned long addr, void *data); @@ -1518,5 +1631,14 @@ static inline int is_hwpoison_address(unsigned long addr) extern void dump_page(struct page *page); +#if defined(CONFIG_TRANSPARENT_HUGEPAGE) || defined(CONFIG_HUGETLBFS) +extern void clear_huge_page(struct page *page, + unsigned long addr, + unsigned int pages_per_huge_page); +extern void copy_user_huge_page(struct page *dst, struct page *src, + unsigned long addr, struct vm_area_struct *vma, + unsigned int pages_per_huge_page); +#endif /* CONFIG_TRANSPARENT_HUGEPAGE || CONFIG_HUGETLBFS */ + #endif /* __KERNEL__ */ #endif /* _LINUX_MM_H */ diff --git a/include/linux/mm_inline.h b/include/linux/mm_inline.h index 8835b877b8db..8f7d24712dc1 100644 --- a/include/linux/mm_inline.h +++ b/include/linux/mm_inline.h @@ -1,6 +1,8 @@ #ifndef LINUX_MM_INLINE_H #define LINUX_MM_INLINE_H +#include <linux/huge_mm.h> + /** * page_is_file_cache - should the page be on a file LRU or anon LRU? * @page: the page to test @@ -20,18 +22,25 @@ static inline int page_is_file_cache(struct page *page) } static inline void -add_page_to_lru_list(struct zone *zone, struct page *page, enum lru_list l) +__add_page_to_lru_list(struct zone *zone, struct page *page, enum lru_list l, + struct list_head *head) { - list_add(&page->lru, &zone->lru[l].list); - __inc_zone_state(zone, NR_LRU_BASE + l); + list_add(&page->lru, head); + __mod_zone_page_state(zone, NR_LRU_BASE + l, hpage_nr_pages(page)); mem_cgroup_add_lru_list(page, l); } static inline void +add_page_to_lru_list(struct zone *zone, struct page *page, enum lru_list l) +{ + __add_page_to_lru_list(zone, page, l, &zone->lru[l].list); +} + +static inline void del_page_from_lru_list(struct zone *zone, struct page *page, enum lru_list l) { list_del(&page->lru); - __dec_zone_state(zone, NR_LRU_BASE + l); + __mod_zone_page_state(zone, NR_LRU_BASE + l, -hpage_nr_pages(page)); mem_cgroup_del_lru_list(page, l); } @@ -66,7 +75,7 @@ del_page_from_lru(struct zone *zone, struct page *page) l += LRU_ACTIVE; } } - __dec_zone_state(zone, NR_LRU_BASE + l); + __mod_zone_page_state(zone, NR_LRU_BASE + l, -hpage_nr_pages(page)); mem_cgroup_del_lru_list(page, l); } diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index bb7288a782fd..26bc4e2cd275 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -310,6 +310,9 @@ struct mm_struct { #ifdef CONFIG_MMU_NOTIFIER struct mmu_notifier_mm *mmu_notifier_mm; #endif +#ifdef CONFIG_TRANSPARENT_HUGEPAGE + pgtable_t pmd_huge_pte; /* protected by page_table_lock */ +#endif /* How many tasks sharing this mm are OOM_DISABLE */ atomic_t oom_disable_count; }; diff --git a/include/linux/mmc/sh_mmcif.h b/include/linux/mmc/sh_mmcif.h index bf173502d744..38d393092812 100644 --- a/include/linux/mmc/sh_mmcif.h +++ b/include/linux/mmc/sh_mmcif.h @@ -94,12 +94,12 @@ struct sh_mmcif_plat_data { static inline u32 sh_mmcif_readl(void __iomem *addr, int reg) { - return readl(addr + reg); + return __raw_readl(addr + reg); } static inline void sh_mmcif_writel(void __iomem *addr, int reg, u32 val) { - writel(val, addr + reg); + __raw_writel(val, addr + reg); } #define SH_MMCIF_BBS 512 /* boot block size */ diff --git a/include/linux/mmu_notifier.h b/include/linux/mmu_notifier.h index 43dcfbdc39de..cc2e7dfea9d7 100644 --- a/include/linux/mmu_notifier.h +++ b/include/linux/mmu_notifier.h @@ -62,6 +62,16 @@ struct mmu_notifier_ops { unsigned long address); /* + * test_young is called to check the young/accessed bitflag in + * the secondary pte. This is used to know if the page is + * frequently used without actually clearing the flag or tearing + * down the secondary mapping on the page. + */ + int (*test_young)(struct mmu_notifier *mn, + struct mm_struct *mm, + unsigned long address); + + /* * change_pte is called in cases that pte mapping to page is changed: * for example, when ksm remaps pte to point to a new shared page. */ @@ -163,6 +173,8 @@ extern void __mmu_notifier_mm_destroy(struct mm_struct *mm); extern void __mmu_notifier_release(struct mm_struct *mm); extern int __mmu_notifier_clear_flush_young(struct mm_struct *mm, unsigned long address); +extern int __mmu_notifier_test_young(struct mm_struct *mm, + unsigned long address); extern void __mmu_notifier_change_pte(struct mm_struct *mm, unsigned long address, pte_t pte); extern void __mmu_notifier_invalidate_page(struct mm_struct *mm, @@ -186,6 +198,14 @@ static inline int mmu_notifier_clear_flush_young(struct mm_struct *mm, return 0; } +static inline int mmu_notifier_test_young(struct mm_struct *mm, + unsigned long address) +{ + if (mm_has_notifiers(mm)) + return __mmu_notifier_test_young(mm, address); + return 0; +} + static inline void mmu_notifier_change_pte(struct mm_struct *mm, unsigned long address, pte_t pte) { @@ -243,6 +263,32 @@ static inline void mmu_notifier_mm_destroy(struct mm_struct *mm) __pte; \ }) +#define pmdp_clear_flush_notify(__vma, __address, __pmdp) \ +({ \ + pmd_t __pmd; \ + struct vm_area_struct *___vma = __vma; \ + unsigned long ___address = __address; \ + VM_BUG_ON(__address & ~HPAGE_PMD_MASK); \ + mmu_notifier_invalidate_range_start(___vma->vm_mm, ___address, \ + (__address)+HPAGE_PMD_SIZE);\ + __pmd = pmdp_clear_flush(___vma, ___address, __pmdp); \ + mmu_notifier_invalidate_range_end(___vma->vm_mm, ___address, \ + (__address)+HPAGE_PMD_SIZE); \ + __pmd; \ +}) + +#define pmdp_splitting_flush_notify(__vma, __address, __pmdp) \ +({ \ + struct vm_area_struct *___vma = __vma; \ + unsigned long ___address = __address; \ + VM_BUG_ON(__address & ~HPAGE_PMD_MASK); \ + mmu_notifier_invalidate_range_start(___vma->vm_mm, ___address, \ + (__address)+HPAGE_PMD_SIZE);\ + pmdp_splitting_flush(___vma, ___address, __pmdp); \ + mmu_notifier_invalidate_range_end(___vma->vm_mm, ___address, \ + (__address)+HPAGE_PMD_SIZE); \ +}) + #define ptep_clear_flush_young_notify(__vma, __address, __ptep) \ ({ \ int __young; \ @@ -254,6 +300,17 @@ static inline void mmu_notifier_mm_destroy(struct mm_struct *mm) __young; \ }) +#define pmdp_clear_flush_young_notify(__vma, __address, __pmdp) \ +({ \ + int __young; \ + struct vm_area_struct *___vma = __vma; \ + unsigned long ___address = __address; \ + __young = pmdp_clear_flush_young(___vma, ___address, __pmdp); \ + __young |= mmu_notifier_clear_flush_young(___vma->vm_mm, \ + ___address); \ + __young; \ +}) + #define set_pte_at_notify(__mm, __address, __ptep, __pte) \ ({ \ struct mm_struct *___mm = __mm; \ @@ -276,6 +333,12 @@ static inline int mmu_notifier_clear_flush_young(struct mm_struct *mm, return 0; } +static inline int mmu_notifier_test_young(struct mm_struct *mm, + unsigned long address) +{ + return 0; +} + static inline void mmu_notifier_change_pte(struct mm_struct *mm, unsigned long address, pte_t pte) { @@ -305,7 +368,10 @@ static inline void mmu_notifier_mm_destroy(struct mm_struct *mm) } #define ptep_clear_flush_young_notify ptep_clear_flush_young +#define pmdp_clear_flush_young_notify pmdp_clear_flush_young #define ptep_clear_flush_notify ptep_clear_flush +#define pmdp_clear_flush_notify pmdp_clear_flush +#define pmdp_splitting_flush_notify pmdp_splitting_flush #define set_pte_at_notify set_pte_at #endif /* CONFIG_MMU_NOTIFIER */ diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 39c24ebe9cfd..02ecb0189b1d 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -114,6 +114,7 @@ enum zone_stat_item { NUMA_LOCAL, /* allocation from local node */ NUMA_OTHER, /* allocation from other node */ #endif + NR_ANON_TRANSPARENT_HUGEPAGES, NR_VM_ZONE_STAT_ITEMS }; /* @@ -458,12 +459,6 @@ static inline int zone_is_oom_locked(const struct zone *zone) return test_bit(ZONE_OOM_LOCKED, &zone->flags); } -#ifdef CONFIG_SMP -unsigned long zone_nr_free_pages(struct zone *zone); -#else -#define zone_nr_free_pages(zone) zone_page_state(zone, NR_FREE_PAGES) -#endif /* CONFIG_SMP */ - /* * The "priority" of VM scanning is how much of the queues we will scan in one * go. A value of 12 for DEF_PRIORITY implies that we will scan 1/4096th of the @@ -645,6 +640,7 @@ typedef struct pglist_data { wait_queue_head_t kswapd_wait; struct task_struct *kswapd; int kswapd_max_order; + enum zone_type classzone_idx; } pg_data_t; #define node_present_pages(nid) (NODE_DATA(nid)->node_present_pages) @@ -660,8 +656,10 @@ typedef struct pglist_data { extern struct mutex zonelists_mutex; void build_all_zonelists(void *data); -void wakeup_kswapd(struct zone *zone, int order); -int zone_watermark_ok(struct zone *z, int order, unsigned long mark, +void wakeup_kswapd(struct zone *zone, int order, enum zone_type classzone_idx); +bool zone_watermark_ok(struct zone *z, int order, unsigned long mark, + int classzone_idx, int alloc_flags); +bool zone_watermark_ok_safe(struct zone *z, int order, unsigned long mark, int classzone_idx, int alloc_flags); enum memmap_context { MEMMAP_EARLY, diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index 5f38c460367e..0db8037e2725 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -48,9 +48,6 @@ * struct page (these bits with information) are always mapped into kernel * address space... * - * PG_buddy is set to indicate that the page is free and in the buddy system - * (see mm/page_alloc.c). - * * PG_hwpoison indicates that a page got corrupted in hardware and contains * data with incorrect ECC bits that triggered a machine check. Accessing is * not safe since it may cause another machine check. Don't touch! @@ -96,7 +93,6 @@ enum pageflags { PG_swapcache, /* Swap page: swp_entry_t in private */ PG_mappedtodisk, /* Has blocks allocated on-disk */ PG_reclaim, /* To be reclaimed asap */ - PG_buddy, /* Page is free, on buddy lists */ PG_swapbacked, /* Page is backed by RAM/swap */ PG_unevictable, /* Page is "unevictable" */ #ifdef CONFIG_MMU @@ -108,6 +104,9 @@ enum pageflags { #ifdef CONFIG_MEMORY_FAILURE PG_hwpoison, /* hardware poisoned page. Don't touch */ #endif +#ifdef CONFIG_TRANSPARENT_HUGEPAGE + PG_compound_lock, +#endif __NR_PAGEFLAGS, /* Filesystems */ @@ -198,7 +197,7 @@ static inline int __TestClearPage##uname(struct page *page) { return 0; } struct page; /* forward declaration */ TESTPAGEFLAG(Locked, locked) TESTSETFLAG(Locked, locked) -PAGEFLAG(Error, error) +PAGEFLAG(Error, error) TESTCLEARFLAG(Error, error) PAGEFLAG(Referenced, referenced) TESTCLEARFLAG(Referenced, referenced) PAGEFLAG(Dirty, dirty) TESTSCFLAG(Dirty, dirty) __CLEARPAGEFLAG(Dirty, dirty) PAGEFLAG(LRU, lru) __CLEARPAGEFLAG(LRU, lru) @@ -230,7 +229,6 @@ PAGEFLAG(OwnerPriv1, owner_priv_1) TESTCLEARFLAG(OwnerPriv1, owner_priv_1) * risky: they bypass page accounting. */ TESTPAGEFLAG(Writeback, writeback) TESTSCFLAG(Writeback, writeback) -__PAGEFLAG(Buddy, buddy) PAGEFLAG(MappedToDisk, mappedtodisk) /* PG_readahead is only used for file reads; PG_reclaim is only for writes */ @@ -344,7 +342,7 @@ static inline void set_page_writeback(struct page *page) * tests can be used in performance sensitive paths. PageCompound is * generally not used in hot code paths. */ -__PAGEFLAG(Head, head) +__PAGEFLAG(Head, head) CLEARPAGEFLAG(Head, head) __PAGEFLAG(Tail, tail) static inline int PageCompound(struct page *page) @@ -352,6 +350,13 @@ static inline int PageCompound(struct page *page) return page->flags & ((1L << PG_head) | (1L << PG_tail)); } +#ifdef CONFIG_TRANSPARENT_HUGEPAGE +static inline void ClearPageCompound(struct page *page) +{ + BUG_ON(!PageHead(page)); + ClearPageHead(page); +} +#endif #else /* * Reduce page flag use as much as possible by overlapping @@ -389,14 +394,61 @@ static inline void __ClearPageTail(struct page *page) page->flags &= ~PG_head_tail_mask; } +#ifdef CONFIG_TRANSPARENT_HUGEPAGE +static inline void ClearPageCompound(struct page *page) +{ + BUG_ON((page->flags & PG_head_tail_mask) != (1 << PG_compound)); + clear_bit(PG_compound, &page->flags); +} +#endif + #endif /* !PAGEFLAGS_EXTENDED */ +#ifdef CONFIG_TRANSPARENT_HUGEPAGE +/* + * PageHuge() only returns true for hugetlbfs pages, but not for + * normal or transparent huge pages. + * + * PageTransHuge() returns true for both transparent huge and + * hugetlbfs pages, but not normal pages. PageTransHuge() can only be + * called only in the core VM paths where hugetlbfs pages can't exist. + */ +static inline int PageTransHuge(struct page *page) +{ + VM_BUG_ON(PageTail(page)); + return PageHead(page); +} + +static inline int PageTransCompound(struct page *page) +{ + return PageCompound(page); +} + +#else + +static inline int PageTransHuge(struct page *page) +{ + return 0; +} + +static inline int PageTransCompound(struct page *page) +{ + return 0; +} +#endif + #ifdef CONFIG_MMU #define __PG_MLOCKED (1 << PG_mlocked) #else #define __PG_MLOCKED 0 #endif +#ifdef CONFIG_TRANSPARENT_HUGEPAGE +#define __PG_COMPOUND_LOCK (1 << PG_compound_lock) +#else +#define __PG_COMPOUND_LOCK 0 +#endif + /* * Flags checked when a page is freed. Pages being freed should not have * these flags set. It they are, there is a problem. @@ -404,9 +456,10 @@ static inline void __ClearPageTail(struct page *page) #define PAGE_FLAGS_CHECK_AT_FREE \ (1 << PG_lru | 1 << PG_locked | \ 1 << PG_private | 1 << PG_private_2 | \ - 1 << PG_buddy | 1 << PG_writeback | 1 << PG_reserved | \ + 1 << PG_writeback | 1 << PG_reserved | \ 1 << PG_slab | 1 << PG_swapcache | 1 << PG_active | \ - 1 << PG_unevictable | __PG_MLOCKED | __PG_HWPOISON) + 1 << PG_unevictable | __PG_MLOCKED | __PG_HWPOISON | \ + __PG_COMPOUND_LOCK) /* * Flags checked when a page is prepped for return by the page allocator. diff --git a/include/linux/page_cgroup.h b/include/linux/page_cgroup.h index b02195dfc1b0..5b0c971d7cae 100644 --- a/include/linux/page_cgroup.h +++ b/include/linux/page_cgroup.h @@ -35,12 +35,18 @@ struct page_cgroup *lookup_page_cgroup(struct page *page); enum { /* flags for mem_cgroup */ - PCG_LOCK, /* page cgroup is locked */ + PCG_LOCK, /* Lock for pc->mem_cgroup and following bits. */ PCG_CACHE, /* charged as cache */ PCG_USED, /* this object is in use. */ - PCG_ACCT_LRU, /* page has been accounted for */ - PCG_FILE_MAPPED, /* page is accounted as "mapped" */ PCG_MIGRATION, /* under page migration */ + /* flags for mem_cgroup and file and I/O status */ + PCG_MOVE_LOCK, /* For race between move_account v.s. following bits */ + PCG_FILE_MAPPED, /* page is accounted as "mapped" */ + PCG_FILE_DIRTY, /* page is dirty */ + PCG_FILE_WRITEBACK, /* page is under writeback */ + PCG_FILE_UNSTABLE_NFS, /* page is NFS unstable */ + /* No lock in page_cgroup */ + PCG_ACCT_LRU, /* page has been accounted for (under lru_lock) */ }; #define TESTPCGFLAG(uname, lname) \ @@ -59,6 +65,10 @@ static inline void ClearPageCgroup##uname(struct page_cgroup *pc) \ static inline int TestClearPageCgroup##uname(struct page_cgroup *pc) \ { return test_and_clear_bit(PCG_##lname, &pc->flags); } +#define TESTSETPCGFLAG(uname, lname) \ +static inline int TestSetPageCgroup##uname(struct page_cgroup *pc) \ + { return test_and_set_bit(PCG_##lname, &pc->flags); } + /* Cache flag is set only once (at allocation) */ TESTPCGFLAG(Cache, CACHE) CLEARPCGFLAG(Cache, CACHE) @@ -78,6 +88,22 @@ SETPCGFLAG(FileMapped, FILE_MAPPED) CLEARPCGFLAG(FileMapped, FILE_MAPPED) TESTPCGFLAG(FileMapped, FILE_MAPPED) +SETPCGFLAG(FileDirty, FILE_DIRTY) +CLEARPCGFLAG(FileDirty, FILE_DIRTY) +TESTPCGFLAG(FileDirty, FILE_DIRTY) +TESTCLEARPCGFLAG(FileDirty, FILE_DIRTY) +TESTSETPCGFLAG(FileDirty, FILE_DIRTY) + +SETPCGFLAG(FileWriteback, FILE_WRITEBACK) +CLEARPCGFLAG(FileWriteback, FILE_WRITEBACK) +TESTPCGFLAG(FileWriteback, FILE_WRITEBACK) + +SETPCGFLAG(FileUnstableNFS, FILE_UNSTABLE_NFS) +CLEARPCGFLAG(FileUnstableNFS, FILE_UNSTABLE_NFS) +TESTPCGFLAG(FileUnstableNFS, FILE_UNSTABLE_NFS) +TESTCLEARPCGFLAG(FileUnstableNFS, FILE_UNSTABLE_NFS) +TESTSETPCGFLAG(FileUnstableNFS, FILE_UNSTABLE_NFS) + SETPCGFLAG(Migration, MIGRATION) CLEARPCGFLAG(Migration, MIGRATION) TESTPCGFLAG(Migration, MIGRATION) @@ -94,6 +120,10 @@ static inline enum zone_type page_cgroup_zid(struct page_cgroup *pc) static inline void lock_page_cgroup(struct page_cgroup *pc) { + /* + * Don't take this lock in IRQ context. + * This lock is for pc->mem_cgroup, USED, CACHE, MIGRATION + */ bit_spin_lock(PCG_LOCK, &pc->flags); } @@ -107,6 +137,24 @@ static inline int page_is_cgroup_locked(struct page_cgroup *pc) return bit_spin_is_locked(PCG_LOCK, &pc->flags); } +static inline void move_lock_page_cgroup(struct page_cgroup *pc, + unsigned long *flags) +{ + /* + * We know updates to pc->flags of page cache's stats are from both of + * usual context or IRQ context. Disable IRQ to avoid deadlock. + */ + local_irq_save(*flags); + bit_spin_lock(PCG_MOVE_LOCK, &pc->flags); +} + +static inline void move_unlock_page_cgroup(struct page_cgroup *pc, + unsigned long *flags) +{ + bit_spin_unlock(PCG_MOVE_LOCK, &pc->flags); + local_irq_restore(*flags); +} + #else /* CONFIG_CGROUP_MEM_RES_CTLR */ struct page_cgroup; diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index 2d1ffe3cf1ee..9c66e994540f 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -48,7 +48,7 @@ static inline void mapping_clear_unevictable(struct address_space *mapping) static inline int mapping_unevictable(struct address_space *mapping) { - if (likely(mapping)) + if (mapping) return test_bit(AS_UNEVICTABLE, &mapping->flags); return !!mapping; } diff --git a/include/linux/radix-tree.h b/include/linux/radix-tree.h index ab2baa5c4884..23241c2fecce 100644 --- a/include/linux/radix-tree.h +++ b/include/linux/radix-tree.h @@ -146,6 +146,22 @@ static inline void *radix_tree_deref_slot(void **pslot) } /** + * radix_tree_deref_slot_protected - dereference a slot without RCU lock but with tree lock held + * @pslot: pointer to slot, returned by radix_tree_lookup_slot + * Returns: item that was stored in that slot with any direct pointer flag + * removed. + * + * Similar to radix_tree_deref_slot but only used during migration when a pages + * mapping is being moved. The caller does not hold the RCU read lock but it + * must hold the tree lock to prevent parallel updates. + */ +static inline void *radix_tree_deref_slot_protected(void **pslot, + spinlock_t *treelock) +{ + return rcu_dereference_protected(*pslot, lockdep_is_held(treelock)); +} + +/** * radix_tree_deref_retry - check radix_tree_deref_slot * @arg: pointer returned by radix_tree_deref_slot * Returns: 0 if retry is not required, otherwise retry is required diff --git a/include/linux/rmap.h b/include/linux/rmap.h index bb83c0da2071..e9fd04ca1e51 100644 --- a/include/linux/rmap.h +++ b/include/linux/rmap.h @@ -198,6 +198,8 @@ enum ttu_flags { }; #define TTU_ACTION(x) ((x) & TTU_ACTION_MASK) +bool is_vma_temporary_stack(struct vm_area_struct *vma); + int try_to_unmap(struct page *, enum ttu_flags flags); int try_to_unmap_one(struct page *, struct vm_area_struct *, unsigned long address, enum ttu_flags flags); diff --git a/include/linux/sched.h b/include/linux/sched.h index 96e23215e276..d747f948b34e 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -21,7 +21,8 @@ #define CLONE_DETACHED 0x00400000 /* Unused, ignored */ #define CLONE_UNTRACED 0x00800000 /* set if the tracing process can't force CLONE_PTRACE on this clone */ #define CLONE_CHILD_SETTID 0x01000000 /* set the TID in the child */ -#define CLONE_STOPPED 0x02000000 /* Start in stopped state */ +/* 0x02000000 was previously the unused CLONE_STOPPED (Start in stopped state) + and is now available for re-use. */ #define CLONE_NEWUTS 0x04000000 /* New utsname group? */ #define CLONE_NEWIPC 0x08000000 /* New ipcs */ #define CLONE_NEWUSER 0x10000000 /* New user namespace */ @@ -433,6 +434,7 @@ extern int get_dumpable(struct mm_struct *mm); #endif /* leave room for more dump flags */ #define MMF_VM_MERGEABLE 16 /* KSM may merge identical pages */ +#define MMF_VM_HUGEPAGE 17 /* set when VM_HUGEPAGE is set on vma */ #define MMF_INIT_MASK (MMF_DUMPABLE_MASK | MMF_DUMP_FILTER_MASK) @@ -633,6 +635,8 @@ struct signal_struct { int oom_adj; /* OOM kill score adjustment (bit shift) */ int oom_score_adj; /* OOM kill score adjustment */ + int oom_score_adj_min; /* OOM kill score adjustment minimum value. + * Only settable by CAP_SYS_RESOURCE. */ struct mutex cred_guard_mutex; /* guard against foreign influences on * credential calculations diff --git a/include/linux/swap.h b/include/linux/swap.h index eba53e71d2cc..4d559325d919 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -208,6 +208,8 @@ extern unsigned int nr_free_pagecache_pages(void); /* linux/mm/swap.c */ extern void __lru_cache_add(struct page *, enum lru_list lru); extern void lru_cache_add_lru(struct page *, enum lru_list lru); +extern void lru_add_page_tail(struct zone* zone, + struct page *page, struct page *page_tail); extern void activate_page(struct page *); extern void mark_page_accessed(struct page *); extern void lru_add_drain(void); diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h index 44b54f619ac6..4ed6fcd6b726 100644 --- a/include/linux/vmalloc.h +++ b/include/linux/vmalloc.h @@ -59,8 +59,9 @@ extern void *vmalloc_exec(unsigned long size); extern void *vmalloc_32(unsigned long size); extern void *vmalloc_32_user(unsigned long size); extern void *__vmalloc(unsigned long size, gfp_t gfp_mask, pgprot_t prot); -extern void *__vmalloc_area(struct vm_struct *area, gfp_t gfp_mask, - pgprot_t prot); +extern void *__vmalloc_node_range(unsigned long size, unsigned long align, + unsigned long start, unsigned long end, gfp_t gfp_mask, + pgprot_t prot, int node, void *caller); extern void vfree(const void *addr); extern void *vmap(struct page **pages, unsigned int count, @@ -90,9 +91,6 @@ extern struct vm_struct *__get_vm_area_caller(unsigned long size, unsigned long flags, unsigned long start, unsigned long end, void *caller); -extern struct vm_struct *get_vm_area_node(unsigned long size, - unsigned long flags, int node, - gfp_t gfp_mask); extern struct vm_struct *remove_vm_area(const void *addr); extern int map_vm_area(struct vm_struct *area, pgprot_t prot, @@ -120,7 +118,7 @@ extern __init void vm_area_register_early(struct vm_struct *vm, size_t align); #ifdef CONFIG_SMP struct vm_struct **pcpu_get_vm_areas(const unsigned long *offsets, const size_t *sizes, int nr_vms, - size_t align, gfp_t gfp_mask); + size_t align); void pcpu_free_vm_areas(struct vm_struct **vms, int nr_vms); #endif diff --git a/include/linux/vmstat.h b/include/linux/vmstat.h index eaaea37b3b75..833e676d6d92 100644 --- a/include/linux/vmstat.h +++ b/include/linux/vmstat.h @@ -254,6 +254,11 @@ extern void dec_zone_state(struct zone *, enum zone_stat_item); extern void __dec_zone_state(struct zone *, enum zone_stat_item); void refresh_cpu_vm_stats(int); + +int calculate_pressure_threshold(struct zone *zone); +int calculate_normal_threshold(struct zone *zone); +void set_pgdat_percpu_threshold(pg_data_t *pgdat, + int (*calculate_pressure)(struct zone *)); #else /* CONFIG_SMP */ /* @@ -298,6 +303,8 @@ static inline void __dec_zone_page_state(struct page *page, #define dec_zone_page_state __dec_zone_page_state #define mod_zone_page_state __mod_zone_page_state +#define set_pgdat_percpu_threshold(pgdat, callback) { } + static inline void refresh_cpu_vm_stats(int cpu) { } #endif |
