diff options
Diffstat (limited to 'mm/page_alloc.c')
-rw-r--r-- | mm/page_alloc.c | 281 |
1 files changed, 185 insertions, 96 deletions
diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 2db537e1da49..1dcb8ff0e681 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -114,13 +114,6 @@ static DEFINE_SPINLOCK(managed_page_count_lock); unsigned long totalram_pages __read_mostly; unsigned long totalreserve_pages __read_mostly; unsigned long totalcma_pages __read_mostly; -/* - * When calculating the number of globally allowed dirty pages, there - * is a certain number of per-zone reserves that should not be - * considered dirtyable memory. This is the sum of those reserves - * over all existing zones that contribute dirtyable memory. - */ -unsigned long dirty_balance_reserve __read_mostly; int percpu_pagelist_fraction; gfp_t gfp_allowed_mask __read_mostly = GFP_BOOT_MASK; @@ -230,6 +223,20 @@ static char * const zone_names[MAX_NR_ZONES] = { }; static void free_compound_page(struct page *page); + +char * const migratetype_names[MIGRATE_TYPES] = { + "Unmovable", + "Movable", + "Reclaimable", +#ifdef CONFIG_CMA + "CMA", +#endif + "HighAtomic", +#ifdef CONFIG_MEMORY_ISOLATION + "Isolate", +#endif +}; + compound_page_dtor * const compound_page_dtors[] = { NULL, free_compound_page, @@ -475,6 +482,7 @@ static void bad_page(struct page *page, const char *reason, printk(KERN_ALERT "BUG: Bad page state in process %s pfn:%05lx\n", current->comm, page_to_pfn(page)); dump_page_badflags(page, reason, bad_flags); + dump_page_owner(page); print_modules(); dump_stack(); @@ -521,7 +529,8 @@ void prep_compound_page(struct page *page, unsigned int order) #ifdef CONFIG_DEBUG_PAGEALLOC unsigned int _debug_guardpage_minorder; -bool _debug_pagealloc_enabled __read_mostly; +bool _debug_pagealloc_enabled __read_mostly + = IS_ENABLED(CONFIG_DEBUG_PAGEALLOC_ENABLE_DEFAULT); bool _debug_guardpage_enabled __read_mostly; static int __init early_debug_pagealloc(char *buf) @@ -532,6 +541,9 @@ static int __init early_debug_pagealloc(char *buf) if (strcmp(buf, "on") == 0) _debug_pagealloc_enabled = true; + if (strcmp(buf, "off") == 0) + _debug_pagealloc_enabled = false; + return 0; } early_param("debug_pagealloc", early_debug_pagealloc); @@ -1032,9 +1044,8 @@ static bool free_pages_prepare(struct page *page, unsigned int order) trace_mm_page_free(page, order); kmemcheck_free_shadow(page, order); - kasan_free_pages(page, order); - if (PageAnon(page)) + if (PageMappingFlags(page)) page->mapping = NULL; bad += free_pages_check(page); for (i = 1; i < (1 << order); i++) { @@ -1054,7 +1065,9 @@ static bool free_pages_prepare(struct page *page, unsigned int order) PAGE_SIZE << order); } arch_free_page(page, order); + kernel_poison_pages(page, 1 << order, 0); kernel_map_pages(page, 1 << order, 0); + kasan_free_pages(page, order); return true; } @@ -1075,8 +1088,7 @@ static void __free_pages_ok(struct page *page, unsigned int order) local_irq_restore(flags); } -static void __init __free_pages_boot_core(struct page *page, - unsigned long pfn, unsigned int order) +static void __init __free_pages_boot_core(struct page *page, unsigned long pfn, unsigned int order) { unsigned int nr_pages = 1 << order; struct page *p = page; @@ -1148,7 +1160,7 @@ static inline bool __meminit meminit_pfn_in_nid(unsigned long pfn, int node, #endif -void __init __free_pages_bootmem(struct page *page, unsigned long pfn, +void __free_pages_bootmem(struct page *page, unsigned long pfn, unsigned int order) { if (early_page_uninitialised(pfn)) @@ -1326,6 +1338,11 @@ void __init page_alloc_init_late(void) #endif /* CONFIG_DEFERRED_STRUCT_PAGE_INIT */ #ifdef CONFIG_CMA +bool is_cma_pageblock(struct page *page) +{ + return get_pageblock_migratetype(page) == MIGRATE_CMA; +} + /* Free whole pageblock and set its migration type to MIGRATE_CMA. */ void __init init_cma_reserved_pageblock(struct page *page) { @@ -1433,8 +1450,27 @@ static inline int check_new_page(struct page *page) return 0; } +static inline bool free_pages_prezeroed(void) +{ + return IS_ENABLED(CONFIG_PAGE_POISONING_ZERO) && + page_poisoning_enabled(); +} + +inline void post_alloc_hook(struct page *page, unsigned int order, + gfp_t gfp_flags) +{ + set_page_private(page, 0); + set_page_refcounted(page); + + kasan_alloc_pages(page, order); + arch_alloc_page(page, order); + kernel_map_pages(page, 1 << order, 1); + kernel_poison_pages(page, 1 << order, 1); + set_page_owner(page, order, gfp_flags); +} + static int prep_new_page(struct page *page, unsigned int order, gfp_t gfp_flags, - int alloc_flags) + int alloc_flags) { int i; @@ -1444,22 +1480,15 @@ static int prep_new_page(struct page *page, unsigned int order, gfp_t gfp_flags, return 1; } - set_page_private(page, 0); - set_page_refcounted(page); + post_alloc_hook(page, order, gfp_flags); - arch_alloc_page(page, order); - kernel_map_pages(page, 1 << order, 1); - kasan_alloc_pages(page, order); - - if (gfp_flags & __GFP_ZERO) + if (!free_pages_prezeroed() && (gfp_flags & __GFP_ZERO)) for (i = 0; i < (1 << order); i++) clear_highpage(page + i); if (order && (gfp_flags & __GFP_COMP)) prep_compound_page(page, order); - set_page_owner(page, order, gfp_flags); - /* * page is set pfmemalloc when ALLOC_NO_WATERMARKS was necessary to * allocate the page. The expectation is that the caller is taking @@ -1522,6 +1551,11 @@ static int fallbacks[MIGRATE_TYPES][4] = { #endif }; +int *get_migratetype_fallbacks(int mtype) +{ + return fallbacks[mtype]; +} + #ifdef CONFIG_CMA static struct page *__rmqueue_cma_fallback(struct zone *zone, unsigned int order) @@ -1625,7 +1659,8 @@ static void change_pageblock_range(struct page *pageblock_page, * is worse than movable allocations stealing from unmovable and reclaimable * pageblocks. */ -static bool can_steal_fallback(unsigned int order, int start_mt) +static bool can_steal_fallback(unsigned int current_order, unsigned int start_order, + int start_mt, int fallback_mt) { /* * Leaving this order check is intended, although there is @@ -1634,12 +1669,17 @@ static bool can_steal_fallback(unsigned int order, int start_mt) * but, below check doesn't guarantee it and that is just heuristic * so could be changed anytime. */ - if (order >= pageblock_order) + if (current_order >= pageblock_order) return true; - if (order >= pageblock_order / 2 || + /* don't let unmovable allocations cause migrations simply because of free pages */ + if ((start_mt != MIGRATE_UNMOVABLE && current_order >= pageblock_order / 2) || + /* only steal reclaimable page blocks for unmovable allocations */ + (start_mt == MIGRATE_UNMOVABLE && fallback_mt != MIGRATE_MOVABLE && current_order >= pageblock_order / 2) || + /* reclaimable can steal aggressively */ start_mt == MIGRATE_RECLAIMABLE || - start_mt == MIGRATE_UNMOVABLE || + /* allow unmovable allocs up to 64K without migrating blocks */ + (start_mt == MIGRATE_UNMOVABLE && start_order >= 5) || page_group_by_mobility_disabled) return true; @@ -1679,8 +1719,9 @@ static void steal_suitable_fallback(struct zone *zone, struct page *page, * we can steal other freepages all together. This would help to reduce * fragmentation due to mixed migratetype pages in one pageblock. */ -int find_suitable_fallback(struct free_area *area, unsigned int order, - int migratetype, bool only_stealable, bool *can_steal) +int find_suitable_fallback(struct free_area *area, unsigned int current_order, + int migratetype, bool only_stealable, + int start_order, bool *can_steal) { int i; int fallback_mt; @@ -1697,7 +1738,7 @@ int find_suitable_fallback(struct free_area *area, unsigned int order, if (list_empty(&area->free_list[fallback_mt])) continue; - if (can_steal_fallback(order, migratetype)) + if (can_steal_fallback(current_order, start_order, migratetype, fallback_mt)) *can_steal = true; if (!only_stealable) @@ -1833,13 +1874,14 @@ __rmqueue_fallback(struct zone *zone, unsigned int order, int start_migratetype) --current_order) { area = &(zone->free_area[current_order]); fallback_mt = find_suitable_fallback(area, current_order, - start_migratetype, false, &can_steal); + start_migratetype, false, order, &can_steal); if (fallback_mt == -1) continue; page = list_entry(area->free_list[fallback_mt].next, struct page, lru); - if (can_steal) + if (can_steal && + get_pageblock_migratetype(page) != MIGRATE_HIGHATOMIC) steal_suitable_fallback(zone, page, start_migratetype); /* Remove the page from the freelists */ @@ -1878,17 +1920,30 @@ static struct page *__rmqueue(struct zone *zone, unsigned int order, page = __rmqueue_smallest(zone, order, migratetype); if (unlikely(!page)) { - if (migratetype == MIGRATE_MOVABLE) - page = __rmqueue_cma_fallback(zone, order); - - if (!page) - page = __rmqueue_fallback(zone, order, migratetype); + page = __rmqueue_fallback(zone, order, migratetype); } trace_mm_page_alloc_zone_locked(page, order, migratetype); return page; } +#ifdef CONFIG_CMA +static struct page *__rmqueue_cma(struct zone *zone, unsigned int order) +{ + struct page *page = 0; + if (IS_ENABLED(CONFIG_CMA)) + if (!zone->cma_alloc) + page = __rmqueue_cma_fallback(zone, order); + trace_mm_page_alloc_zone_locked(page, order, MIGRATE_CMA); + return page; +} +#else +static inline struct page *__rmqueue_cma(struct zone *zone, unsigned int order) +{ + return NULL; +} +#endif + /* * Obtain a specified number of elements from the buddy allocator, all under * a single hold of the lock, for efficiency. Add them to the supplied list. @@ -1902,7 +1957,17 @@ static int rmqueue_bulk(struct zone *zone, unsigned int order, spin_lock(&zone->lock); for (i = 0; i < count; ++i) { - struct page *page = __rmqueue(zone, order, migratetype, 0); + struct page *page; + + /* + * If migrate type CMA is being requested only try to + * satisfy the request with CMA pages to try and increase + * CMA utlization. + */ + if (is_migrate_cma(migratetype)) + page = __rmqueue_cma(zone, order); + else + page = __rmqueue(zone, order, migratetype, 0); if (unlikely(page == NULL)) break; @@ -1929,6 +1994,28 @@ static int rmqueue_bulk(struct zone *zone, unsigned int order, return i; } +/* + * Return the pcp list that corresponds to the migrate type if that list isn't + * empty. + * If the list is empty return NULL. + */ +static struct list_head *get_populated_pcp_list(struct zone *zone, + unsigned int order, struct per_cpu_pages *pcp, + int migratetype, int cold) +{ + struct list_head *list = &pcp->lists[migratetype]; + + if (list_empty(list)) { + pcp->count += rmqueue_bulk(zone, order, + pcp->batch, list, + migratetype, cold); + + if (list_empty(list)) + list = NULL; + } + return list; +} + #ifdef CONFIG_NUMA /* * Called from the vmstat counter updater to drain pagesets of this @@ -2176,7 +2263,6 @@ void free_hot_cold_page_list(struct list_head *list, bool cold) void split_page(struct page *page, unsigned int order) { int i; - gfp_t gfp_mask; VM_BUG_ON_PAGE(PageCompound(page), page); VM_BUG_ON_PAGE(!page_count(page), page); @@ -2190,12 +2276,9 @@ void split_page(struct page *page, unsigned int order) split_page(virt_to_page(page[0].shadow), order); #endif - gfp_mask = get_page_owner_gfp(page); - set_page_owner(page, 0, gfp_mask); - for (i = 1; i < (1 << order); i++) { + for (i = 1; i < (1 << order); i++) set_page_refcounted(page + i); - set_page_owner(page + i, 0, gfp_mask); - } + split_page_owner(page, order); } EXPORT_SYMBOL_GPL(split_page); @@ -2213,7 +2296,8 @@ int __isolate_free_page(struct page *page, unsigned int order) if (!is_migrate_isolate(mt)) { /* Obey watermarks as if the page was being allocated */ watermark = low_wmark_pages(zone) + (1 << order); - if (!zone_watermark_ok(zone, 0, watermark, 0, 0)) + if (!is_migrate_cma(mt) && + !zone_watermark_ok(zone, 0, watermark, 0, 0)) return 0; __mod_zone_freepage_state(zone, -(1UL << order), mt); @@ -2224,14 +2308,13 @@ int __isolate_free_page(struct page *page, unsigned int order) zone->free_area[order].nr_free--; rmv_page_order(page); - set_page_owner(page, order, __GFP_MOVABLE); - /* Set the pageblock if the isolated page is at least a pageblock */ if (order >= pageblock_order - 1) { struct page *endpage = page + (1 << order) - 1; for (; page < endpage; page += pageblock_nr_pages) { int mt = get_pageblock_migratetype(page); - if (!is_migrate_isolate(mt) && !is_migrate_cma(mt)) + if (!is_migrate_isolate(mt) && !is_migrate_cma(mt) + && mt != MIGRATE_HIGHATOMIC) set_pageblock_migratetype(page, MIGRATE_MOVABLE); } @@ -2242,33 +2325,6 @@ int __isolate_free_page(struct page *page, unsigned int order) } /* - * Similar to split_page except the page is already free. As this is only - * being used for migration, the migratetype of the block also changes. - * As this is called with interrupts disabled, the caller is responsible - * for calling arch_alloc_page() and kernel_map_page() after interrupts - * are enabled. - * - * Note: this is probably too low level an operation for use in drivers. - * Please consult with lkml before using this in your driver. - */ -int split_free_page(struct page *page) -{ - unsigned int order; - int nr_pages; - - order = page_order(page); - - nr_pages = __isolate_free_page(page, order); - if (!nr_pages) - return 0; - - /* Split into individual pages */ - set_page_refcounted(page); - split_page(page, order); - return nr_pages; -} - -/* * Allocate a page from the given zone. Use pcplists for order-0 allocations. */ static inline @@ -2277,21 +2333,32 @@ struct page *buffered_rmqueue(struct zone *preferred_zone, gfp_t gfp_flags, int alloc_flags, int migratetype) { unsigned long flags; - struct page *page; + struct page *page = NULL; bool cold = ((gfp_flags & __GFP_COLD) != 0); if (likely(order == 0)) { struct per_cpu_pages *pcp; - struct list_head *list; + struct list_head *list = NULL; local_irq_save(flags); pcp = &this_cpu_ptr(zone->pageset)->pcp; - list = &pcp->lists[migratetype]; - if (list_empty(list)) { - pcp->count += rmqueue_bulk(zone, 0, - pcp->batch, list, - migratetype, cold); - if (unlikely(list_empty(list))) + + /* First try to get CMA pages */ + if (migratetype == MIGRATE_MOVABLE && + gfp_flags & __GFP_CMA) { + list = get_populated_pcp_list(zone, 0, pcp, + get_cma_migrate_type(), cold); + } + + if (list == NULL) { + /* + * Either CMA is not suitable or there are no free CMA + * pages. + */ + list = get_populated_pcp_list(zone, 0, pcp, + migratetype, cold); + if (unlikely(list == NULL) || + unlikely(list_empty(list))) goto failed; } @@ -2324,8 +2391,13 @@ struct page *buffered_rmqueue(struct zone *preferred_zone, if (page) trace_mm_page_alloc_zone_locked(page, order, migratetype); } + if (!page && migratetype == MIGRATE_MOVABLE && + gfp_flags & __GFP_CMA) + page = __rmqueue_cma(zone, order); + if (!page) page = __rmqueue(zone, order, migratetype, gfp_flags); + spin_unlock(&zone->lock); if (!page) goto failed; @@ -2485,6 +2557,14 @@ static bool __zone_watermark_ok(struct zone *z, unsigned int order, continue; for (mt = 0; mt < MIGRATE_PCPTYPES; mt++) { +#ifdef CONFIG_CMA + /* + * Note that this check is needed only + * when MIGRATE_CMA < MIGRATE_PCPTYPES. + */ + if (mt == MIGRATE_CMA) + continue; +#endif if (!list_empty(&area->free_list[mt])) return true; } @@ -3703,6 +3783,13 @@ long si_mem_available(void) available += global_page_state(NR_SLAB_RECLAIMABLE) - min(global_page_state(NR_SLAB_RECLAIMABLE) / 2, wmark_low); + /* + * Part of the kernel memory, which can be released under memory + * pressure. + */ + available += global_page_state(NR_INDIRECTLY_RECLAIMABLE_BYTES) >> + PAGE_SHIFT; + if (available < 0) available = 0; return available; @@ -5311,6 +5398,9 @@ static void __paginginit free_area_init_core(struct pglist_data *pgdat) #endif init_waitqueue_head(&pgdat->kswapd_wait); init_waitqueue_head(&pgdat->pfmemalloc_wait); +#ifdef CONFIG_COMPACTION + init_waitqueue_head(&pgdat->kcompactd_wait); +#endif pgdat_page_ext_init(pgdat); for (j = 0; j < MAX_NR_ZONES; j++) { @@ -6081,20 +6171,12 @@ static void calculate_totalreserve_pages(void) if (max > zone->managed_pages) max = zone->managed_pages; + + zone->totalreserve_pages = max; + reserve_pages += max; - /* - * Lowmem reserves are not available to - * GFP_HIGHUSER page cache allocations and - * kswapd tries to balance zones to their high - * watermark. As a result, neither should be - * regarded as dirtyable memory, to prevent a - * situation where reclaim has to clean pages - * in order to balance the zones. - */ - zone->dirty_balance_reserve = max; } } - dirty_balance_reserve = reserve_pages; totalreserve_pages = reserve_pages; } @@ -6629,8 +6711,9 @@ void set_pfnblock_flags_mask(struct page *page, unsigned long flags, * If @count is not zero, it is okay to include less @count unmovable pages * * PageLRU check without isolation or lru_lock could race so that - * MIGRATE_MOVABLE block might include unmovable pages. It means you can't - * expect this function should be exact. + * MIGRATE_MOVABLE block might include unmovable pages. And __PageMovable + * check without lock_page also may miss some movable non-lru pages at + * race condition. So you can't expect this function should be exact. */ bool has_unmovable_pages(struct zone *zone, struct page *page, int count, bool skip_hwpoisoned_pages) @@ -6686,6 +6769,9 @@ bool has_unmovable_pages(struct zone *zone, struct page *page, int count, if (skip_hwpoisoned_pages && PageHWPoison(page)) continue; + if (__PageMovable(page)) + continue; + if (!PageLRU(page)) found++; /* @@ -6855,6 +6941,8 @@ int alloc_contig_range(unsigned long start, unsigned long end, if (ret) return ret; + cc.zone->cma_alloc = 1; + ret = __alloc_contig_migrate_range(&cc, start, end); if (ret) goto done; @@ -6913,6 +7001,7 @@ int alloc_contig_range(unsigned long start, unsigned long end, done: undo_isolate_page_range(pfn_max_align_down(start), pfn_max_align_up(end), migratetype); + cc.zone->cma_alloc = 0; return ret; } |