summaryrefslogtreecommitdiff
path: root/mm/page_alloc.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/page_alloc.c')
-rw-r--r--mm/page_alloc.c273
1 files changed, 181 insertions, 92 deletions
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index a4f6564b70c8..bd4fb66c11c5 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -114,13 +114,6 @@ static DEFINE_SPINLOCK(managed_page_count_lock);
unsigned long totalram_pages __read_mostly;
unsigned long totalreserve_pages __read_mostly;
unsigned long totalcma_pages __read_mostly;
-/*
- * When calculating the number of globally allowed dirty pages, there
- * is a certain number of per-zone reserves that should not be
- * considered dirtyable memory. This is the sum of those reserves
- * over all existing zones that contribute dirtyable memory.
- */
-unsigned long dirty_balance_reserve __read_mostly;
int percpu_pagelist_fraction;
gfp_t gfp_allowed_mask __read_mostly = GFP_BOOT_MASK;
@@ -230,6 +223,20 @@ static char * const zone_names[MAX_NR_ZONES] = {
};
static void free_compound_page(struct page *page);
+
+char * const migratetype_names[MIGRATE_TYPES] = {
+ "Unmovable",
+ "Movable",
+ "Reclaimable",
+#ifdef CONFIG_CMA
+ "CMA",
+#endif
+ "HighAtomic",
+#ifdef CONFIG_MEMORY_ISOLATION
+ "Isolate",
+#endif
+};
+
compound_page_dtor * const compound_page_dtors[] = {
NULL,
free_compound_page,
@@ -466,6 +473,7 @@ static void bad_page(struct page *page, const char *reason,
printk(KERN_ALERT "BUG: Bad page state in process %s pfn:%05lx\n",
current->comm, page_to_pfn(page));
dump_page_badflags(page, reason, bad_flags);
+ dump_page_owner(page);
print_modules();
dump_stack();
@@ -512,7 +520,8 @@ void prep_compound_page(struct page *page, unsigned int order)
#ifdef CONFIG_DEBUG_PAGEALLOC
unsigned int _debug_guardpage_minorder;
-bool _debug_pagealloc_enabled __read_mostly;
+bool _debug_pagealloc_enabled __read_mostly
+ = IS_ENABLED(CONFIG_DEBUG_PAGEALLOC_ENABLE_DEFAULT);
bool _debug_guardpage_enabled __read_mostly;
static int __init early_debug_pagealloc(char *buf)
@@ -523,6 +532,9 @@ static int __init early_debug_pagealloc(char *buf)
if (strcmp(buf, "on") == 0)
_debug_pagealloc_enabled = true;
+ if (strcmp(buf, "off") == 0)
+ _debug_pagealloc_enabled = false;
+
return 0;
}
early_param("debug_pagealloc", early_debug_pagealloc);
@@ -572,6 +584,9 @@ static inline void set_page_guard(struct zone *zone, struct page *page,
return;
page_ext = lookup_page_ext(page);
+ if (unlikely(!page_ext))
+ return;
+
__set_bit(PAGE_EXT_DEBUG_GUARD, &page_ext->flags);
INIT_LIST_HEAD(&page->lru);
@@ -589,6 +604,9 @@ static inline void clear_page_guard(struct zone *zone, struct page *page,
return;
page_ext = lookup_page_ext(page);
+ if (unlikely(!page_ext))
+ return;
+
__clear_bit(PAGE_EXT_DEBUG_GUARD, &page_ext->flags);
set_page_private(page, 0);
@@ -1013,9 +1031,8 @@ static bool free_pages_prepare(struct page *page, unsigned int order)
trace_mm_page_free(page, order);
kmemcheck_free_shadow(page, order);
- kasan_free_pages(page, order);
- if (PageAnon(page))
+ if (PageMappingFlags(page))
page->mapping = NULL;
bad += free_pages_check(page);
for (i = 1; i < (1 << order); i++) {
@@ -1035,7 +1052,9 @@ static bool free_pages_prepare(struct page *page, unsigned int order)
PAGE_SIZE << order);
}
arch_free_page(page, order);
+ kernel_poison_pages(page, 1 << order, 0);
kernel_map_pages(page, 1 << order, 0);
+ kasan_free_pages(page, order);
return true;
}
@@ -1056,8 +1075,7 @@ static void __free_pages_ok(struct page *page, unsigned int order)
local_irq_restore(flags);
}
-static void __init __free_pages_boot_core(struct page *page,
- unsigned long pfn, unsigned int order)
+static void __init __free_pages_boot_core(struct page *page, unsigned long pfn, unsigned int order)
{
unsigned int nr_pages = 1 << order;
struct page *p = page;
@@ -1129,7 +1147,7 @@ static inline bool __meminit meminit_pfn_in_nid(unsigned long pfn, int node,
#endif
-void __init __free_pages_bootmem(struct page *page, unsigned long pfn,
+void __free_pages_bootmem(struct page *page, unsigned long pfn,
unsigned int order)
{
if (early_page_uninitialised(pfn))
@@ -1307,6 +1325,11 @@ void __init page_alloc_init_late(void)
#endif /* CONFIG_DEFERRED_STRUCT_PAGE_INIT */
#ifdef CONFIG_CMA
+bool is_cma_pageblock(struct page *page)
+{
+ return get_pageblock_migratetype(page) == MIGRATE_CMA;
+}
+
/* Free whole pageblock and set its migration type to MIGRATE_CMA. */
void __init init_cma_reserved_pageblock(struct page *page)
{
@@ -1414,8 +1437,27 @@ static inline int check_new_page(struct page *page)
return 0;
}
+static inline bool free_pages_prezeroed(void)
+{
+ return IS_ENABLED(CONFIG_PAGE_POISONING_ZERO) &&
+ page_poisoning_enabled();
+}
+
+inline void post_alloc_hook(struct page *page, unsigned int order,
+ gfp_t gfp_flags)
+{
+ set_page_private(page, 0);
+ set_page_refcounted(page);
+
+ kasan_alloc_pages(page, order);
+ arch_alloc_page(page, order);
+ kernel_map_pages(page, 1 << order, 1);
+ kernel_poison_pages(page, 1 << order, 1);
+ set_page_owner(page, order, gfp_flags);
+}
+
static int prep_new_page(struct page *page, unsigned int order, gfp_t gfp_flags,
- int alloc_flags)
+ int alloc_flags)
{
int i;
@@ -1425,22 +1467,15 @@ static int prep_new_page(struct page *page, unsigned int order, gfp_t gfp_flags,
return 1;
}
- set_page_private(page, 0);
- set_page_refcounted(page);
-
- arch_alloc_page(page, order);
- kernel_map_pages(page, 1 << order, 1);
- kasan_alloc_pages(page, order);
+ post_alloc_hook(page, order, gfp_flags);
- if (gfp_flags & __GFP_ZERO)
+ if (!free_pages_prezeroed() && (gfp_flags & __GFP_ZERO))
for (i = 0; i < (1 << order); i++)
clear_highpage(page + i);
if (order && (gfp_flags & __GFP_COMP))
prep_compound_page(page, order);
- set_page_owner(page, order, gfp_flags);
-
/*
* page is set pfmemalloc when ALLOC_NO_WATERMARKS was necessary to
* allocate the page. The expectation is that the caller is taking
@@ -1503,6 +1538,11 @@ static int fallbacks[MIGRATE_TYPES][4] = {
#endif
};
+int *get_migratetype_fallbacks(int mtype)
+{
+ return fallbacks[mtype];
+}
+
#ifdef CONFIG_CMA
static struct page *__rmqueue_cma_fallback(struct zone *zone,
unsigned int order)
@@ -1760,13 +1800,25 @@ static void unreserve_highatomic_pageblock(const struct alloc_context *ac)
struct page, lru);
/*
- * It should never happen but changes to locking could
- * inadvertently allow a per-cpu drain to add pages
- * to MIGRATE_HIGHATOMIC while unreserving so be safe
- * and watch for underflows.
+ * In page freeing path, migratetype change is racy so
+ * we can counter several free pages in a pageblock
+ * in this loop althoug we changed the pageblock type
+ * from highatomic to ac->migratetype. So we should
+ * adjust the count once.
*/
- zone->nr_reserved_highatomic -= min(pageblock_nr_pages,
- zone->nr_reserved_highatomic);
+ if (get_pageblock_migratetype(page) ==
+ MIGRATE_HIGHATOMIC) {
+ /*
+ * It should never happen but changes to
+ * locking could inadvertently allow a per-cpu
+ * drain to add pages to MIGRATE_HIGHATOMIC
+ * while unreserving so be safe and watch for
+ * underflows.
+ */
+ zone->nr_reserved_highatomic -= min(
+ pageblock_nr_pages,
+ zone->nr_reserved_highatomic);
+ }
/*
* Convert to ac->migratetype and avoid the normal
@@ -1808,7 +1860,8 @@ __rmqueue_fallback(struct zone *zone, unsigned int order, int start_migratetype)
page = list_entry(area->free_list[fallback_mt].next,
struct page, lru);
- if (can_steal)
+ if (can_steal &&
+ get_pageblock_migratetype(page) != MIGRATE_HIGHATOMIC)
steal_suitable_fallback(zone, page, start_migratetype);
/* Remove the page from the freelists */
@@ -1847,17 +1900,30 @@ static struct page *__rmqueue(struct zone *zone, unsigned int order,
page = __rmqueue_smallest(zone, order, migratetype);
if (unlikely(!page)) {
- if (migratetype == MIGRATE_MOVABLE)
- page = __rmqueue_cma_fallback(zone, order);
-
- if (!page)
- page = __rmqueue_fallback(zone, order, migratetype);
+ page = __rmqueue_fallback(zone, order, migratetype);
}
trace_mm_page_alloc_zone_locked(page, order, migratetype);
return page;
}
+#ifdef CONFIG_CMA
+static struct page *__rmqueue_cma(struct zone *zone, unsigned int order)
+{
+ struct page *page = 0;
+ if (IS_ENABLED(CONFIG_CMA))
+ if (!zone->cma_alloc)
+ page = __rmqueue_cma_fallback(zone, order);
+ trace_mm_page_alloc_zone_locked(page, order, MIGRATE_CMA);
+ return page;
+}
+#else
+static inline struct page *__rmqueue_cma(struct zone *zone, unsigned int order)
+{
+ return NULL;
+}
+#endif
+
/*
* Obtain a specified number of elements from the buddy allocator, all under
* a single hold of the lock, for efficiency. Add them to the supplied list.
@@ -1871,7 +1937,17 @@ static int rmqueue_bulk(struct zone *zone, unsigned int order,
spin_lock(&zone->lock);
for (i = 0; i < count; ++i) {
- struct page *page = __rmqueue(zone, order, migratetype, 0);
+ struct page *page;
+
+ /*
+ * If migrate type CMA is being requested only try to
+ * satisfy the request with CMA pages to try and increase
+ * CMA utlization.
+ */
+ if (is_migrate_cma(migratetype))
+ page = __rmqueue_cma(zone, order);
+ else
+ page = __rmqueue(zone, order, migratetype, 0);
if (unlikely(page == NULL))
break;
@@ -1898,6 +1974,28 @@ static int rmqueue_bulk(struct zone *zone, unsigned int order,
return i;
}
+/*
+ * Return the pcp list that corresponds to the migrate type if that list isn't
+ * empty.
+ * If the list is empty return NULL.
+ */
+static struct list_head *get_populated_pcp_list(struct zone *zone,
+ unsigned int order, struct per_cpu_pages *pcp,
+ int migratetype, int cold)
+{
+ struct list_head *list = &pcp->lists[migratetype];
+
+ if (list_empty(list)) {
+ pcp->count += rmqueue_bulk(zone, order,
+ pcp->batch, list,
+ migratetype, cold);
+
+ if (list_empty(list))
+ list = NULL;
+ }
+ return list;
+}
+
#ifdef CONFIG_NUMA
/*
* Called from the vmstat counter updater to drain pagesets of this
@@ -2145,7 +2243,6 @@ void free_hot_cold_page_list(struct list_head *list, bool cold)
void split_page(struct page *page, unsigned int order)
{
int i;
- gfp_t gfp_mask;
VM_BUG_ON_PAGE(PageCompound(page), page);
VM_BUG_ON_PAGE(!page_count(page), page);
@@ -2159,12 +2256,9 @@ void split_page(struct page *page, unsigned int order)
split_page(virt_to_page(page[0].shadow), order);
#endif
- gfp_mask = get_page_owner_gfp(page);
- set_page_owner(page, 0, gfp_mask);
- for (i = 1; i < (1 << order); i++) {
+ for (i = 1; i < (1 << order); i++)
set_page_refcounted(page + i);
- set_page_owner(page + i, 0, gfp_mask);
- }
+ split_page_owner(page, order);
}
EXPORT_SYMBOL_GPL(split_page);
@@ -2182,7 +2276,8 @@ int __isolate_free_page(struct page *page, unsigned int order)
if (!is_migrate_isolate(mt)) {
/* Obey watermarks as if the page was being allocated */
watermark = low_wmark_pages(zone) + (1 << order);
- if (!zone_watermark_ok(zone, 0, watermark, 0, 0))
+ if (!is_migrate_cma(mt) &&
+ !zone_watermark_ok(zone, 0, watermark, 0, 0))
return 0;
__mod_zone_freepage_state(zone, -(1UL << order), mt);
@@ -2193,14 +2288,13 @@ int __isolate_free_page(struct page *page, unsigned int order)
zone->free_area[order].nr_free--;
rmv_page_order(page);
- set_page_owner(page, order, __GFP_MOVABLE);
-
/* Set the pageblock if the isolated page is at least a pageblock */
if (order >= pageblock_order - 1) {
struct page *endpage = page + (1 << order) - 1;
for (; page < endpage; page += pageblock_nr_pages) {
int mt = get_pageblock_migratetype(page);
- if (!is_migrate_isolate(mt) && !is_migrate_cma(mt))
+ if (!is_migrate_isolate(mt) && !is_migrate_cma(mt)
+ && mt != MIGRATE_HIGHATOMIC)
set_pageblock_migratetype(page,
MIGRATE_MOVABLE);
}
@@ -2211,33 +2305,6 @@ int __isolate_free_page(struct page *page, unsigned int order)
}
/*
- * Similar to split_page except the page is already free. As this is only
- * being used for migration, the migratetype of the block also changes.
- * As this is called with interrupts disabled, the caller is responsible
- * for calling arch_alloc_page() and kernel_map_page() after interrupts
- * are enabled.
- *
- * Note: this is probably too low level an operation for use in drivers.
- * Please consult with lkml before using this in your driver.
- */
-int split_free_page(struct page *page)
-{
- unsigned int order;
- int nr_pages;
-
- order = page_order(page);
-
- nr_pages = __isolate_free_page(page, order);
- if (!nr_pages)
- return 0;
-
- /* Split into individual pages */
- set_page_refcounted(page);
- split_page(page, order);
- return nr_pages;
-}
-
-/*
* Allocate a page from the given zone. Use pcplists for order-0 allocations.
*/
static inline
@@ -2246,21 +2313,32 @@ struct page *buffered_rmqueue(struct zone *preferred_zone,
gfp_t gfp_flags, int alloc_flags, int migratetype)
{
unsigned long flags;
- struct page *page;
+ struct page *page = NULL;
bool cold = ((gfp_flags & __GFP_COLD) != 0);
if (likely(order == 0)) {
struct per_cpu_pages *pcp;
- struct list_head *list;
+ struct list_head *list = NULL;
local_irq_save(flags);
pcp = &this_cpu_ptr(zone->pageset)->pcp;
- list = &pcp->lists[migratetype];
- if (list_empty(list)) {
- pcp->count += rmqueue_bulk(zone, 0,
- pcp->batch, list,
- migratetype, cold);
- if (unlikely(list_empty(list)))
+
+ /* First try to get CMA pages */
+ if (migratetype == MIGRATE_MOVABLE &&
+ gfp_flags & __GFP_CMA) {
+ list = get_populated_pcp_list(zone, 0, pcp,
+ get_cma_migrate_type(), cold);
+ }
+
+ if (list == NULL) {
+ /*
+ * Either CMA is not suitable or there are no free CMA
+ * pages.
+ */
+ list = get_populated_pcp_list(zone, 0, pcp,
+ migratetype, cold);
+ if (unlikely(list == NULL) ||
+ unlikely(list_empty(list)))
goto failed;
}
@@ -2293,8 +2371,13 @@ struct page *buffered_rmqueue(struct zone *preferred_zone,
if (page)
trace_mm_page_alloc_zone_locked(page, order, migratetype);
}
+ if (!page && migratetype == MIGRATE_MOVABLE &&
+ gfp_flags & __GFP_CMA)
+ page = __rmqueue_cma(zone, order);
+
if (!page)
page = __rmqueue(zone, order, migratetype, gfp_flags);
+
spin_unlock(&zone->lock);
if (!page)
goto failed;
@@ -2457,6 +2540,14 @@ static bool __zone_watermark_ok(struct zone *z, unsigned int order,
return true;
for (mt = 0; mt < MIGRATE_PCPTYPES; mt++) {
+#ifdef CONFIG_CMA
+ /*
+ * Note that this check is needed only
+ * when MIGRATE_CMA < MIGRATE_PCPTYPES.
+ */
+ if (mt == MIGRATE_CMA)
+ continue;
+#endif
if (!list_empty(&area->free_list[mt]))
return true;
}
@@ -5241,6 +5332,9 @@ static void __paginginit free_area_init_core(struct pglist_data *pgdat)
#endif
init_waitqueue_head(&pgdat->kswapd_wait);
init_waitqueue_head(&pgdat->pfmemalloc_wait);
+#ifdef CONFIG_COMPACTION
+ init_waitqueue_head(&pgdat->kcompactd_wait);
+#endif
pgdat_page_ext_init(pgdat);
for (j = 0; j < MAX_NR_ZONES; j++) {
@@ -6012,20 +6106,12 @@ static void calculate_totalreserve_pages(void)
if (max > zone->managed_pages)
max = zone->managed_pages;
+
+ zone->totalreserve_pages = max;
+
reserve_pages += max;
- /*
- * Lowmem reserves are not available to
- * GFP_HIGHUSER page cache allocations and
- * kswapd tries to balance zones to their high
- * watermark. As a result, neither should be
- * regarded as dirtyable memory, to prevent a
- * situation where reclaim has to clean pages
- * in order to balance the zones.
- */
- zone->dirty_balance_reserve = max;
}
}
- dirty_balance_reserve = reserve_pages;
totalreserve_pages = reserve_pages;
}
@@ -6786,6 +6872,8 @@ int alloc_contig_range(unsigned long start, unsigned long end,
if (ret)
return ret;
+ cc.zone->cma_alloc = 1;
+
ret = __alloc_contig_migrate_range(&cc, start, end);
if (ret)
goto done;
@@ -6844,6 +6932,7 @@ int alloc_contig_range(unsigned long start, unsigned long end,
done:
undo_isolate_page_range(pfn_max_align_down(start),
pfn_max_align_up(end), migratetype);
+ cc.zone->cma_alloc = 0;
return ret;
}