summaryrefslogtreecommitdiff
path: root/mm
diff options
context:
space:
mode:
Diffstat (limited to 'mm')
-rw-r--r--mm/cma.c7
-rw-r--r--mm/filemap.c95
-rw-r--r--mm/hugetlb.c13
-rw-r--r--mm/kasan/kasan.h3
-rw-r--r--mm/ksm.c3
-rw-r--r--mm/list_lru.c2
-rw-r--r--mm/memblock.c28
-rw-r--r--mm/memcontrol.c19
-rw-r--r--mm/page-writeback.c14
-rw-r--r--mm/page_alloc.c21
-rw-r--r--mm/swapfile.c2
-rw-r--r--mm/usercopy.c1
-rw-r--r--mm/vmscan.c21
-rw-r--r--mm/workingset.c10
14 files changed, 130 insertions, 109 deletions
diff --git a/mm/cma.c b/mm/cma.c
index b55caef2454a..20e661a501a6 100644
--- a/mm/cma.c
+++ b/mm/cma.c
@@ -188,7 +188,8 @@ int __init cma_init_reserved_mem(phys_addr_t base, phys_addr_t size,
return -EINVAL;
/* ensure minimal alignment required by mm core */
- alignment = PAGE_SIZE << max(MAX_ORDER - 1, pageblock_order);
+ alignment = PAGE_SIZE <<
+ max_t(unsigned long, MAX_ORDER - 1, pageblock_order);
/* alignment should be aligned with order_per_bit */
if (!IS_ALIGNED(alignment >> PAGE_SHIFT, 1 << order_per_bit))
@@ -271,8 +272,8 @@ int __init cma_declare_contiguous(phys_addr_t base,
* migratetype page by page allocator's buddy algorithm. In the case,
* you couldn't get a contiguous memory, which is not what we want.
*/
- alignment = max(alignment,
- (phys_addr_t)PAGE_SIZE << max(MAX_ORDER - 1, pageblock_order));
+ alignment = max(alignment, (phys_addr_t)PAGE_SIZE <<
+ max_t(unsigned long, MAX_ORDER - 1, pageblock_order));
base = ALIGN(base, alignment);
size = ALIGN(size, alignment);
limit &= ~(alignment - 1);
diff --git a/mm/filemap.c b/mm/filemap.c
index a322c035c6b9..c46678d7d041 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -109,6 +109,48 @@
* ->tasklist_lock (memory_failure, collect_procs_ao)
*/
+static int page_cache_tree_insert(struct address_space *mapping,
+ struct page *page, void **shadowp)
+{
+ struct radix_tree_node *node;
+ void **slot;
+ int error;
+
+ error = __radix_tree_create(&mapping->page_tree, page->index,
+ &node, &slot);
+ if (error)
+ return error;
+ if (*slot) {
+ void *p;
+
+ p = radix_tree_deref_slot_protected(slot, &mapping->tree_lock);
+ if (!radix_tree_exceptional_entry(p))
+ return -EEXIST;
+ if (shadowp)
+ *shadowp = p;
+ mapping->nrshadows--;
+ if (node)
+ workingset_node_shadows_dec(node);
+ }
+ radix_tree_replace_slot(slot, page);
+ mapping->nrpages++;
+ if (node) {
+ workingset_node_pages_inc(node);
+ /*
+ * Don't track node that contains actual pages.
+ *
+ * Avoid acquiring the list_lru lock if already
+ * untracked. The list_empty() test is safe as
+ * node->private_list is protected by
+ * mapping->tree_lock.
+ */
+ if (!list_empty(&node->private_list))
+ list_lru_del(&workingset_shadow_nodes,
+ &node->private_list);
+ }
+ return 0;
+}
+
static void page_cache_tree_delete(struct address_space *mapping,
struct page *page, void *shadow)
{
@@ -122,6 +164,14 @@ static void page_cache_tree_delete(struct address_space *mapping,
__radix_tree_lookup(&mapping->page_tree, page->index, &node, &slot);
+ if (!node) {
+ /*
+ * We need a node to properly account shadow
+ * entries. Don't plant any without. XXX
+ */
+ shadow = NULL;
+ }
+
if (shadow) {
mapping->nrshadows++;
/*
@@ -540,9 +590,8 @@ int replace_page_cache_page(struct page *old, struct page *new, gfp_t gfp_mask)
memcg = mem_cgroup_begin_page_stat(old);
spin_lock_irqsave(&mapping->tree_lock, flags);
__delete_from_page_cache(old, NULL, memcg);
- error = radix_tree_insert(&mapping->page_tree, offset, new);
+ error = page_cache_tree_insert(mapping, new, NULL);
BUG_ON(error);
- mapping->nrpages++;
/*
* hugetlb pages do not participate in page cache accounting.
@@ -564,48 +613,6 @@ int replace_page_cache_page(struct page *old, struct page *new, gfp_t gfp_mask)
}
EXPORT_SYMBOL_GPL(replace_page_cache_page);
-static int page_cache_tree_insert(struct address_space *mapping,
- struct page *page, void **shadowp)
-{
- struct radix_tree_node *node;
- void **slot;
- int error;
-
- error = __radix_tree_create(&mapping->page_tree, page->index,
- &node, &slot);
- if (error)
- return error;
- if (*slot) {
- void *p;
-
- p = radix_tree_deref_slot_protected(slot, &mapping->tree_lock);
- if (!radix_tree_exceptional_entry(p))
- return -EEXIST;
- if (shadowp)
- *shadowp = p;
- mapping->nrshadows--;
- if (node)
- workingset_node_shadows_dec(node);
- }
- radix_tree_replace_slot(slot, page);
- mapping->nrpages++;
- if (node) {
- workingset_node_pages_inc(node);
- /*
- * Don't track node that contains actual pages.
- *
- * Avoid acquiring the list_lru lock if already
- * untracked. The list_empty() test is safe as
- * node->private_list is protected by
- * mapping->tree_lock.
- */
- if (!list_empty(&node->private_list))
- list_lru_del(&workingset_shadow_nodes,
- &node->private_list);
- }
- return 0;
-}
-
static int __add_to_page_cache_locked(struct page *page,
struct address_space *mapping,
pgoff_t offset, gfp_t gfp_mask,
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 125c7dd55322..4434cdd4cd9a 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -1416,12 +1416,13 @@ static void dissolve_free_huge_page(struct page *page)
{
spin_lock(&hugetlb_lock);
if (PageHuge(page) && !page_count(page)) {
- struct hstate *h = page_hstate(page);
- int nid = page_to_nid(page);
- list_del(&page->lru);
+ struct page *head = compound_head(page);
+ struct hstate *h = page_hstate(head);
+ int nid = page_to_nid(head);
+ list_del(&head->lru);
h->free_huge_pages--;
h->free_huge_pages_node[nid]--;
- update_and_free_page(h, page);
+ update_and_free_page(h, head);
}
spin_unlock(&hugetlb_lock);
}
@@ -1429,7 +1430,8 @@ static void dissolve_free_huge_page(struct page *page)
/*
* Dissolve free hugepages in a given pfn range. Used by memory hotplug to
* make specified memory blocks removable from the system.
- * Note that start_pfn should aligned with (minimum) hugepage size.
+ * Note that this will dissolve a free gigantic hugepage completely, if any
+ * part of it lies within the given range.
*/
void dissolve_free_huge_pages(unsigned long start_pfn, unsigned long end_pfn)
{
@@ -1438,7 +1440,6 @@ void dissolve_free_huge_pages(unsigned long start_pfn, unsigned long end_pfn)
if (!hugepages_supported())
return;
- VM_BUG_ON(!IS_ALIGNED(start_pfn, 1 << minimum_order));
for (pfn = start_pfn; pfn < end_pfn; pfn += 1 << minimum_order)
dissolve_free_huge_page(pfn_to_page(pfn));
}
diff --git a/mm/kasan/kasan.h b/mm/kasan/kasan.h
index 4f6c62e5c21e..37ff0ab6a8ff 100644
--- a/mm/kasan/kasan.h
+++ b/mm/kasan/kasan.h
@@ -52,6 +52,9 @@ struct kasan_global {
#if KASAN_ABI_VERSION >= 4
struct kasan_source_location *location;
#endif
+#if KASAN_ABI_VERSION >= 5
+ char *odr_indicator;
+#endif
};
static inline const void *kasan_shadow_to_mem(const void *shadow_addr)
diff --git a/mm/ksm.c b/mm/ksm.c
index 7b59d9723adb..25cc13c1fe20 100644
--- a/mm/ksm.c
+++ b/mm/ksm.c
@@ -301,7 +301,8 @@ static inline struct rmap_item *alloc_rmap_item(void)
{
struct rmap_item *rmap_item;
- rmap_item = kmem_cache_zalloc(rmap_item_cache, GFP_KERNEL);
+ rmap_item = kmem_cache_zalloc(rmap_item_cache, GFP_KERNEL |
+ __GFP_NORETRY | __GFP_NOWARN);
if (rmap_item)
ksm_rmap_items++;
return rmap_item;
diff --git a/mm/list_lru.c b/mm/list_lru.c
index afc71ea9a381..5d8dffd5b57c 100644
--- a/mm/list_lru.c
+++ b/mm/list_lru.c
@@ -554,6 +554,8 @@ int __list_lru_init(struct list_lru *lru, bool memcg_aware,
err = memcg_init_list_lru(lru, memcg_aware);
if (err) {
kfree(lru->node);
+ /* Do this so a list_lru_destroy() doesn't crash: */
+ lru->node = NULL;
goto out;
}
diff --git a/mm/memblock.c b/mm/memblock.c
index 7f0a860a357e..89e74fa82290 100644
--- a/mm/memblock.c
+++ b/mm/memblock.c
@@ -827,6 +827,17 @@ int __init_memblock memblock_mark_mirror(phys_addr_t base, phys_addr_t size)
return memblock_setclr_flag(base, size, 1, MEMBLOCK_MIRROR);
}
+/**
+ * memblock_mark_nomap - Mark a memory region with flag MEMBLOCK_NOMAP.
+ * @base: the base phys addr of the region
+ * @size: the size of the region
+ *
+ * Return 0 on success, -errno on failure.
+ */
+int __init_memblock memblock_mark_nomap(phys_addr_t base, phys_addr_t size)
+{
+ return memblock_setclr_flag(base, size, 1, MEMBLOCK_NOMAP);
+}
/**
* __next_reserved_mem_region - next function for for_each_reserved_region()
@@ -918,6 +929,10 @@ void __init_memblock __next_mem_range(u64 *idx, int nid, ulong flags,
if ((flags & MEMBLOCK_MIRROR) && !memblock_is_mirror(m))
continue;
+ /* skip nomap memory unless we were asked for it explicitly */
+ if (!(flags & MEMBLOCK_NOMAP) && memblock_is_nomap(m))
+ continue;
+
if (!type_b) {
if (out_start)
*out_start = m_start;
@@ -1027,6 +1042,10 @@ void __init_memblock __next_mem_range_rev(u64 *idx, int nid, ulong flags,
if ((flags & MEMBLOCK_MIRROR) && !memblock_is_mirror(m))
continue;
+ /* skip nomap memory unless we were asked for it explicitly */
+ if (!(flags & MEMBLOCK_NOMAP) && memblock_is_nomap(m))
+ continue;
+
if (!type_b) {
if (out_start)
*out_start = m_start;
@@ -1538,6 +1557,15 @@ int __init_memblock memblock_is_memory(phys_addr_t addr)
return memblock_search(&memblock.memory, addr) != -1;
}
+int __init_memblock memblock_is_map_memory(phys_addr_t addr)
+{
+ int i = memblock_search(&memblock.memory, addr);
+
+ if (i == -1)
+ return false;
+ return !memblock_is_nomap(&memblock.memory.regions[i]);
+}
+
#ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP
int __init_memblock memblock_search_pfn_nid(unsigned long pfn,
unsigned long *start_pfn, unsigned long *end_pfn)
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index b62b33a3cfec..5d9c8a3136bc 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -2055,6 +2055,15 @@ retry:
current->flags & PF_EXITING))
goto force;
+ /*
+ * Prevent unbounded recursion when reclaim operations need to
+ * allocate memory. This might exceed the limits temporarily,
+ * but we prefer facilitating memory reclaim and getting back
+ * under the limit over triggering OOM kills in these cases.
+ */
+ if (unlikely(current->flags & PF_MEMALLOC))
+ goto force;
+
if (unlikely(task_in_memcg_oom(current)))
goto nomem;
@@ -4972,11 +4981,6 @@ static int mem_cgroup_can_attach(struct cgroup_taskset *tset)
return ret;
}
-static int mem_cgroup_allow_attach(struct cgroup_taskset *tset)
-{
- return subsys_cgroup_allow_attach(tset);
-}
-
static void mem_cgroup_cancel_attach(struct cgroup_taskset *tset)
{
if (mc.to)
@@ -5131,10 +5135,6 @@ static int mem_cgroup_can_attach(struct cgroup_taskset *tset)
{
return 0;
}
-static int mem_cgroup_allow_attach(struct cgroup_taskset *tset)
-{
- return 0;
-}
static void mem_cgroup_cancel_attach(struct cgroup_taskset *tset)
{
}
@@ -5352,7 +5352,6 @@ struct cgroup_subsys memory_cgrp_subsys = {
.css_reset = mem_cgroup_css_reset,
.can_attach = mem_cgroup_can_attach,
.cancel_attach = mem_cgroup_cancel_attach,
- .allow_attach = mem_cgroup_allow_attach,
.post_attach = mem_cgroup_move_task,
.bind = mem_cgroup_bind,
.dfl_cftypes = memory_files,
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index 6b4fa64a5c91..15cb026ef807 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -278,7 +278,12 @@ static unsigned long zone_dirtyable_memory(struct zone *zone)
unsigned long nr_pages;
nr_pages = zone_page_state(zone, NR_FREE_PAGES);
- nr_pages -= min(nr_pages, zone->dirty_balance_reserve);
+ /*
+ * Pages reserved for the kernel should not be considered
+ * dirtyable, to prevent a situation where reclaim has to
+ * clean pages in order to balance the zones.
+ */
+ nr_pages -= min(nr_pages, zone->totalreserve_pages);
nr_pages += zone_page_state(zone, NR_INACTIVE_FILE);
nr_pages += zone_page_state(zone, NR_ACTIVE_FILE);
@@ -332,7 +337,12 @@ static unsigned long global_dirtyable_memory(void)
unsigned long x;
x = global_page_state(NR_FREE_PAGES);
- x -= min(x, dirty_balance_reserve);
+ /*
+ * Pages reserved for the kernel should not be considered
+ * dirtyable, to prevent a situation where reclaim has to
+ * clean pages in order to balance the zones.
+ */
+ x -= min(x, totalreserve_pages);
x += global_page_state(NR_INACTIVE_FILE);
x += global_page_state(NR_ACTIVE_FILE);
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index af5901c65ba8..3048c5dbda16 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -114,13 +114,6 @@ static DEFINE_SPINLOCK(managed_page_count_lock);
unsigned long totalram_pages __read_mostly;
unsigned long totalreserve_pages __read_mostly;
unsigned long totalcma_pages __read_mostly;
-/*
- * When calculating the number of globally allowed dirty pages, there
- * is a certain number of per-zone reserves that should not be
- * considered dirtyable memory. This is the sum of those reserves
- * over all existing zones that contribute dirtyable memory.
- */
-unsigned long dirty_balance_reserve __read_mostly;
int percpu_pagelist_fraction;
gfp_t gfp_allowed_mask __read_mostly = GFP_BOOT_MASK;
@@ -6090,20 +6083,12 @@ static void calculate_totalreserve_pages(void)
if (max > zone->managed_pages)
max = zone->managed_pages;
+
+ zone->totalreserve_pages = max;
+
reserve_pages += max;
- /*
- * Lowmem reserves are not available to
- * GFP_HIGHUSER page cache allocations and
- * kswapd tries to balance zones to their high
- * watermark. As a result, neither should be
- * regarded as dirtyable memory, to prevent a
- * situation where reclaim has to clean pages
- * in order to balance the zones.
- */
- zone->dirty_balance_reserve = max;
}
}
- dirty_balance_reserve = reserve_pages;
totalreserve_pages = reserve_pages;
}
diff --git a/mm/swapfile.c b/mm/swapfile.c
index 3ae1225084c9..45656508512b 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -2268,6 +2268,8 @@ static unsigned long read_swap_header(struct swap_info_struct *p,
swab32s(&swap_header->info.version);
swab32s(&swap_header->info.last_page);
swab32s(&swap_header->info.nr_badpages);
+ if (swap_header->info.nr_badpages > MAX_SWAP_BADPAGES)
+ return 0;
for (i = 0; i < swap_header->info.nr_badpages; i++)
swab32s(&swap_header->info.badpages[i]);
}
diff --git a/mm/usercopy.c b/mm/usercopy.c
index 089328f2b920..b34996a3860b 100644
--- a/mm/usercopy.c
+++ b/mm/usercopy.c
@@ -15,6 +15,7 @@
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
#include <linux/mm.h>
+#include <linux/sched.h>
#include <linux/slab.h>
#include <asm/sections.h>
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 6801adacadb0..d82765ba44f4 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -2265,23 +2265,6 @@ out:
}
}
-#ifdef CONFIG_ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH
-static void init_tlb_ubc(void)
-{
- /*
- * This deliberately does not clear the cpumask as it's expensive
- * and unnecessary. If there happens to be data in there then the
- * first SWAP_CLUSTER_MAX pages will send an unnecessary IPI and
- * then will be cleared.
- */
- current->tlb_ubc.flush_required = false;
-}
-#else
-static inline void init_tlb_ubc(void)
-{
-}
-#endif /* CONFIG_ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH */
-
/*
* This is a basic per-zone page freer. Used by both kswapd and direct reclaim.
*/
@@ -2316,8 +2299,6 @@ static void shrink_lruvec(struct lruvec *lruvec, int swappiness,
scan_adjusted = (global_reclaim(sc) && !current_is_kswapd() &&
sc->priority == DEF_PRIORITY);
- init_tlb_ubc();
-
blk_start_plug(&plug);
while (nr[LRU_INACTIVE_ANON] || nr[LRU_ACTIVE_FILE] ||
nr[LRU_INACTIVE_FILE]) {
@@ -3043,7 +3024,9 @@ unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *memcg,
sc.may_writepage,
sc.gfp_mask);
+ current->flags |= PF_MEMALLOC;
nr_reclaimed = do_try_to_free_pages(zonelist, &sc);
+ current->flags &= ~PF_MEMALLOC;
trace_mm_vmscan_memcg_reclaim_end(nr_reclaimed);
diff --git a/mm/workingset.c b/mm/workingset.c
index aa017133744b..df66f426fdcf 100644
--- a/mm/workingset.c
+++ b/mm/workingset.c
@@ -341,21 +341,19 @@ static enum lru_status shadow_lru_isolate(struct list_head *item,
* no pages, so we expect to be able to remove them all and
* delete and free the empty node afterwards.
*/
-
- BUG_ON(!node->count);
- BUG_ON(node->count & RADIX_TREE_COUNT_MASK);
+ BUG_ON(!workingset_node_shadows(node));
+ BUG_ON(workingset_node_pages(node));
for (i = 0; i < RADIX_TREE_MAP_SIZE; i++) {
if (node->slots[i]) {
BUG_ON(!radix_tree_exceptional_entry(node->slots[i]));
node->slots[i] = NULL;
- BUG_ON(node->count < (1U << RADIX_TREE_COUNT_SHIFT));
- node->count -= 1U << RADIX_TREE_COUNT_SHIFT;
+ workingset_node_shadows_dec(node);
BUG_ON(!mapping->nrshadows);
mapping->nrshadows--;
}
}
- BUG_ON(node->count);
+ BUG_ON(workingset_node_shadows(node));
inc_zone_state(page_zone(virt_to_page(node)), WORKINGSET_NODERECLAIM);
if (!__radix_tree_delete_node(&mapping->page_tree, node))
BUG();