summaryrefslogtreecommitdiff
path: root/mm
diff options
context:
space:
mode:
Diffstat (limited to 'mm')
-rw-r--r--mm/Kconfig3
-rw-r--r--mm/allocpercpu.c20
-rw-r--r--mm/filemap.c237
-rw-r--r--mm/filemap_xip.c2
-rw-r--r--mm/hugetlb.c2
-rw-r--r--mm/memcontrol.c364
-rw-r--r--mm/memory.c3
-rw-r--r--mm/migrate.c46
-rw-r--r--mm/nommu.c4
-rw-r--r--mm/page-writeback.c12
-rw-r--r--mm/pdflush.c4
-rw-r--r--mm/readahead.c6
-rw-r--r--mm/rmap.c16
-rw-r--r--mm/shmem.c52
-rw-r--r--mm/shmem_acl.c2
-rw-r--r--mm/slab.c11
-rw-r--r--mm/slob.c7
-rw-r--r--mm/slub.c13
-rw-r--r--mm/sparse.c2
-rw-r--r--mm/swap_state.c30
-rw-r--r--mm/swapfile.c10
-rw-r--r--mm/truncate.c6
-rw-r--r--mm/util.c55
-rw-r--r--mm/vmalloc.c6
-rw-r--r--mm/vmscan.c85
25 files changed, 634 insertions, 364 deletions
diff --git a/mm/Kconfig b/mm/Kconfig
index aa799007a11b..efee5d379df4 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -77,6 +77,9 @@ config FLAT_NODE_MEM_MAP
def_bool y
depends on !SPARSEMEM
+config HAVE_GET_USER_PAGES_FAST
+ bool
+
#
# Both the NUMA code and DISCONTIGMEM use arrays of pg_data_t's
# to represent different areas of memory. This variable allows
diff --git a/mm/allocpercpu.c b/mm/allocpercpu.c
index 843364594e23..4297bc41bfd2 100644
--- a/mm/allocpercpu.c
+++ b/mm/allocpercpu.c
@@ -18,27 +18,28 @@
* Depopulating per-cpu data for a cpu going offline would be a typical
* use case. You need to register a cpu hotplug handler for that purpose.
*/
-void percpu_depopulate(void *__pdata, int cpu)
+static void percpu_depopulate(void *__pdata, int cpu)
{
struct percpu_data *pdata = __percpu_disguise(__pdata);
kfree(pdata->ptrs[cpu]);
pdata->ptrs[cpu] = NULL;
}
-EXPORT_SYMBOL_GPL(percpu_depopulate);
/**
* percpu_depopulate_mask - depopulate per-cpu data for some cpu's
* @__pdata: per-cpu data to depopulate
* @mask: depopulate per-cpu data for cpu's selected through mask bits
*/
-void __percpu_depopulate_mask(void *__pdata, cpumask_t *mask)
+static void __percpu_depopulate_mask(void *__pdata, cpumask_t *mask)
{
int cpu;
for_each_cpu_mask_nr(cpu, *mask)
percpu_depopulate(__pdata, cpu);
}
-EXPORT_SYMBOL_GPL(__percpu_depopulate_mask);
+
+#define percpu_depopulate_mask(__pdata, mask) \
+ __percpu_depopulate_mask((__pdata), &(mask))
/**
* percpu_populate - populate per-cpu data for given cpu
@@ -51,7 +52,7 @@ EXPORT_SYMBOL_GPL(__percpu_depopulate_mask);
* use case. You need to register a cpu hotplug handler for that purpose.
* Per-cpu object is populated with zeroed buffer.
*/
-void *percpu_populate(void *__pdata, size_t size, gfp_t gfp, int cpu)
+static void *percpu_populate(void *__pdata, size_t size, gfp_t gfp, int cpu)
{
struct percpu_data *pdata = __percpu_disguise(__pdata);
int node = cpu_to_node(cpu);
@@ -68,7 +69,6 @@ void *percpu_populate(void *__pdata, size_t size, gfp_t gfp, int cpu)
pdata->ptrs[cpu] = kzalloc(size, gfp);
return pdata->ptrs[cpu];
}
-EXPORT_SYMBOL_GPL(percpu_populate);
/**
* percpu_populate_mask - populate per-cpu data for more cpu's
@@ -79,8 +79,8 @@ EXPORT_SYMBOL_GPL(percpu_populate);
*
* Per-cpu objects are populated with zeroed buffers.
*/
-int __percpu_populate_mask(void *__pdata, size_t size, gfp_t gfp,
- cpumask_t *mask)
+static int __percpu_populate_mask(void *__pdata, size_t size, gfp_t gfp,
+ cpumask_t *mask)
{
cpumask_t populated;
int cpu;
@@ -94,7 +94,9 @@ int __percpu_populate_mask(void *__pdata, size_t size, gfp_t gfp,
cpu_set(cpu, populated);
return 0;
}
-EXPORT_SYMBOL_GPL(__percpu_populate_mask);
+
+#define percpu_populate_mask(__pdata, size, gfp, mask) \
+ __percpu_populate_mask((__pdata), (size), (gfp), &(mask))
/**
* percpu_alloc_mask - initial setup of per-cpu data
diff --git a/mm/filemap.c b/mm/filemap.c
index 7675b91f4f63..5de7633e1dbe 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -109,13 +109,13 @@
/*
* Remove a page from the page cache and free it. Caller has to make
* sure the page is locked and that nobody else uses it - or that usage
- * is safe. The caller must hold a write_lock on the mapping's tree_lock.
+ * is safe. The caller must hold the mapping's tree_lock.
*/
void __remove_from_page_cache(struct page *page)
{
struct address_space *mapping = page->mapping;
- mem_cgroup_uncharge_page(page);
+ mem_cgroup_uncharge_cache_page(page);
radix_tree_delete(&mapping->page_tree, page->index);
page->mapping = NULL;
mapping->nrpages--;
@@ -141,9 +141,9 @@ void remove_from_page_cache(struct page *page)
BUG_ON(!PageLocked(page));
- write_lock_irq(&mapping->tree_lock);
+ spin_lock_irq(&mapping->tree_lock);
__remove_from_page_cache(page);
- write_unlock_irq(&mapping->tree_lock);
+ spin_unlock_irq(&mapping->tree_lock);
}
static int sync_page(void *word)
@@ -442,48 +442,52 @@ int filemap_write_and_wait_range(struct address_space *mapping,
}
/**
- * add_to_page_cache - add newly allocated pagecache pages
+ * add_to_page_cache_locked - add a locked page to the pagecache
* @page: page to add
* @mapping: the page's address_space
* @offset: page index
* @gfp_mask: page allocation mode
*
- * This function is used to add newly allocated pagecache pages;
- * the page is new, so we can just run SetPageLocked() against it.
- * The other page state flags were set by rmqueue().
- *
+ * This function is used to add a page to the pagecache. It must be locked.
* This function does not add the page to the LRU. The caller must do that.
*/
-int add_to_page_cache(struct page *page, struct address_space *mapping,
+int add_to_page_cache_locked(struct page *page, struct address_space *mapping,
pgoff_t offset, gfp_t gfp_mask)
{
- int error = mem_cgroup_cache_charge(page, current->mm,
+ int error;
+
+ VM_BUG_ON(!PageLocked(page));
+
+ error = mem_cgroup_cache_charge(page, current->mm,
gfp_mask & ~__GFP_HIGHMEM);
if (error)
goto out;
error = radix_tree_preload(gfp_mask & ~__GFP_HIGHMEM);
if (error == 0) {
- write_lock_irq(&mapping->tree_lock);
+ page_cache_get(page);
+ page->mapping = mapping;
+ page->index = offset;
+
+ spin_lock_irq(&mapping->tree_lock);
error = radix_tree_insert(&mapping->page_tree, offset, page);
- if (!error) {
- page_cache_get(page);
- SetPageLocked(page);
- page->mapping = mapping;
- page->index = offset;
+ if (likely(!error)) {
mapping->nrpages++;
__inc_zone_page_state(page, NR_FILE_PAGES);
- } else
- mem_cgroup_uncharge_page(page);
+ } else {
+ page->mapping = NULL;
+ mem_cgroup_uncharge_cache_page(page);
+ page_cache_release(page);
+ }
- write_unlock_irq(&mapping->tree_lock);
+ spin_unlock_irq(&mapping->tree_lock);
radix_tree_preload_end();
} else
- mem_cgroup_uncharge_page(page);
+ mem_cgroup_uncharge_cache_page(page);
out:
return error;
}
-EXPORT_SYMBOL(add_to_page_cache);
+EXPORT_SYMBOL(add_to_page_cache_locked);
int add_to_page_cache_lru(struct page *page, struct address_space *mapping,
pgoff_t offset, gfp_t gfp_mask)
@@ -633,15 +637,35 @@ void __lock_page_nosync(struct page *page)
* Is there a pagecache struct page at the given (mapping, offset) tuple?
* If yes, increment its refcount and return it; if no, return NULL.
*/
-struct page * find_get_page(struct address_space *mapping, pgoff_t offset)
+struct page *find_get_page(struct address_space *mapping, pgoff_t offset)
{
+ void **pagep;
struct page *page;
- read_lock_irq(&mapping->tree_lock);
- page = radix_tree_lookup(&mapping->page_tree, offset);
- if (page)
- page_cache_get(page);
- read_unlock_irq(&mapping->tree_lock);
+ rcu_read_lock();
+repeat:
+ page = NULL;
+ pagep = radix_tree_lookup_slot(&mapping->page_tree, offset);
+ if (pagep) {
+ page = radix_tree_deref_slot(pagep);
+ if (unlikely(!page || page == RADIX_TREE_RETRY))
+ goto repeat;
+
+ if (!page_cache_get_speculative(page))
+ goto repeat;
+
+ /*
+ * Has the page moved?
+ * This is part of the lockless pagecache protocol. See
+ * include/linux/pagemap.h for details.
+ */
+ if (unlikely(page != *pagep)) {
+ page_cache_release(page);
+ goto repeat;
+ }
+ }
+ rcu_read_unlock();
+
return page;
}
EXPORT_SYMBOL(find_get_page);
@@ -656,32 +680,22 @@ EXPORT_SYMBOL(find_get_page);
*
* Returns zero if the page was not present. find_lock_page() may sleep.
*/
-struct page *find_lock_page(struct address_space *mapping,
- pgoff_t offset)
+struct page *find_lock_page(struct address_space *mapping, pgoff_t offset)
{
struct page *page;
repeat:
- read_lock_irq(&mapping->tree_lock);
- page = radix_tree_lookup(&mapping->page_tree, offset);
+ page = find_get_page(mapping, offset);
if (page) {
- page_cache_get(page);
- if (TestSetPageLocked(page)) {
- read_unlock_irq(&mapping->tree_lock);
- __lock_page(page);
-
- /* Has the page been truncated while we slept? */
- if (unlikely(page->mapping != mapping)) {
- unlock_page(page);
- page_cache_release(page);
- goto repeat;
- }
- VM_BUG_ON(page->index != offset);
- goto out;
+ lock_page(page);
+ /* Has the page been truncated? */
+ if (unlikely(page->mapping != mapping)) {
+ unlock_page(page);
+ page_cache_release(page);
+ goto repeat;
}
+ VM_BUG_ON(page->index != offset);
}
- read_unlock_irq(&mapping->tree_lock);
-out:
return page;
}
EXPORT_SYMBOL(find_lock_page);
@@ -747,13 +761,39 @@ unsigned find_get_pages(struct address_space *mapping, pgoff_t start,
{
unsigned int i;
unsigned int ret;
+ unsigned int nr_found;
+
+ rcu_read_lock();
+restart:
+ nr_found = radix_tree_gang_lookup_slot(&mapping->page_tree,
+ (void ***)pages, start, nr_pages);
+ ret = 0;
+ for (i = 0; i < nr_found; i++) {
+ struct page *page;
+repeat:
+ page = radix_tree_deref_slot((void **)pages[i]);
+ if (unlikely(!page))
+ continue;
+ /*
+ * this can only trigger if nr_found == 1, making livelock
+ * a non issue.
+ */
+ if (unlikely(page == RADIX_TREE_RETRY))
+ goto restart;
- read_lock_irq(&mapping->tree_lock);
- ret = radix_tree_gang_lookup(&mapping->page_tree,
- (void **)pages, start, nr_pages);
- for (i = 0; i < ret; i++)
- page_cache_get(pages[i]);
- read_unlock_irq(&mapping->tree_lock);
+ if (!page_cache_get_speculative(page))
+ goto repeat;
+
+ /* Has the page moved? */
+ if (unlikely(page != *((void **)pages[i]))) {
+ page_cache_release(page);
+ goto repeat;
+ }
+
+ pages[ret] = page;
+ ret++;
+ }
+ rcu_read_unlock();
return ret;
}
@@ -774,19 +814,44 @@ unsigned find_get_pages_contig(struct address_space *mapping, pgoff_t index,
{
unsigned int i;
unsigned int ret;
+ unsigned int nr_found;
+
+ rcu_read_lock();
+restart:
+ nr_found = radix_tree_gang_lookup_slot(&mapping->page_tree,
+ (void ***)pages, index, nr_pages);
+ ret = 0;
+ for (i = 0; i < nr_found; i++) {
+ struct page *page;
+repeat:
+ page = radix_tree_deref_slot((void **)pages[i]);
+ if (unlikely(!page))
+ continue;
+ /*
+ * this can only trigger if nr_found == 1, making livelock
+ * a non issue.
+ */
+ if (unlikely(page == RADIX_TREE_RETRY))
+ goto restart;
- read_lock_irq(&mapping->tree_lock);
- ret = radix_tree_gang_lookup(&mapping->page_tree,
- (void **)pages, index, nr_pages);
- for (i = 0; i < ret; i++) {
- if (pages[i]->mapping == NULL || pages[i]->index != index)
+ if (page->mapping == NULL || page->index != index)
break;
- page_cache_get(pages[i]);
+ if (!page_cache_get_speculative(page))
+ goto repeat;
+
+ /* Has the page moved? */
+ if (unlikely(page != *((void **)pages[i]))) {
+ page_cache_release(page);
+ goto repeat;
+ }
+
+ pages[ret] = page;
+ ret++;
index++;
}
- read_unlock_irq(&mapping->tree_lock);
- return i;
+ rcu_read_unlock();
+ return ret;
}
EXPORT_SYMBOL(find_get_pages_contig);
@@ -806,15 +871,43 @@ unsigned find_get_pages_tag(struct address_space *mapping, pgoff_t *index,
{
unsigned int i;
unsigned int ret;
+ unsigned int nr_found;
+
+ rcu_read_lock();
+restart:
+ nr_found = radix_tree_gang_lookup_tag_slot(&mapping->page_tree,
+ (void ***)pages, *index, nr_pages, tag);
+ ret = 0;
+ for (i = 0; i < nr_found; i++) {
+ struct page *page;
+repeat:
+ page = radix_tree_deref_slot((void **)pages[i]);
+ if (unlikely(!page))
+ continue;
+ /*
+ * this can only trigger if nr_found == 1, making livelock
+ * a non issue.
+ */
+ if (unlikely(page == RADIX_TREE_RETRY))
+ goto restart;
+
+ if (!page_cache_get_speculative(page))
+ goto repeat;
+
+ /* Has the page moved? */
+ if (unlikely(page != *((void **)pages[i]))) {
+ page_cache_release(page);
+ goto repeat;
+ }
+
+ pages[ret] = page;
+ ret++;
+ }
+ rcu_read_unlock();
- read_lock_irq(&mapping->tree_lock);
- ret = radix_tree_gang_lookup_tag(&mapping->page_tree,
- (void **)pages, *index, nr_pages, tag);
- for (i = 0; i < ret; i++)
- page_cache_get(pages[i]);
if (ret)
*index = pages[ret - 1]->index + 1;
- read_unlock_irq(&mapping->tree_lock);
+
return ret;
}
EXPORT_SYMBOL(find_get_pages_tag);
@@ -1665,8 +1758,9 @@ static int __remove_suid(struct dentry *dentry, int kill)
return notify_change(dentry, &newattrs);
}
-int remove_suid(struct dentry *dentry)
+int file_remove_suid(struct file *file)
{
+ struct dentry *dentry = file->f_path.dentry;
int killsuid = should_remove_suid(dentry);
int killpriv = security_inode_need_killpriv(dentry);
int error = 0;
@@ -1680,7 +1774,7 @@ int remove_suid(struct dentry *dentry)
return error;
}
-EXPORT_SYMBOL(remove_suid);
+EXPORT_SYMBOL(file_remove_suid);
static size_t __iovec_copy_from_user_inatomic(char *vaddr,
const struct iovec *iov, size_t base, size_t bytes)
@@ -2436,7 +2530,7 @@ __generic_file_aio_write_nolock(struct kiocb *iocb, const struct iovec *iov,
if (count == 0)
goto out;
- err = remove_suid(file->f_path.dentry);
+ err = file_remove_suid(file);
if (err)
goto out;
@@ -2563,9 +2657,8 @@ EXPORT_SYMBOL(generic_file_aio_write);
* Otherwise return zero.
*
* The @gfp_mask argument specifies whether I/O may be performed to release
- * this page (__GFP_IO), and whether the call may block (__GFP_WAIT).
+ * this page (__GFP_IO), and whether the call may block (__GFP_WAIT & __GFP_FS).
*
- * NOTE: @gfp_mask may go away, and this function may become non-blocking.
*/
int try_to_release_page(struct page *page, gfp_t gfp_mask)
{
diff --git a/mm/filemap_xip.c b/mm/filemap_xip.c
index 3e744abcce9d..98a3f31ccd6a 100644
--- a/mm/filemap_xip.c
+++ b/mm/filemap_xip.c
@@ -380,7 +380,7 @@ xip_file_write(struct file *filp, const char __user *buf, size_t len,
if (count == 0)
goto out_backing;
- ret = remove_suid(filp->f_path.dentry);
+ ret = file_remove_suid(filp);
if (ret)
goto out_backing;
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 41341c414194..3be79dc18c5c 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -1026,7 +1026,6 @@ static void __init report_hugepages(void)
}
}
-#ifdef CONFIG_SYSCTL
#ifdef CONFIG_HIGHMEM
static void try_to_free_low(struct hstate *h, unsigned long count)
{
@@ -1386,6 +1385,7 @@ static unsigned int cpuset_mems_nr(unsigned int *array)
return nr;
}
+#ifdef CONFIG_SYSCTL
int hugetlb_sysctl_handler(struct ctl_table *table, int write,
struct file *file, void __user *buffer,
size_t *length, loff_t *ppos)
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index e46451e1d9b7..fba566c51322 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -35,9 +35,9 @@
#include <asm/uaccess.h>
-struct cgroup_subsys mem_cgroup_subsys;
-static const int MEM_CGROUP_RECLAIM_RETRIES = 5;
-static struct kmem_cache *page_cgroup_cache;
+struct cgroup_subsys mem_cgroup_subsys __read_mostly;
+static struct kmem_cache *page_cgroup_cache __read_mostly;
+#define MEM_CGROUP_RECLAIM_RETRIES 5
/*
* Statistics for memory cgroup.
@@ -166,7 +166,6 @@ struct page_cgroup {
struct list_head lru; /* per cgroup LRU list */
struct page *page;
struct mem_cgroup *mem_cgroup;
- int ref_cnt; /* cached, mapped, migrating */
int flags;
};
#define PAGE_CGROUP_FLAG_CACHE (0x1) /* charged as cache */
@@ -185,6 +184,7 @@ static enum zone_type page_cgroup_zid(struct page_cgroup *pc)
enum charge_type {
MEM_CGROUP_CHARGE_TYPE_CACHE = 0,
MEM_CGROUP_CHARGE_TYPE_MAPPED,
+ MEM_CGROUP_CHARGE_TYPE_FORCE, /* used by force_empty */
};
/*
@@ -296,7 +296,7 @@ static void __mem_cgroup_remove_list(struct mem_cgroup_per_zone *mz,
MEM_CGROUP_ZSTAT(mz, MEM_CGROUP_ZSTAT_INACTIVE) -= 1;
mem_cgroup_charge_statistics(pc->mem_cgroup, pc->flags, false);
- list_del_init(&pc->lru);
+ list_del(&pc->lru);
}
static void __mem_cgroup_add_list(struct mem_cgroup_per_zone *mz,
@@ -354,6 +354,9 @@ void mem_cgroup_move_lists(struct page *page, bool active)
struct mem_cgroup_per_zone *mz;
unsigned long flags;
+ if (mem_cgroup_subsys.disabled)
+ return;
+
/*
* We cannot lock_page_cgroup while holding zone's lru_lock,
* because other holders of lock_page_cgroup can be interrupted
@@ -524,7 +527,8 @@ unsigned long mem_cgroup_isolate_pages(unsigned long nr_to_scan,
* < 0 if the cgroup is over its limit
*/
static int mem_cgroup_charge_common(struct page *page, struct mm_struct *mm,
- gfp_t gfp_mask, enum charge_type ctype)
+ gfp_t gfp_mask, enum charge_type ctype,
+ struct mem_cgroup *memcg)
{
struct mem_cgroup *mem;
struct page_cgroup *pc;
@@ -532,35 +536,8 @@ static int mem_cgroup_charge_common(struct page *page, struct mm_struct *mm,
unsigned long nr_retries = MEM_CGROUP_RECLAIM_RETRIES;
struct mem_cgroup_per_zone *mz;
- if (mem_cgroup_subsys.disabled)
- return 0;
-
- /*
- * Should page_cgroup's go to their own slab?
- * One could optimize the performance of the charging routine
- * by saving a bit in the page_flags and using it as a lock
- * to see if the cgroup page already has a page_cgroup associated
- * with it
- */
-retry:
- lock_page_cgroup(page);
- pc = page_get_page_cgroup(page);
- /*
- * The page_cgroup exists and
- * the page has already been accounted.
- */
- if (pc) {
- VM_BUG_ON(pc->page != page);
- VM_BUG_ON(pc->ref_cnt <= 0);
-
- pc->ref_cnt++;
- unlock_page_cgroup(page);
- goto done;
- }
- unlock_page_cgroup(page);
-
- pc = kmem_cache_zalloc(page_cgroup_cache, gfp_mask);
- if (pc == NULL)
+ pc = kmem_cache_alloc(page_cgroup_cache, gfp_mask);
+ if (unlikely(pc == NULL))
goto err;
/*
@@ -569,16 +546,18 @@ retry:
* thread group leader migrates. It's possible that mm is not
* set, if so charge the init_mm (happens for pagecache usage).
*/
- if (!mm)
- mm = &init_mm;
-
- rcu_read_lock();
- mem = mem_cgroup_from_task(rcu_dereference(mm->owner));
- /*
- * For every charge from the cgroup, increment reference count
- */
- css_get(&mem->css);
- rcu_read_unlock();
+ if (likely(!memcg)) {
+ rcu_read_lock();
+ mem = mem_cgroup_from_task(rcu_dereference(mm->owner));
+ /*
+ * For every charge from the cgroup, increment reference count
+ */
+ css_get(&mem->css);
+ rcu_read_unlock();
+ } else {
+ mem = memcg;
+ css_get(&memcg->css);
+ }
while (res_counter_charge(&mem->res, PAGE_SIZE)) {
if (!(gfp_mask & __GFP_WAIT))
@@ -603,25 +582,24 @@ retry:
}
}
- pc->ref_cnt = 1;
pc->mem_cgroup = mem;
pc->page = page;
- pc->flags = PAGE_CGROUP_FLAG_ACTIVE;
+ /*
+ * If a page is accounted as a page cache, insert to inactive list.
+ * If anon, insert to active list.
+ */
if (ctype == MEM_CGROUP_CHARGE_TYPE_CACHE)
pc->flags = PAGE_CGROUP_FLAG_CACHE;
+ else
+ pc->flags = PAGE_CGROUP_FLAG_ACTIVE;
lock_page_cgroup(page);
- if (page_get_page_cgroup(page)) {
+ if (unlikely(page_get_page_cgroup(page))) {
unlock_page_cgroup(page);
- /*
- * Another charge has been added to this page already.
- * We take lock_page_cgroup(page) again and read
- * page->cgroup, increment refcnt.... just retry is OK.
- */
res_counter_uncharge(&mem->res, PAGE_SIZE);
css_put(&mem->css);
kmem_cache_free(page_cgroup_cache, pc);
- goto retry;
+ goto done;
}
page_assign_page_cgroup(page, pc);
@@ -642,24 +620,65 @@ err:
int mem_cgroup_charge(struct page *page, struct mm_struct *mm, gfp_t gfp_mask)
{
+ if (mem_cgroup_subsys.disabled)
+ return 0;
+
+ /*
+ * If already mapped, we don't have to account.
+ * If page cache, page->mapping has address_space.
+ * But page->mapping may have out-of-use anon_vma pointer,
+ * detecit it by PageAnon() check. newly-mapped-anon's page->mapping
+ * is NULL.
+ */
+ if (page_mapped(page) || (page->mapping && !PageAnon(page)))
+ return 0;
+ if (unlikely(!mm))
+ mm = &init_mm;
return mem_cgroup_charge_common(page, mm, gfp_mask,
- MEM_CGROUP_CHARGE_TYPE_MAPPED);
+ MEM_CGROUP_CHARGE_TYPE_MAPPED, NULL);
}
int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm,
gfp_t gfp_mask)
{
- if (!mm)
+ if (mem_cgroup_subsys.disabled)
+ return 0;
+
+ /*
+ * Corner case handling. This is called from add_to_page_cache()
+ * in usual. But some FS (shmem) precharges this page before calling it
+ * and call add_to_page_cache() with GFP_NOWAIT.
+ *
+ * For GFP_NOWAIT case, the page may be pre-charged before calling
+ * add_to_page_cache(). (See shmem.c) check it here and avoid to call
+ * charge twice. (It works but has to pay a bit larger cost.)
+ */
+ if (!(gfp_mask & __GFP_WAIT)) {
+ struct page_cgroup *pc;
+
+ lock_page_cgroup(page);
+ pc = page_get_page_cgroup(page);
+ if (pc) {
+ VM_BUG_ON(pc->page != page);
+ VM_BUG_ON(!pc->mem_cgroup);
+ unlock_page_cgroup(page);
+ return 0;
+ }
+ unlock_page_cgroup(page);
+ }
+
+ if (unlikely(!mm))
mm = &init_mm;
+
return mem_cgroup_charge_common(page, mm, gfp_mask,
- MEM_CGROUP_CHARGE_TYPE_CACHE);
+ MEM_CGROUP_CHARGE_TYPE_CACHE, NULL);
}
/*
- * Uncharging is always a welcome operation, we never complain, simply
- * uncharge.
+ * uncharge if !page_mapped(page)
*/
-void mem_cgroup_uncharge_page(struct page *page)
+static void
+__mem_cgroup_uncharge_common(struct page *page, enum charge_type ctype)
{
struct page_cgroup *pc;
struct mem_cgroup *mem;
@@ -674,98 +693,151 @@ void mem_cgroup_uncharge_page(struct page *page)
*/
lock_page_cgroup(page);
pc = page_get_page_cgroup(page);
- if (!pc)
+ if (unlikely(!pc))
goto unlock;
VM_BUG_ON(pc->page != page);
- VM_BUG_ON(pc->ref_cnt <= 0);
- if (--(pc->ref_cnt) == 0) {
- mz = page_cgroup_zoneinfo(pc);
- spin_lock_irqsave(&mz->lru_lock, flags);
- __mem_cgroup_remove_list(mz, pc);
- spin_unlock_irqrestore(&mz->lru_lock, flags);
+ if ((ctype == MEM_CGROUP_CHARGE_TYPE_MAPPED)
+ && ((pc->flags & PAGE_CGROUP_FLAG_CACHE)
+ || page_mapped(page)))
+ goto unlock;
- page_assign_page_cgroup(page, NULL);
- unlock_page_cgroup(page);
+ mz = page_cgroup_zoneinfo(pc);
+ spin_lock_irqsave(&mz->lru_lock, flags);
+ __mem_cgroup_remove_list(mz, pc);
+ spin_unlock_irqrestore(&mz->lru_lock, flags);
- mem = pc->mem_cgroup;
- res_counter_uncharge(&mem->res, PAGE_SIZE);
- css_put(&mem->css);
+ page_assign_page_cgroup(page, NULL);
+ unlock_page_cgroup(page);
- kmem_cache_free(page_cgroup_cache, pc);
- return;
- }
+ mem = pc->mem_cgroup;
+ res_counter_uncharge(&mem->res, PAGE_SIZE);
+ css_put(&mem->css);
+ kmem_cache_free(page_cgroup_cache, pc);
+ return;
unlock:
unlock_page_cgroup(page);
}
+void mem_cgroup_uncharge_page(struct page *page)
+{
+ __mem_cgroup_uncharge_common(page, MEM_CGROUP_CHARGE_TYPE_MAPPED);
+}
+
+void mem_cgroup_uncharge_cache_page(struct page *page)
+{
+ VM_BUG_ON(page_mapped(page));
+ __mem_cgroup_uncharge_common(page, MEM_CGROUP_CHARGE_TYPE_CACHE);
+}
+
/*
- * Returns non-zero if a page (under migration) has valid page_cgroup member.
- * Refcnt of page_cgroup is incremented.
+ * Before starting migration, account against new page.
*/
-int mem_cgroup_prepare_migration(struct page *page)
+int mem_cgroup_prepare_migration(struct page *page, struct page *newpage)
{
struct page_cgroup *pc;
+ struct mem_cgroup *mem = NULL;
+ enum charge_type ctype = MEM_CGROUP_CHARGE_TYPE_MAPPED;
+ int ret = 0;
if (mem_cgroup_subsys.disabled)
return 0;
lock_page_cgroup(page);
pc = page_get_page_cgroup(page);
- if (pc)
- pc->ref_cnt++;
+ if (pc) {
+ mem = pc->mem_cgroup;
+ css_get(&mem->css);
+ if (pc->flags & PAGE_CGROUP_FLAG_CACHE)
+ ctype = MEM_CGROUP_CHARGE_TYPE_CACHE;
+ }
unlock_page_cgroup(page);
- return pc != NULL;
+ if (mem) {
+ ret = mem_cgroup_charge_common(newpage, NULL, GFP_KERNEL,
+ ctype, mem);
+ css_put(&mem->css);
+ }
+ return ret;
}
-void mem_cgroup_end_migration(struct page *page)
+/* remove redundant charge if migration failed*/
+void mem_cgroup_end_migration(struct page *newpage)
{
- mem_cgroup_uncharge_page(page);
+ /*
+ * At success, page->mapping is not NULL.
+ * special rollback care is necessary when
+ * 1. at migration failure. (newpage->mapping is cleared in this case)
+ * 2. the newpage was moved but not remapped again because the task
+ * exits and the newpage is obsolete. In this case, the new page
+ * may be a swapcache. So, we just call mem_cgroup_uncharge_page()
+ * always for avoiding mess. The page_cgroup will be removed if
+ * unnecessary. File cache pages is still on radix-tree. Don't
+ * care it.
+ */
+ if (!newpage->mapping)
+ __mem_cgroup_uncharge_common(newpage,
+ MEM_CGROUP_CHARGE_TYPE_FORCE);
+ else if (PageAnon(newpage))
+ mem_cgroup_uncharge_page(newpage);
}
/*
- * We know both *page* and *newpage* are now not-on-LRU and PG_locked.
- * And no race with uncharge() routines because page_cgroup for *page*
- * has extra one reference by mem_cgroup_prepare_migration.
+ * A call to try to shrink memory usage under specified resource controller.
+ * This is typically used for page reclaiming for shmem for reducing side
+ * effect of page allocation from shmem, which is used by some mem_cgroup.
*/
-void mem_cgroup_page_migration(struct page *page, struct page *newpage)
+int mem_cgroup_shrink_usage(struct mm_struct *mm, gfp_t gfp_mask)
{
- struct page_cgroup *pc;
- struct mem_cgroup_per_zone *mz;
- unsigned long flags;
+ struct mem_cgroup *mem;
+ int progress = 0;
+ int retry = MEM_CGROUP_RECLAIM_RETRIES;
- lock_page_cgroup(page);
- pc = page_get_page_cgroup(page);
- if (!pc) {
- unlock_page_cgroup(page);
- return;
- }
+ if (mem_cgroup_subsys.disabled)
+ return 0;
- mz = page_cgroup_zoneinfo(pc);
- spin_lock_irqsave(&mz->lru_lock, flags);
- __mem_cgroup_remove_list(mz, pc);
- spin_unlock_irqrestore(&mz->lru_lock, flags);
+ rcu_read_lock();
+ mem = mem_cgroup_from_task(rcu_dereference(mm->owner));
+ css_get(&mem->css);
+ rcu_read_unlock();
- page_assign_page_cgroup(page, NULL);
- unlock_page_cgroup(page);
+ do {
+ progress = try_to_free_mem_cgroup_pages(mem, gfp_mask);
+ } while (!progress && --retry);
- pc->page = newpage;
- lock_page_cgroup(newpage);
- page_assign_page_cgroup(newpage, pc);
+ css_put(&mem->css);
+ if (!retry)
+ return -ENOMEM;
+ return 0;
+}
- mz = page_cgroup_zoneinfo(pc);
- spin_lock_irqsave(&mz->lru_lock, flags);
- __mem_cgroup_add_list(mz, pc);
- spin_unlock_irqrestore(&mz->lru_lock, flags);
+int mem_cgroup_resize_limit(struct mem_cgroup *memcg, unsigned long long val)
+{
+
+ int retry_count = MEM_CGROUP_RECLAIM_RETRIES;
+ int progress;
+ int ret = 0;
- unlock_page_cgroup(newpage);
+ while (res_counter_set_limit(&memcg->res, val)) {
+ if (signal_pending(current)) {
+ ret = -EINTR;
+ break;
+ }
+ if (!retry_count) {
+ ret = -EBUSY;
+ break;
+ }
+ progress = try_to_free_mem_cgroup_pages(memcg, GFP_KERNEL);
+ if (!progress)
+ retry_count--;
+ }
+ return ret;
}
+
/*
* This routine traverse page_cgroup in given list and drop them all.
- * This routine ignores page_cgroup->ref_cnt.
* *And* this routine doesn't reclaim page itself, just removes page_cgroup.
*/
#define FORCE_UNCHARGE_BATCH (128)
@@ -790,12 +862,20 @@ static void mem_cgroup_force_empty_list(struct mem_cgroup *mem,
page = pc->page;
get_page(page);
spin_unlock_irqrestore(&mz->lru_lock, flags);
- mem_cgroup_uncharge_page(page);
- put_page(page);
- if (--count <= 0) {
- count = FORCE_UNCHARGE_BATCH;
+ /*
+ * Check if this page is on LRU. !LRU page can be found
+ * if it's under page migration.
+ */
+ if (PageLRU(page)) {
+ __mem_cgroup_uncharge_common(page,
+ MEM_CGROUP_CHARGE_TYPE_FORCE);
+ put_page(page);
+ if (--count <= 0) {
+ count = FORCE_UNCHARGE_BATCH;
+ cond_resched();
+ }
+ } else
cond_resched();
- }
spin_lock_irqsave(&mz->lru_lock, flags);
}
spin_unlock_irqrestore(&mz->lru_lock, flags);
@@ -810,9 +890,6 @@ static int mem_cgroup_force_empty(struct mem_cgroup *mem)
int ret = -EBUSY;
int node, zid;
- if (mem_cgroup_subsys.disabled)
- return 0;
-
css_get(&mem->css);
/*
* page reclaim code (kswapd etc..) will move pages between
@@ -838,32 +915,34 @@ out:
return ret;
}
-static int mem_cgroup_write_strategy(char *buf, unsigned long long *tmp)
-{
- *tmp = memparse(buf, &buf);
- if (*buf != '\0')
- return -EINVAL;
-
- /*
- * Round up the value to the closest page size
- */
- *tmp = ((*tmp + PAGE_SIZE - 1) >> PAGE_SHIFT) << PAGE_SHIFT;
- return 0;
-}
-
static u64 mem_cgroup_read(struct cgroup *cont, struct cftype *cft)
{
return res_counter_read_u64(&mem_cgroup_from_cont(cont)->res,
cft->private);
}
-
-static ssize_t mem_cgroup_write(struct cgroup *cont, struct cftype *cft,
- struct file *file, const char __user *userbuf,
- size_t nbytes, loff_t *ppos)
+/*
+ * The user of this function is...
+ * RES_LIMIT.
+ */
+static int mem_cgroup_write(struct cgroup *cont, struct cftype *cft,
+ const char *buffer)
{
- return res_counter_write(&mem_cgroup_from_cont(cont)->res,
- cft->private, userbuf, nbytes, ppos,
- mem_cgroup_write_strategy);
+ struct mem_cgroup *memcg = mem_cgroup_from_cont(cont);
+ unsigned long long val;
+ int ret;
+
+ switch (cft->private) {
+ case RES_LIMIT:
+ /* This function does all necessary parse...reuse it */
+ ret = res_counter_memparse_write_strategy(buffer, &val);
+ if (!ret)
+ ret = mem_cgroup_resize_limit(memcg, val);
+ break;
+ default:
+ ret = -EINVAL; /* should be BUG() ? */
+ break;
+ }
+ return ret;
}
static int mem_cgroup_reset(struct cgroup *cont, unsigned int event)
@@ -940,7 +1019,7 @@ static struct cftype mem_cgroup_files[] = {
{
.name = "limit_in_bytes",
.private = RES_LIMIT,
- .write = mem_cgroup_write,
+ .write_string = mem_cgroup_write,
.read_u64 = mem_cgroup_read,
},
{
@@ -1070,8 +1149,6 @@ static void mem_cgroup_destroy(struct cgroup_subsys *ss,
static int mem_cgroup_populate(struct cgroup_subsys *ss,
struct cgroup *cont)
{
- if (mem_cgroup_subsys.disabled)
- return 0;
return cgroup_add_files(cont, ss, mem_cgroup_files,
ARRAY_SIZE(mem_cgroup_files));
}
@@ -1084,9 +1161,6 @@ static void mem_cgroup_move_task(struct cgroup_subsys *ss,
struct mm_struct *mm;
struct mem_cgroup *mem, *old_mem;
- if (mem_cgroup_subsys.disabled)
- return;
-
mm = get_task_mm(p);
if (mm == NULL)
return;
diff --git a/mm/memory.c b/mm/memory.c
index 262e3eb6601a..a8ca04faaea6 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -374,7 +374,8 @@ static inline void add_mm_rss(struct mm_struct *mm, int file_rss, int anon_rss)
*
* The calling function must still handle the error.
*/
-void print_bad_pte(struct vm_area_struct *vma, pte_t pte, unsigned long vaddr)
+static void print_bad_pte(struct vm_area_struct *vma, pte_t pte,
+ unsigned long vaddr)
{
printk(KERN_ERR "Bad pte = %08llx, process = %s, "
"vm_flags = %lx, vaddr = %lx\n",
diff --git a/mm/migrate.c b/mm/migrate.c
index 376cceba82f9..153572fb60b8 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -285,7 +285,15 @@ void migration_entry_wait(struct mm_struct *mm, pmd_t *pmd,
page = migration_entry_to_page(entry);
- get_page(page);
+ /*
+ * Once radix-tree replacement of page migration started, page_count
+ * *must* be zero. And, we don't want to call wait_on_page_locked()
+ * against a page without get_page().
+ * So, we use get_page_unless_zero(), here. Even failed, page fault
+ * will occur again.
+ */
+ if (!get_page_unless_zero(page))
+ goto out;
pte_unmap_unlock(ptep, ptl);
wait_on_page_locked(page);
put_page(page);
@@ -305,6 +313,7 @@ out:
static int migrate_page_move_mapping(struct address_space *mapping,
struct page *newpage, struct page *page)
{
+ int expected_count;
void **pslot;
if (!mapping) {
@@ -314,14 +323,20 @@ static int migrate_page_move_mapping(struct address_space *mapping,
return 0;
}
- write_lock_irq(&mapping->tree_lock);
+ spin_lock_irq(&mapping->tree_lock);
pslot = radix_tree_lookup_slot(&mapping->page_tree,
page_index(page));
- if (page_count(page) != 2 + !!PagePrivate(page) ||
+ expected_count = 2 + !!PagePrivate(page);
+ if (page_count(page) != expected_count ||
(struct page *)radix_tree_deref_slot(pslot) != page) {
- write_unlock_irq(&mapping->tree_lock);
+ spin_unlock_irq(&mapping->tree_lock);
+ return -EAGAIN;
+ }
+
+ if (!page_freeze_refs(page, expected_count)) {
+ spin_unlock_irq(&mapping->tree_lock);
return -EAGAIN;
}
@@ -338,6 +353,7 @@ static int migrate_page_move_mapping(struct address_space *mapping,
radix_tree_replace_slot(pslot, newpage);
+ page_unfreeze_refs(page, expected_count);
/*
* Drop cache reference from old page.
* We know this isn't the last reference.
@@ -357,7 +373,9 @@ static int migrate_page_move_mapping(struct address_space *mapping,
__dec_zone_page_state(page, NR_FILE_PAGES);
__inc_zone_page_state(newpage, NR_FILE_PAGES);
- write_unlock_irq(&mapping->tree_lock);
+ spin_unlock_irq(&mapping->tree_lock);
+ if (!PageSwapCache(newpage))
+ mem_cgroup_uncharge_cache_page(page);
return 0;
}
@@ -611,7 +629,6 @@ static int move_to_new_page(struct page *newpage, struct page *page)
rc = fallback_migrate_page(mapping, newpage, page);
if (!rc) {
- mem_cgroup_page_migration(page, newpage);
remove_migration_ptes(page, newpage);
} else
newpage->mapping = NULL;
@@ -641,6 +658,14 @@ static int unmap_and_move(new_page_t get_new_page, unsigned long private,
/* page was freed from under us. So we are done. */
goto move_newpage;
+ charge = mem_cgroup_prepare_migration(page, newpage);
+ if (charge == -ENOMEM) {
+ rc = -ENOMEM;
+ goto move_newpage;
+ }
+ /* prepare cgroup just returns 0 or -ENOMEM */
+ BUG_ON(charge);
+
rc = -EAGAIN;
if (TestSetPageLocked(page)) {
if (!force)
@@ -692,19 +717,14 @@ static int unmap_and_move(new_page_t get_new_page, unsigned long private,
goto rcu_unlock;
}
- charge = mem_cgroup_prepare_migration(page);
/* Establish migration ptes or remove ptes */
try_to_unmap(page, 1);
if (!page_mapped(page))
rc = move_to_new_page(newpage, page);
- if (rc) {
+ if (rc)
remove_migration_ptes(page, page);
- if (charge)
- mem_cgroup_end_migration(page);
- } else if (charge)
- mem_cgroup_end_migration(newpage);
rcu_unlock:
if (rcu_locked)
rcu_read_unlock();
@@ -725,6 +745,8 @@ unlock:
}
move_newpage:
+ if (!charge)
+ mem_cgroup_end_migration(newpage);
/*
* Move the new page to the LRU. If migration was not successful
* then this will free the page.
diff --git a/mm/nommu.c b/mm/nommu.c
index 4462b6a3fcb9..5edccd9c9218 100644
--- a/mm/nommu.c
+++ b/mm/nommu.c
@@ -22,7 +22,7 @@
#include <linux/pagemap.h>
#include <linux/slab.h>
#include <linux/vmalloc.h>
-#include <linux/ptrace.h>
+#include <linux/tracehook.h>
#include <linux/blkdev.h>
#include <linux/backing-dev.h>
#include <linux/mount.h>
@@ -745,7 +745,7 @@ static unsigned long determine_vm_flags(struct file *file,
* it's being traced - otherwise breakpoints set in it may interfere
* with another untraced process
*/
- if ((flags & MAP_PRIVATE) && (current->ptrace & PT_PTRACED))
+ if ((flags & MAP_PRIVATE) && tracehook_expect_breakpoints(current))
vm_flags &= ~VM_MAYSHARE;
return vm_flags;
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index 94c6d8988ab3..24de8b65fdbd 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -1088,7 +1088,7 @@ int __set_page_dirty_nobuffers(struct page *page)
if (!mapping)
return 1;
- write_lock_irq(&mapping->tree_lock);
+ spin_lock_irq(&mapping->tree_lock);
mapping2 = page_mapping(page);
if (mapping2) { /* Race with truncate? */
BUG_ON(mapping2 != mapping);
@@ -1102,7 +1102,7 @@ int __set_page_dirty_nobuffers(struct page *page)
radix_tree_tag_set(&mapping->page_tree,
page_index(page), PAGECACHE_TAG_DIRTY);
}
- write_unlock_irq(&mapping->tree_lock);
+ spin_unlock_irq(&mapping->tree_lock);
if (mapping->host) {
/* !PageAnon && !swapper_space */
__mark_inode_dirty(mapping->host, I_DIRTY_PAGES);
@@ -1258,7 +1258,7 @@ int test_clear_page_writeback(struct page *page)
struct backing_dev_info *bdi = mapping->backing_dev_info;
unsigned long flags;
- write_lock_irqsave(&mapping->tree_lock, flags);
+ spin_lock_irqsave(&mapping->tree_lock, flags);
ret = TestClearPageWriteback(page);
if (ret) {
radix_tree_tag_clear(&mapping->page_tree,
@@ -1269,7 +1269,7 @@ int test_clear_page_writeback(struct page *page)
__bdi_writeout_inc(bdi);
}
}
- write_unlock_irqrestore(&mapping->tree_lock, flags);
+ spin_unlock_irqrestore(&mapping->tree_lock, flags);
} else {
ret = TestClearPageWriteback(page);
}
@@ -1287,7 +1287,7 @@ int test_set_page_writeback(struct page *page)
struct backing_dev_info *bdi = mapping->backing_dev_info;
unsigned long flags;
- write_lock_irqsave(&mapping->tree_lock, flags);
+ spin_lock_irqsave(&mapping->tree_lock, flags);
ret = TestSetPageWriteback(page);
if (!ret) {
radix_tree_tag_set(&mapping->page_tree,
@@ -1300,7 +1300,7 @@ int test_set_page_writeback(struct page *page)
radix_tree_tag_clear(&mapping->page_tree,
page_index(page),
PAGECACHE_TAG_DIRTY);
- write_unlock_irqrestore(&mapping->tree_lock, flags);
+ spin_unlock_irqrestore(&mapping->tree_lock, flags);
} else {
ret = TestSetPageWriteback(page);
}
diff --git a/mm/pdflush.c b/mm/pdflush.c
index 9d834aa4b979..0cbe0c60c6bf 100644
--- a/mm/pdflush.c
+++ b/mm/pdflush.c
@@ -130,7 +130,7 @@ static int __pdflush(struct pdflush_work *my_work)
* Thread creation: For how long have there been zero
* available threads?
*/
- if (jiffies - last_empty_jifs > 1 * HZ) {
+ if (time_after(jiffies, last_empty_jifs + 1 * HZ)) {
/* unlocked list_empty() test is OK here */
if (list_empty(&pdflush_list)) {
/* unlocked test is OK here */
@@ -151,7 +151,7 @@ static int __pdflush(struct pdflush_work *my_work)
if (nr_pdflush_threads <= MIN_PDFLUSH_THREADS)
continue;
pdf = list_entry(pdflush_list.prev, struct pdflush_work, list);
- if (jiffies - pdf->when_i_went_to_sleep > 1 * HZ) {
+ if (time_after(jiffies, pdf->when_i_went_to_sleep + 1 * HZ)) {
/* Limit exit rate */
pdf->when_i_went_to_sleep = jiffies;
break; /* exeunt */
diff --git a/mm/readahead.c b/mm/readahead.c
index d8723a5f6496..77e8ddf945e9 100644
--- a/mm/readahead.c
+++ b/mm/readahead.c
@@ -382,9 +382,9 @@ ondemand_readahead(struct address_space *mapping,
if (hit_readahead_marker) {
pgoff_t start;
- read_lock_irq(&mapping->tree_lock);
- start = radix_tree_next_hole(&mapping->page_tree, offset, max+1);
- read_unlock_irq(&mapping->tree_lock);
+ rcu_read_lock();
+ start = radix_tree_next_hole(&mapping->page_tree, offset,max+1);
+ rcu_read_unlock();
if (!start || start - offset > max)
return 0;
diff --git a/mm/rmap.c b/mm/rmap.c
index bf0a5b7cfb8e..39ae5a9bf382 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -138,7 +138,7 @@ void anon_vma_unlink(struct vm_area_struct *vma)
anon_vma_free(anon_vma);
}
-static void anon_vma_ctor(struct kmem_cache *cachep, void *data)
+static void anon_vma_ctor(void *data)
{
struct anon_vma *anon_vma = data;
@@ -576,14 +576,8 @@ void page_add_anon_rmap(struct page *page,
VM_BUG_ON(address < vma->vm_start || address >= vma->vm_end);
if (atomic_inc_and_test(&page->_mapcount))
__page_set_anon_rmap(page, vma, address);
- else {
+ else
__page_check_anon_rmap(page, vma, address);
- /*
- * We unconditionally charged during prepare, we uncharge here
- * This takes care of balancing the reference counts
- */
- mem_cgroup_uncharge_page(page);
- }
}
/**
@@ -614,12 +608,6 @@ void page_add_file_rmap(struct page *page)
{
if (atomic_inc_and_test(&page->_mapcount))
__inc_zone_page_state(page, NR_FILE_MAPPED);
- else
- /*
- * We unconditionally charged during prepare, we uncharge here
- * This takes care of balancing the reference counts
- */
- mem_cgroup_uncharge_page(page);
}
#ifdef CONFIG_DEBUG_VM
diff --git a/mm/shmem.c b/mm/shmem.c
index 9ffbea9b79e1..952d361774bb 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -922,20 +922,26 @@ found:
error = 1;
if (!inode)
goto out;
- /* Precharge page while we can wait, compensate afterwards */
+ /* Precharge page using GFP_KERNEL while we can wait */
error = mem_cgroup_cache_charge(page, current->mm, GFP_KERNEL);
if (error)
goto out;
error = radix_tree_preload(GFP_KERNEL);
- if (error)
- goto uncharge;
+ if (error) {
+ mem_cgroup_uncharge_cache_page(page);
+ goto out;
+ }
error = 1;
spin_lock(&info->lock);
ptr = shmem_swp_entry(info, idx, NULL);
- if (ptr && ptr->val == entry.val)
- error = add_to_page_cache(page, inode->i_mapping,
+ if (ptr && ptr->val == entry.val) {
+ error = add_to_page_cache_locked(page, inode->i_mapping,
idx, GFP_NOWAIT);
+ /* does mem_cgroup_uncharge_cache_page on error */
+ } else /* we must compensate for our precharge above */
+ mem_cgroup_uncharge_cache_page(page);
+
if (error == -EEXIST) {
struct page *filepage = find_get_page(inode->i_mapping, idx);
error = 1;
@@ -961,8 +967,6 @@ found:
shmem_swp_unmap(ptr);
spin_unlock(&info->lock);
radix_tree_preload_end();
-uncharge:
- mem_cgroup_uncharge_page(page);
out:
unlock_page(page);
page_cache_release(page);
@@ -1297,8 +1301,8 @@ repeat:
SetPageUptodate(filepage);
set_page_dirty(filepage);
swap_free(swap);
- } else if (!(error = add_to_page_cache(
- swappage, mapping, idx, GFP_NOWAIT))) {
+ } else if (!(error = add_to_page_cache_locked(swappage, mapping,
+ idx, GFP_NOWAIT))) {
info->flags |= SHMEM_PAGEIN;
shmem_swp_set(info, entry, 0);
shmem_swp_unmap(entry);
@@ -1311,17 +1315,14 @@ repeat:
shmem_swp_unmap(entry);
spin_unlock(&info->lock);
unlock_page(swappage);
+ page_cache_release(swappage);
if (error == -ENOMEM) {
/* allow reclaim from this memory cgroup */
- error = mem_cgroup_cache_charge(swappage,
- current->mm, gfp & ~__GFP_HIGHMEM);
- if (error) {
- page_cache_release(swappage);
+ error = mem_cgroup_shrink_usage(current->mm,
+ gfp);
+ if (error)
goto failed;
- }
- mem_cgroup_uncharge_page(swappage);
}
- page_cache_release(swappage);
goto repeat;
}
} else if (sgp == SGP_READ && !filepage) {
@@ -1358,6 +1359,8 @@ repeat:
}
if (!filepage) {
+ int ret;
+
spin_unlock(&info->lock);
filepage = shmem_alloc_page(gfp, info, idx);
if (!filepage) {
@@ -1386,10 +1389,18 @@ repeat:
swap = *entry;
shmem_swp_unmap(entry);
}
- if (error || swap.val || 0 != add_to_page_cache_lru(
- filepage, mapping, idx, GFP_NOWAIT)) {
+ ret = error || swap.val;
+ if (ret)
+ mem_cgroup_uncharge_cache_page(filepage);
+ else
+ ret = add_to_page_cache_lru(filepage, mapping,
+ idx, GFP_NOWAIT);
+ /*
+ * At add_to_page_cache_lru() failure, uncharge will
+ * be done automatically.
+ */
+ if (ret) {
spin_unlock(&info->lock);
- mem_cgroup_uncharge_page(filepage);
page_cache_release(filepage);
shmem_unacct_blocks(info->flags, 1);
shmem_free_blocks(inode, 1);
@@ -1398,7 +1409,6 @@ repeat:
goto failed;
goto repeat;
}
- mem_cgroup_uncharge_page(filepage);
info->flags |= SHMEM_PAGEIN;
}
@@ -2342,7 +2352,7 @@ static void shmem_destroy_inode(struct inode *inode)
kmem_cache_free(shmem_inode_cachep, SHMEM_I(inode));
}
-static void init_once(struct kmem_cache *cachep, void *foo)
+static void init_once(void *foo)
{
struct shmem_inode_info *p = (struct shmem_inode_info *) foo;
diff --git a/mm/shmem_acl.c b/mm/shmem_acl.c
index f5664c5b9eb1..8e5aadd7dcd6 100644
--- a/mm/shmem_acl.c
+++ b/mm/shmem_acl.c
@@ -191,7 +191,7 @@ shmem_check_acl(struct inode *inode, int mask)
* shmem_permission - permission() inode operation
*/
int
-shmem_permission(struct inode *inode, int mask, struct nameidata *nd)
+shmem_permission(struct inode *inode, int mask)
{
return generic_permission(inode, mask, shmem_check_acl);
}
diff --git a/mm/slab.c b/mm/slab.c
index 052e7d64537e..918f04f7fef1 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -406,7 +406,7 @@ struct kmem_cache {
unsigned int dflags; /* dynamic flags */
/* constructor func */
- void (*ctor)(struct kmem_cache *, void *);
+ void (*ctor)(void *obj);
/* 5) cache creation/removal */
const char *name;
@@ -2137,8 +2137,7 @@ static int __init_refok setup_cpu_cache(struct kmem_cache *cachep)
*/
struct kmem_cache *
kmem_cache_create (const char *name, size_t size, size_t align,
- unsigned long flags,
- void (*ctor)(struct kmem_cache *, void *))
+ unsigned long flags, void (*ctor)(void *))
{
size_t left_over, slab_size, ralign;
struct kmem_cache *cachep = NULL, *pc;
@@ -2653,7 +2652,7 @@ static void cache_init_objs(struct kmem_cache *cachep,
* They must also be threaded.
*/
if (cachep->ctor && !(cachep->flags & SLAB_POISON))
- cachep->ctor(cachep, objp + obj_offset(cachep));
+ cachep->ctor(objp + obj_offset(cachep));
if (cachep->flags & SLAB_RED_ZONE) {
if (*dbg_redzone2(cachep, objp) != RED_INACTIVE)
@@ -2669,7 +2668,7 @@ static void cache_init_objs(struct kmem_cache *cachep,
cachep->buffer_size / PAGE_SIZE, 0);
#else
if (cachep->ctor)
- cachep->ctor(cachep, objp);
+ cachep->ctor(objp);
#endif
slab_bufctl(slabp)[i] = i + 1;
}
@@ -3093,7 +3092,7 @@ static void *cache_alloc_debugcheck_after(struct kmem_cache *cachep,
#endif
objp += obj_offset(cachep);
if (cachep->ctor && cachep->flags & SLAB_POISON)
- cachep->ctor(cachep, objp);
+ cachep->ctor(objp);
#if ARCH_SLAB_MINALIGN
if ((u32)objp & (ARCH_SLAB_MINALIGN-1)) {
printk(KERN_ERR "0x%p: not aligned to ARCH_SLAB_MINALIGN=%d\n",
diff --git a/mm/slob.c b/mm/slob.c
index de268eb7ac70..d8fbd4d1bfa7 100644
--- a/mm/slob.c
+++ b/mm/slob.c
@@ -525,12 +525,11 @@ struct kmem_cache {
unsigned int size, align;
unsigned long flags;
const char *name;
- void (*ctor)(struct kmem_cache *, void *);
+ void (*ctor)(void *);
};
struct kmem_cache *kmem_cache_create(const char *name, size_t size,
- size_t align, unsigned long flags,
- void (*ctor)(struct kmem_cache *, void *))
+ size_t align, unsigned long flags, void (*ctor)(void *))
{
struct kmem_cache *c;
@@ -575,7 +574,7 @@ void *kmem_cache_alloc_node(struct kmem_cache *c, gfp_t flags, int node)
b = slob_new_page(flags, get_order(c->size), node);
if (c->ctor)
- c->ctor(c, b);
+ c->ctor(b);
return b;
}
diff --git a/mm/slub.c b/mm/slub.c
index 77c21cf53ff9..b7e2cd5d82db 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -1012,7 +1012,7 @@ __setup("slub_debug", setup_slub_debug);
static unsigned long kmem_cache_flags(unsigned long objsize,
unsigned long flags, const char *name,
- void (*ctor)(struct kmem_cache *, void *))
+ void (*ctor)(void *))
{
/*
* Enable debugging if selected on the kernel commandline.
@@ -1040,7 +1040,7 @@ static inline int check_object(struct kmem_cache *s, struct page *page,
static inline void add_full(struct kmem_cache_node *n, struct page *page) {}
static inline unsigned long kmem_cache_flags(unsigned long objsize,
unsigned long flags, const char *name,
- void (*ctor)(struct kmem_cache *, void *))
+ void (*ctor)(void *))
{
return flags;
}
@@ -1103,7 +1103,7 @@ static void setup_object(struct kmem_cache *s, struct page *page,
{
setup_object_debug(s, page, object);
if (unlikely(s->ctor))
- s->ctor(s, object);
+ s->ctor(object);
}
static struct page *new_slab(struct kmem_cache *s, gfp_t flags, int node)
@@ -2286,7 +2286,7 @@ static int calculate_sizes(struct kmem_cache *s, int forced_order)
static int kmem_cache_open(struct kmem_cache *s, gfp_t gfpflags,
const char *name, size_t size,
size_t align, unsigned long flags,
- void (*ctor)(struct kmem_cache *, void *))
+ void (*ctor)(void *))
{
memset(s, 0, kmem_size);
s->name = name;
@@ -3042,7 +3042,7 @@ static int slab_unmergeable(struct kmem_cache *s)
static struct kmem_cache *find_mergeable(size_t size,
size_t align, unsigned long flags, const char *name,
- void (*ctor)(struct kmem_cache *, void *))
+ void (*ctor)(void *))
{
struct kmem_cache *s;
@@ -3082,8 +3082,7 @@ static struct kmem_cache *find_mergeable(size_t size,
}
struct kmem_cache *kmem_cache_create(const char *name, size_t size,
- size_t align, unsigned long flags,
- void (*ctor)(struct kmem_cache *, void *))
+ size_t align, unsigned long flags, void (*ctor)(void *))
{
struct kmem_cache *s;
diff --git a/mm/sparse.c b/mm/sparse.c
index 8ffc08990008..5d9dbbb9d39e 100644
--- a/mm/sparse.c
+++ b/mm/sparse.c
@@ -377,7 +377,7 @@ struct page __init *sparse_mem_map_populate(unsigned long pnum, int nid)
}
#endif /* !CONFIG_SPARSEMEM_VMEMMAP */
-struct page __init *sparse_early_mem_map_alloc(unsigned long pnum)
+static struct page __init *sparse_early_mem_map_alloc(unsigned long pnum)
{
struct page *map;
struct mem_section *ms = __nr_to_section(pnum);
diff --git a/mm/swap_state.c b/mm/swap_state.c
index d8aadaf2a0ba..b8035b055129 100644
--- a/mm/swap_state.c
+++ b/mm/swap_state.c
@@ -39,7 +39,7 @@ static struct backing_dev_info swap_backing_dev_info = {
struct address_space swapper_space = {
.page_tree = RADIX_TREE_INIT(GFP_ATOMIC|__GFP_NOWARN),
- .tree_lock = __RW_LOCK_UNLOCKED(swapper_space.tree_lock),
+ .tree_lock = __SPIN_LOCK_UNLOCKED(swapper_space.tree_lock),
.a_ops = &swap_aops,
.i_mmap_nonlinear = LIST_HEAD_INIT(swapper_space.i_mmap_nonlinear),
.backing_dev_info = &swap_backing_dev_info,
@@ -56,7 +56,8 @@ static struct {
void show_swap_cache_info(void)
{
- printk("Swap cache: add %lu, delete %lu, find %lu/%lu\n",
+ printk("%lu pages in swap cache\n", total_swapcache_pages);
+ printk("Swap cache stats: add %lu, delete %lu, find %lu/%lu\n",
swap_cache_info.add_total, swap_cache_info.del_total,
swap_cache_info.find_success, swap_cache_info.find_total);
printk("Free swap = %lukB\n", nr_swap_pages << (PAGE_SHIFT - 10));
@@ -64,7 +65,7 @@ void show_swap_cache_info(void)
}
/*
- * add_to_swap_cache resembles add_to_page_cache on swapper_space,
+ * add_to_swap_cache resembles add_to_page_cache_locked on swapper_space,
* but sets SwapCache flag and private instead of mapping and index.
*/
int add_to_swap_cache(struct page *page, swp_entry_t entry, gfp_t gfp_mask)
@@ -76,19 +77,26 @@ int add_to_swap_cache(struct page *page, swp_entry_t entry, gfp_t gfp_mask)
BUG_ON(PagePrivate(page));
error = radix_tree_preload(gfp_mask);
if (!error) {
- write_lock_irq(&swapper_space.tree_lock);
+ page_cache_get(page);
+ SetPageSwapCache(page);
+ set_page_private(page, entry.val);
+
+ spin_lock_irq(&swapper_space.tree_lock);
error = radix_tree_insert(&swapper_space.page_tree,
entry.val, page);
- if (!error) {
- page_cache_get(page);
- SetPageSwapCache(page);
- set_page_private(page, entry.val);
+ if (likely(!error)) {
total_swapcache_pages++;
__inc_zone_page_state(page, NR_FILE_PAGES);
INC_CACHE_INFO(add_total);
}
- write_unlock_irq(&swapper_space.tree_lock);
+ spin_unlock_irq(&swapper_space.tree_lock);
radix_tree_preload_end();
+
+ if (unlikely(error)) {
+ set_page_private(page, 0UL);
+ ClearPageSwapCache(page);
+ page_cache_release(page);
+ }
}
return error;
}
@@ -175,9 +183,9 @@ void delete_from_swap_cache(struct page *page)
entry.val = page_private(page);
- write_lock_irq(&swapper_space.tree_lock);
+ spin_lock_irq(&swapper_space.tree_lock);
__delete_from_swap_cache(page);
- write_unlock_irq(&swapper_space.tree_lock);
+ spin_unlock_irq(&swapper_space.tree_lock);
swap_free(entry);
page_cache_release(page);
diff --git a/mm/swapfile.c b/mm/swapfile.c
index 2f33edb8bee9..6beb6251e99d 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -33,8 +33,8 @@
#include <asm/tlbflush.h>
#include <linux/swapops.h>
-DEFINE_SPINLOCK(swap_lock);
-unsigned int nr_swapfiles;
+static DEFINE_SPINLOCK(swap_lock);
+static unsigned int nr_swapfiles;
long total_swap_pages;
static int swap_overflow;
static int least_priority;
@@ -44,7 +44,7 @@ static const char Unused_file[] = "Unused swap file entry ";
static const char Bad_offset[] = "Bad swap offset entry ";
static const char Unused_offset[] = "Unused swap offset entry ";
-struct swap_list_t swap_list = {-1, -1};
+static struct swap_list_t swap_list = {-1, -1};
static struct swap_info_struct swap_info[MAX_SWAPFILES];
@@ -369,13 +369,13 @@ int remove_exclusive_swap_page(struct page *page)
retval = 0;
if (p->swap_map[swp_offset(entry)] == 1) {
/* Recheck the page count with the swapcache lock held.. */
- write_lock_irq(&swapper_space.tree_lock);
+ spin_lock_irq(&swapper_space.tree_lock);
if ((page_count(page) == 2) && !PageWriteback(page)) {
__delete_from_swap_cache(page);
SetPageDirty(page);
retval = 1;
}
- write_unlock_irq(&swapper_space.tree_lock);
+ spin_unlock_irq(&swapper_space.tree_lock);
}
spin_unlock(&swap_lock);
diff --git a/mm/truncate.c b/mm/truncate.c
index b8961cb63414..e68443d74567 100644
--- a/mm/truncate.c
+++ b/mm/truncate.c
@@ -349,18 +349,18 @@ invalidate_complete_page2(struct address_space *mapping, struct page *page)
if (PagePrivate(page) && !try_to_release_page(page, GFP_KERNEL))
return 0;
- write_lock_irq(&mapping->tree_lock);
+ spin_lock_irq(&mapping->tree_lock);
if (PageDirty(page))
goto failed;
BUG_ON(PagePrivate(page));
__remove_from_page_cache(page);
- write_unlock_irq(&mapping->tree_lock);
+ spin_unlock_irq(&mapping->tree_lock);
ClearPageUptodate(page);
page_cache_release(page); /* pagecache ref */
return 1;
failed:
- write_unlock_irq(&mapping->tree_lock);
+ spin_unlock_irq(&mapping->tree_lock);
return 0;
}
diff --git a/mm/util.c b/mm/util.c
index 8f18683825bc..9341ca77bd88 100644
--- a/mm/util.c
+++ b/mm/util.c
@@ -1,7 +1,9 @@
+#include <linux/mm.h>
#include <linux/slab.h>
#include <linux/string.h>
#include <linux/module.h>
#include <linux/err.h>
+#include <linux/sched.h>
#include <asm/uaccess.h>
/**
@@ -68,25 +70,22 @@ void *kmemdup(const void *src, size_t len, gfp_t gfp)
EXPORT_SYMBOL(kmemdup);
/**
- * krealloc - reallocate memory. The contents will remain unchanged.
+ * __krealloc - like krealloc() but don't free @p.
* @p: object to reallocate memory for.
* @new_size: how many bytes of memory are required.
* @flags: the type of memory to allocate.
*
- * The contents of the object pointed to are preserved up to the
- * lesser of the new and old sizes. If @p is %NULL, krealloc()
- * behaves exactly like kmalloc(). If @size is 0 and @p is not a
- * %NULL pointer, the object pointed to is freed.
+ * This function is like krealloc() except it never frees the originally
+ * allocated buffer. Use this if you don't want to free the buffer immediately
+ * like, for example, with RCU.
*/
-void *krealloc(const void *p, size_t new_size, gfp_t flags)
+void *__krealloc(const void *p, size_t new_size, gfp_t flags)
{
void *ret;
size_t ks = 0;
- if (unlikely(!new_size)) {
- kfree(p);
+ if (unlikely(!new_size))
return ZERO_SIZE_PTR;
- }
if (p)
ks = ksize(p);
@@ -95,10 +94,37 @@ void *krealloc(const void *p, size_t new_size, gfp_t flags)
return (void *)p;
ret = kmalloc_track_caller(new_size, flags);
- if (ret && p) {
+ if (ret && p)
memcpy(ret, p, ks);
+
+ return ret;
+}
+EXPORT_SYMBOL(__krealloc);
+
+/**
+ * krealloc - reallocate memory. The contents will remain unchanged.
+ * @p: object to reallocate memory for.
+ * @new_size: how many bytes of memory are required.
+ * @flags: the type of memory to allocate.
+ *
+ * The contents of the object pointed to are preserved up to the
+ * lesser of the new and old sizes. If @p is %NULL, krealloc()
+ * behaves exactly like kmalloc(). If @size is 0 and @p is not a
+ * %NULL pointer, the object pointed to is freed.
+ */
+void *krealloc(const void *p, size_t new_size, gfp_t flags)
+{
+ void *ret;
+
+ if (unlikely(!new_size)) {
kfree(p);
+ return ZERO_SIZE_PTR;
}
+
+ ret = __krealloc(p, new_size, flags);
+ if (ret && p != ret)
+ kfree(p);
+
return ret;
}
EXPORT_SYMBOL(krealloc);
@@ -136,3 +162,12 @@ char *strndup_user(const char __user *s, long n)
return p;
}
EXPORT_SYMBOL(strndup_user);
+
+#ifndef HAVE_ARCH_PICK_MMAP_LAYOUT
+void arch_pick_mmap_layout(struct mm_struct *mm)
+{
+ mm->mmap_base = TASK_UNMAPPED_BASE;
+ mm->get_unmapped_area = arch_get_unmapped_area;
+ mm->unmap_area = arch_unmap_area;
+}
+#endif
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index 35f293816294..85b9a0d2c877 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -381,16 +381,14 @@ static void __vunmap(const void *addr, int deallocate_pages)
return;
if ((PAGE_SIZE-1) & (unsigned long)addr) {
- printk(KERN_ERR "Trying to vfree() bad address (%p)\n", addr);
- WARN_ON(1);
+ WARN(1, KERN_ERR "Trying to vfree() bad address (%p)\n", addr);
return;
}
area = remove_vm_area(addr);
if (unlikely(!area)) {
- printk(KERN_ERR "Trying to vfree() nonexistent vm area (%p)\n",
+ WARN(1, KERN_ERR "Trying to vfree() nonexistent vm area (%p)\n",
addr);
- WARN_ON(1);
return;
}
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 967d30ccd92b..8f71761bc4b7 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -38,6 +38,7 @@
#include <linux/kthread.h>
#include <linux/freezer.h>
#include <linux/memcontrol.h>
+#include <linux/delayacct.h>
#include <asm/tlbflush.h>
#include <asm/div64.h>
@@ -390,17 +391,15 @@ static pageout_t pageout(struct page *page, struct address_space *mapping,
}
/*
- * Attempt to detach a locked page from its ->mapping. If it is dirty or if
- * someone else has a ref on the page, abort and return 0. If it was
- * successfully detached, return 1. Assumes the caller has a single ref on
- * this page.
+ * Same as remove_mapping, but if the page is removed from the mapping, it
+ * gets returned with a refcount of 0.
*/
-int remove_mapping(struct address_space *mapping, struct page *page)
+static int __remove_mapping(struct address_space *mapping, struct page *page)
{
BUG_ON(!PageLocked(page));
BUG_ON(mapping != page_mapping(page));
- write_lock_irq(&mapping->tree_lock);
+ spin_lock_irq(&mapping->tree_lock);
/*
* The non racy check for a busy page.
*
@@ -426,28 +425,48 @@ int remove_mapping(struct address_space *mapping, struct page *page)
* Note that if SetPageDirty is always performed via set_page_dirty,
* and thus under tree_lock, then this ordering is not required.
*/
- if (unlikely(page_count(page) != 2))
+ if (!page_freeze_refs(page, 2))
goto cannot_free;
- smp_rmb();
- if (unlikely(PageDirty(page)))
+ /* note: atomic_cmpxchg in page_freeze_refs provides the smp_rmb */
+ if (unlikely(PageDirty(page))) {
+ page_unfreeze_refs(page, 2);
goto cannot_free;
+ }
if (PageSwapCache(page)) {
swp_entry_t swap = { .val = page_private(page) };
__delete_from_swap_cache(page);
- write_unlock_irq(&mapping->tree_lock);
+ spin_unlock_irq(&mapping->tree_lock);
swap_free(swap);
- __put_page(page); /* The pagecache ref */
- return 1;
+ } else {
+ __remove_from_page_cache(page);
+ spin_unlock_irq(&mapping->tree_lock);
}
- __remove_from_page_cache(page);
- write_unlock_irq(&mapping->tree_lock);
- __put_page(page);
return 1;
cannot_free:
- write_unlock_irq(&mapping->tree_lock);
+ spin_unlock_irq(&mapping->tree_lock);
+ return 0;
+}
+
+/*
+ * Attempt to detach a locked page from its ->mapping. If it is dirty or if
+ * someone else has a ref on the page, abort and return 0. If it was
+ * successfully detached, return 1. Assumes the caller has a single ref on
+ * this page.
+ */
+int remove_mapping(struct address_space *mapping, struct page *page)
+{
+ if (__remove_mapping(mapping, page)) {
+ /*
+ * Unfreezing the refcount with 1 rather than 2 effectively
+ * drops the pagecache ref for us without requiring another
+ * atomic operation.
+ */
+ page_unfreeze_refs(page, 1);
+ return 1;
+ }
return 0;
}
@@ -597,18 +616,34 @@ static unsigned long shrink_page_list(struct list_head *page_list,
if (PagePrivate(page)) {
if (!try_to_release_page(page, sc->gfp_mask))
goto activate_locked;
- if (!mapping && page_count(page) == 1)
- goto free_it;
+ if (!mapping && page_count(page) == 1) {
+ unlock_page(page);
+ if (put_page_testzero(page))
+ goto free_it;
+ else {
+ /*
+ * rare race with speculative reference.
+ * the speculative reference will free
+ * this page shortly, so we may
+ * increment nr_reclaimed here (and
+ * leave it off the LRU).
+ */
+ nr_reclaimed++;
+ continue;
+ }
+ }
}
- if (!mapping || !remove_mapping(mapping, page))
+ if (!mapping || !__remove_mapping(mapping, page))
goto keep_locked;
-free_it:
unlock_page(page);
+free_it:
nr_reclaimed++;
- if (!pagevec_add(&freed_pvec, page))
- __pagevec_release_nonlru(&freed_pvec);
+ if (!pagevec_add(&freed_pvec, page)) {
+ __pagevec_free(&freed_pvec);
+ pagevec_reinit(&freed_pvec);
+ }
continue;
activate_locked:
@@ -622,7 +657,7 @@ keep:
}
list_splice(&ret_pages, page_list);
if (pagevec_count(&freed_pvec))
- __pagevec_release_nonlru(&freed_pvec);
+ __pagevec_free(&freed_pvec);
count_vm_events(PGACTIVATE, pgactivate);
return nr_reclaimed;
}
@@ -1316,6 +1351,8 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist,
struct zone *zone;
enum zone_type high_zoneidx = gfp_zone(sc->gfp_mask);
+ delayacct_freepages_start();
+
if (scan_global_lru(sc))
count_vm_event(ALLOCSTALL);
/*
@@ -1396,6 +1433,8 @@ out:
} else
mem_cgroup_record_reclaim_priority(sc->mem_cgroup, priority);
+ delayacct_freepages_end();
+
return ret;
}