From 3167760f83899ccda312b9ad9306ec9e5dda06d4 Mon Sep 17 00:00:00 2001 From: Dan Magenheimer Date: Wed, 21 Sep 2011 11:56:28 -0400 Subject: mm: cleancache: s/flush/invalidate/ Per akpm suggestions alter the use of the term flush to be invalidate. The next patch will do this across all MM. This change is completely cosmetic. [v9: akpm@linux-foundation.org: change "flush" to "invalidate", part 3] Signed-off-by: Dan Magenheimer Cc: Kamezawa Hiroyuki Cc: Jan Beulich Reviewed-by: Seth Jennings Cc: Jeremy Fitzhardinge Cc: Hugh Dickins Cc: Johannes Weiner Cc: Nitin Gupta Cc: Matthew Wilcox Cc: Chris Mason Cc: Rik Riel Cc: Andrew Morton [v10: Fixed fs: move code out of buffer.c conflict change] Signed-off-by: Konrad Rzeszutek Wilk --- mm/filemap.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'mm/filemap.c') diff --git a/mm/filemap.c b/mm/filemap.c index a0701e6eec10..0aa3faa48219 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -123,7 +123,7 @@ void __delete_from_page_cache(struct page *page) if (PageUptodate(page) && PageMappedToDisk(page)) cleancache_put_page(page); else - cleancache_flush_page(mapping, page); + cleancache_invalidate_page(mapping, page); radix_tree_delete(&mapping->page_tree, page->index); page->mapping = NULL; -- cgit v1.2.3 From 9b04c5fec43c0da610a2c37f70c5b013101a6ad7 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Fri, 25 Nov 2011 23:14:39 +0800 Subject: mm: remove the second argument of k[un]map_atomic() Signed-off-by: Cong Wang --- mm/filemap.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'mm/filemap.c') diff --git a/mm/filemap.c b/mm/filemap.c index b66275757c28..2f8165075a5a 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -1318,10 +1318,10 @@ int file_read_actor(read_descriptor_t *desc, struct page *page, * taking the kmap. */ if (!fault_in_pages_writeable(desc->arg.buf, size)) { - kaddr = kmap_atomic(page, KM_USER0); + kaddr = kmap_atomic(page); left = __copy_to_user_inatomic(desc->arg.buf, kaddr + offset, size); - kunmap_atomic(kaddr, KM_USER0); + kunmap_atomic(kaddr); if (left == 0) goto success; } @@ -2045,7 +2045,7 @@ size_t iov_iter_copy_from_user_atomic(struct page *page, size_t copied; BUG_ON(!in_atomic()); - kaddr = kmap_atomic(page, KM_USER0); + kaddr = kmap_atomic(page); if (likely(i->nr_segs == 1)) { int left; char __user *buf = i->iov->iov_base + i->iov_offset; @@ -2055,7 +2055,7 @@ size_t iov_iter_copy_from_user_atomic(struct page *page, copied = __iovec_copy_from_user_inatomic(kaddr + offset, i->iov, i->iov_offset, bytes); } - kunmap_atomic(kaddr, KM_USER0); + kunmap_atomic(kaddr); return copied; } -- cgit v1.2.3 From 1010bb1b80edb0713415dfe1f97114d320f58c4f Mon Sep 17 00:00:00 2001 From: Fengguang Wu Date: Wed, 21 Mar 2012 16:34:08 -0700 Subject: mm: don't set __GFP_WRITE on ramfs/sysfs writes There is not much point in skipping zones during allocation based on the dirty usage which they'll never contribute to. And we'd like to avoid page reclaim waits when writing to ramfs/sysfs etc. Signed-off-by: Fengguang Wu Acked-by: Johannes Weiner Cc: Jan Kara Cc: Greg Thelen Cc: Ying Han Cc: KAMEZAWA Hiroyuki Cc: Rik van Riel Cc: Mel Gorman Acked-by: Minchan Kim Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/filemap.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'mm/filemap.c') diff --git a/mm/filemap.c b/mm/filemap.c index 2f8165075a5a..e8cf8aefd6f6 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -2341,7 +2341,9 @@ struct page *grab_cache_page_write_begin(struct address_space *mapping, struct page *page; gfp_t gfp_notmask = 0; - gfp_mask = mapping_gfp_mask(mapping) | __GFP_WRITE; + gfp_mask = mapping_gfp_mask(mapping); + if (mapping_cap_account_dirty(mapping)) + gfp_mask |= __GFP_WRITE; if (flags & AOP_FLAG_NOFS) gfp_notmask = __GFP_FS; repeat: -- cgit v1.2.3 From 9a3c531df9462df6cc2b060f749651723ffc180c Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Wed, 21 Mar 2012 16:34:09 -0700 Subject: mm: update stale lock ordering comment for memory-failure.c When i_mmap_lock changed to a mutex the locking order in memory failure was changed to take the sleeping lock first. But the big fat mm lock ordering comment (BFMLO) wasn't updated. Do this here. Pointed out by Andrew. Signed-off-by: Andi Kleen Cc: Peter Zijlstra Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/filemap.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'mm/filemap.c') diff --git a/mm/filemap.c b/mm/filemap.c index e8cf8aefd6f6..f3230604006c 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -101,9 +101,8 @@ * ->inode->i_lock (zap_pte_range->set_page_dirty) * ->private_lock (zap_pte_range->__set_page_dirty_buffers) * - * (code doesn't rely on that order, so you could switch it around) - * ->tasklist_lock (memory_failure, collect_procs_ao) - * ->i_mmap_mutex + * ->i_mmap_mutex + * ->tasklist_lock (memory_failure, collect_procs_ao) */ /* -- cgit v1.2.3 From cc9a6c8776615f9c194ccf0b63a0aa5628235545 Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Wed, 21 Mar 2012 16:34:11 -0700 Subject: cpuset: mm: reduce large amounts of memory barrier related damage v3 Commit c0ff7453bb5c ("cpuset,mm: fix no node to alloc memory when changing cpuset's mems") wins a super prize for the largest number of memory barriers entered into fast paths for one commit. [get|put]_mems_allowed is incredibly heavy with pairs of full memory barriers inserted into a number of hot paths. This was detected while investigating at large page allocator slowdown introduced some time after 2.6.32. The largest portion of this overhead was shown by oprofile to be at an mfence introduced by this commit into the page allocator hot path. For extra style points, the commit introduced the use of yield() in an implementation of what looks like a spinning mutex. This patch replaces the full memory barriers on both read and write sides with a sequence counter with just read barriers on the fast path side. This is much cheaper on some architectures, including x86. The main bulk of the patch is the retry logic if the nodemask changes in a manner that can cause a false failure. While updating the nodemask, a check is made to see if a false failure is a risk. If it is, the sequence number gets bumped and parallel allocators will briefly stall while the nodemask update takes place. In a page fault test microbenchmark, oprofile samples from __alloc_pages_nodemask went from 4.53% of all samples to 1.15%. The actual results were 3.3.0-rc3 3.3.0-rc3 rc3-vanilla nobarrier-v2r1 Clients 1 UserTime 0.07 ( 0.00%) 0.08 (-14.19%) Clients 2 UserTime 0.07 ( 0.00%) 0.07 ( 2.72%) Clients 4 UserTime 0.08 ( 0.00%) 0.07 ( 3.29%) Clients 1 SysTime 0.70 ( 0.00%) 0.65 ( 6.65%) Clients 2 SysTime 0.85 ( 0.00%) 0.82 ( 3.65%) Clients 4 SysTime 1.41 ( 0.00%) 1.41 ( 0.32%) Clients 1 WallTime 0.77 ( 0.00%) 0.74 ( 4.19%) Clients 2 WallTime 0.47 ( 0.00%) 0.45 ( 3.73%) Clients 4 WallTime 0.38 ( 0.00%) 0.37 ( 1.58%) Clients 1 Flt/sec/cpu 497620.28 ( 0.00%) 520294.53 ( 4.56%) Clients 2 Flt/sec/cpu 414639.05 ( 0.00%) 429882.01 ( 3.68%) Clients 4 Flt/sec/cpu 257959.16 ( 0.00%) 258761.48 ( 0.31%) Clients 1 Flt/sec 495161.39 ( 0.00%) 517292.87 ( 4.47%) Clients 2 Flt/sec 820325.95 ( 0.00%) 850289.77 ( 3.65%) Clients 4 Flt/sec 1020068.93 ( 0.00%) 1022674.06 ( 0.26%) MMTests Statistics: duration Sys Time Running Test (seconds) 135.68 132.17 User+Sys Time Running Test (seconds) 164.2 160.13 Total Elapsed Time (seconds) 123.46 120.87 The overall improvement is small but the System CPU time is much improved and roughly in correlation to what oprofile reported (these performance figures are without profiling so skew is expected). The actual number of page faults is noticeably improved. For benchmarks like kernel builds, the overall benefit is marginal but the system CPU time is slightly reduced. To test the actual bug the commit fixed I opened two terminals. The first ran within a cpuset and continually ran a small program that faulted 100M of anonymous data. In a second window, the nodemask of the cpuset was continually randomised in a loop. Without the commit, the program would fail every so often (usually within 10 seconds) and obviously with the commit everything worked fine. With this patch applied, it also worked fine so the fix should be functionally equivalent. Signed-off-by: Mel Gorman Cc: Miao Xie Cc: David Rientjes Cc: Peter Zijlstra Cc: Christoph Lameter Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/filemap.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) (limited to 'mm/filemap.c') diff --git a/mm/filemap.c b/mm/filemap.c index f3230604006c..843042045dc9 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -499,10 +499,13 @@ struct page *__page_cache_alloc(gfp_t gfp) struct page *page; if (cpuset_do_page_mem_spread()) { - get_mems_allowed(); - n = cpuset_mem_spread_node(); - page = alloc_pages_exact_node(n, gfp, 0); - put_mems_allowed(); + unsigned int cpuset_mems_cookie; + do { + cpuset_mems_cookie = get_mems_allowed(); + n = cpuset_mem_spread_node(); + page = alloc_pages_exact_node(n, gfp, 0); + } while (!put_mems_allowed(cpuset_mems_cookie) && !page); + return page; } return alloc_pages(gfp, 0); -- cgit v1.2.3 From 0fc9d1040313047edf6a39fd4d7c7defdca97c62 Mon Sep 17 00:00:00 2001 From: Konstantin Khlebnikov Date: Wed, 28 Mar 2012 14:42:54 -0700 Subject: radix-tree: use iterators in find_get_pages* functions Replace radix_tree_gang_lookup_slot() and radix_tree_gang_lookup_tag_slot() in page-cache lookup functions with brand-new radix-tree direct iterating. This avoids the double-scanning and pointer copying. Iterator don't stop after nr_pages page-get fails in a row, it continue lookup till the radix-tree end. Thus we can safely remove these restart conditions. Unfortunately, old implementation didn't forbid nr_pages == 0, this corner case does not fit into new code, so the patch adds an extra check at the beginning. Signed-off-by: Konstantin Khlebnikov Tested-by: Hugh Dickins Cc: Christoph Hellwig Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/filemap.c | 86 +++++++++++++++++++++++++++--------------------------------- 1 file changed, 38 insertions(+), 48 deletions(-) (limited to 'mm/filemap.c') diff --git a/mm/filemap.c b/mm/filemap.c index c3811bc6b9e3..79c4b2b0b14e 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -813,20 +813,19 @@ EXPORT_SYMBOL(find_or_create_page); unsigned find_get_pages(struct address_space *mapping, pgoff_t start, unsigned int nr_pages, struct page **pages) { - unsigned int i; - unsigned int ret; - unsigned int nr_found, nr_skip; + struct radix_tree_iter iter; + void **slot; + unsigned ret = 0; + + if (unlikely(!nr_pages)) + return 0; rcu_read_lock(); restart: - nr_found = radix_tree_gang_lookup_slot(&mapping->page_tree, - (void ***)pages, NULL, start, nr_pages); - ret = 0; - nr_skip = 0; - for (i = 0; i < nr_found; i++) { + radix_tree_for_each_slot(slot, &mapping->page_tree, &iter, start) { struct page *page; repeat: - page = radix_tree_deref_slot((void **)pages[i]); + page = radix_tree_deref_slot(slot); if (unlikely(!page)) continue; @@ -837,7 +836,7 @@ repeat: * when entry at index 0 moves out of or back * to root: none yet gotten, safe to restart. */ - WARN_ON(start | i); + WARN_ON(iter.index); goto restart; } /* @@ -845,7 +844,6 @@ repeat: * here as an exceptional entry: so skip over it - * we only reach this from invalidate_mapping_pages(). */ - nr_skip++; continue; } @@ -853,21 +851,16 @@ repeat: goto repeat; /* Has the page moved? */ - if (unlikely(page != *((void **)pages[i]))) { + if (unlikely(page != *slot)) { page_cache_release(page); goto repeat; } pages[ret] = page; - ret++; + if (++ret == nr_pages) + break; } - /* - * If all entries were removed before we could secure them, - * try again, because callers stop trying once 0 is returned. - */ - if (unlikely(!ret && nr_found > nr_skip)) - goto restart; rcu_read_unlock(); return ret; } @@ -887,21 +880,22 @@ repeat: unsigned find_get_pages_contig(struct address_space *mapping, pgoff_t index, unsigned int nr_pages, struct page **pages) { - unsigned int i; - unsigned int ret; - unsigned int nr_found; + struct radix_tree_iter iter; + void **slot; + unsigned int ret = 0; + + if (unlikely(!nr_pages)) + return 0; rcu_read_lock(); restart: - nr_found = radix_tree_gang_lookup_slot(&mapping->page_tree, - (void ***)pages, NULL, index, nr_pages); - ret = 0; - for (i = 0; i < nr_found; i++) { + radix_tree_for_each_contig(slot, &mapping->page_tree, &iter, index) { struct page *page; repeat: - page = radix_tree_deref_slot((void **)pages[i]); + page = radix_tree_deref_slot(slot); + /* The hole, there no reason to continue */ if (unlikely(!page)) - continue; + break; if (radix_tree_exception(page)) { if (radix_tree_deref_retry(page)) { @@ -924,7 +918,7 @@ repeat: goto repeat; /* Has the page moved? */ - if (unlikely(page != *((void **)pages[i]))) { + if (unlikely(page != *slot)) { page_cache_release(page); goto repeat; } @@ -934,14 +928,14 @@ repeat: * otherwise we can get both false positives and false * negatives, which is just confusing to the caller. */ - if (page->mapping == NULL || page->index != index) { + if (page->mapping == NULL || page->index != iter.index) { page_cache_release(page); break; } pages[ret] = page; - ret++; - index++; + if (++ret == nr_pages) + break; } rcu_read_unlock(); return ret; @@ -962,19 +956,20 @@ EXPORT_SYMBOL(find_get_pages_contig); unsigned find_get_pages_tag(struct address_space *mapping, pgoff_t *index, int tag, unsigned int nr_pages, struct page **pages) { - unsigned int i; - unsigned int ret; - unsigned int nr_found; + struct radix_tree_iter iter; + void **slot; + unsigned ret = 0; + + if (unlikely(!nr_pages)) + return 0; rcu_read_lock(); restart: - nr_found = radix_tree_gang_lookup_tag_slot(&mapping->page_tree, - (void ***)pages, *index, nr_pages, tag); - ret = 0; - for (i = 0; i < nr_found; i++) { + radix_tree_for_each_tagged(slot, &mapping->page_tree, + &iter, *index, tag) { struct page *page; repeat: - page = radix_tree_deref_slot((void **)pages[i]); + page = radix_tree_deref_slot(slot); if (unlikely(!page)) continue; @@ -998,21 +993,16 @@ repeat: goto repeat; /* Has the page moved? */ - if (unlikely(page != *((void **)pages[i]))) { + if (unlikely(page != *slot)) { page_cache_release(page); goto repeat; } pages[ret] = page; - ret++; + if (++ret == nr_pages) + break; } - /* - * If all entries were removed before we could secure them, - * try again, because callers stop trying once 0 is returned. - */ - if (unlikely(!ret && nr_found)) - goto restart; rcu_read_unlock(); if (ret) -- cgit v1.2.3