diff options
Diffstat (limited to 'mm/vmscan.c')
-rw-r--r-- | mm/vmscan.c | 186 |
1 files changed, 156 insertions, 30 deletions
diff --git a/mm/vmscan.c b/mm/vmscan.c index 0c114e2b01d3..ff408638fd95 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -104,6 +104,13 @@ struct scan_control { /* Number of pages freed so far during a call to shrink_zones() */ unsigned long nr_reclaimed; + + /* + * Reclaim pages from a vma. If the page is shared by other tasks + * it is zapped from a vma without reclaim so it ends up remaining + * on memory until last task zap it. + */ + struct vm_area_struct *target_vma; }; #define lru_to_page(_head) (list_entry((_head)->prev, struct page, lru)) @@ -146,6 +153,12 @@ int vm_swappiness = 60; */ unsigned long vm_total_pages; +#ifdef CONFIG_KSWAPD_CPU_AFFINITY_MASK +char *kswapd_cpu_mask = CONFIG_KSWAPD_CPU_AFFINITY_MASK; +#else +char *kswapd_cpu_mask = NULL; +#endif + static LIST_HEAD(shrinker_list); static DECLARE_RWSEM(shrinker_rwsem); @@ -277,6 +290,10 @@ static unsigned long do_shrink_slab(struct shrink_control *shrinkctl, int nid = shrinkctl->nid; long batch_size = shrinker->batch ? shrinker->batch : SHRINK_BATCH; + long min_cache_size = batch_size; + + if (current_is_kswapd()) + min_cache_size = 0; freeable = shrinker->count_objects(shrinker, shrinkctl); if (freeable == 0) @@ -342,7 +359,7 @@ static unsigned long do_shrink_slab(struct shrink_control *shrinkctl, * scanning at high prio and therefore should try to reclaim as much as * possible. */ - while (total_scan >= batch_size || + while (total_scan > min_cache_size || total_scan >= freeable) { unsigned long ret; unsigned long nr_to_scan = min(batch_size, total_scan); @@ -904,7 +921,7 @@ static unsigned long shrink_page_list(struct list_head *page_list, struct address_space *mapping; struct page *page; int may_enter_fs; - enum page_references references = PAGEREF_RECLAIM_CLEAN; + enum page_references references = PAGEREF_RECLAIM; bool dirty, writeback; cond_resched(); @@ -916,7 +933,8 @@ static unsigned long shrink_page_list(struct list_head *page_list, goto keep; VM_BUG_ON_PAGE(PageActive(page), page); - VM_BUG_ON_PAGE(page_zone(page) != zone, page); + if (zone) + VM_BUG_ON_PAGE(page_zone(page) != zone, page); sc->nr_scanned++; @@ -995,7 +1013,7 @@ static unsigned long shrink_page_list(struct list_head *page_list, /* Case 1 above */ if (current_is_kswapd() && PageReclaim(page) && - test_bit(ZONE_WRITEBACK, &zone->flags)) { + (zone && test_bit(ZONE_WRITEBACK, &zone->flags))) { nr_immediate++; goto keep_locked; @@ -1061,7 +1079,8 @@ static unsigned long shrink_page_list(struct list_head *page_list, */ if (page_mapped(page) && mapping) { switch (try_to_unmap(page, - ttu_flags|TTU_BATCH_FLUSH)) { + ttu_flags|TTU_BATCH_FLUSH, + sc->target_vma)) { case SWAP_FAIL: goto activate_locked; case SWAP_AGAIN: @@ -1081,7 +1100,8 @@ static unsigned long shrink_page_list(struct list_head *page_list, */ if (page_is_file_cache(page) && (!current_is_kswapd() || - !test_bit(ZONE_DIRTY, &zone->flags))) { + (zone && + !test_bit(ZONE_DIRTY, &zone->flags)))) { /* * Immediately reclaim when written back. * Similar in principal to deactivate_page() @@ -1193,6 +1213,13 @@ free_it: * appear not as the counts should be low */ list_add(&page->lru, &free_pages); + /* + * If pagelist are from multiple zones, we should decrease + * NR_ISOLATED_ANON + x on freed pages in here. + */ + if (!zone) + dec_zone_page_state(page, NR_ISOLATED_ANON + + page_is_file_cache(page)); continue; cull_mlocked: @@ -1204,7 +1231,7 @@ cull_mlocked: activate_locked: /* Not a candidate for swapping, so reclaim swap space. */ - if (PageSwapCache(page) && vm_swap_full()) + if (PageSwapCache(page) && vm_swap_full(page_swap_info(page))) try_to_free_swap(page); VM_BUG_ON_PAGE(PageActive(page), page); SetPageActive(page); @@ -1238,6 +1265,8 @@ unsigned long reclaim_clean_pages_from_list(struct zone *zone, .gfp_mask = GFP_KERNEL, .priority = DEF_PRIORITY, .may_unmap = 1, + /* Doesn't allow to write out dirty page */ + .may_writepage = 0, }; unsigned long ret, dummy1, dummy2, dummy3, dummy4, dummy5; struct page *page, *next; @@ -1259,6 +1288,42 @@ unsigned long reclaim_clean_pages_from_list(struct zone *zone, return ret; } +#ifdef CONFIG_PROCESS_RECLAIM +unsigned long reclaim_pages_from_list(struct list_head *page_list, + struct vm_area_struct *vma) +{ + struct scan_control sc = { + .gfp_mask = GFP_KERNEL, + .priority = DEF_PRIORITY, + .may_writepage = 1, + .may_unmap = 1, + .may_swap = 1, + .target_vma = vma, + }; + + unsigned long nr_reclaimed; + struct page *page; + unsigned long dummy1, dummy2, dummy3, dummy4, dummy5; + + list_for_each_entry(page, page_list, lru) + ClearPageActive(page); + + nr_reclaimed = shrink_page_list(page_list, NULL, &sc, + TTU_UNMAP|TTU_IGNORE_ACCESS, + &dummy1, &dummy2, &dummy3, &dummy4, &dummy5, true); + + while (!list_empty(page_list)) { + page = lru_to_page(page_list); + list_del(&page->lru); + dec_zone_page_state(page, NR_ISOLATED_ANON + + page_is_file_cache(page)); + putback_lru_page(page); + } + + return nr_reclaimed; +} +#endif + /* * Attempt to remove the specified page from its LRU. Only take this page * if it is of the appropriate PageActive status. Pages which are being @@ -1445,6 +1510,44 @@ int isolate_lru_page(struct page *page) return ret; } +static int __too_many_isolated(struct zone *zone, int file, + struct scan_control *sc, int safe) +{ + unsigned long inactive, isolated; + + if (file) { + if (safe) { + inactive = zone_page_state_snapshot(zone, + NR_INACTIVE_FILE); + isolated = zone_page_state_snapshot(zone, + NR_ISOLATED_FILE); + } else { + inactive = zone_page_state(zone, NR_INACTIVE_FILE); + isolated = zone_page_state(zone, NR_ISOLATED_FILE); + } + } else { + if (safe) { + inactive = zone_page_state_snapshot(zone, + NR_INACTIVE_ANON); + isolated = zone_page_state_snapshot(zone, + NR_ISOLATED_ANON); + } else { + inactive = zone_page_state(zone, NR_INACTIVE_ANON); + isolated = zone_page_state(zone, NR_ISOLATED_ANON); + } + } + + /* + * GFP_NOIO/GFP_NOFS callers are allowed to isolate more pages, so they + * won't get blocked by normal direct-reclaimers, forming a circular + * deadlock. + */ + if ((sc->gfp_mask & (__GFP_IO | __GFP_FS)) == (__GFP_IO | __GFP_FS)) + inactive >>= 3; + + return isolated > inactive; +} + /* * A direct reclaimer may isolate SWAP_CLUSTER_MAX pages from the LRU list and * then get resheduled. When there are massive number of tasks doing page @@ -1453,33 +1556,22 @@ int isolate_lru_page(struct page *page) * unnecessary swapping, thrashing and OOM. */ static int too_many_isolated(struct zone *zone, int file, - struct scan_control *sc) + struct scan_control *sc, int safe) { - unsigned long inactive, isolated; - if (current_is_kswapd()) return 0; if (!sane_reclaim(sc)) return 0; - if (file) { - inactive = zone_page_state(zone, NR_INACTIVE_FILE); - isolated = zone_page_state(zone, NR_ISOLATED_FILE); - } else { - inactive = zone_page_state(zone, NR_INACTIVE_ANON); - isolated = zone_page_state(zone, NR_ISOLATED_ANON); + if (unlikely(__too_many_isolated(zone, file, sc, 0))) { + if (safe) + return __too_many_isolated(zone, file, sc, safe); + else + return 1; } - /* - * GFP_NOIO/GFP_NOFS callers are allowed to isolate more pages, so they - * won't get blocked by normal direct-reclaimers, forming a circular - * deadlock. - */ - if ((sc->gfp_mask & (__GFP_IO | __GFP_FS)) == (__GFP_IO | __GFP_FS)) - inactive >>= 3; - - return isolated > inactive; + return 0; } static noinline_for_stack void @@ -1495,6 +1587,7 @@ putback_inactive_pages(struct lruvec *lruvec, struct list_head *page_list) while (!list_empty(page_list)) { struct page *page = lru_to_page(page_list); int lru; + int file; VM_BUG_ON_PAGE(PageLRU(page), page); list_del(&page->lru); @@ -1511,8 +1604,11 @@ putback_inactive_pages(struct lruvec *lruvec, struct list_head *page_list) lru = page_lru(page); add_page_to_lru_list(page, lruvec, lru); + file = is_file_lru(lru); + if (IS_ENABLED(CONFIG_ZCACHE)) + if (file) + SetPageWasActive(page); if (is_active_lru(lru)) { - int file = is_file_lru(lru); int numpages = hpage_nr_pages(page); reclaim_stat->recent_rotated[file] += numpages; } @@ -1569,15 +1665,18 @@ shrink_inactive_list(unsigned long nr_to_scan, struct lruvec *lruvec, unsigned long nr_immediate = 0; isolate_mode_t isolate_mode = 0; int file = is_file_lru(lru); + int safe = 0; struct zone *zone = lruvec_zone(lruvec); struct zone_reclaim_stat *reclaim_stat = &lruvec->reclaim_stat; - while (unlikely(too_many_isolated(zone, file, sc))) { + while (unlikely(too_many_isolated(zone, file, sc, safe))) { congestion_wait(BLK_RW_ASYNC, HZ/10); /* We are about to die and free our memory. Return now. */ if (fatal_signal_pending(current)) return SWAP_CLUSTER_MAX; + + safe = 1; } lru_add_drain(); @@ -1834,6 +1933,12 @@ static void shrink_active_list(unsigned long nr_to_scan, } ClearPageActive(page); /* we are de-activating */ + if (IS_ENABLED(CONFIG_ZCACHE)) + /* + * For zcache to know whether the page is from active + * file list + */ + SetPageWasActive(page); list_add(&page->lru, &l_inactive); } @@ -2049,7 +2154,8 @@ static void get_scan_count(struct lruvec *lruvec, int swappiness, * There is enough inactive page cache, do not reclaim * anything from the anonymous working set right now. */ - if (!inactive_file_is_low(lruvec)) { + if (!IS_ENABLED(CONFIG_BALANCE_ANON_FILE_RECLAIM) && + !inactive_file_is_low(lruvec)) { scan_balance = SCAN_FILE; goto out; } @@ -3425,7 +3531,7 @@ static int kswapd(void *p) lockdep_set_current_reclaim_state(GFP_KERNEL); - if (!cpumask_empty(cpumask)) + if (kswapd_cpu_mask == NULL && !cpumask_empty(cpumask)) set_cpus_allowed_ptr(tsk, cpumask); current->reclaim_state = &reclaim_state; @@ -3595,6 +3701,22 @@ static int cpu_callback(struct notifier_block *nfb, unsigned long action, return NOTIFY_OK; } +static int set_kswapd_cpu_mask(pg_data_t *pgdat) +{ + int ret = 0; + cpumask_t tmask; + + if (!kswapd_cpu_mask) + return 0; + + cpumask_clear(&tmask); + ret = cpumask_parse(kswapd_cpu_mask, &tmask); + if (ret) + return ret; + + return set_cpus_allowed_ptr(pgdat->kswapd, &tmask); +} + /* * This kswapd start function will be called by init and node-hot-add. * On node-hot-add, kswapd will moved to proper cpus if cpus are hot-added. @@ -3614,6 +3736,9 @@ int kswapd_run(int nid) pr_err("Failed to start kswapd on node %d\n", nid); ret = PTR_ERR(pgdat->kswapd); pgdat->kswapd = NULL; + } else if (kswapd_cpu_mask) { + if (set_kswapd_cpu_mask(pgdat)) + pr_warn("error setting kswapd cpu affinity mask\n"); } return ret; } @@ -3639,7 +3764,8 @@ static int __init kswapd_init(void) swap_setup(); for_each_node_state(nid, N_MEMORY) kswapd_run(nid); - hotcpu_notifier(cpu_callback, 0); + if (kswapd_cpu_mask == NULL) + hotcpu_notifier(cpu_callback, 0); return 0; } |