diff options
Diffstat (limited to 'mm/process_reclaim.c')
-rw-r--r-- | mm/process_reclaim.c | 256 |
1 files changed, 256 insertions, 0 deletions
diff --git a/mm/process_reclaim.c b/mm/process_reclaim.c new file mode 100644 index 000000000000..98e5af190fe0 --- /dev/null +++ b/mm/process_reclaim.c @@ -0,0 +1,256 @@ +/* + * Copyright (c) 2015-2016, The Linux Foundation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 and + * only version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ +#include <linux/module.h> +#include <linux/kernel.h> +#include <linux/mm.h> +#include <linux/swap.h> +#include <linux/sort.h> +#include <linux/oom.h> +#include <linux/sched.h> +#include <linux/rcupdate.h> +#include <linux/notifier.h> +#include <linux/vmpressure.h> + +#define CREATE_TRACE_POINTS +#include <trace/events/process_reclaim.h> + +#define MAX_SWAP_TASKS SWAP_CLUSTER_MAX + +static void swap_fn(struct work_struct *work); +DECLARE_WORK(swap_work, swap_fn); + +/* User knob to enable/disable process reclaim feature */ +static int enable_process_reclaim; +module_param_named(enable_process_reclaim, enable_process_reclaim, int, + S_IRUGO | S_IWUSR); + +/* The max number of pages tried to be reclaimed in a single run */ +int per_swap_size = SWAP_CLUSTER_MAX * 32; +module_param_named(per_swap_size, per_swap_size, int, S_IRUGO | S_IWUSR); + +int reclaim_avg_efficiency; +module_param_named(reclaim_avg_efficiency, reclaim_avg_efficiency, + int, S_IRUGO); + +/* The vmpressure region where process reclaim operates */ +static unsigned long pressure_min = 50; +static unsigned long pressure_max = 90; +module_param_named(pressure_min, pressure_min, ulong, S_IRUGO | S_IWUSR); +module_param_named(pressure_max, pressure_max, ulong, S_IRUGO | S_IWUSR); + +static short min_score_adj = 360; +module_param_named(min_score_adj, min_score_adj, short, + S_IRUGO | S_IWUSR); + +/* + * Scheduling process reclaim workqueue unecessarily + * when the reclaim efficiency is low does not make + * sense. We try to detect a drop in efficiency and + * disable reclaim for a time period. This period and the + * period for which we monitor a drop in efficiency is + * defined by swap_eff_win. swap_opt_eff is the optimal + * efficincy used as theshold for this. + */ +static int swap_eff_win = 2; +module_param_named(swap_eff_win, swap_eff_win, int, S_IRUGO | S_IWUSR); + +static int swap_opt_eff = 50; +module_param_named(swap_opt_eff, swap_opt_eff, int, S_IRUGO | S_IWUSR); + +static atomic_t skip_reclaim = ATOMIC_INIT(0); +/* Not atomic since only a single instance of swap_fn run at a time */ +static int monitor_eff; + +struct selected_task { + struct task_struct *p; + int tasksize; + short oom_score_adj; +}; + +int selected_cmp(const void *a, const void *b) +{ + const struct selected_task *x = a; + const struct selected_task *y = b; + int ret; + + ret = x->tasksize < y->tasksize ? -1 : 1; + + return ret; +} + +static int test_task_flag(struct task_struct *p, int flag) +{ + struct task_struct *t = p; + + rcu_read_lock(); + for_each_thread(p, t) { + task_lock(t); + if (test_tsk_thread_flag(t, flag)) { + task_unlock(t); + rcu_read_unlock(); + return 1; + } + task_unlock(t); + } + rcu_read_unlock(); + + return 0; +} + +static void swap_fn(struct work_struct *work) +{ + struct task_struct *tsk; + struct reclaim_param rp; + + /* Pick the best MAX_SWAP_TASKS tasks in terms of anon size */ + struct selected_task selected[MAX_SWAP_TASKS] = {{0, 0, 0},}; + int si = 0; + int i; + int tasksize; + int total_sz = 0; + int total_scan = 0; + int total_reclaimed = 0; + int nr_to_reclaim; + int efficiency; + + rcu_read_lock(); + for_each_process(tsk) { + struct task_struct *p; + short oom_score_adj; + + if (tsk->flags & PF_KTHREAD) + continue; + + if (test_task_flag(tsk, TIF_MEMDIE)) + continue; + + p = find_lock_task_mm(tsk); + if (!p) + continue; + + oom_score_adj = p->signal->oom_score_adj; + if (oom_score_adj < min_score_adj) { + task_unlock(p); + continue; + } + + tasksize = get_mm_counter(p->mm, MM_ANONPAGES); + task_unlock(p); + + if (tasksize <= 0) + continue; + + if (si == MAX_SWAP_TASKS) { + sort(&selected[0], MAX_SWAP_TASKS, + sizeof(struct selected_task), + &selected_cmp, NULL); + if (tasksize < selected[0].tasksize) + continue; + selected[0].p = p; + selected[0].oom_score_adj = oom_score_adj; + selected[0].tasksize = tasksize; + } else { + selected[si].p = p; + selected[si].oom_score_adj = oom_score_adj; + selected[si].tasksize = tasksize; + si++; + } + } + + for (i = 0; i < si; i++) + total_sz += selected[i].tasksize; + + /* Skip reclaim if total size is too less */ + if (total_sz < SWAP_CLUSTER_MAX) { + rcu_read_unlock(); + return; + } + + for (i = 0; i < si; i++) + get_task_struct(selected[i].p); + + rcu_read_unlock(); + + while (si--) { + nr_to_reclaim = + (selected[si].tasksize * per_swap_size) / total_sz; + /* scan atleast a page */ + if (!nr_to_reclaim) + nr_to_reclaim = 1; + + rp = reclaim_task_anon(selected[si].p, nr_to_reclaim); + + trace_process_reclaim(selected[si].tasksize, + selected[si].oom_score_adj, rp.nr_scanned, + rp.nr_reclaimed, per_swap_size, total_sz, + nr_to_reclaim); + total_scan += rp.nr_scanned; + total_reclaimed += rp.nr_reclaimed; + put_task_struct(selected[si].p); + } + + if (total_scan) { + efficiency = (total_reclaimed * 100) / total_scan; + + if (efficiency < swap_opt_eff) { + if (++monitor_eff == swap_eff_win) { + atomic_set(&skip_reclaim, swap_eff_win); + monitor_eff = 0; + } + } else { + monitor_eff = 0; + } + + reclaim_avg_efficiency = + (efficiency + reclaim_avg_efficiency) / 2; + trace_process_reclaim_eff(efficiency, reclaim_avg_efficiency); + } +} + +static int vmpressure_notifier(struct notifier_block *nb, + unsigned long action, void *data) +{ + unsigned long pressure = action; + + if (!enable_process_reclaim) + return 0; + + if (!current_is_kswapd()) + return 0; + + if (atomic_dec_if_positive(&skip_reclaim) >= 0) + return 0; + + if ((pressure >= pressure_min) && (pressure < pressure_max)) + if (!work_pending(&swap_work)) + queue_work(system_unbound_wq, &swap_work); + return 0; +} + +static struct notifier_block vmpr_nb = { + .notifier_call = vmpressure_notifier, +}; + +static int __init process_reclaim_init(void) +{ + vmpressure_notifier_register(&vmpr_nb); + return 0; +} + +static void __exit process_reclaim_exit(void) +{ + vmpressure_notifier_unregister(&vmpr_nb); +} + +module_init(process_reclaim_init); +module_exit(process_reclaim_exit); |