From a5be2d0d1a8746e7be5210e3d6b904455000443c Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 21 Nov 2011 12:32:23 -0800 Subject: freezer: rename thaw_process() to __thaw_task() and simplify the implementation thaw_process() now has only internal users - system and cgroup freezers. Remove the unnecessary return value, rename, unexport and collapse __thaw_process() into it. This will help further updates to the freezer code. -v3: oom_kill grew a use of thaw_process() while this patch was pending. Convert it to use __thaw_task() for now. In the longer term, this should be handled by allowing tasks to die if killed even if it's frozen. -v2: minor style update as suggested by Matt. Signed-off-by: Tejun Heo Cc: Paul Menage Cc: Matt Helsley --- mm/oom_kill.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'mm/oom_kill.c') diff --git a/mm/oom_kill.c b/mm/oom_kill.c index 76f2c5ae908e..3134ee2fb2e8 100644 --- a/mm/oom_kill.c +++ b/mm/oom_kill.c @@ -328,7 +328,7 @@ static struct task_struct *select_bad_process(unsigned int *ppoints, */ if (test_tsk_thread_flag(p, TIF_MEMDIE)) { if (unlikely(frozen(p))) - thaw_process(p); + __thaw_task(p); return ERR_PTR(-1UL); } if (!p->mm) -- cgit v1.2.3 From ff05b6f7ae762b6eb464183eec994b28ea09f6dd Mon Sep 17 00:00:00 2001 From: Frantisek Hrbata Date: Mon, 19 Dec 2011 17:11:59 -0800 Subject: oom: fix integer overflow of points in oom_badness An integer overflow will happen on 64bit archs if task's sum of rss, swapents and nr_ptes exceeds (2^31)/1000 value. This was introduced by commit f755a04 oom: use pte pages in OOM score where the oom score computation was divided into several steps and it's no longer computed as one expression in unsigned long(rss, swapents, nr_pte are unsigned long), where the result value assigned to points(int) is in range(1..1000). So there could be an int overflow while computing 176 points *= 1000; and points may have negative value. Meaning the oom score for a mem hog task will be one. 196 if (points <= 0) 197 return 1; For example: [ 3366] 0 3366 35390480 24303939 5 0 0 oom01 Out of memory: Kill process 3366 (oom01) score 1 or sacrifice child Here the oom1 process consumes more than 24303939(rss)*4096~=92GB physical memory, but it's oom score is one. In this situation the mem hog task is skipped and oom killer kills another and most probably innocent task with oom score greater than one. The points variable should be of type long instead of int to prevent the int overflow. Signed-off-by: Frantisek Hrbata Acked-by: KOSAKI Motohiro Acked-by: Oleg Nesterov Acked-by: David Rientjes Cc: [2.6.36+] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/oom_kill.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'mm/oom_kill.c') diff --git a/mm/oom_kill.c b/mm/oom_kill.c index 76f2c5ae908e..069b64e521fc 100644 --- a/mm/oom_kill.c +++ b/mm/oom_kill.c @@ -176,7 +176,7 @@ static bool oom_unkillable_task(struct task_struct *p, unsigned int oom_badness(struct task_struct *p, struct mem_cgroup *mem, const nodemask_t *nodemask, unsigned long totalpages) { - int points; + long points; if (oom_unkillable_task(p, mem, nodemask)) return 0; -- cgit v1.2.3 From 43d2b113241d6797b890318767e0af78e313414b Mon Sep 17 00:00:00 2001 From: KAMEZAWA Hiroyuki Date: Tue, 10 Jan 2012 15:08:09 -0800 Subject: tracepoint: add tracepoints for debugging oom_score_adj oom_score_adj is used for guarding processes from OOM-Killer. One of problem is that it's inherited at fork(). When a daemon set oom_score_adj and make children, it's hard to know where the value is set. This patch adds some tracepoints useful for debugging. This patch adds 3 trace points. - creating new task - renaming a task (exec) - set oom_score_adj To debug, users need to enable some trace pointer. Maybe filtering is useful as # EVENT=/sys/kernel/debug/tracing/events/task/ # echo "oom_score_adj != 0" > $EVENT/task_newtask/filter # echo "oom_score_adj != 0" > $EVENT/task_rename/filter # echo 1 > $EVENT/enable # EVENT=/sys/kernel/debug/tracing/events/oom/ # echo 1 > $EVENT/enable output will be like this. # grep oom /sys/kernel/debug/tracing/trace bash-7699 [007] d..3 5140.744510: oom_score_adj_update: pid=7699 comm=bash oom_score_adj=-1000 bash-7699 [007] ...1 5151.818022: task_newtask: pid=7729 comm=bash clone_flags=1200011 oom_score_adj=-1000 ls-7729 [003] ...2 5151.818504: task_rename: pid=7729 oldcomm=bash newcomm=ls oom_score_adj=-1000 bash-7699 [002] ...1 5175.701468: task_newtask: pid=7730 comm=bash clone_flags=1200011 oom_score_adj=-1000 grep-7730 [007] ...2 5175.701993: task_rename: pid=7730 oldcomm=bash newcomm=grep oom_score_adj=-1000 Signed-off-by: KAMEZAWA Hiroyuki Cc: KOSAKI Motohiro Acked-by: David Rientjes Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/oom_kill.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'mm/oom_kill.c') diff --git a/mm/oom_kill.c b/mm/oom_kill.c index eeb27e27dce3..7c122faa05c5 100644 --- a/mm/oom_kill.c +++ b/mm/oom_kill.c @@ -33,6 +33,10 @@ #include #include #include +#include + +#define CREATE_TRACE_POINTS +#include int sysctl_panic_on_oom; int sysctl_oom_kill_allocating_task; @@ -55,6 +59,7 @@ void compare_swap_oom_score_adj(int old_val, int new_val) spin_lock_irq(&sighand->siglock); if (current->signal->oom_score_adj == old_val) current->signal->oom_score_adj = new_val; + trace_oom_score_adj_update(current); spin_unlock_irq(&sighand->siglock); } @@ -74,6 +79,7 @@ int test_set_oom_score_adj(int new_val) spin_lock_irq(&sighand->siglock); old_val = current->signal->oom_score_adj; current->signal->oom_score_adj = new_val; + trace_oom_score_adj_update(current); spin_unlock_irq(&sighand->siglock); return old_val; -- cgit v1.2.3 From ec0fffd84b162e0563a28a81aa049f946b31a8e2 Mon Sep 17 00:00:00 2001 From: Johannes Weiner Date: Thu, 12 Jan 2012 17:18:29 -0800 Subject: mm: oom_kill: remove memcg argument from oom_kill_task() The memcg argument of oom_kill_task() hasn't been used since 341aea2 'oom-kill: remove boost_dying_task_prio()'. Kill it. Signed-off-by: Johannes Weiner Acked-by: David Rientjes Acked-by: KAMEZAWA Hiroyuki Acked-by: Michal Hocko Cc: Balbir Singh Cc: Hugh Dickins Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/oom_kill.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'mm/oom_kill.c') diff --git a/mm/oom_kill.c b/mm/oom_kill.c index 7c122faa05c5..05c0f27d4ed1 100644 --- a/mm/oom_kill.c +++ b/mm/oom_kill.c @@ -434,7 +434,7 @@ static void dump_header(struct task_struct *p, gfp_t gfp_mask, int order, } #define K(x) ((x) << (PAGE_SHIFT-10)) -static int oom_kill_task(struct task_struct *p, struct mem_cgroup *mem) +static int oom_kill_task(struct task_struct *p) { struct task_struct *q; struct mm_struct *mm; @@ -533,7 +533,7 @@ static int oom_kill_process(struct task_struct *p, gfp_t gfp_mask, int order, } } while_each_thread(p, t); - return oom_kill_task(victim, mem); + return oom_kill_task(victim); } /* -- cgit v1.2.3 From 72835c86ca15d0126354b73d5f29ce9194931c9b Mon Sep 17 00:00:00 2001 From: Johannes Weiner Date: Thu, 12 Jan 2012 17:18:32 -0800 Subject: mm: unify remaining mem_cont, mem, etc. variable names to memcg Signed-off-by: Johannes Weiner Acked-by: David Rientjes Acked-by: KAMEZAWA Hiroyuki Acked-by: Michal Hocko Cc: Balbir Singh Cc: Hugh Dickins Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/oom_kill.c | 38 +++++++++++++++++++------------------- 1 file changed, 19 insertions(+), 19 deletions(-) (limited to 'mm/oom_kill.c') diff --git a/mm/oom_kill.c b/mm/oom_kill.c index 05c0f27d4ed1..2958fd8e7c9a 100644 --- a/mm/oom_kill.c +++ b/mm/oom_kill.c @@ -152,7 +152,7 @@ struct task_struct *find_lock_task_mm(struct task_struct *p) /* return true if the task is not adequate as candidate victim task. */ static bool oom_unkillable_task(struct task_struct *p, - const struct mem_cgroup *mem, const nodemask_t *nodemask) + const struct mem_cgroup *memcg, const nodemask_t *nodemask) { if (is_global_init(p)) return true; @@ -160,7 +160,7 @@ static bool oom_unkillable_task(struct task_struct *p, return true; /* When mem_cgroup_out_of_memory() and p is not member of the group */ - if (mem && !task_in_mem_cgroup(p, mem)) + if (memcg && !task_in_mem_cgroup(p, memcg)) return true; /* p may not have freeable memory in nodemask */ @@ -179,12 +179,12 @@ static bool oom_unkillable_task(struct task_struct *p, * predictable as possible. The goal is to return the highest value for the * task consuming the most memory to avoid subsequent oom failures. */ -unsigned int oom_badness(struct task_struct *p, struct mem_cgroup *mem, +unsigned int oom_badness(struct task_struct *p, struct mem_cgroup *memcg, const nodemask_t *nodemask, unsigned long totalpages) { long points; - if (oom_unkillable_task(p, mem, nodemask)) + if (oom_unkillable_task(p, memcg, nodemask)) return 0; p = find_lock_task_mm(p); @@ -308,7 +308,7 @@ static enum oom_constraint constrained_alloc(struct zonelist *zonelist, * (not docbooked, we don't want this one cluttering up the manual) */ static struct task_struct *select_bad_process(unsigned int *ppoints, - unsigned long totalpages, struct mem_cgroup *mem, + unsigned long totalpages, struct mem_cgroup *memcg, const nodemask_t *nodemask) { struct task_struct *g, *p; @@ -320,7 +320,7 @@ static struct task_struct *select_bad_process(unsigned int *ppoints, if (p->exit_state) continue; - if (oom_unkillable_task(p, mem, nodemask)) + if (oom_unkillable_task(p, memcg, nodemask)) continue; /* @@ -364,7 +364,7 @@ static struct task_struct *select_bad_process(unsigned int *ppoints, } } - points = oom_badness(p, mem, nodemask, totalpages); + points = oom_badness(p, memcg, nodemask, totalpages); if (points > *ppoints) { chosen = p; *ppoints = points; @@ -387,14 +387,14 @@ static struct task_struct *select_bad_process(unsigned int *ppoints, * * Call with tasklist_lock read-locked. */ -static void dump_tasks(const struct mem_cgroup *mem, const nodemask_t *nodemask) +static void dump_tasks(const struct mem_cgroup *memcg, const nodemask_t *nodemask) { struct task_struct *p; struct task_struct *task; pr_info("[ pid ] uid tgid total_vm rss cpu oom_adj oom_score_adj name\n"); for_each_process(p) { - if (oom_unkillable_task(p, mem, nodemask)) + if (oom_unkillable_task(p, memcg, nodemask)) continue; task = find_lock_task_mm(p); @@ -417,7 +417,7 @@ static void dump_tasks(const struct mem_cgroup *mem, const nodemask_t *nodemask) } static void dump_header(struct task_struct *p, gfp_t gfp_mask, int order, - struct mem_cgroup *mem, const nodemask_t *nodemask) + struct mem_cgroup *memcg, const nodemask_t *nodemask) { task_lock(current); pr_warning("%s invoked oom-killer: gfp_mask=0x%x, order=%d, " @@ -427,10 +427,10 @@ static void dump_header(struct task_struct *p, gfp_t gfp_mask, int order, cpuset_print_task_mems_allowed(current); task_unlock(current); dump_stack(); - mem_cgroup_print_oom_info(mem, p); + mem_cgroup_print_oom_info(memcg, p); show_mem(SHOW_MEM_FILTER_NODES); if (sysctl_oom_dump_tasks) - dump_tasks(mem, nodemask); + dump_tasks(memcg, nodemask); } #define K(x) ((x) << (PAGE_SHIFT-10)) @@ -484,7 +484,7 @@ static int oom_kill_task(struct task_struct *p) static int oom_kill_process(struct task_struct *p, gfp_t gfp_mask, int order, unsigned int points, unsigned long totalpages, - struct mem_cgroup *mem, nodemask_t *nodemask, + struct mem_cgroup *memcg, nodemask_t *nodemask, const char *message) { struct task_struct *victim = p; @@ -493,7 +493,7 @@ static int oom_kill_process(struct task_struct *p, gfp_t gfp_mask, int order, unsigned int victim_points = 0; if (printk_ratelimit()) - dump_header(p, gfp_mask, order, mem, nodemask); + dump_header(p, gfp_mask, order, memcg, nodemask); /* * If the task is already exiting, don't alarm the sysadmin or kill @@ -524,7 +524,7 @@ static int oom_kill_process(struct task_struct *p, gfp_t gfp_mask, int order, /* * oom_badness() returns 0 if the thread is unkillable */ - child_points = oom_badness(child, mem, nodemask, + child_points = oom_badness(child, memcg, nodemask, totalpages); if (child_points > victim_points) { victim = child; @@ -561,7 +561,7 @@ static void check_panic_on_oom(enum oom_constraint constraint, gfp_t gfp_mask, } #ifdef CONFIG_CGROUP_MEM_RES_CTLR -void mem_cgroup_out_of_memory(struct mem_cgroup *mem, gfp_t gfp_mask) +void mem_cgroup_out_of_memory(struct mem_cgroup *memcg, gfp_t gfp_mask) { unsigned long limit; unsigned int points = 0; @@ -578,14 +578,14 @@ void mem_cgroup_out_of_memory(struct mem_cgroup *mem, gfp_t gfp_mask) } check_panic_on_oom(CONSTRAINT_MEMCG, gfp_mask, 0, NULL); - limit = mem_cgroup_get_limit(mem) >> PAGE_SHIFT; + limit = mem_cgroup_get_limit(memcg) >> PAGE_SHIFT; read_lock(&tasklist_lock); retry: - p = select_bad_process(&points, limit, mem, NULL); + p = select_bad_process(&points, limit, memcg, NULL); if (!p || PTR_ERR(p) == -1UL) goto out; - if (oom_kill_process(p, gfp_mask, 0, points, limit, mem, NULL, + if (oom_kill_process(p, gfp_mask, 0, points, limit, memcg, NULL, "Memory cgroup out of memory")) goto retry; out: -- cgit v1.2.3