From 0bb943c7a2136716757a263f604d26309fd98042 Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Fri, 14 Nov 2008 19:05:31 +0100 Subject: tracing: kernel/trace/trace.c: introduce missing kfree() Impact: fix memory leak Error handling code following a kzalloc should free the allocated data. The semantic match that finds the problem is as follows: (http://www.emn.fr/x-info/coccinelle/) // @r exists@ local idexpression x; statement S; expression E; identifier f,l; position p1,p2; expression *ptr != NULL; @@ ( if ((x@p1 = \(kmalloc\|kzalloc\|kcalloc\)(...)) == NULL) S | x@p1 = \(kmalloc\|kzalloc\|kcalloc\)(...); ... if (x == NULL) S ) <... when != x when != if (...) { <+...x...+> } x->f = E ...> ( return \(0\|<+...x...+>\|ptr\); | return@p2 ...; ) @script:python@ p1 << r.p1; p2 << r.p2; @@ print "* file: %s kmalloc %s return %s" % (p1[0].file,p1[0].line,p2[0].line) // Signed-off-by: Julia Lawall Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar --- kernel/trace/trace.c | 1 + 1 file changed, 1 insertion(+) (limited to 'kernel') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 697eda36b86a..d86e3252f300 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -1936,6 +1936,7 @@ __tracing_open(struct inode *inode, struct file *file, int *ret) ring_buffer_read_finish(iter->buffer_iter[cpu]); } mutex_unlock(&trace_types_lock); + kfree(iter); return ERR_PTR(-ENOMEM); } -- cgit v1.2.3 From 641d2f63cfe24539e154efa2f932937934c27dde Mon Sep 17 00:00:00 2001 From: Vegard Nossum Date: Tue, 18 Nov 2008 19:22:13 +0100 Subject: trace: introduce missing mutex_unlock() Impact: fix tracing buffer mutex leak in case of allocation failure This error was spotted by this semantic patch: http://www.emn.fr/x-info/coccinelle/mut.html It looks correct as far as I can tell. Please review. Signed-off-by: Vegard Nossum Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar --- kernel/trace/ring_buffer.c | 1 + 1 file changed, 1 insertion(+) (limited to 'kernel') diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 036456cbb4f7..f780e9552f91 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -617,6 +617,7 @@ int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size) list_del_init(&page->list); free_buffer_page(page); } + mutex_unlock(&buffer->mutex); return -ENOMEM; } -- cgit v1.2.3 From f10ed36ec1118c6f9523cd7e53cb0aadb53efe9f Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Fri, 7 Nov 2008 22:36:02 -0500 Subject: ftrace: fix set_ftrace_filter Impact: fix of output of set_ftrace_filter The commit "ftrace: do not show freed records in available_filter_functions" Removed a bit too much from the set_ftrace_filter code, where we now see all functions in the set_ftrace_filter file even when we set a filter. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar --- kernel/trace/ftrace.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'kernel') diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 4a39d24568c8..dcac7418f688 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -738,6 +738,9 @@ t_next(struct seq_file *m, void *v, loff_t *pos) ((iter->flags & FTRACE_ITER_FAILURES) && !(rec->flags & FTRACE_FL_FAILED)) || + ((iter->flags & FTRACE_ITER_FILTER) && + !(rec->flags & FTRACE_FL_FILTER)) || + ((iter->flags & FTRACE_ITER_NOTRACE) && !(rec->flags & FTRACE_FL_NOTRACE))) { rec = NULL; -- cgit v1.2.3 From 820432783190b4096499e38a4a4d7095c511913d Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Tue, 18 Nov 2008 23:57:14 -0500 Subject: ftrace: make filtered functions effective on setting Impact: fix filter selection to apply when set It can be confusing when the set_filter_functions is set (or cleared) and the functions being recorded by the dynamic tracer does not match. This patch causes the code to be updated if the function tracer is enabled and the filter is changed. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar --- kernel/trace/ftrace.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index dcac7418f688..5cbddb59e99f 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -1189,7 +1189,7 @@ ftrace_regex_release(struct inode *inode, struct file *file, int enable) mutex_lock(&ftrace_sysctl_lock); mutex_lock(&ftrace_start_lock); - if (iter->filtered && ftrace_start && ftrace_enabled) + if (ftrace_start && ftrace_enabled) ftrace_run_update_code(FTRACE_ENABLE_CALLS); mutex_unlock(&ftrace_start_lock); mutex_unlock(&ftrace_sysctl_lock); -- cgit v1.2.3 From 32464779a1b8c15e9aa9aa0306b2f735080df9d8 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Tue, 18 Nov 2008 20:33:02 -0500 Subject: ftrace: fix dyn ftrace filter selection Impact: clean up and fix for dyn ftrace filter selection The previous logic of the dynamic ftrace selection of enabling or disabling functions was complex and incorrect. This patch simplifies the code and corrects the usage. This simplification also makes the code more robust. Here is the correct logic: Given a function that can be traced by dynamic ftrace: If the function is not to be traced, disable it if it was enabled. (this is if the function is in the set_ftrace_notrace file) (filter is on if there exists any functions in set_ftrace_filter file) If the filter is on, and we are enabling functions: If the function is in set_ftrace_filter, enable it if it is not already enabled. If the function is not in set_ftrace_filter, disable it if it is not already disabled. Otherwise, if the filter is off and we are enabling function tracing: Enable the function if it is not already enabled. Otherwise, if we are disabling function tracing: Disable the function if it is not already disabled. This code now sets or clears the ENABLED flag in the record, and at the end it will enable the function if the flag is set, or disable the function if the flag is cleared. The parameters for the function that does the above logic is also simplified. Instead of passing in confusing "new" and "old" where they might be swapped if the "enabled" flag is not set. The old logic even had one of the above always NULL and had to be filled in. The new logic simply passes in one parameter called "nop". A "call" is calculated in the code, and at the end of the logic, when we know we need to either disable or enable the function, we can then use the "nop" and "call" properly. This code is more robust than the previous version. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar --- kernel/trace/ftrace.c | 108 +++++++++++++++++++++++--------------------------- 1 file changed, 50 insertions(+), 58 deletions(-) (limited to 'kernel') diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 5cbddb59e99f..fdaab04a0282 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -327,96 +327,89 @@ ftrace_record_ip(unsigned long ip) static int __ftrace_replace_code(struct dyn_ftrace *rec, - unsigned char *old, unsigned char *new, int enable) + unsigned char *nop, int enable) { unsigned long ip, fl; + unsigned char *call, *old, *new; ip = rec->ip; - if (ftrace_filtered && enable) { + /* + * If this record is not to be traced and + * it is not enabled then do nothing. + * + * If this record is not to be traced and + * it is enabled then disabled it. + * + */ + if (rec->flags & FTRACE_FL_NOTRACE) { + if (rec->flags & FTRACE_FL_ENABLED) + rec->flags &= ~FTRACE_FL_ENABLED; + else + return 0; + + } else if (ftrace_filtered && enable) { /* - * If filtering is on: - * - * If this record is set to be filtered and - * is enabled then do nothing. - * - * If this record is set to be filtered and - * it is not enabled, enable it. - * - * If this record is not set to be filtered - * and it is not enabled do nothing. - * - * If this record is set not to trace then - * do nothing. - * - * If this record is set not to trace and - * it is enabled then disable it. - * - * If this record is not set to be filtered and - * it is enabled, disable it. + * Filtering is on: */ - fl = rec->flags & (FTRACE_FL_FILTER | FTRACE_FL_NOTRACE | - FTRACE_FL_ENABLED); + fl = rec->flags & (FTRACE_FL_FILTER | FTRACE_FL_ENABLED); - if ((fl == (FTRACE_FL_FILTER | FTRACE_FL_ENABLED)) || - (fl == (FTRACE_FL_FILTER | FTRACE_FL_NOTRACE)) || - !fl || (fl == FTRACE_FL_NOTRACE)) + /* Record is filtered and enabled, do nothing */ + if (fl == (FTRACE_FL_FILTER | FTRACE_FL_ENABLED)) return 0; - /* - * If it is enabled disable it, - * otherwise enable it! - */ - if (fl & FTRACE_FL_ENABLED) { - /* swap new and old */ - new = old; - old = ftrace_call_replace(ip, FTRACE_ADDR); + /* Record is not filtered and is not enabled do nothing */ + if (!fl) + return 0; + + /* Record is not filtered but enabled, disable it */ + if (fl == FTRACE_FL_ENABLED) rec->flags &= ~FTRACE_FL_ENABLED; - } else { - new = ftrace_call_replace(ip, FTRACE_ADDR); + else + /* Otherwise record is filtered but not enabled, enable it */ rec->flags |= FTRACE_FL_ENABLED; - } } else { + /* Disable or not filtered */ if (enable) { - /* - * If this record is set not to trace and is - * not enabled, do nothing. - */ - fl = rec->flags & (FTRACE_FL_NOTRACE | FTRACE_FL_ENABLED); - if (fl == FTRACE_FL_NOTRACE) - return 0; - - new = ftrace_call_replace(ip, FTRACE_ADDR); - } else - old = ftrace_call_replace(ip, FTRACE_ADDR); - - if (enable) { + /* if record is enabled, do nothing */ if (rec->flags & FTRACE_FL_ENABLED) return 0; + rec->flags |= FTRACE_FL_ENABLED; + } else { + + /* if record is not enabled do nothing */ if (!(rec->flags & FTRACE_FL_ENABLED)) return 0; + rec->flags &= ~FTRACE_FL_ENABLED; } } + call = ftrace_call_replace(ip, FTRACE_ADDR); + + if (rec->flags & FTRACE_FL_ENABLED) { + old = nop; + new = call; + } else { + old = call; + new = nop; + } + return ftrace_modify_code(ip, old, new); } static void ftrace_replace_code(int enable) { int i, failed; - unsigned char *new = NULL, *old = NULL; + unsigned char *nop = NULL; struct dyn_ftrace *rec; struct ftrace_page *pg; - if (enable) - old = ftrace_nop_replace(); - else - new = ftrace_nop_replace(); + nop = ftrace_nop_replace(); for (pg = ftrace_pages_start; pg; pg = pg->next) { for (i = 0; i < pg->index; i++) { @@ -434,7 +427,7 @@ static void ftrace_replace_code(int enable) unfreeze_record(rec); } - failed = __ftrace_replace_code(rec, old, new, enable); + failed = __ftrace_replace_code(rec, nop, enable); if (failed && (rec->flags & FTRACE_FL_CONVERTED)) { rec->flags |= FTRACE_FL_FAILED; if ((system_state == SYSTEM_BOOTING) || @@ -538,8 +531,7 @@ static void ftrace_startup(void) mutex_lock(&ftrace_start_lock); ftrace_start++; - if (ftrace_start == 1) - command |= FTRACE_ENABLE_CALLS; + command |= FTRACE_ENABLE_CALLS; if (saved_ftrace_func != ftrace_trace_function) { saved_ftrace_func = ftrace_trace_function; -- cgit v1.2.3 From de11defebf00007677fb7ee91d9b089b78786fbb Mon Sep 17 00:00:00 2001 From: Ulrich Drepper Date: Wed, 19 Nov 2008 15:36:14 -0800 Subject: reintroduce accept4 Introduce a new accept4() system call. The addition of this system call matches analogous changes in 2.6.27 (dup3(), evenfd2(), signalfd4(), inotify_init1(), epoll_create1(), pipe2()) which added new system calls that differed from analogous traditional system calls in adding a flags argument that can be used to access additional functionality. The accept4() system call is exactly the same as accept(), except that it adds a flags bit-mask argument. Two flags are initially implemented. (Most of the new system calls in 2.6.27 also had both of these flags.) SOCK_CLOEXEC causes the close-on-exec (FD_CLOEXEC) flag to be enabled for the new file descriptor returned by accept4(). This is a useful security feature to avoid leaking information in a multithreaded program where one thread is doing an accept() at the same time as another thread is doing a fork() plus exec(). More details here: http://udrepper.livejournal.com/20407.html "Secure File Descriptor Handling", Ulrich Drepper). The other flag is SOCK_NONBLOCK, which causes the O_NONBLOCK flag to be enabled on the new open file description created by accept4(). (This flag is merely a convenience, saving the use of additional calls fcntl(F_GETFL) and fcntl (F_SETFL) to achieve the same result. Here's a test program. Works on x86-32. Should work on x86-64, but I (mtk) don't have a system to hand to test with. It tests accept4() with each of the four possible combinations of SOCK_CLOEXEC and SOCK_NONBLOCK set/clear in 'flags', and verifies that the appropriate flags are set on the file descriptor/open file description returned by accept4(). I tested Ulrich's patch in this thread by applying against 2.6.28-rc2, and it passes according to my test program. /* test_accept4.c Copyright (C) 2008, Linux Foundation, written by Michael Kerrisk Licensed under the GNU GPLv2 or later. */ #define _GNU_SOURCE #include #include #include #include #include #include #include #include #define PORT_NUM 33333 #define die(msg) do { perror(msg); exit(EXIT_FAILURE); } while (0) /**********************************************************************/ /* The following is what we need until glibc gets a wrapper for accept4() */ /* Flags for socket(), socketpair(), accept4() */ #ifndef SOCK_CLOEXEC #define SOCK_CLOEXEC O_CLOEXEC #endif #ifndef SOCK_NONBLOCK #define SOCK_NONBLOCK O_NONBLOCK #endif #ifdef __x86_64__ #define SYS_accept4 288 #elif __i386__ #define USE_SOCKETCALL 1 #define SYS_ACCEPT4 18 #else #error "Sorry -- don't know the syscall # on this architecture" #endif static int accept4(int fd, struct sockaddr *sockaddr, socklen_t *addrlen, int flags) { printf("Calling accept4(): flags = %x", flags); if (flags != 0) { printf(" ("); if (flags & SOCK_CLOEXEC) printf("SOCK_CLOEXEC"); if ((flags & SOCK_CLOEXEC) && (flags & SOCK_NONBLOCK)) printf(" "); if (flags & SOCK_NONBLOCK) printf("SOCK_NONBLOCK"); printf(")"); } printf("\n"); #if USE_SOCKETCALL long args[6]; args[0] = fd; args[1] = (long) sockaddr; args[2] = (long) addrlen; args[3] = flags; return syscall(SYS_socketcall, SYS_ACCEPT4, args); #else return syscall(SYS_accept4, fd, sockaddr, addrlen, flags); #endif } /**********************************************************************/ static int do_test(int lfd, struct sockaddr_in *conn_addr, int closeonexec_flag, int nonblock_flag) { int connfd, acceptfd; int fdf, flf, fdf_pass, flf_pass; struct sockaddr_in claddr; socklen_t addrlen; printf("=======================================\n"); connfd = socket(AF_INET, SOCK_STREAM, 0); if (connfd == -1) die("socket"); if (connect(connfd, (struct sockaddr *) conn_addr, sizeof(struct sockaddr_in)) == -1) die("connect"); addrlen = sizeof(struct sockaddr_in); acceptfd = accept4(lfd, (struct sockaddr *) &claddr, &addrlen, closeonexec_flag | nonblock_flag); if (acceptfd == -1) { perror("accept4()"); close(connfd); return 0; } fdf = fcntl(acceptfd, F_GETFD); if (fdf == -1) die("fcntl:F_GETFD"); fdf_pass = ((fdf & FD_CLOEXEC) != 0) == ((closeonexec_flag & SOCK_CLOEXEC) != 0); printf("Close-on-exec flag is %sset (%s); ", (fdf & FD_CLOEXEC) ? "" : "not ", fdf_pass ? "OK" : "failed"); flf = fcntl(acceptfd, F_GETFL); if (flf == -1) die("fcntl:F_GETFD"); flf_pass = ((flf & O_NONBLOCK) != 0) == ((nonblock_flag & SOCK_NONBLOCK) !=0); printf("nonblock flag is %sset (%s)\n", (flf & O_NONBLOCK) ? "" : "not ", flf_pass ? "OK" : "failed"); close(acceptfd); close(connfd); printf("Test result: %s\n", (fdf_pass && flf_pass) ? "PASS" : "FAIL"); return fdf_pass && flf_pass; } static int create_listening_socket(int port_num) { struct sockaddr_in svaddr; int lfd; int optval; memset(&svaddr, 0, sizeof(struct sockaddr_in)); svaddr.sin_family = AF_INET; svaddr.sin_addr.s_addr = htonl(INADDR_ANY); svaddr.sin_port = htons(port_num); lfd = socket(AF_INET, SOCK_STREAM, 0); if (lfd == -1) die("socket"); optval = 1; if (setsockopt(lfd, SOL_SOCKET, SO_REUSEADDR, &optval, sizeof(optval)) == -1) die("setsockopt"); if (bind(lfd, (struct sockaddr *) &svaddr, sizeof(struct sockaddr_in)) == -1) die("bind"); if (listen(lfd, 5) == -1) die("listen"); return lfd; } int main(int argc, char *argv[]) { struct sockaddr_in conn_addr; int lfd; int port_num; int passed; passed = 1; port_num = (argc > 1) ? atoi(argv[1]) : PORT_NUM; memset(&conn_addr, 0, sizeof(struct sockaddr_in)); conn_addr.sin_family = AF_INET; conn_addr.sin_addr.s_addr = htonl(INADDR_LOOPBACK); conn_addr.sin_port = htons(port_num); lfd = create_listening_socket(port_num); if (!do_test(lfd, &conn_addr, 0, 0)) passed = 0; if (!do_test(lfd, &conn_addr, SOCK_CLOEXEC, 0)) passed = 0; if (!do_test(lfd, &conn_addr, 0, SOCK_NONBLOCK)) passed = 0; if (!do_test(lfd, &conn_addr, SOCK_CLOEXEC, SOCK_NONBLOCK)) passed = 0; close(lfd); exit(passed ? EXIT_SUCCESS : EXIT_FAILURE); } [mtk.manpages@gmail.com: rewrote changelog, updated test program] Signed-off-by: Ulrich Drepper Tested-by: Michael Kerrisk Acked-by: Michael Kerrisk Cc: Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/sys_ni.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c index a77b27b11b04..e14a23281707 100644 --- a/kernel/sys_ni.c +++ b/kernel/sys_ni.c @@ -31,7 +31,7 @@ cond_syscall(sys_socketpair); cond_syscall(sys_bind); cond_syscall(sys_listen); cond_syscall(sys_accept); -cond_syscall(sys_paccept); +cond_syscall(sys_accept4); cond_syscall(sys_connect); cond_syscall(sys_getsockname); cond_syscall(sys_getpeername); -- cgit v1.2.3 From f481891fdc49d3d1b8a9674a1825d183069a805f Mon Sep 17 00:00:00 2001 From: Miao Xie Date: Wed, 19 Nov 2008 15:36:30 -0800 Subject: cpuset: update top cpuset's mems after adding a node After adding a node into the machine, top cpuset's mems isn't updated. By reviewing the code, we found that the update function cpuset_track_online_nodes() was invoked after node_states[N_ONLINE] changes. It is wrong because N_ONLINE just means node has pgdat, and if node has/added memory, we use N_HIGH_MEMORY. So, We should invoke the update function after node_states[N_HIGH_MEMORY] changes, just like its commit says. This patch fixes it. And we use notifier of memory hotplug instead of direct calling of cpuset_track_online_nodes(). Signed-off-by: Miao Xie Acked-by: Yasunori Goto Cc: David Rientjes Cc: Paul Menage Signed-off-by: Linus Torvalds --- kernel/cpuset.c | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) (limited to 'kernel') diff --git a/kernel/cpuset.c b/kernel/cpuset.c index 81fc6791a296..da7ff6137f37 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c @@ -36,6 +36,7 @@ #include #include #include +#include #include #include #include @@ -2015,12 +2016,23 @@ static int cpuset_track_online_cpus(struct notifier_block *unused_nb, * Call this routine anytime after node_states[N_HIGH_MEMORY] changes. * See also the previous routine cpuset_track_online_cpus(). */ -void cpuset_track_online_nodes(void) +static int cpuset_track_online_nodes(struct notifier_block *self, + unsigned long action, void *arg) { cgroup_lock(); - top_cpuset.mems_allowed = node_states[N_HIGH_MEMORY]; - scan_for_empty_cpusets(&top_cpuset); + switch (action) { + case MEM_ONLINE: + top_cpuset.mems_allowed = node_states[N_HIGH_MEMORY]; + break; + case MEM_OFFLINE: + top_cpuset.mems_allowed = node_states[N_HIGH_MEMORY]; + scan_for_empty_cpusets(&top_cpuset); + break; + default: + break; + } cgroup_unlock(); + return NOTIFY_OK; } #endif @@ -2036,6 +2048,7 @@ void __init cpuset_init_smp(void) top_cpuset.mems_allowed = node_states[N_HIGH_MEMORY]; hotcpu_notifier(cpuset_track_online_cpus, 0); + hotplug_memory_notifier(cpuset_track_online_nodes, 10); } /** -- cgit v1.2.3 From 3fa59dfbc3b223f02c26593be69ce6fc9a940405 Mon Sep 17 00:00:00 2001 From: KAMEZAWA Hiroyuki Date: Wed, 19 Nov 2008 15:36:34 -0800 Subject: cgroup: fix potential deadlock in pre_destroy As Balbir pointed out, memcg's pre_destroy handler has potential deadlock. It has following lock sequence. cgroup_mutex (cgroup_rmdir) -> pre_destroy -> mem_cgroup_pre_destroy-> force_empty -> cpu_hotplug.lock. (lru_add_drain_all-> schedule_work-> get_online_cpus) But, cpuset has following. cpu_hotplug.lock (call notifier) -> cgroup_mutex. (within notifier) Then, this lock sequence should be fixed. Considering how pre_destroy works, it's not necessary to holding cgroup_mutex() while calling it. As a side effect, we don't have to wait at this mutex while memcg's force_empty works.(it can be long when there are tons of pages.) Signed-off-by: KAMEZAWA Hiroyuki Acked-by: Balbir Singh Cc: Li Zefan Cc: Paul Menage Cc: Daisuke Nishimura Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/cgroup.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) (limited to 'kernel') diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 358e77564e6f..1a06be61dcd0 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -2472,10 +2472,7 @@ static int cgroup_rmdir(struct inode *unused_dir, struct dentry *dentry) mutex_unlock(&cgroup_mutex); return -EBUSY; } - - parent = cgrp->parent; - root = cgrp->root; - sb = root->sb; + mutex_unlock(&cgroup_mutex); /* * Call pre_destroy handlers of subsys. Notify subsystems @@ -2483,7 +2480,14 @@ static int cgroup_rmdir(struct inode *unused_dir, struct dentry *dentry) */ cgroup_call_pre_destroy(cgrp); - if (cgroup_has_css_refs(cgrp)) { + mutex_lock(&cgroup_mutex); + parent = cgrp->parent; + root = cgrp->root; + sb = root->sb; + + if (atomic_read(&cgrp->count) + || !list_empty(&cgrp->children) + || cgroup_has_css_refs(cgrp)) { mutex_unlock(&cgroup_mutex); return -EBUSY; } -- cgit v1.2.3 From 966c8c12dc9e77f931e2281ba25d2f0244b06949 Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Wed, 19 Nov 2008 15:36:36 -0800 Subject: sprint_symbol(): use less stack sprint_symbol(), itself used when dumping stacks, has been wasting 128 bytes of stack: lookup the symbol directly into the buffer supplied by the caller, instead of using a locally declared namebuf. I believe the name != buffer strcpy() is obsolete: the design here dates from when module symbol lookup pointed into a supposedly const but sadly volatile table; nowadays it copies, but an uncalled strcpy() looks better here than the risk of a recursive BUG_ON(). Signed-off-by: Hugh Dickins Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/kallsyms.c | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) (limited to 'kernel') diff --git a/kernel/kallsyms.c b/kernel/kallsyms.c index 5072cf1685a2..7b8b0f21a5b1 100644 --- a/kernel/kallsyms.c +++ b/kernel/kallsyms.c @@ -304,17 +304,24 @@ int sprint_symbol(char *buffer, unsigned long address) char *modname; const char *name; unsigned long offset, size; - char namebuf[KSYM_NAME_LEN]; + int len; - name = kallsyms_lookup(address, &size, &offset, &modname, namebuf); + name = kallsyms_lookup(address, &size, &offset, &modname, buffer); if (!name) return sprintf(buffer, "0x%lx", address); + if (name != buffer) + strcpy(buffer, name); + len = strlen(buffer); + buffer += len; + if (modname) - return sprintf(buffer, "%s+%#lx/%#lx [%s]", name, offset, - size, modname); + len += sprintf(buffer, "+%#lx/%#lx [%s]", + offset, size, modname); else - return sprintf(buffer, "%s+%#lx/%#lx", name, offset, size); + len += sprintf(buffer, "+%#lx/%#lx", offset, size); + + return len; } /* Look up a kernel symbol and print it to the kernel messages. */ -- cgit v1.2.3 From 33d283bef23132c48195eafc21449f8ba88fce6b Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Wed, 19 Nov 2008 15:36:48 -0800 Subject: cgroups: fix a serious bug in cgroupstats Try this, and you'll get oops immediately: # cd Documentation/accounting/ # gcc -o getdelays getdelays.c # mount -t cgroup -o debug xxx /mnt # ./getdelays -C /mnt/tasks Because a normal file's dentry->d_fsdata is a pointer to struct cftype, not struct cgroup. After the patch, it returns EINVAL if we try to get cgroupstats from a normal file. Cc: Balbir Singh Signed-off-by: Li Zefan Acked-by: Paul Menage Cc: [2.6.25.x, 2.6.26.x, 2.6.27.x] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/cgroup.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'kernel') diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 1a06be61dcd0..fe00b3b983a8 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -2039,10 +2039,13 @@ int cgroupstats_build(struct cgroupstats *stats, struct dentry *dentry) struct cgroup *cgrp; struct cgroup_iter it; struct task_struct *tsk; + /* - * Validate dentry by checking the superblock operations + * Validate dentry by checking the superblock operations, + * and make sure it's a directory. */ - if (dentry->d_sb->s_op != &cgroup_ops) + if (dentry->d_sb->s_op != &cgroup_ops || + !S_ISDIR(dentry->d_inode->i_mode)) goto err; ret = 0; -- cgit v1.2.3