From a8f072c1d624a627b67f2ace2f0c25d856ef4e54 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 2 Jun 2011 11:13:59 +0200 Subject: job control: rename signal->group_stop and flags to jobctl and update them signal->group_stop currently hosts mostly group stop related flags; however, it's gonna be used for wider purposes and the GROUP_STOP_ flag prefix becomes confusing. Rename signal->group_stop to signal->jobctl and rename all GROUP_STOP_* flags to JOBCTL_*. Bit position macros JOBCTL_*_BIT are defined and JOBCTL_* flags are defined in terms of them to allow using bitops later. While at it, reassign JOBCTL_TRAPPING to bit 22 to better accomodate future additions. This doesn't cause any functional change. -v2: JOBCTL_*_BIT macros added as suggested by Linus. Signed-off-by: Tejun Heo Cc: Linus Torvalds Signed-off-by: Oleg Nesterov --- fs/exec.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/exec.c') diff --git a/fs/exec.c b/fs/exec.c index ea5f748906a8..8986bb0f9dc2 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -1772,7 +1772,7 @@ static int zap_process(struct task_struct *start, int exit_code) t = start; do { - task_clear_group_stop_pending(t); + task_clear_jobctl_stop_pending(t); if (t != current && t->mm) { sigaddset(&t->pending.signal, SIGKILL); signal_wake_up(t, 1); -- cgit v1.2.3 From 3759a0d94c18764247b66511d1038f2b93aa95de Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 2 Jun 2011 11:14:00 +0200 Subject: job control: introduce JOBCTL_PENDING_MASK and task_clear_jobctl_pending() This patch introduces JOBCTL_PENDING_MASK and replaces task_clear_jobctl_stop_pending() with task_clear_jobctl_pending() which takes an extra @mask argument. JOBCTL_PENDING_MASK is currently equal to JOBCTL_STOP_PENDING but future patches will add more bits. recalc_sigpending_tsk() is updated to use JOBCTL_PENDING_MASK instead. task_clear_jobctl_pending() takes @mask which in subset of JOBCTL_PENDING_MASK and clears the relevant jobctl bits. If JOBCTL_STOP_PENDING is set, other STOP bits are cleared together. All task_clear_jobctl_stop_pending() users are updated to call task_clear_jobctl_pending() with JOBCTL_STOP_PENDING which is functionally identical to task_clear_jobctl_stop_pending(). This patch doesn't cause any functional change. Signed-off-by: Tejun Heo Signed-off-by: Oleg Nesterov --- fs/exec.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/exec.c') diff --git a/fs/exec.c b/fs/exec.c index 8986bb0f9dc2..4402105287cb 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -1772,7 +1772,7 @@ static int zap_process(struct task_struct *start, int exit_code) t = start; do { - task_clear_jobctl_stop_pending(t); + task_clear_jobctl_pending(t, JOBCTL_STOP_PENDING); if (t != current && t->mm) { sigaddset(&t->pending.signal, SIGKILL); signal_wake_up(t, 1); -- cgit v1.2.3 From 6dfca32984237a8a011b5bf367e53341a265b2a4 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 2 Jun 2011 11:14:00 +0200 Subject: job control: make task_clear_jobctl_pending() clear TRAPPING automatically JOBCTL_TRAPPING indicates that ptracer is waiting for tracee to (re)transit into TRACED. task_clear_jobctl_pending() must be called when either tracee enters TRACED or the transition is cancelled for some reason. The former is achieved by explicitly calling task_clear_jobctl_pending() in ptrace_stop() and the latter by calling it at the end of do_signal_stop(). Calling task_clear_jobctl_trapping() at the end of do_signal_stop() limits the scope TRAPPING can be used and is fragile in that seemingly unrelated changes to tracee's control flow can lead to stuck TRAPPING. We already have task_clear_jobctl_pending() calls on those cancelling events to clear JOBCTL_STOP_PENDING. Cancellations can be handled by making those call sites use JOBCTL_PENDING_MASK instead and updating task_clear_jobctl_pending() such that task_clear_jobctl_trapping() is called automatically if no stop/trap is pending. This patch makes the above changes and removes the fallback task_clear_jobctl_trapping() call from do_signal_stop(). Signed-off-by: Tejun Heo Signed-off-by: Oleg Nesterov --- fs/exec.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/exec.c') diff --git a/fs/exec.c b/fs/exec.c index 4402105287cb..a9f2b3631bdb 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -1772,7 +1772,7 @@ static int zap_process(struct task_struct *start, int exit_code) t = start; do { - task_clear_jobctl_pending(t, JOBCTL_STOP_PENDING); + task_clear_jobctl_pending(t, JOBCTL_PENDING_MASK); if (t != current && t->mm) { sigaddset(&t->pending.signal, SIGKILL); signal_wake_up(t, 1); -- cgit v1.2.3 From dac853ae89043f1b7752875300faf614de43c74b Mon Sep 17 00:00:00 2001 From: Mathias Krause Date: Thu, 9 Jun 2011 20:05:18 +0200 Subject: exec: delay address limit change until point of no return Unconditionally changing the address limit to USER_DS and not restoring it to its old value in the error path is wrong because it prevents us using kernel memory on repeated calls to this function. This, in fact, breaks the fallback of hard coded paths to the init program from being ever successful if the first candidate fails to load. With this patch applied switching to USER_DS is delayed until the point of no return is reached which makes it possible to have a multi-arch rootfs with one arch specific init binary for each of the (hard coded) probed paths. Since the address limit is already set to USER_DS when start_thread() will be invoked, this redundancy can be safely removed. Signed-off-by: Mathias Krause Cc: Al Viro Cc: stable@kernel.org Signed-off-by: Linus Torvalds --- fs/exec.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) (limited to 'fs/exec.c') diff --git a/fs/exec.c b/fs/exec.c index ea5f748906a8..97e0d52d72fd 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -1093,6 +1093,7 @@ int flush_old_exec(struct linux_binprm * bprm) bprm->mm = NULL; /* We're using it now */ + set_fs(USER_DS); current->flags &= ~(PF_RANDOMIZE | PF_KTHREAD); flush_thread(); current->personality &= ~bprm->per_clear; @@ -1357,10 +1358,6 @@ int search_binary_handler(struct linux_binprm *bprm,struct pt_regs *regs) if (retval) return retval; - /* kernel module loader fixup */ - /* so we don't try to load run modprobe in kernel space. */ - set_fs(USER_DS); - retval = audit_bprm(bprm); if (retval) return retval; -- cgit v1.2.3 From 7f81c8890c15a10f5220bebae3b6dfae4961962a Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Wed, 15 Jun 2011 15:08:11 -0700 Subject: fs/exec.c: use BUILD_BUG_ON for VM_STACK_FLAGS & VM_STACK_INCOMPLETE_SETUP Commit a8bef8ff6ea1 ("mm: migration: avoid race between shift_arg_pages() and rmap_walk() during migration by not migrating temporary stacks") introduced a BUG_ON() to ensure that VM_STACK_FLAGS and VM_STACK_INCOMPLETE_SETUP do not overlap. The check is a compile time one, so BUILD_BUG_ON is more appropriate. Signed-off-by: Michal Hocko Cc: Mel Gorman Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/exec.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/exec.c') diff --git a/fs/exec.c b/fs/exec.c index 97e0d52d72fd..b54f74f3cd80 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -277,7 +277,7 @@ static int __bprm_mm_init(struct linux_binprm *bprm) * use STACK_TOP because that can depend on attributes which aren't * configured yet. */ - BUG_ON(VM_STACK_FLAGS & VM_STACK_INCOMPLETE_SETUP); + BUILD_BUG_ON(VM_STACK_FLAGS & VM_STACK_INCOMPLETE_SETUP); vma->vm_end = STACK_TOP_MAX; vma->vm_start = vma->vm_end - PAGE_SIZE; vma->vm_flags = VM_STACK_FLAGS | VM_STACK_INCOMPLETE_SETUP; -- cgit v1.2.3 From 13fca640bb8ab611a50e0ba120b186faa2994d6c Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Wed, 15 Jun 2011 21:53:52 -0700 Subject: Revert "fs/exec.c: use BUILD_BUG_ON for VM_STACK_FLAGS & VM_STACK_INCOMPLETE_SETUP" This reverts commit 7f81c8890c15a10f5220bebae3b6dfae4961962a. It turns out that it's not actually a build-time check on x86-64 UML, which does some seriously crazy stuff with VM_STACK_FLAGS. The VM_STACK_FLAGS define depends on the arch-supplied VM_STACK_DEFAULT_FLAGS value, and on x86-64 UML we have arch/um/sys-x86_64/shared/sysdep/vm-flags.h: #define VM_STACK_DEFAULT_FLAGS \ (test_thread_flag(TIF_IA32) ? vm_stack_flags32 : vm_stack_flags) #define VM_STACK_DEFAULT_FLAGS vm_stack_flags (yes, seriously: two different #define's for that thing, with the first one being inside an "#ifdef TIF_IA32") It's possible that it is UML that should just be fixed in this area, but for now let's just undo the (very small) optimization. Reported-by: Randy Dunlap Acked-by: Andrew Morton Cc: Michal Hocko Cc: Richard Weinberger Signed-off-by: Linus Torvalds --- fs/exec.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/exec.c') diff --git a/fs/exec.c b/fs/exec.c index b54f74f3cd80..97e0d52d72fd 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -277,7 +277,7 @@ static int __bprm_mm_init(struct linux_binprm *bprm) * use STACK_TOP because that can depend on attributes which aren't * configured yet. */ - BUILD_BUG_ON(VM_STACK_FLAGS & VM_STACK_INCOMPLETE_SETUP); + BUG_ON(VM_STACK_FLAGS & VM_STACK_INCOMPLETE_SETUP); vma->vm_end = STACK_TOP_MAX; vma->vm_start = vma->vm_end - PAGE_SIZE; vma->vm_flags = VM_STACK_FLAGS | VM_STACK_INCOMPLETE_SETUP; -- cgit v1.2.3 From 879669961b11e7f40b518784863a259f735a72bf Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 17 Jun 2011 11:25:59 +0100 Subject: KEYS/DNS: Fix ____call_usermodehelper() to not lose the session keyring ____call_usermodehelper() now erases any credentials set by the subprocess_inf::init() function. The problem is that commit 17f60a7da150 ("capabilites: allow the application of capability limits to usermode helpers") creates and commits new credentials with prepare_kernel_cred() after the call to the init() function. This wipes all keyrings after umh_keys_init() is called. The best way to deal with this is to put the init() call just prior to the commit_creds() call, and pass the cred pointer to init(). That means that umh_keys_init() and suchlike can modify the credentials _before_ they are published and potentially in use by the rest of the system. This prevents request_key() from working as it is prevented from passing the session keyring it set up with the authorisation token to /sbin/request-key, and so the latter can't assume the authority to instantiate the key. This causes the in-kernel DNS resolver to fail with ENOKEY unconditionally. Signed-off-by: David Howells Acked-by: Eric Paris Tested-by: Jeff Layton Signed-off-by: Linus Torvalds --- fs/exec.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/exec.c') diff --git a/fs/exec.c b/fs/exec.c index 97e0d52d72fd..6075a1e727ae 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -1996,7 +1996,7 @@ static void wait_for_dump_helpers(struct file *file) * is a special value that we use to trap recursive * core dumps */ -static int umh_pipe_setup(struct subprocess_info *info) +static int umh_pipe_setup(struct subprocess_info *info, struct cred *new) { struct file *rp, *wp; struct fdtable *fdt; -- cgit v1.2.3 From a288eecce5253cc1565d400a52b9b476a157e040 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 17 Jun 2011 16:50:37 +0200 Subject: ptrace: kill trivial tracehooks At this point, tracehooks aren't useful to mainline kernel and mostly just add an extra layer of obfuscation. Although they have comments, without actual in-kernel users, it is difficult to tell what are their assumptions and they're actually trying to achieve. To mainline kernel, they just aren't worth keeping around. This patch kills the following trivial tracehooks. * Ones testing whether task is ptraced. Replace with ->ptrace test. tracehook_expect_breakpoints() tracehook_consider_ignored_signal() tracehook_consider_fatal_signal() * ptrace_event() wrappers. Call directly. tracehook_report_exec() tracehook_report_exit() tracehook_report_vfork_done() * ptrace_release_task() wrapper. Call directly. tracehook_finish_release_task() * noop tracehook_prepare_release_task() tracehook_report_death() This doesn't introduce any behavior change. Signed-off-by: Tejun Heo Cc: Christoph Hellwig Cc: Martin Schwidefsky Signed-off-by: Oleg Nesterov --- fs/exec.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/exec.c') diff --git a/fs/exec.c b/fs/exec.c index a9f2b3631bdb..b37030d0a50b 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -1384,7 +1384,7 @@ int search_binary_handler(struct linux_binprm *bprm,struct pt_regs *regs) bprm->recursion_depth = depth; if (retval >= 0) { if (depth == 0) - tracehook_report_exec(fmt, bprm, regs); + ptrace_event(PTRACE_EVENT_EXEC, 0); put_binfmt(fmt); allow_write_access(bprm->file); if (bprm->file) -- cgit v1.2.3 From 4b9d33e6d83cc05a8005a8f9a8b9677fa0f53626 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 17 Jun 2011 16:50:38 +0200 Subject: ptrace: kill clone/exec tracehooks At this point, tracehooks aren't useful to mainline kernel and mostly just add an extra layer of obfuscation. Although they have comments, without actual in-kernel users, it is difficult to tell what are their assumptions and they're actually trying to achieve. To mainline kernel, they just aren't worth keeping around. This patch kills the following clone and exec related tracehooks. tracehook_prepare_clone() tracehook_finish_clone() tracehook_report_clone() tracehook_report_clone_complete() tracehook_unsafe_exec() The changes are mostly trivial - logic is moved to the caller and comments are merged and adjusted appropriately. The only exception is in check_unsafe_exec() where LSM_UNSAFE_PTRACE* are OR'd to bprm->unsafe instead of setting it, which produces the same result as the field is always zero on entry. It also tests p->ptrace instead of (p->ptrace & PT_PTRACED) for consistency, which also gives the same result. This doesn't introduce any behavior change. Signed-off-by: Tejun Heo Cc: Christoph Hellwig Signed-off-by: Oleg Nesterov --- fs/exec.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'fs/exec.c') diff --git a/fs/exec.c b/fs/exec.c index b37030d0a50b..8dca45b0dae8 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -1224,7 +1224,12 @@ int check_unsafe_exec(struct linux_binprm *bprm) unsigned n_fs; int res = 0; - bprm->unsafe = tracehook_unsafe_exec(p); + if (p->ptrace) { + if (p->ptrace & PT_PTRACE_CAP) + bprm->unsafe |= LSM_UNSAFE_PTRACE_CAP; + else + bprm->unsafe |= LSM_UNSAFE_PTRACE; + } n_fs = 1; spin_lock(&p->fs->lock); -- cgit v1.2.3 From 087806b1281563e4ae7a5bce3155f894af5f4118 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Wed, 22 Jun 2011 23:10:26 +0200 Subject: redefine thread_group_leader() as exit_signal >= 0 Change de_thread() to set old_leader->exit_signal = -1. This is good for the consistency, it is no longer the leader and all sub-threads have exit_signal = -1 set by copy_process(CLONE_THREAD). And this allows us to micro-optimize thread_group_leader(), it can simply check exit_signal >= 0. This also makes sense because we should move ->group_leader from task_struct to signal_struct. Signed-off-by: Oleg Nesterov Reviewed-by: Tejun Heo --- fs/exec.c | 1 + 1 file changed, 1 insertion(+) (limited to 'fs/exec.c') diff --git a/fs/exec.c b/fs/exec.c index 8dca45b0dae8..c3d517bfdd27 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -963,6 +963,7 @@ static int de_thread(struct task_struct *tsk) leader->group_leader = tsk; tsk->exit_signal = SIGCHLD; + leader->exit_signal = -1; BUG_ON(leader->exit_state != EXIT_ZOMBIE); leader->exit_state = EXIT_DEAD; -- cgit v1.2.3 From bb188d7e64deb0e9cf13a99f44ae0065de5352d6 Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Wed, 29 Jun 2011 04:13:39 +0200 Subject: ptrace: make former thread ID available via PTRACE_GETEVENTMSG after PTRACE_EVENT_EXEC stop When multithreaded program execs under ptrace, all traced threads report WIFEXITED status, except for thread group leader and the thread which execs. Unless tracer tracks thread group relationship between tracees, which is a nontrivial task, it will not detect that execed thread no longer exists. This patch allows tracer to figure out which thread performed this exec, by requesting PTRACE_GETEVENTMSG in PTRACE_EVENT_EXEC stop. Another, samller problem which is solved by this patch is that tracer now can figure out which of the several concurrent execs in multithreaded program succeeded. Signed-off-by: Denys Vlasenko Signed-off-by: Oleg Nesterov --- fs/exec.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'fs/exec.c') diff --git a/fs/exec.c b/fs/exec.c index c3d517bfdd27..b08367abf30e 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -1358,6 +1358,7 @@ int search_binary_handler(struct linux_binprm *bprm,struct pt_regs *regs) unsigned int depth = bprm->recursion_depth; int try,retval; struct linux_binfmt *fmt; + pid_t old_pid; retval = security_bprm_check(bprm); if (retval) @@ -1371,6 +1372,11 @@ int search_binary_handler(struct linux_binprm *bprm,struct pt_regs *regs) if (retval) return retval; + /* Need to fetch pid before load_binary changes it */ + rcu_read_lock(); + old_pid = task_pid_nr_ns(current, task_active_pid_ns(current->parent)); + rcu_read_unlock(); + retval = -ENOENT; for (try=0; try<2; try++) { read_lock(&binfmt_lock); @@ -1390,7 +1396,8 @@ int search_binary_handler(struct linux_binprm *bprm,struct pt_regs *regs) bprm->recursion_depth = depth; if (retval >= 0) { if (depth == 0) - ptrace_event(PTRACE_EVENT_EXEC, 0); + ptrace_event(PTRACE_EVENT_EXEC, + old_pid); put_binfmt(fmt); allow_write_access(bprm->file); if (bprm->file) -- cgit v1.2.3 From 1b5d783c94c328d406e801566f161adcfb018dda Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 19 Jun 2011 12:49:47 -0400 Subject: consolidate BINPRM_FLAGS_ENFORCE_NONDUMP handling new helper: would_dump(bprm, file). Checks if we are allowed to read the file and if we are not - sets ENFORCE_NODUMP. Exported, used in places that previously open-coded the same logics. Signed-off-by: Al Viro --- fs/exec.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) (limited to 'fs/exec.c') diff --git a/fs/exec.c b/fs/exec.c index 6075a1e727ae..f9f12ad299af 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -1105,6 +1105,13 @@ out: } EXPORT_SYMBOL(flush_old_exec); +void would_dump(struct linux_binprm *bprm, struct file *file) +{ + if (inode_permission(file->f_path.dentry->d_inode, MAY_READ) < 0) + bprm->interp_flags |= BINPRM_FLAGS_ENFORCE_NONDUMP; +} +EXPORT_SYMBOL(would_dump); + void setup_new_exec(struct linux_binprm * bprm) { int i, ch; @@ -1144,9 +1151,10 @@ void setup_new_exec(struct linux_binprm * bprm) if (bprm->cred->uid != current_euid() || bprm->cred->gid != current_egid()) { current->pdeath_signal = 0; - } else if (file_permission(bprm->file, MAY_READ) || - bprm->interp_flags & BINPRM_FLAGS_ENFORCE_NONDUMP) { - set_dumpable(current->mm, suid_dumpable); + } else { + would_dump(bprm, bprm->file); + if (bprm->interp_flags & BINPRM_FLAGS_ENFORCE_NONDUMP) + set_dumpable(current->mm, suid_dumpable); } /* -- cgit v1.2.3 From eac1b5e57d7abc836e78fd3fbcf77dbeed01edc9 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Thu, 21 Jul 2011 20:00:43 +0200 Subject: ptrace: do_wait(traced_leader_killed_by_mt_exec) can block forever Test-case: void *tfunc(void *arg) { execvp("true", NULL); return NULL; } int main(void) { int pid; if (fork()) { pthread_t t; kill(getpid(), SIGSTOP); pthread_create(&t, NULL, tfunc, NULL); for (;;) pause(); } pid = getppid(); assert(ptrace(PTRACE_ATTACH, pid, 0,0) == 0); while (wait(NULL) > 0) ptrace(PTRACE_CONT, pid, 0,0); return 0; } It is racy, exit_notify() does __wake_up_parent() too. But in the likely case it triggers the problem: de_thread() does release_task() and the old leader goes away without the notification, the tracer sleeps in do_wait() without children/tracees. Change de_thread() to do __wake_up_parent(traced_leader->parent). Since it is already EXIT_DEAD we can do this without ptrace_unlink(), EXIT_DEAD threads do not exist from do_wait's pov. Signed-off-by: Oleg Nesterov Acked-by: Tejun Heo --- fs/exec.c | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'fs/exec.c') diff --git a/fs/exec.c b/fs/exec.c index b08367abf30e..d219541db06c 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -967,6 +967,14 @@ static int de_thread(struct task_struct *tsk) BUG_ON(leader->exit_state != EXIT_ZOMBIE); leader->exit_state = EXIT_DEAD; + + /* + * We are going to release_task()->ptrace_unlink() silently, + * the tracer can sleep in do_wait(). EXIT_DEAD guarantees + * the tracer wont't block again waiting for this thread. + */ + if (unlikely(leader->ptrace)) + __wake_up_parent(leader, leader->parent); write_unlock_irq(&tasklist_lock); release_task(leader); -- cgit v1.2.3 From 3141c8b165644774eb0e83d8330fbe47e45b37bf Mon Sep 17 00:00:00 2001 From: Jiri Slaby Date: Tue, 26 Jul 2011 16:08:32 -0700 Subject: coredump: use task comm instead of (unknown) If we don't know the file corresponding to the binary (i.e. exe_file is unknown), use "task->comm (path unknown)" instead of simple "(unknown)" as suggested by ak. The fallback is the same as %e except it will append "(path unknown)". Signed-off-by: Jiri Slaby Cc: Alan Cox Cc: Al Viro Cc: Andi Kleen Cc: Oleg Nesterov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/exec.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/exec.c') diff --git a/fs/exec.c b/fs/exec.c index 842d5700c155..a682624de572 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -1657,7 +1657,7 @@ static int cn_print_exe_file(struct core_name *cn) exe_file = get_mm_exe_file(current->mm); if (!exe_file) - return cn_printf(cn, "(unknown)"); + return cn_printf(cn, "%s (path unknown)", current->comm); pathbuf = kmalloc(PATH_MAX, GFP_TEMPORARY); if (!pathbuf) { -- cgit v1.2.3 From 2c563731fee0f625924f72e854957bc77601e8b3 Mon Sep 17 00:00:00 2001 From: Jiri Slaby Date: Tue, 26 Jul 2011 16:08:33 -0700 Subject: coredump: escape / in hostname and comm Change every occurence of / in comm and hostname to !. If the process changes its name to contain /, the core is not dumped (if the directory tree doesn't exist like that). The same with hostname being something like myhost/3. Fix this behaviour by using the escape loop used in %E. (We extract it to a separate function.) Now both with comm == myprocess/1 and hostname == myhost/1, the core is dumped like (kernel.core_pattern='core.%p.%e.%h): core.2349.myprocess!1.myhost!1 Signed-off-by: Jiri Slaby Cc: Alan Cox Cc: Al Viro Cc: Andi Kleen Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/exec.c | 31 +++++++++++++++++++++++-------- 1 file changed, 23 insertions(+), 8 deletions(-) (limited to 'fs/exec.c') diff --git a/fs/exec.c b/fs/exec.c index a682624de572..27d487f913d3 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -1649,15 +1649,26 @@ expand_fail: return ret; } +static void cn_escape(char *str) +{ + for (; *str; str++) + if (*str == '/') + *str = '!'; +} + static int cn_print_exe_file(struct core_name *cn) { struct file *exe_file; - char *pathbuf, *path, *p; + char *pathbuf, *path; int ret; exe_file = get_mm_exe_file(current->mm); - if (!exe_file) - return cn_printf(cn, "%s (path unknown)", current->comm); + if (!exe_file) { + char *commstart = cn->corename + cn->used; + ret = cn_printf(cn, "%s (path unknown)", current->comm); + cn_escape(commstart); + return ret; + } pathbuf = kmalloc(PATH_MAX, GFP_TEMPORARY); if (!pathbuf) { @@ -1671,9 +1682,7 @@ static int cn_print_exe_file(struct core_name *cn) goto free_buf; } - for (p = path; *p; p++) - if (*p == '/') - *p = '!'; + cn_escape(path); ret = cn_printf(cn, "%s", path); @@ -1745,16 +1754,22 @@ static int format_corename(struct core_name *cn, long signr) break; } /* hostname */ - case 'h': + case 'h': { + char *namestart = cn->corename + cn->used; down_read(&uts_sem); err = cn_printf(cn, "%s", utsname()->nodename); up_read(&uts_sem); + cn_escape(namestart); break; + } /* executable */ - case 'e': + case 'e': { + char *commstart = cn->corename + cn->used; err = cn_printf(cn, "%s", current->comm); + cn_escape(commstart); break; + } case 'E': err = cn_print_exe_file(cn); break; -- cgit v1.2.3 From 99b64567486716d18b2156cad188d86478816e4f Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Tue, 26 Jul 2011 16:08:34 -0700 Subject: do_coredump: fix the "ispipe" error check do_coredump() assumes that if format_corename() fails it should return -ENOMEM. This is not true, for example cn_print_exe_file() can propagate the error from d_path. Even if it was true, this is too fragile. Change the code to check "ispipe < 0". Signed-off-by: Oleg Nesterov Signed-off-by: Jiri Slaby Reviewed-by: Neil Horman Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/exec.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'fs/exec.c') diff --git a/fs/exec.c b/fs/exec.c index 27d487f913d3..f8fad7fc0e5f 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -2133,16 +2133,16 @@ void do_coredump(long signr, int exit_code, struct pt_regs *regs) ispipe = format_corename(&cn, signr); - if (ispipe == -ENOMEM) { - printk(KERN_WARNING "format_corename failed\n"); - printk(KERN_WARNING "Aborting core\n"); - goto fail_corename; - } - if (ispipe) { int dump_count; char **helper_argv; + if (ispipe < 0) { + printk(KERN_WARNING "format_corename failed\n"); + printk(KERN_WARNING "Aborting core\n"); + goto fail_corename; + } + if (cprm.limit == 1) { /* * Normally core limits are irrelevant to pipes, since -- cgit v1.2.3 From aacb3d17a73f6447c04e4d769391238dcf85568d Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Tue, 26 Jul 2011 16:08:40 -0700 Subject: fs/exec.c: use BUILD_BUG_ON for VM_STACK_FLAGS & VM_STACK_INCOMPLETE_SETUP Commit a8bef8ff6ea1 ("mm: migration: avoid race between shift_arg_pages() and rmap_walk() during migration by not migrating temporary stacks") introduced a BUG_ON() to ensure that VM_STACK_FLAGS and VM_STACK_INCOMPLETE_SETUP do not overlap. The check is a compile time one, so BUILD_BUG_ON is more appropriate. Signed-off-by: Michal Hocko Cc: Mel Gorman Cc: Richard Weinberger Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/exec.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/exec.c') diff --git a/fs/exec.c b/fs/exec.c index f8fad7fc0e5f..01829a1cb766 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -277,7 +277,7 @@ static int __bprm_mm_init(struct linux_binprm *bprm) * use STACK_TOP because that can depend on attributes which aren't * configured yet. */ - BUG_ON(VM_STACK_FLAGS & VM_STACK_INCOMPLETE_SETUP); + BUILD_BUG_ON(VM_STACK_FLAGS & VM_STACK_INCOMPLETE_SETUP); vma->vm_end = STACK_TOP_MAX; vma->vm_start = vma->vm_end - PAGE_SIZE; vma->vm_flags = VM_STACK_FLAGS | VM_STACK_INCOMPLETE_SETUP; -- cgit v1.2.3 From 912193521b719fbfc2f16776febf5232fe8ba261 Mon Sep 17 00:00:00 2001 From: Tetsuo Handa Date: Tue, 26 Jul 2011 16:08:41 -0700 Subject: exec: do not call request_module() twice from search_binary_handler() Currently, search_binary_handler() tries to load binary loader module using request_module() if a loader for the requested program is not yet loaded. But second attempt of request_module() does not affect the result of search_binary_handler(). If request_module() triggered recursion, calling request_module() twice causes 2 to the power of MAX_KMOD_CONCURRENT (= 50) repetitions. It is not an infinite loop but is sufficient for users to consider as a hang up. Therefore, this patch changes not to call request_module() twice, making 1 to the power of MAX_KMOD_CONCURRENT repetitions in case of recursion. Signed-off-by: Tetsuo Handa Reported-by: Richard Weinberger Tested-by: Richard Weinberger Cc: Al Viro Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/exec.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'fs/exec.c') diff --git a/fs/exec.c b/fs/exec.c index 01829a1cb766..e6770a526f34 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -1440,6 +1440,8 @@ int search_binary_handler(struct linux_binprm *bprm,struct pt_regs *regs) printable(bprm->buf[2]) && printable(bprm->buf[3])) break; /* -ENOEXEC */ + if (try) + break; /* -ENOEXEC */ request_module("binfmt-%04x", *(unsigned short *)(&bprm->buf[2])); #endif } -- cgit v1.2.3 From b4edf8bd06916645b57df23a720b17cae4051c43 Mon Sep 17 00:00:00 2001 From: Tetsuo Handa Date: Tue, 26 Jul 2011 16:08:42 -0700 Subject: exec: do not retry load_binary method if CONFIG_MODULES=n If CONFIG_MODULES=n, it makes no sense to retry the list of binary formats handler because the list will not be modified by request_module(). Signed-off-by: Tetsuo Handa Cc: Richard Weinberger Cc: Al Viro Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/exec.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'fs/exec.c') diff --git a/fs/exec.c b/fs/exec.c index e6770a526f34..0e8e59939d09 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -1430,9 +1430,9 @@ int search_binary_handler(struct linux_binprm *bprm,struct pt_regs *regs) } } read_unlock(&binfmt_lock); +#ifdef CONFIG_MODULES if (retval != -ENOEXEC || bprm->mm == NULL) { break; -#ifdef CONFIG_MODULES } else { #define printable(c) (((c)=='\t') || ((c)=='\n') || (0x20<=(c) && (c)<=0x7e)) if (printable(bprm->buf[0]) && @@ -1443,8 +1443,10 @@ int search_binary_handler(struct linux_binprm *bprm,struct pt_regs *regs) if (try) break; /* -ENOEXEC */ request_module("binfmt-%04x", *(unsigned short *)(&bprm->buf[2])); -#endif } +#else + break; +#endif } return retval; } -- cgit v1.2.3 From 32e107f71e4a993ac438f0049aa4019457911ffb Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Tue, 26 Jul 2011 16:08:43 -0700 Subject: fs/exec.c:acct_arg_size(): ptl is no longer needed for add_mm_counter() acct_arg_size() takes ->page_table_lock around add_mm_counter() if !SPLIT_RSS_COUNTING. This is not needed after commit 172703b08cd0 ("mm: delete non-atomic mm counter implementation"). Signed-off-by: Oleg Nesterov Reviewed-by: Matt Fleming Cc: Dave Hansen Cc: KAMEZAWA Hiroyuki Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/exec.c | 7 ------- 1 file changed, 7 deletions(-) (limited to 'fs/exec.c') diff --git a/fs/exec.c b/fs/exec.c index 0e8e59939d09..da80612a35f4 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -181,14 +181,7 @@ static void acct_arg_size(struct linux_binprm *bprm, unsigned long pages) return; bprm->vma_pages = pages; - -#ifdef SPLIT_RSS_COUNTING - add_mm_counter(mm, MM_ANONPAGES, diff); -#else - spin_lock(&mm->page_table_lock); add_mm_counter(mm, MM_ANONPAGES, diff); - spin_unlock(&mm->page_table_lock); -#endif } static struct page *get_arg_page(struct linux_binprm *bprm, unsigned long pos, -- cgit v1.2.3