From 188a1eafa03aaa5e5fe6f53e637e704cd2c31c7c Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Fri, 23 Sep 2005 13:22:21 -0700 Subject: Make sure SIGKILL gets proper respect Bhavesh P. Davda noticed that SIGKILL wouldn't properly kill a process under just the right cicumstances: a stopped task that already had another signal queued would get the SIGKILL queued onto the shared queue, and there it would remain until SIGCONT. This simplifies the signal acceptance logic, and fixes the bug in the process. Losely based on an earlier patch by Bhavesh. Signed-off-by: Linus Torvalds --- kernel/signal.c | 31 ++++++++++++++----------------- 1 file changed, 14 insertions(+), 17 deletions(-) (limited to 'kernel') diff --git a/kernel/signal.c b/kernel/signal.c index b92c3c9f8b9a..5a274705ba19 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -936,34 +936,31 @@ force_sig_specific(int sig, struct task_struct *t) * as soon as they're available, so putting the signal on the shared queue * will be equivalent to sending it to one such thread. */ -#define wants_signal(sig, p, mask) \ - (!sigismember(&(p)->blocked, sig) \ - && !((p)->state & mask) \ - && !((p)->flags & PF_EXITING) \ - && (task_curr(p) || !signal_pending(p))) - +static inline int wants_signal(int sig, struct task_struct *p) +{ + if (sigismember(&p->blocked, sig)) + return 0; + if (p->flags & PF_EXITING) + return 0; + if (sig == SIGKILL) + return 1; + if (p->state & (TASK_STOPPED | TASK_TRACED)) + return 0; + return task_curr(p) || !signal_pending(p); +} static void __group_complete_signal(int sig, struct task_struct *p) { - unsigned int mask; struct task_struct *t; - /* - * Don't bother traced and stopped tasks (but - * SIGKILL will punch through that). - */ - mask = TASK_STOPPED | TASK_TRACED; - if (sig == SIGKILL) - mask = 0; - /* * Now find a thread we can wake up to take the signal off the queue. * * If the main thread wants the signal, it gets first crack. * Probably the least surprising to the average bear. */ - if (wants_signal(sig, p, mask)) + if (wants_signal(sig, p)) t = p; else if (thread_group_empty(p)) /* @@ -981,7 +978,7 @@ __group_complete_signal(int sig, struct task_struct *p) t = p->signal->curr_target = p; BUG_ON(t->tgid != p->tgid); - while (!wants_signal(sig, t, mask)) { + while (!wants_signal(sig, t)) { t = next_thread(t); if (t == p->signal->curr_target) /* -- cgit v1.2.3 From 254b54771cc4c00002f796be65d2885191dca9dc Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Tue, 27 Sep 2005 21:45:31 -0700 Subject: [PATCH] swsusp: remove wrong code from data_free The following patch removes some wrong code from the data_free() function in swsusp. This function could only be called if there's an error while writing the suspend image to swap, so it is not triggered easily. However, if triggered, it would probably corrupt some memory. Signed-off-by: Rafael J. Wysocki Acked-by: Pavel Machek Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/power/swsusp.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) (limited to 'kernel') diff --git a/kernel/power/swsusp.c b/kernel/power/swsusp.c index 1cc9ff25e479..8aef1b49150f 100644 --- a/kernel/power/swsusp.c +++ b/kernel/power/swsusp.c @@ -402,15 +402,14 @@ static int write_page(unsigned long addr, swp_entry_t * loc) static void data_free(void) { swp_entry_t entry; - int i; + struct pbe * p; - for (i = 0; i < nr_copy_pages; i++) { - entry = (pagedir_nosave + i)->swap_address; + for_each_pbe(p, pagedir_nosave) { + entry = p->swap_address; if (entry.val) swap_free(entry); else break; - (pagedir_nosave + i)->swap_address = (swp_entry_t){0}; } } -- cgit v1.2.3 From f2d613799af915da1fd78463ba8ec5086a0d6f92 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Tue, 27 Sep 2005 21:45:32 -0700 Subject: [PATCH] swsusp: prevent possible memory leak Prevent swsusp from leaking some memory in case of an error in read_pagedir(). It also prevents the BUG_ON() from triggering if there's an error while reading swap. Signed-off-by: Rafael J. Wysocki Acked-by: Pavel Machek Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/power/swsusp.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'kernel') diff --git a/kernel/power/swsusp.c b/kernel/power/swsusp.c index 8aef1b49150f..0dfb24948907 100644 --- a/kernel/power/swsusp.c +++ b/kernel/power/swsusp.c @@ -1437,9 +1437,9 @@ static int read_pagedir(struct pbe *pblist) } if (error) - free_page((unsigned long)pblist); - - BUG_ON(i != swsusp_info.pagedir_pages); + free_pagedir(pblist); + else + BUG_ON(i != swsusp_info.pagedir_pages); return error; } -- cgit v1.2.3 From f36462f078403c1859a7e58177b28e01b3a179e4 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Tue, 27 Sep 2005 21:45:34 -0700 Subject: [PATCH] Ignore trailing whitespace on kernel parameters correctly Dave Jones says: ... if the modprobe.conf has trailing whitespace, modules fail to load with the following helpful message.. snd_intel8x0: Unknown parameter `' Previous version truncated last argument. Signed-off-by: Rusty Russell Cc: Dave Jones Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/params.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) (limited to 'kernel') diff --git a/kernel/params.c b/kernel/params.c index fbf173215fd2..1a8614bac5d5 100644 --- a/kernel/params.c +++ b/kernel/params.c @@ -80,8 +80,6 @@ static char *next_arg(char *args, char **param, char **val) int in_quote = 0, quoted = 0; char *next; - /* Chew any extra spaces */ - while (*args == ' ') args++; if (*args == '"') { args++; in_quote = 1; @@ -121,6 +119,10 @@ static char *next_arg(char *args, char **param, char **val) next = args + i + 1; } else next = args + i; + + /* Chew up trailing spaces. */ + while (*next == ' ') + next++; return next; } @@ -135,6 +137,10 @@ int parse_args(const char *name, DEBUGP("Parsing ARGS: %s\n", args); + /* Chew leading spaces */ + while (*args == ' ') + args++; + while (*args) { int ret; -- cgit v1.2.3 From 0f7347c20c410c300be0db4c132945fd02e54110 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Tue, 27 Sep 2005 21:45:43 -0700 Subject: [PATCH] swsusp: avoid problems if there are too many pages to save The following patch makes swsusp avoid problems during resume if there are too many pages to save on suspend. It adds a constant that allows us to verify if we are going to save too many pages and implements the check (this is done as early as we can tell that the check will trigger, which is in swsusp_alloc()). Signed-off-by: Rafael J. Wysocki Acked-by: Pavel Machek Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/power/power.h | 5 ++++- kernel/power/swsusp.c | 4 ++++ 2 files changed, 8 insertions(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/power/power.h b/kernel/power/power.h index 9c9167d910dd..6748de23e83c 100644 --- a/kernel/power/power.h +++ b/kernel/power/power.h @@ -9,6 +9,9 @@ #define SUSPEND_CONSOLE (MAX_NR_CONSOLES-1) #endif +#define MAX_PBES ((PAGE_SIZE - sizeof(struct new_utsname) \ + - 4 - 3*sizeof(unsigned long) - sizeof(int) \ + - sizeof(void *)) / sizeof(swp_entry_t)) struct swsusp_info { struct new_utsname uts; @@ -18,7 +21,7 @@ struct swsusp_info { unsigned long image_pages; unsigned long pagedir_pages; suspend_pagedir_t * suspend_pagedir; - swp_entry_t pagedir[768]; + swp_entry_t pagedir[MAX_PBES]; } __attribute__((aligned(PAGE_SIZE))); diff --git a/kernel/power/swsusp.c b/kernel/power/swsusp.c index 0dfb24948907..acf79ac1cb6d 100644 --- a/kernel/power/swsusp.c +++ b/kernel/power/swsusp.c @@ -931,6 +931,10 @@ static int swsusp_alloc(void) if (!enough_swap()) return -ENOSPC; + if (MAX_PBES < nr_copy_pages / PBES_PER_PAGE + + !!(nr_copy_pages % PBES_PER_PAGE)) + return -ENOSPC; + if (!(pagedir_save = alloc_pagedir(nr_copy_pages))) { printk(KERN_ERR "suspend: Allocating pagedir failed.\n"); return -ENOMEM; -- cgit v1.2.3 From 5134fc15b643dc36eb9aa77e4318b886844a9ac5 Mon Sep 17 00:00:00 2001 From: Paul Jackson Date: Wed, 28 Sep 2005 06:42:24 -0700 Subject: [PATCH] cpuset read past eof memory leak fix Don't leak a page of memory if user reads a cpuset file past eof. Signed-off-by: KUROSAWA Takahiro Signed-off-by: Paul Jackson Signed-off-by: Linus Torvalds --- kernel/cpuset.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) (limited to 'kernel') diff --git a/kernel/cpuset.c b/kernel/cpuset.c index 79866bc6b3a1..6a6e87b2f2fd 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c @@ -969,7 +969,7 @@ static ssize_t cpuset_common_file_read(struct file *file, char __user *buf, ssize_t retval = 0; char *s; char *start; - size_t n; + ssize_t n; if (!(page = (char *)__get_free_page(GFP_KERNEL))) return -ENOMEM; @@ -999,12 +999,13 @@ static ssize_t cpuset_common_file_read(struct file *file, char __user *buf, *s++ = '\n'; *s = '\0'; - /* Do nothing if *ppos is at the eof or beyond the eof. */ - if (s - page <= *ppos) - return 0; - start = page + *ppos; n = s - start; + + /* Do nothing if *ppos is at the eof or beyond the eof. */ + if (n <= 0) + goto out; + retval = n - copy_to_user(buf, start, min(n, nbytes)); *ppos += retval; out: -- cgit v1.2.3 From 5acbc5cb507e6c381b70093b1081854708e82b16 Mon Sep 17 00:00:00 2001 From: Roland McGrath Date: Thu, 29 Sep 2005 14:54:42 -0700 Subject: [PATCH] Fix task state testing properly in do_signal_stop() Any tests using < TASK_STOPPED or the like are left over from the time when the TASK_ZOMBIE and TASK_DEAD bits were in the same word, and it served to check for "stopped or dead". I think this one in do_signal_stop is the only such case. It has been buggy ever since exit_state was separated, and isn't testing the exit_state value. Signed-off-by: Roland McGrath Signed-off-by: Linus Torvalds --- kernel/signal.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/signal.c b/kernel/signal.c index 5a274705ba19..619b027e92b5 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -1763,7 +1763,8 @@ do_signal_stop(int signr) * stop is always done with the siglock held, * so this check has no races. */ - if (t->state < TASK_STOPPED) { + if (!t->exit_state && + !(t->state & (TASK_STOPPED|TASK_TRACED))) { stop_count++; signal_wake_up(t, 0); } -- cgit v1.2.3 From eacaa1f5aa4a41a48349f55abcd9258506943e76 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 30 Sep 2005 03:26:43 +0100 Subject: [PATCH] cpuset crapectomy Switched cpuset_common_file_read() to simple_read_from_buffer(), killed a bunch of useless (and not quite correct - e.g. min(size_t,ssize_t)) code. Signed-off-by: Al Viro Signed-off-by: Linus Torvalds --- kernel/cpuset.c | 12 +----------- 1 file changed, 1 insertion(+), 11 deletions(-) (limited to 'kernel') diff --git a/kernel/cpuset.c b/kernel/cpuset.c index 6a6e87b2f2fd..45a5719a0104 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c @@ -968,8 +968,6 @@ static ssize_t cpuset_common_file_read(struct file *file, char __user *buf, char *page; ssize_t retval = 0; char *s; - char *start; - ssize_t n; if (!(page = (char *)__get_free_page(GFP_KERNEL))) return -ENOMEM; @@ -999,15 +997,7 @@ static ssize_t cpuset_common_file_read(struct file *file, char __user *buf, *s++ = '\n'; *s = '\0'; - start = page + *ppos; - n = s - start; - - /* Do nothing if *ppos is at the eof or beyond the eof. */ - if (n <= 0) - goto out; - - retval = n - copy_to_user(buf, start, min(n, nbytes)); - *ppos += retval; + retval = simple_read_from_buffer(buf, nbytes, ppos, page, s - page); out: free_page((unsigned long)page); return retval; -- cgit v1.2.3 From 14bf01bb0599c89fc7f426d20353b76e12555308 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sat, 1 Oct 2005 11:04:18 -0700 Subject: Fix inequality comparison against "task->state" We should always use bitmask ops, rather than depend on some ordering of the different states. With the TASK_NONINTERACTIVE flag, the inequality doesn't really work. Oleg Nesterov argues (likely correctly) that this test is unnecessary in the first place. However, the minimal fix for now is to at least make it work in the presense of TASK_NONINTERACTIVE. Waiting for consensus from Roland & co on potential bigger cleanups. Signed-off-by: Linus Torvalds --- kernel/exit.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/exit.c b/kernel/exit.c index ee6d8b8abef5..43077732619b 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -1203,7 +1203,7 @@ static int wait_task_stopped(task_t *p, int delayed_group_leader, int noreap, exit_code = p->exit_code; if (unlikely(!exit_code) || - unlikely(p->state > TASK_STOPPED)) + unlikely(p->state & TASK_TRACED)) goto bail_ref; return wait_noreap_copyout(p, pid, uid, why, (exit_code << 8) | 0x7f, -- cgit v1.2.3 From 788e05a67c343fa22f2ae1d3ca264e7f15c25eaf Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Fri, 7 Oct 2005 17:46:19 +0400 Subject: [PATCH] fix do_coredump() vs SIGSTOP race Let's suppose we have 2 threads in thread group: A - does coredump B - has pending SIGSTOP thread A thread B do_coredump: get_signal_to_deliver: lock(->sighand) ->signal->flags = SIGNAL_GROUP_EXIT unlock(->sighand) lock(->sighand) signr = dequeue_signal() ->signal->flags |= SIGNAL_STOP_DEQUEUED return SIGSTOP; do_signal_stop: unlock(->sighand) coredump_wait: zap_threads: lock(tasklist_lock) send SIGKILL to B // signal_wake_up() does nothing unlock(tasklist_lock) lock(tasklist_lock) lock(->sighand) re-check sig->flags & SIGNAL_STOP_DEQUEUED, yes set_current_state(TASK_STOPPED); finish_stop: schedule(); // ->state == TASK_STOPPED wait_for_completion(&startup_done) // waits for complete() from B, // ->state == TASK_UNINTERRUPTIBLE We can't wake up 'B' in any way: SIGCONT will be ignored because handle_stop_signal() sees ->signal->flags & SIGNAL_GROUP_EXIT. sys_kill(SIGKILL)->__group_complete_signal() will choose uninterruptible 'A', so it can't help. sys_tkill(B, SIGKILL) will be ignored by specific_send_sig_info() because B already has pending SIGKILL. This scenario is not possbile if 'A' does do_group_exit(), because it sets sig->flags = SIGNAL_GROUP_EXIT and delivers SIGKILL to subthreads atomically, holding both tasklist_lock and sighand->lock. That means that do_signal_stop() will notice !SIGNAL_STOP_DEQUEUED after re-locking ->sighand. And it is not possible to any other thread to re-add SIGNAL_STOP_DEQUEUED later, because dequeue_signal() can only return SIGKILL. I think it is better to change do_coredump() to do sigaddset(SIGKILL) and signal_wake_up() under sighand->lock, but this patch is much simpler. Signed-off-by: Oleg Nesterov Signed-off-by: Linus Torvalds --- kernel/signal.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/signal.c b/kernel/signal.c index 619b027e92b5..c135f5aa2c2d 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -578,7 +578,8 @@ int dequeue_signal(struct task_struct *tsk, sigset_t *mask, siginfo_t *info) * is to alert stop-signal processing code when another * processor has come along and cleared the flag. */ - tsk->signal->flags |= SIGNAL_STOP_DEQUEUED; + if (!(tsk->signal->flags & SIGNAL_GROUP_EXIT)) + tsk->signal->flags |= SIGNAL_STOP_DEQUEUED; } if ( signr && ((info->si_code & __SI_MASK) == __SI_TIMER) && -- cgit v1.2.3 From dd0fc66fb33cd610bc1a5db8a5e232d34879b4d7 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 7 Oct 2005 07:46:04 +0100 Subject: [PATCH] gfp flags annotations - part 1 - added typedef unsigned int __nocast gfp_t; - replaced __nocast uses for gfp flags with gfp_t - it gives exactly the same warnings as far as sparse is concerned, doesn't change generated code (from gcc point of view we replaced unsigned int with typedef) and documents what's going on far better. Signed-off-by: Al Viro Signed-off-by: Linus Torvalds --- kernel/audit.c | 2 +- kernel/cpuset.c | 2 +- kernel/kfifo.c | 4 ++-- kernel/signal.c | 2 +- 4 files changed, 5 insertions(+), 5 deletions(-) (limited to 'kernel') diff --git a/kernel/audit.c b/kernel/audit.c index 83096b67510a..aefa73a8a586 100644 --- a/kernel/audit.c +++ b/kernel/audit.c @@ -560,7 +560,7 @@ static void audit_buffer_free(struct audit_buffer *ab) } static struct audit_buffer * audit_buffer_alloc(struct audit_context *ctx, - unsigned int __nocast gfp_mask, int type) + gfp_t gfp_mask, int type) { unsigned long flags; struct audit_buffer *ab = NULL; diff --git a/kernel/cpuset.c b/kernel/cpuset.c index 45a5719a0104..28176d083f7b 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c @@ -1670,7 +1670,7 @@ static const struct cpuset *nearest_exclusive_ancestor(const struct cpuset *cs) * GFP_USER - only nodes in current tasks mems allowed ok. **/ -int cpuset_zone_allowed(struct zone *z, unsigned int __nocast gfp_mask) +int cpuset_zone_allowed(struct zone *z, gfp_t gfp_mask) { int node; /* node that zone z is on */ const struct cpuset *cs; /* current cpuset ancestors */ diff --git a/kernel/kfifo.c b/kernel/kfifo.c index 179baafcdd96..64ab045c3d9d 100644 --- a/kernel/kfifo.c +++ b/kernel/kfifo.c @@ -36,7 +36,7 @@ * struct kfifo with kfree(). */ struct kfifo *kfifo_init(unsigned char *buffer, unsigned int size, - unsigned int __nocast gfp_mask, spinlock_t *lock) + gfp_t gfp_mask, spinlock_t *lock) { struct kfifo *fifo; @@ -64,7 +64,7 @@ EXPORT_SYMBOL(kfifo_init); * * The size will be rounded-up to a power of 2. */ -struct kfifo *kfifo_alloc(unsigned int size, unsigned int __nocast gfp_mask, spinlock_t *lock) +struct kfifo *kfifo_alloc(unsigned int size, gfp_t gfp_mask, spinlock_t *lock) { unsigned char *buffer; struct kfifo *ret; diff --git a/kernel/signal.c b/kernel/signal.c index c135f5aa2c2d..cba193ceda0d 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -262,7 +262,7 @@ next_signal(struct sigpending *pending, sigset_t *mask) return sig; } -static struct sigqueue *__sigqueue_alloc(struct task_struct *t, unsigned int __nocast flags, +static struct sigqueue *__sigqueue_alloc(struct task_struct *t, gfp_t flags, int override_rlimit) { struct sigqueue *q = NULL; -- cgit v1.2.3 From 3dd083255ddcfa87751fa8e32f61a9547a15a541 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Sun, 9 Oct 2005 21:19:40 +0200 Subject: [PATCH] x86_64: Set up safe page tables during resume The following patch makes swsusp avoid the possible temporary corruption of page translation tables during resume on x86-64. This is achieved by creating a copy of the relevant page tables that will not be modified by swsusp and can be safely used by it on resume. The problem is that during resume on x86-64 swsusp may temporarily corrupt the page tables used for the direct mapping of RAM. If that happens, a page fault occurs and cannot be handled properly, which leads to the solid hang of the affected system. This leads to the loss of the system's state from before suspend and may result in the loss of data or the corruption of filesystems, so it is a serious issue. Also, it appears to happen quite often (for me, as often as 50% of the time). The problem is related to the fact that (at least) one of the PMD entries used in the direct memory mapping (starting at PAGE_OFFSET) points to a page table the physical address of which is much greater than the physical address of the PMD entry itself. Moreover, unfortunately, the physical address of the page table before suspend (i.e. the one stored in the suspend image) happens to be different to the physical address of the corresponding page table used during resume (i.e. the one that is valid right before swsusp_arch_resume() in arch/x86_64/kernel/suspend_asm.S is executed). Thus while the image is restored, the "offending" PMD entry gets overwritten, so it does not point to the right physical address any more (i.e. there's no page table at the address pointed to by it, because it points to the address the page table has been at during suspend). Consequently, if the PMD entry is used later on, and it _is_ used in the process of copying the image pages, a page fault occurs, but it cannot be handled in the normal way and the system hangs. In principle we can call create_resume_mapping() from swsusp_arch_resume() (ie. from suspend_asm.S), but then the memory allocations in create_resume_mapping(), resume_pud_mapping(), and resume_pmd_mapping() must be made carefully so that we use _only_ NosaveFree pages in them (the other pages are overwritten by the loop in swsusp_arch_resume()). Additionally, we are in atomic context at that time, so we cannot use GFP_KERNEL. Moreover, if one of the allocations fails, we should free all of the allocated pages, so we need to trace them somehow. All of this is done in the appended patch, except that the functions populating the page tables are located in arch/x86_64/kernel/suspend.c rather than in init.c. It may be done in a more elegan way in the future, with the help of some swsusp patches that are in the works now. [AK: move some externs into headers, renamed a function] Signed-off-by: Rafael J. Wysocki Signed-off-by: Andi Kleen Signed-off-by: Linus Torvalds --- kernel/power/swsusp.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'kernel') diff --git a/kernel/power/swsusp.c b/kernel/power/swsusp.c index acf79ac1cb6d..2d5c45676442 100644 --- a/kernel/power/swsusp.c +++ b/kernel/power/swsusp.c @@ -1095,7 +1095,7 @@ static inline void eat_page(void *page) *eaten_memory = c; } -static unsigned long get_usable_page(unsigned gfp_mask) +unsigned long get_usable_page(unsigned gfp_mask) { unsigned long m; @@ -1109,7 +1109,7 @@ static unsigned long get_usable_page(unsigned gfp_mask) return m; } -static void free_eaten_memory(void) +void free_eaten_memory(void) { unsigned long m; void **c; @@ -1481,11 +1481,12 @@ static int read_suspend_image(void) /* Allocate memory for the image and read the data from swap */ error = check_pagedir(pagedir_nosave); - free_eaten_memory(); + if (!error) error = data_read(pagedir_nosave); if (error) { /* We fail cleanly */ + free_eaten_memory(); for_each_pbe (p, pagedir_nosave) if (p->address) { free_page(p->address); -- cgit v1.2.3 From 46113830a18847cff8da73005e57bc49c2f95a56 Mon Sep 17 00:00:00 2001 From: Harald Welte Date: Mon, 10 Oct 2005 19:44:29 +0200 Subject: [PATCH] Fix signal sending in usbdevio on async URB completion If a process issues an URB from userspace and (starts to) terminate before the URB comes back, we run into the issue described above. This is because the urb saves a pointer to "current" when it is posted to the device, but there's no guarantee that this pointer is still valid afterwards. In fact, there are three separate issues: 1) the pointer to "current" can become invalid, since the task could be completely gone when the URB completion comes back from the device. 2) Even if the saved task pointer is still pointing to a valid task_struct, task_struct->sighand could have gone meanwhile. 3) Even if the process is perfectly fine, permissions may have changed, and we can no longer send it a signal. So what we do instead, is to save the PID and uid's of the process, and introduce a new kill_proc_info_as_uid() function. Signed-off-by: Harald Welte [ Fixed up types and added symbol exports ] Signed-off-by: Linus Torvalds --- kernel/signal.c | 34 ++++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) (limited to 'kernel') diff --git a/kernel/signal.c b/kernel/signal.c index cba193ceda0d..50c992643771 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -1193,6 +1193,40 @@ kill_proc_info(int sig, struct siginfo *info, pid_t pid) return error; } +/* like kill_proc_info(), but doesn't use uid/euid of "current" */ +int kill_proc_info_as_uid(int sig, struct siginfo *info, pid_t pid, + uid_t uid, uid_t euid) +{ + int ret = -EINVAL; + struct task_struct *p; + + if (!valid_signal(sig)) + return ret; + + read_lock(&tasklist_lock); + p = find_task_by_pid(pid); + if (!p) { + ret = -ESRCH; + goto out_unlock; + } + if ((!info || ((unsigned long)info != 1 && + (unsigned long)info != 2 && SI_FROMUSER(info))) + && (euid != p->suid) && (euid != p->uid) + && (uid != p->suid) && (uid != p->uid)) { + ret = -EPERM; + goto out_unlock; + } + if (sig && p->sighand) { + unsigned long flags; + spin_lock_irqsave(&p->sighand->siglock, flags); + ret = __group_send_sig_info(sig, info, p); + spin_unlock_irqrestore(&p->sighand->siglock, flags); + } +out_unlock: + read_unlock(&tasklist_lock); + return ret; +} +EXPORT_SYMBOL_GPL(kill_proc_info_as_uid); /* * kill_something_info() interprets pid in interesting ways just like kill(2). -- cgit v1.2.3 From c6ecf7ed3131961e5aeedb0efd217afa0808798f Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Fri, 14 Oct 2005 15:59:03 -0700 Subject: [PATCH] Add missing export of getnstimeofday() Adds the missing EXPORT_SYMBOL_GPL for getnstimeofday() when CONFIG_TIME_INTERPOLATION isn't set. Needed by drivers/char/mmtimer.c Signed-off-by: Takashi Iwai Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/time.c | 1 + 1 file changed, 1 insertion(+) (limited to 'kernel') diff --git a/kernel/time.c b/kernel/time.c index dd5ae1162a8f..40c2410ac99a 100644 --- a/kernel/time.c +++ b/kernel/time.c @@ -570,6 +570,7 @@ void getnstimeofday(struct timespec *tv) tv->tv_sec = x.tv_sec; tv->tv_nsec = x.tv_usec * NSEC_PER_USEC; } +EXPORT_SYMBOL_GPL(getnstimeofday); #endif #if (BITS_PER_LONG < 64) -- cgit v1.2.3 From 2cc78eb52bc1ae89f0a4fa5a00eb998dffde4a9f Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Mon, 17 Oct 2005 09:10:15 -0700 Subject: Increase default RCU batching sharply Dipankar made RCU limit the batch size to improve latency, but that approach is unworkable: it can cause the RCU queues to grow without bounds, since the batch limiter ended up limiting the callbacks. So make the limit much higher, and start planning on instead limiting the batch size by doing RCU callbacks more often if the queue looks like it might be growing too long. Signed-off-by: Linus Torvalds --- kernel/rcupdate.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/rcupdate.c b/kernel/rcupdate.c index bef3b6901b76..dd99415b1551 100644 --- a/kernel/rcupdate.c +++ b/kernel/rcupdate.c @@ -71,7 +71,7 @@ DEFINE_PER_CPU(struct rcu_data, rcu_bh_data) = { 0L }; /* Fake initialization required by compiler */ static DEFINE_PER_CPU(struct tasklet_struct, rcu_tasklet) = {NULL}; -static int maxbatch = 10; +static int maxbatch = 10000; #ifndef __HAVE_ARCH_CMPXCHG /* -- cgit v1.2.3 From 47d6b08334a43fafa61a587f721fa21ef65d81be Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Mon, 17 Oct 2005 18:49:42 +0400 Subject: [PATCH] posix-timers: fix task accounting Make sure we release the task struct properly when releasing pending timers. release_task() does write_lock_irq(&tasklist_lock), so it can't race with run_posix_cpu_timers() on any cpu. Signed-off-by: Oleg Nesterov Signed-off-by: Linus Torvalds --- kernel/posix-cpu-timers.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'kernel') diff --git a/kernel/posix-cpu-timers.c b/kernel/posix-cpu-timers.c index ad85d3f0dcc4..7a51a5597c33 100644 --- a/kernel/posix-cpu-timers.c +++ b/kernel/posix-cpu-timers.c @@ -424,6 +424,7 @@ static void cleanup_timers(struct list_head *head, cputime_t ptime = cputime_add(utime, stime); list_for_each_entry_safe(timer, next, head, entry) { + put_task_struct(timer->task); timer->task = NULL; list_del_init(&timer->entry); if (cputime_lt(timer->expires.cpu, ptime)) { @@ -436,6 +437,7 @@ static void cleanup_timers(struct list_head *head, ++head; list_for_each_entry_safe(timer, next, head, entry) { + put_task_struct(timer->task); timer->task = NULL; list_del_init(&timer->entry); if (cputime_lt(timer->expires.cpu, utime)) { @@ -448,6 +450,7 @@ static void cleanup_timers(struct list_head *head, ++head; list_for_each_entry_safe(timer, next, head, entry) { + put_task_struct(timer->task); timer->task = NULL; list_del_init(&timer->entry); if (timer->expires.sched < sched_time) { -- cgit v1.2.3 From 5ee832dbc6770135ec8d63296af0a4374557bb79 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 17 Oct 2005 20:01:21 +0200 Subject: [PATCH] rcu: keep rcu callback event counter This makes call_rcu() keep track of how many events there are on the RCU list, and cause a reschedule event when the list gets too long. This helps keep RCU event lists down. Signed-off-by: Linus Torvalds --- kernel/rcupdate.c | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'kernel') diff --git a/kernel/rcupdate.c b/kernel/rcupdate.c index dd99415b1551..2559d4b8f23f 100644 --- a/kernel/rcupdate.c +++ b/kernel/rcupdate.c @@ -109,6 +109,10 @@ void fastcall call_rcu(struct rcu_head *head, rdp = &__get_cpu_var(rcu_data); *rdp->nxttail = head; rdp->nxttail = &head->next; + + if (unlikely(++rdp->count > 10000)) + set_need_resched(); + local_irq_restore(flags); } @@ -140,6 +144,12 @@ void fastcall call_rcu_bh(struct rcu_head *head, rdp = &__get_cpu_var(rcu_bh_data); *rdp->nxttail = head; rdp->nxttail = &head->next; + rdp->count++; +/* + * Should we directly call rcu_do_batch() here ? + * if (unlikely(rdp->count > 10000)) + * rcu_do_batch(rdp); + */ local_irq_restore(flags); } @@ -157,6 +167,7 @@ static void rcu_do_batch(struct rcu_data *rdp) next = rdp->donelist = list->next; list->func(list); list = next; + rdp->count--; if (++count >= maxbatch) break; } -- cgit v1.2.3