From 188a1eafa03aaa5e5fe6f53e637e704cd2c31c7c Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@g5.osdl.org>
Date: Fri, 23 Sep 2005 13:22:21 -0700
Subject: Make sure SIGKILL gets proper respect

Bhavesh P. Davda <bhavesh@avaya.com> noticed that SIGKILL wouldn't
properly kill a process under just the right cicumstances: a stopped
task that already had another signal queued would get the SIGKILL
queued onto the shared queue, and there it would remain until SIGCONT.

This simplifies the signal acceptance logic, and fixes the bug in the
process.

Losely based on an earlier patch by Bhavesh.

Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 kernel/signal.c | 31 ++++++++++++++-----------------
 1 file changed, 14 insertions(+), 17 deletions(-)

(limited to 'kernel')

diff --git a/kernel/signal.c b/kernel/signal.c
index b92c3c9f8b9a..5a274705ba19 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -936,34 +936,31 @@ force_sig_specific(int sig, struct task_struct *t)
  * as soon as they're available, so putting the signal on the shared queue
  * will be equivalent to sending it to one such thread.
  */
-#define wants_signal(sig, p, mask) 			\
-	(!sigismember(&(p)->blocked, sig)		\
-	 && !((p)->state & mask)			\
-	 && !((p)->flags & PF_EXITING)			\
-	 && (task_curr(p) || !signal_pending(p)))
-
+static inline int wants_signal(int sig, struct task_struct *p)
+{
+	if (sigismember(&p->blocked, sig))
+		return 0;
+	if (p->flags & PF_EXITING)
+		return 0;
+	if (sig == SIGKILL)
+		return 1;
+	if (p->state & (TASK_STOPPED | TASK_TRACED))
+		return 0;
+	return task_curr(p) || !signal_pending(p);
+}
 
 static void
 __group_complete_signal(int sig, struct task_struct *p)
 {
-	unsigned int mask;
 	struct task_struct *t;
 
-	/*
-	 * Don't bother traced and stopped tasks (but
-	 * SIGKILL will punch through that).
-	 */
-	mask = TASK_STOPPED | TASK_TRACED;
-	if (sig == SIGKILL)
-		mask = 0;
-
 	/*
 	 * Now find a thread we can wake up to take the signal off the queue.
 	 *
 	 * If the main thread wants the signal, it gets first crack.
 	 * Probably the least surprising to the average bear.
 	 */
-	if (wants_signal(sig, p, mask))
+	if (wants_signal(sig, p))
 		t = p;
 	else if (thread_group_empty(p))
 		/*
@@ -981,7 +978,7 @@ __group_complete_signal(int sig, struct task_struct *p)
 			t = p->signal->curr_target = p;
 		BUG_ON(t->tgid != p->tgid);
 
-		while (!wants_signal(sig, t, mask)) {
+		while (!wants_signal(sig, t)) {
 			t = next_thread(t);
 			if (t == p->signal->curr_target)
 				/*
-- 
cgit v1.2.3


From 254b54771cc4c00002f796be65d2885191dca9dc Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rjw@sisk.pl>
Date: Tue, 27 Sep 2005 21:45:31 -0700
Subject: [PATCH] swsusp: remove wrong code from data_free

The following patch removes some wrong code from the data_free() function
in swsusp.

This function could only be called if there's an error while writing the
suspend image to swap, so it is not triggered easily.  However, if
triggered, it would probably corrupt some memory.

Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
Acked-by: Pavel Machek <pavel@ucw.cz>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 kernel/power/swsusp.c | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

(limited to 'kernel')

diff --git a/kernel/power/swsusp.c b/kernel/power/swsusp.c
index 1cc9ff25e479..8aef1b49150f 100644
--- a/kernel/power/swsusp.c
+++ b/kernel/power/swsusp.c
@@ -402,15 +402,14 @@ static int write_page(unsigned long addr, swp_entry_t * loc)
 static void data_free(void)
 {
 	swp_entry_t entry;
-	int i;
+	struct pbe * p;
 
-	for (i = 0; i < nr_copy_pages; i++) {
-		entry = (pagedir_nosave + i)->swap_address;
+	for_each_pbe(p, pagedir_nosave) {
+		entry = p->swap_address;
 		if (entry.val)
 			swap_free(entry);
 		else
 			break;
-		(pagedir_nosave + i)->swap_address = (swp_entry_t){0};
 	}
 }
 
-- 
cgit v1.2.3


From f2d613799af915da1fd78463ba8ec5086a0d6f92 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rjw@sisk.pl>
Date: Tue, 27 Sep 2005 21:45:32 -0700
Subject: [PATCH] swsusp: prevent possible memory leak

Prevent swsusp from leaking some memory in case of an error in
read_pagedir().  It also prevents the BUG_ON() from triggering if there's
an error while reading swap.

Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
Acked-by: Pavel Machek <pavel@ucw.cz>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 kernel/power/swsusp.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'kernel')

diff --git a/kernel/power/swsusp.c b/kernel/power/swsusp.c
index 8aef1b49150f..0dfb24948907 100644
--- a/kernel/power/swsusp.c
+++ b/kernel/power/swsusp.c
@@ -1437,9 +1437,9 @@ static int read_pagedir(struct pbe *pblist)
 	}
 
 	if (error)
-		free_page((unsigned long)pblist);
-
-	BUG_ON(i != swsusp_info.pagedir_pages);
+		free_pagedir(pblist);
+	else
+		BUG_ON(i != swsusp_info.pagedir_pages);
 
 	return error;
 }
-- 
cgit v1.2.3


From f36462f078403c1859a7e58177b28e01b3a179e4 Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Tue, 27 Sep 2005 21:45:34 -0700
Subject: [PATCH] Ignore trailing whitespace on kernel parameters correctly

Dave Jones says:

... if the modprobe.conf has trailing whitespace, modules fail to load
with the following helpful message..

	snd_intel8x0: Unknown parameter `'

Previous version truncated last argument.

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
Cc: Dave Jones <davej@redhat.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 kernel/params.c | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

(limited to 'kernel')

diff --git a/kernel/params.c b/kernel/params.c
index fbf173215fd2..1a8614bac5d5 100644
--- a/kernel/params.c
+++ b/kernel/params.c
@@ -80,8 +80,6 @@ static char *next_arg(char *args, char **param, char **val)
 	int in_quote = 0, quoted = 0;
 	char *next;
 
-	/* Chew any extra spaces */
-	while (*args == ' ') args++;
 	if (*args == '"') {
 		args++;
 		in_quote = 1;
@@ -121,6 +119,10 @@ static char *next_arg(char *args, char **param, char **val)
 		next = args + i + 1;
 	} else
 		next = args + i;
+
+	/* Chew up trailing spaces. */
+	while (*next == ' ')
+		next++;
 	return next;
 }
 
@@ -135,6 +137,10 @@ int parse_args(const char *name,
 
 	DEBUGP("Parsing ARGS: %s\n", args);
 
+	/* Chew leading spaces */
+	while (*args == ' ')
+		args++;
+
 	while (*args) {
 		int ret;
 
-- 
cgit v1.2.3


From 0f7347c20c410c300be0db4c132945fd02e54110 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rjw@sisk.pl>
Date: Tue, 27 Sep 2005 21:45:43 -0700
Subject: [PATCH] swsusp: avoid problems if there are too many pages to save

The following patch makes swsusp avoid problems during resume if there are
too many pages to save on suspend.  It adds a constant that allows us to
verify if we are going to save too many pages and implements the check
(this is done as early as we can tell that the check will trigger, which is
in swsusp_alloc()).

Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
Acked-by: Pavel Machek <pavel@suse.cz>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 kernel/power/power.h  | 5 ++++-
 kernel/power/swsusp.c | 4 ++++
 2 files changed, 8 insertions(+), 1 deletion(-)

(limited to 'kernel')

diff --git a/kernel/power/power.h b/kernel/power/power.h
index 9c9167d910dd..6748de23e83c 100644
--- a/kernel/power/power.h
+++ b/kernel/power/power.h
@@ -9,6 +9,9 @@
 #define SUSPEND_CONSOLE	(MAX_NR_CONSOLES-1)
 #endif
 
+#define MAX_PBES	((PAGE_SIZE - sizeof(struct new_utsname) \
+			- 4 - 3*sizeof(unsigned long) - sizeof(int) \
+			- sizeof(void *)) / sizeof(swp_entry_t))
 
 struct swsusp_info {
 	struct new_utsname	uts;
@@ -18,7 +21,7 @@ struct swsusp_info {
 	unsigned long		image_pages;
 	unsigned long		pagedir_pages;
 	suspend_pagedir_t	* suspend_pagedir;
-	swp_entry_t		pagedir[768];
+	swp_entry_t		pagedir[MAX_PBES];
 } __attribute__((aligned(PAGE_SIZE)));
 
 
diff --git a/kernel/power/swsusp.c b/kernel/power/swsusp.c
index 0dfb24948907..acf79ac1cb6d 100644
--- a/kernel/power/swsusp.c
+++ b/kernel/power/swsusp.c
@@ -931,6 +931,10 @@ static int swsusp_alloc(void)
 	if (!enough_swap())
 		return -ENOSPC;
 
+	if (MAX_PBES < nr_copy_pages / PBES_PER_PAGE +
+	    !!(nr_copy_pages % PBES_PER_PAGE))
+		return -ENOSPC;
+
 	if (!(pagedir_save = alloc_pagedir(nr_copy_pages))) {
 		printk(KERN_ERR "suspend: Allocating pagedir failed.\n");
 		return -ENOMEM;
-- 
cgit v1.2.3


From 5134fc15b643dc36eb9aa77e4318b886844a9ac5 Mon Sep 17 00:00:00 2001
From: Paul Jackson <pj@sgi.com>
Date: Wed, 28 Sep 2005 06:42:24 -0700
Subject: [PATCH] cpuset read past eof memory leak fix

Don't leak a page of memory if user reads a cpuset file past eof.

Signed-off-by: KUROSAWA Takahiro <kurosawa@valinux.co.jp>
Signed-off-by: Paul Jackson <pj@sgi.com>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 kernel/cpuset.c | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

(limited to 'kernel')

diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index 79866bc6b3a1..6a6e87b2f2fd 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -969,7 +969,7 @@ static ssize_t cpuset_common_file_read(struct file *file, char __user *buf,
 	ssize_t retval = 0;
 	char *s;
 	char *start;
-	size_t n;
+	ssize_t n;
 
 	if (!(page = (char *)__get_free_page(GFP_KERNEL)))
 		return -ENOMEM;
@@ -999,12 +999,13 @@ static ssize_t cpuset_common_file_read(struct file *file, char __user *buf,
 	*s++ = '\n';
 	*s = '\0';
 
-	/* Do nothing if *ppos is at the eof or beyond the eof. */
-	if (s - page <= *ppos)
-		return 0;
-
 	start = page + *ppos;
 	n = s - start;
+
+	/* Do nothing if *ppos is at the eof or beyond the eof. */
+	if (n <= 0)
+		goto out;
+
 	retval = n - copy_to_user(buf, start, min(n, nbytes));
 	*ppos += retval;
 out:
-- 
cgit v1.2.3


From 5acbc5cb507e6c381b70093b1081854708e82b16 Mon Sep 17 00:00:00 2001
From: Roland McGrath <roland@redhat.com>
Date: Thu, 29 Sep 2005 14:54:42 -0700
Subject: [PATCH] Fix task state testing properly in do_signal_stop()

Any tests using < TASK_STOPPED or the like are left over from the time
when the TASK_ZOMBIE and TASK_DEAD bits were in the same word, and it
served to check for "stopped or dead".  I think this one in
do_signal_stop is the only such case.  It has been buggy ever since
exit_state was separated, and isn't testing the exit_state value.

Signed-off-by: Roland McGrath <roland@redhat.com>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 kernel/signal.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'kernel')

diff --git a/kernel/signal.c b/kernel/signal.c
index 5a274705ba19..619b027e92b5 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -1763,7 +1763,8 @@ do_signal_stop(int signr)
 				 * stop is always done with the siglock held,
 				 * so this check has no races.
 				 */
-				if (t->state < TASK_STOPPED) {
+				if (!t->exit_state &&
+				    !(t->state & (TASK_STOPPED|TASK_TRACED))) {
 					stop_count++;
 					signal_wake_up(t, 0);
 				}
-- 
cgit v1.2.3


From eacaa1f5aa4a41a48349f55abcd9258506943e76 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@ftp.linux.org.uk>
Date: Fri, 30 Sep 2005 03:26:43 +0100
Subject: [PATCH] cpuset crapectomy

Switched cpuset_common_file_read() to simple_read_from_buffer(), killed
a bunch of useless (and not quite correct - e.g.  min(size_t,ssize_t))
code.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 kernel/cpuset.c | 12 +-----------
 1 file changed, 1 insertion(+), 11 deletions(-)

(limited to 'kernel')

diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index 6a6e87b2f2fd..45a5719a0104 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -968,8 +968,6 @@ static ssize_t cpuset_common_file_read(struct file *file, char __user *buf,
 	char *page;
 	ssize_t retval = 0;
 	char *s;
-	char *start;
-	ssize_t n;
 
 	if (!(page = (char *)__get_free_page(GFP_KERNEL)))
 		return -ENOMEM;
@@ -999,15 +997,7 @@ static ssize_t cpuset_common_file_read(struct file *file, char __user *buf,
 	*s++ = '\n';
 	*s = '\0';
 
-	start = page + *ppos;
-	n = s - start;
-
-	/* Do nothing if *ppos is at the eof or beyond the eof. */
-	if (n <= 0)
-		goto out;
-
-	retval = n - copy_to_user(buf, start, min(n, nbytes));
-	*ppos += retval;
+	retval = simple_read_from_buffer(buf, nbytes, ppos, page, s - page);
 out:
 	free_page((unsigned long)page);
 	return retval;
-- 
cgit v1.2.3


From 14bf01bb0599c89fc7f426d20353b76e12555308 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@g5.osdl.org>
Date: Sat, 1 Oct 2005 11:04:18 -0700
Subject: Fix inequality comparison against "task->state"

We should always use bitmask ops, rather than depend on some ordering of
the different states.  With the TASK_NONINTERACTIVE flag, the inequality
doesn't really work.

Oleg Nesterov argues (likely correctly) that this test is unnecessary in
the first place.  However, the minimal fix for now is to at least make
it work in the presense of TASK_NONINTERACTIVE.  Waiting for consensus
from Roland & co on potential bigger cleanups.

Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 kernel/exit.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'kernel')

diff --git a/kernel/exit.c b/kernel/exit.c
index ee6d8b8abef5..43077732619b 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -1203,7 +1203,7 @@ static int wait_task_stopped(task_t *p, int delayed_group_leader, int noreap,
 
 		exit_code = p->exit_code;
 		if (unlikely(!exit_code) ||
-		    unlikely(p->state > TASK_STOPPED))
+		    unlikely(p->state & TASK_TRACED))
 			goto bail_ref;
 		return wait_noreap_copyout(p, pid, uid,
 					   why, (exit_code << 8) | 0x7f,
-- 
cgit v1.2.3


From 788e05a67c343fa22f2ae1d3ca264e7f15c25eaf Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@tv-sign.ru>
Date: Fri, 7 Oct 2005 17:46:19 +0400
Subject: [PATCH] fix do_coredump() vs SIGSTOP race

Let's suppose we have 2 threads in thread group:
	A - does coredump
	B - has pending SIGSTOP

thread A						thread B

do_coredump:						get_signal_to_deliver:

  lock(->sighand)
  ->signal->flags = SIGNAL_GROUP_EXIT
  unlock(->sighand)

							lock(->sighand)
							signr = dequeue_signal()
								->signal->flags |= SIGNAL_STOP_DEQUEUED
								return SIGSTOP;

							do_signal_stop:
							    unlock(->sighand)

  coredump_wait:

      zap_threads:
          lock(tasklist_lock)
          send SIGKILL to B
              // signal_wake_up() does nothing
          unlock(tasklist_lock)

							    lock(tasklist_lock)
							    lock(->sighand)
							    re-check sig->flags & SIGNAL_STOP_DEQUEUED, yes
							    set_current_state(TASK_STOPPED);
							    finish_stop:
							        schedule();
							            // ->state == TASK_STOPPED

      wait_for_completion(&startup_done)
         // waits for complete() from B,
         // ->state == TASK_UNINTERRUPTIBLE

We can't wake up 'B' in any way:

	SIGCONT will be ignored because handle_stop_signal() sees
	->signal->flags & SIGNAL_GROUP_EXIT.

	sys_kill(SIGKILL)->__group_complete_signal() will choose
	uninterruptible 'A', so it can't help.

	sys_tkill(B, SIGKILL) will be ignored by specific_send_sig_info()
	because B already has pending SIGKILL.

This scenario is not possbile if 'A' does do_group_exit(), because
it sets sig->flags = SIGNAL_GROUP_EXIT and delivers SIGKILL to
subthreads atomically, holding both tasklist_lock and sighand->lock.
That means that do_signal_stop() will notice !SIGNAL_STOP_DEQUEUED
after re-locking ->sighand. And it is not possible to any other
thread to re-add SIGNAL_STOP_DEQUEUED later, because dequeue_signal()
can only return SIGKILL.

I think it is better to change do_coredump() to do sigaddset(SIGKILL)
and signal_wake_up() under sighand->lock, but this patch is much
simpler.

Signed-off-by: Oleg Nesterov <oleg@tv-sign.ru>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 kernel/signal.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'kernel')

diff --git a/kernel/signal.c b/kernel/signal.c
index 619b027e92b5..c135f5aa2c2d 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -578,7 +578,8 @@ int dequeue_signal(struct task_struct *tsk, sigset_t *mask, siginfo_t *info)
  		 * is to alert stop-signal processing code when another
  		 * processor has come along and cleared the flag.
  		 */
- 		tsk->signal->flags |= SIGNAL_STOP_DEQUEUED;
+ 		if (!(tsk->signal->flags & SIGNAL_GROUP_EXIT))
+ 			tsk->signal->flags |= SIGNAL_STOP_DEQUEUED;
  	}
 	if ( signr &&
 	     ((info->si_code & __SI_MASK) == __SI_TIMER) &&
-- 
cgit v1.2.3


From dd0fc66fb33cd610bc1a5db8a5e232d34879b4d7 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@ftp.linux.org.uk>
Date: Fri, 7 Oct 2005 07:46:04 +0100
Subject: [PATCH] gfp flags annotations - part 1

 - added typedef unsigned int __nocast gfp_t;

 - replaced __nocast uses for gfp flags with gfp_t - it gives exactly
   the same warnings as far as sparse is concerned, doesn't change
   generated code (from gcc point of view we replaced unsigned int with
   typedef) and documents what's going on far better.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 kernel/audit.c  | 2 +-
 kernel/cpuset.c | 2 +-
 kernel/kfifo.c  | 4 ++--
 kernel/signal.c | 2 +-
 4 files changed, 5 insertions(+), 5 deletions(-)

(limited to 'kernel')

diff --git a/kernel/audit.c b/kernel/audit.c
index 83096b67510a..aefa73a8a586 100644
--- a/kernel/audit.c
+++ b/kernel/audit.c
@@ -560,7 +560,7 @@ static void audit_buffer_free(struct audit_buffer *ab)
 }
 
 static struct audit_buffer * audit_buffer_alloc(struct audit_context *ctx,
-						unsigned int __nocast gfp_mask, int type)
+						gfp_t gfp_mask, int type)
 {
 	unsigned long flags;
 	struct audit_buffer *ab = NULL;
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index 45a5719a0104..28176d083f7b 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -1670,7 +1670,7 @@ static const struct cpuset *nearest_exclusive_ancestor(const struct cpuset *cs)
  *	GFP_USER     - only nodes in current tasks mems allowed ok.
  **/
 
-int cpuset_zone_allowed(struct zone *z, unsigned int __nocast gfp_mask)
+int cpuset_zone_allowed(struct zone *z, gfp_t gfp_mask)
 {
 	int node;			/* node that zone z is on */
 	const struct cpuset *cs;	/* current cpuset ancestors */
diff --git a/kernel/kfifo.c b/kernel/kfifo.c
index 179baafcdd96..64ab045c3d9d 100644
--- a/kernel/kfifo.c
+++ b/kernel/kfifo.c
@@ -36,7 +36,7 @@
  * struct kfifo with kfree().
  */
 struct kfifo *kfifo_init(unsigned char *buffer, unsigned int size,
-			 unsigned int __nocast gfp_mask, spinlock_t *lock)
+			 gfp_t gfp_mask, spinlock_t *lock)
 {
 	struct kfifo *fifo;
 
@@ -64,7 +64,7 @@ EXPORT_SYMBOL(kfifo_init);
  *
  * The size will be rounded-up to a power of 2.
  */
-struct kfifo *kfifo_alloc(unsigned int size, unsigned int __nocast gfp_mask, spinlock_t *lock)
+struct kfifo *kfifo_alloc(unsigned int size, gfp_t gfp_mask, spinlock_t *lock)
 {
 	unsigned char *buffer;
 	struct kfifo *ret;
diff --git a/kernel/signal.c b/kernel/signal.c
index c135f5aa2c2d..cba193ceda0d 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -262,7 +262,7 @@ next_signal(struct sigpending *pending, sigset_t *mask)
 	return sig;
 }
 
-static struct sigqueue *__sigqueue_alloc(struct task_struct *t, unsigned int __nocast flags,
+static struct sigqueue *__sigqueue_alloc(struct task_struct *t, gfp_t flags,
 					 int override_rlimit)
 {
 	struct sigqueue *q = NULL;
-- 
cgit v1.2.3


From 3dd083255ddcfa87751fa8e32f61a9547a15a541 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rjw@sisk.pl>
Date: Sun, 9 Oct 2005 21:19:40 +0200
Subject: [PATCH] x86_64: Set up safe page tables during resume

The following patch makes swsusp avoid the possible temporary corruption
of page translation tables during resume on x86-64.  This is achieved by
creating a copy of the relevant page tables that will not be modified by
swsusp and can be safely used by it on resume.

The problem is that during resume on x86-64 swsusp may temporarily
corrupt the page tables used for the direct mapping of RAM.  If that
happens, a page fault occurs and cannot be handled properly, which leads
to the solid hang of the affected system.  This leads to the loss of the
system's state from before suspend and may result in the loss of data or
the corruption of filesystems, so it is a serious issue.  Also, it
appears to happen quite often (for me, as often as 50% of the time).

The problem is related to the fact that (at least) one of the PMD
entries used in the direct memory mapping (starting at PAGE_OFFSET)
points to a page table the physical address of which is much greater
than the physical address of the PMD entry itself.  Moreover,
unfortunately, the physical address of the page table before suspend
(i.e.  the one stored in the suspend image) happens to be different to
the physical address of the corresponding page table used during resume
(i.e.  the one that is valid right before swsusp_arch_resume() in
arch/x86_64/kernel/suspend_asm.S is executed).  Thus while the image is
restored, the "offending" PMD entry gets overwritten, so it does not
point to the right physical address any more (i.e.  there's no page
table at the address pointed to by it, because it points to the address
the page table has been at during suspend).  Consequently, if the PMD
entry is used later on, and it _is_ used in the process of copying the
image pages, a page fault occurs, but it cannot be handled in the normal
way and the system hangs.

In principle we can call create_resume_mapping() from
swsusp_arch_resume() (ie.  from suspend_asm.S), but then the memory
allocations in create_resume_mapping(), resume_pud_mapping(), and
resume_pmd_mapping() must be made carefully so that we use _only_
NosaveFree pages in them (the other pages are overwritten by the loop in
swsusp_arch_resume()).  Additionally, we are in atomic context at that
time, so we cannot use GFP_KERNEL.  Moreover, if one of the allocations
fails, we should free all of the allocated pages, so we need to trace
them somehow.

All of this is done in the appended patch, except that the functions
populating the page tables are located in arch/x86_64/kernel/suspend.c
rather than in init.c.  It may be done in a more elegan way in the
future, with the help of some swsusp patches that are in the works now.

[AK: move some externs into headers, renamed a function]

Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
Signed-off-by: Andi Kleen <ak@suse.de>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 kernel/power/swsusp.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

(limited to 'kernel')

diff --git a/kernel/power/swsusp.c b/kernel/power/swsusp.c
index acf79ac1cb6d..2d5c45676442 100644
--- a/kernel/power/swsusp.c
+++ b/kernel/power/swsusp.c
@@ -1095,7 +1095,7 @@ static inline void eat_page(void *page)
 	*eaten_memory = c;
 }
 
-static unsigned long get_usable_page(unsigned gfp_mask)
+unsigned long get_usable_page(unsigned gfp_mask)
 {
 	unsigned long m;
 
@@ -1109,7 +1109,7 @@ static unsigned long get_usable_page(unsigned gfp_mask)
 	return m;
 }
 
-static void free_eaten_memory(void)
+void free_eaten_memory(void)
 {
 	unsigned long m;
 	void **c;
@@ -1481,11 +1481,12 @@ static int read_suspend_image(void)
 	/* Allocate memory for the image and read the data from swap */
 
 	error = check_pagedir(pagedir_nosave);
-	free_eaten_memory();
+
 	if (!error)
 		error = data_read(pagedir_nosave);
 
 	if (error) { /* We fail cleanly */
+		free_eaten_memory();
 		for_each_pbe (p, pagedir_nosave)
 			if (p->address) {
 				free_page(p->address);
-- 
cgit v1.2.3


From 46113830a18847cff8da73005e57bc49c2f95a56 Mon Sep 17 00:00:00 2001
From: Harald Welte <laforge@gnumonks.org>
Date: Mon, 10 Oct 2005 19:44:29 +0200
Subject: [PATCH] Fix signal sending in usbdevio on async URB completion

If a process issues an URB from userspace and (starts to) terminate
before the URB comes back, we run into the issue described above.  This
is because the urb saves a pointer to "current" when it is posted to the
device, but there's no guarantee that this pointer is still valid
afterwards.

In fact, there are three separate issues:

1) the pointer to "current" can become invalid, since the task could be
   completely gone when the URB completion comes back from the device.

2) Even if the saved task pointer is still pointing to a valid task_struct,
   task_struct->sighand could have gone meanwhile.

3) Even if the process is perfectly fine, permissions may have changed,
   and we can no longer send it a signal.

So what we do instead, is to save the PID and uid's of the process, and
introduce a new kill_proc_info_as_uid() function.

Signed-off-by: Harald Welte <laforge@gnumonks.org>
[ Fixed up types and added symbol exports ]
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 kernel/signal.c | 34 ++++++++++++++++++++++++++++++++++
 1 file changed, 34 insertions(+)

(limited to 'kernel')

diff --git a/kernel/signal.c b/kernel/signal.c
index cba193ceda0d..50c992643771 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -1193,6 +1193,40 @@ kill_proc_info(int sig, struct siginfo *info, pid_t pid)
 	return error;
 }
 
+/* like kill_proc_info(), but doesn't use uid/euid of "current" */
+int kill_proc_info_as_uid(int sig, struct siginfo *info, pid_t pid,
+		      uid_t uid, uid_t euid)
+{
+	int ret = -EINVAL;
+	struct task_struct *p;
+
+	if (!valid_signal(sig))
+		return ret;
+
+	read_lock(&tasklist_lock);
+	p = find_task_by_pid(pid);
+	if (!p) {
+		ret = -ESRCH;
+		goto out_unlock;
+	}
+	if ((!info || ((unsigned long)info != 1 &&
+			(unsigned long)info != 2 && SI_FROMUSER(info)))
+	    && (euid != p->suid) && (euid != p->uid)
+	    && (uid != p->suid) && (uid != p->uid)) {
+		ret = -EPERM;
+		goto out_unlock;
+	}
+	if (sig && p->sighand) {
+		unsigned long flags;
+		spin_lock_irqsave(&p->sighand->siglock, flags);
+		ret = __group_send_sig_info(sig, info, p);
+		spin_unlock_irqrestore(&p->sighand->siglock, flags);
+	}
+out_unlock:
+	read_unlock(&tasklist_lock);
+	return ret;
+}
+EXPORT_SYMBOL_GPL(kill_proc_info_as_uid);
 
 /*
  * kill_something_info() interprets pid in interesting ways just like kill(2).
-- 
cgit v1.2.3


From c6ecf7ed3131961e5aeedb0efd217afa0808798f Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Fri, 14 Oct 2005 15:59:03 -0700
Subject: [PATCH] Add missing export of getnstimeofday()

Adds the missing EXPORT_SYMBOL_GPL for getnstimeofday() when
CONFIG_TIME_INTERPOLATION isn't set.  Needed by drivers/char/mmtimer.c

Signed-off-by: Takashi Iwai <tiwai@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 kernel/time.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'kernel')

diff --git a/kernel/time.c b/kernel/time.c
index dd5ae1162a8f..40c2410ac99a 100644
--- a/kernel/time.c
+++ b/kernel/time.c
@@ -570,6 +570,7 @@ void getnstimeofday(struct timespec *tv)
 	tv->tv_sec = x.tv_sec;
 	tv->tv_nsec = x.tv_usec * NSEC_PER_USEC;
 }
+EXPORT_SYMBOL_GPL(getnstimeofday);
 #endif
 
 #if (BITS_PER_LONG < 64)
-- 
cgit v1.2.3


From 2cc78eb52bc1ae89f0a4fa5a00eb998dffde4a9f Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@g5.osdl.org>
Date: Mon, 17 Oct 2005 09:10:15 -0700
Subject: Increase default RCU batching sharply

Dipankar made RCU limit the batch size to improve latency, but that
approach is unworkable: it can cause the RCU queues to grow without
bounds, since the batch limiter ended up limiting the callbacks.

So make the limit much higher, and start planning on instead limiting
the batch size by doing RCU callbacks more often if the queue looks like
it might be growing too long.

Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 kernel/rcupdate.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'kernel')

diff --git a/kernel/rcupdate.c b/kernel/rcupdate.c
index bef3b6901b76..dd99415b1551 100644
--- a/kernel/rcupdate.c
+++ b/kernel/rcupdate.c
@@ -71,7 +71,7 @@ DEFINE_PER_CPU(struct rcu_data, rcu_bh_data) = { 0L };
 
 /* Fake initialization required by compiler */
 static DEFINE_PER_CPU(struct tasklet_struct, rcu_tasklet) = {NULL};
-static int maxbatch = 10;
+static int maxbatch = 10000;
 
 #ifndef __HAVE_ARCH_CMPXCHG
 /*
-- 
cgit v1.2.3


From 47d6b08334a43fafa61a587f721fa21ef65d81be Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@tv-sign.ru>
Date: Mon, 17 Oct 2005 18:49:42 +0400
Subject: [PATCH] posix-timers: fix task accounting

Make sure we release the task struct properly when releasing pending
timers.

release_task() does write_lock_irq(&tasklist_lock), so it can't race
with run_posix_cpu_timers() on any cpu.

Signed-off-by: Oleg Nesterov <oleg@tv-sign.ru>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 kernel/posix-cpu-timers.c | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'kernel')

diff --git a/kernel/posix-cpu-timers.c b/kernel/posix-cpu-timers.c
index ad85d3f0dcc4..7a51a5597c33 100644
--- a/kernel/posix-cpu-timers.c
+++ b/kernel/posix-cpu-timers.c
@@ -424,6 +424,7 @@ static void cleanup_timers(struct list_head *head,
 	cputime_t ptime = cputime_add(utime, stime);
 
 	list_for_each_entry_safe(timer, next, head, entry) {
+		put_task_struct(timer->task);
 		timer->task = NULL;
 		list_del_init(&timer->entry);
 		if (cputime_lt(timer->expires.cpu, ptime)) {
@@ -436,6 +437,7 @@ static void cleanup_timers(struct list_head *head,
 
 	++head;
 	list_for_each_entry_safe(timer, next, head, entry) {
+		put_task_struct(timer->task);
 		timer->task = NULL;
 		list_del_init(&timer->entry);
 		if (cputime_lt(timer->expires.cpu, utime)) {
@@ -448,6 +450,7 @@ static void cleanup_timers(struct list_head *head,
 
 	++head;
 	list_for_each_entry_safe(timer, next, head, entry) {
+		put_task_struct(timer->task);
 		timer->task = NULL;
 		list_del_init(&timer->entry);
 		if (timer->expires.sched < sched_time) {
-- 
cgit v1.2.3


From 5ee832dbc6770135ec8d63296af0a4374557bb79 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <dada1@cosmosbay.com>
Date: Mon, 17 Oct 2005 20:01:21 +0200
Subject: [PATCH] rcu: keep rcu callback event counter

This makes call_rcu() keep track of how many events there are on the RCU
list, and cause a reschedule event when the list gets too long.

This helps keep RCU event lists down.

Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 kernel/rcupdate.c | 11 +++++++++++
 1 file changed, 11 insertions(+)

(limited to 'kernel')

diff --git a/kernel/rcupdate.c b/kernel/rcupdate.c
index dd99415b1551..2559d4b8f23f 100644
--- a/kernel/rcupdate.c
+++ b/kernel/rcupdate.c
@@ -109,6 +109,10 @@ void fastcall call_rcu(struct rcu_head *head,
 	rdp = &__get_cpu_var(rcu_data);
 	*rdp->nxttail = head;
 	rdp->nxttail = &head->next;
+
+	if (unlikely(++rdp->count > 10000))
+		set_need_resched();
+
 	local_irq_restore(flags);
 }
 
@@ -140,6 +144,12 @@ void fastcall call_rcu_bh(struct rcu_head *head,
 	rdp = &__get_cpu_var(rcu_bh_data);
 	*rdp->nxttail = head;
 	rdp->nxttail = &head->next;
+	rdp->count++;
+/*
+ *  Should we directly call rcu_do_batch() here ?
+ *  if (unlikely(rdp->count > 10000))
+ *      rcu_do_batch(rdp);
+ */
 	local_irq_restore(flags);
 }
 
@@ -157,6 +167,7 @@ static void rcu_do_batch(struct rcu_data *rdp)
 		next = rdp->donelist = list->next;
 		list->func(list);
 		list = next;
+		rdp->count--;
 		if (++count >= maxbatch)
 			break;
 	}
-- 
cgit v1.2.3