From a271c241a6036d4d583d0f47a02ba5f18b8b92b5 Mon Sep 17 00:00:00 2001
From: Tom 'spot' Callaway <tcallawa@redhat.com>
Date: Sun, 24 Apr 2005 20:38:02 -0700
Subject: [SPARC]: Stop-A printk cleanup

This patch is incredibly trivial, but it does resolve some of the user
confusion as to what "L1-A" actually is.

Clarify printk message to refer to Stop-A (L1-A).

Gentoo has a virtually identical patch in their kernel sources.

Signed-off-by: Tom 'spot' Callaway <tcallawa@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 kernel/panic.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'kernel')

diff --git a/kernel/panic.c b/kernel/panic.c
index 0fa3f3a66fb6..081f7465fc8d 100644
--- a/kernel/panic.c
+++ b/kernel/panic.c
@@ -102,9 +102,9 @@ NORET_TYPE void panic(const char * fmt, ...)
 #ifdef __sparc__
 	{
 		extern int stop_a_enabled;
-		/* Make sure the user can actually press L1-A */
+		/* Make sure the user can actually press Stop-A (L1-A) */
 		stop_a_enabled = 1;
-		printk(KERN_EMERG "Press L1-A to return to the boot prom\n");
+		printk(KERN_EMERG "Press Stop-A (L1-A) to return to the boot prom\n");
 	}
 #endif
 #if defined(CONFIG_ARCH_S390)
-- 
cgit v1.2.3


From 9acf6597c533f3d5c991f730c6a1be296679018e Mon Sep 17 00:00:00 2001
From: Christoph Lameter <clameter@engr.sgi.com>
Date: Thu, 28 Apr 2005 08:13:58 -0700
Subject: [PATCH] time interpolator: Fix settimeofday inaccuracy

settimeofday will set the time a little bit too early on systems using
time interpolation since it subtracts the current interpolator offset
from the time. This used to be necessary with the code in 2.6.9 and earlier
but the new code resets the time interpolator after setting the time.
Thus the time is set too early and gettimeofday will return a time slightly
before the time specified with settimeofday if invoked immeditely after
settimeofday.

This removes the obsolete subtraction of the time interpolator offset
and makes settimeofday set the time accurately.

Signed-off-by: Christoph Lameter <clameter@sgi.com>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 kernel/time.c | 8 --------
 1 file changed, 8 deletions(-)

(limited to 'kernel')

diff --git a/kernel/time.c b/kernel/time.c
index 96fd0f499631..d4335c1c884c 100644
--- a/kernel/time.c
+++ b/kernel/time.c
@@ -516,14 +516,6 @@ int do_settimeofday (struct timespec *tv)
 
 	write_seqlock_irq(&xtime_lock);
 	{
-		/*
-		 * This is revolting. We need to set "xtime" correctly. However, the value
-		 * in this location is the value at the most recent update of wall time.
-		 * Discover what correction gettimeofday would have done, and then undo
-		 * it!
-		 */
-		nsec -= time_interpolator_get_offset();
-
 		wtm_sec  = wall_to_monotonic.tv_sec + (xtime.tv_sec - sec);
 		wtm_nsec = wall_to_monotonic.tv_nsec + (xtime.tv_nsec - nsec);
 
-- 
cgit v1.2.3


From 83c7d09173fdb6b06b109e65895392db3e49ac9c Mon Sep 17 00:00:00 2001
From:  <dwmw2@shinybook.infradead.org>
Date: Fri, 29 Apr 2005 15:54:44 +0100
Subject: AUDIT: Avoid log pollution by untrusted strings.

We log strings from userspace, such as arguments to open(). These could
be formatted to contain \n followed by fake audit log entries. Provide
a function for logging such strings, which gives a hex dump when the
string contains anything but basic printable ASCII characters. Use it
for logging filenames.

Signed-off-by: David Woodhouse <dwmw2@infradead.org>
---
 kernel/audit.c   | 23 +++++++++++++++++++++++
 kernel/auditsc.c |  7 ++++---
 2 files changed, 27 insertions(+), 3 deletions(-)

(limited to 'kernel')

diff --git a/kernel/audit.c b/kernel/audit.c
index 0f84dd7af2c8..dca7b99615d2 100644
--- a/kernel/audit.c
+++ b/kernel/audit.c
@@ -720,6 +720,29 @@ void audit_log_format(struct audit_buffer *ab, const char *fmt, ...)
 	va_end(args);
 }
 
+void audit_log_hex(struct audit_buffer *ab, const unsigned char *buf, size_t len)
+{
+	int i;
+
+	for (i=0; i<len; i++)
+		audit_log_format(ab, "%02x", buf[i]);
+}
+
+void audit_log_untrustedstring(struct audit_buffer *ab, const char *string)
+{
+	const char *p = string;
+
+	while (*p) {
+		if (*p == '"' || *p == ' ' || *p < 0x20 || *p > 0x7f) {
+			audit_log_hex(ab, string, strlen(string));
+			return;
+		}
+		p++;
+	}
+	audit_log_format(ab, "\"%s\"", string);
+}
+
+
 /* This is a helper-function to print the d_path without using a static
  * buffer or allocating another buffer in addition to the one in
  * audit_buffer. */
diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index 6f1931381bc9..00e87ffff13b 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -696,9 +696,10 @@ static void audit_log_exit(struct audit_context *context)
 		if (!ab)
 			continue; /* audit_panic has been called */
 		audit_log_format(ab, "item=%d", i);
-		if (context->names[i].name)
-			audit_log_format(ab, " name=%s",
-					 context->names[i].name);
+		if (context->names[i].name) {
+			audit_log_format(ab, " name=");
+			audit_log_untrustedstring(ab, context->names[i].name);
+		}
 		if (context->names[i].ino != (unsigned long)-1)
 			audit_log_format(ab, " inode=%lu dev=%02x:%02x mode=%#o"
 					     " uid=%d gid=%d rdev=%02x:%02x",
-- 
cgit v1.2.3


From 81b7854d52d35ed2353dd47033ae630d18322a2d Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@osdl.org>
Date: Fri, 29 Apr 2005 15:59:11 +0100
Subject: audit_log_untrustedstring() warning fix

kernel/audit.c: In function `audit_log_untrustedstring':
kernel/audit.c:736: warning: comparison is always false due to limited range of data type

Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: David Woodhouse <dwmw2@infradead.org>
---
 kernel/audit.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'kernel')

diff --git a/kernel/audit.c b/kernel/audit.c
index dca7b99615d2..e7bff8000d23 100644
--- a/kernel/audit.c
+++ b/kernel/audit.c
@@ -730,7 +730,7 @@ void audit_log_hex(struct audit_buffer *ab, const unsigned char *buf, size_t len
 
 void audit_log_untrustedstring(struct audit_buffer *ab, const char *string)
 {
-	const char *p = string;
+	const unsigned char *p = string;
 
 	while (*p) {
 		if (*p == '"' || *p == ' ' || *p < 0x20 || *p > 0x7f) {
-- 
cgit v1.2.3


From 2fd6f58ba6efc82ea2c9c2630f7ff5ed9eeaf34a Mon Sep 17 00:00:00 2001
From:  <dwmw2@shinybook.infradead.org>
Date: Fri, 29 Apr 2005 16:08:28 +0100
Subject: [AUDIT] Don't allow ptrace to fool auditing, log arch of audited
 syscalls.

We were calling ptrace_notify() after auditing the syscall and arguments,
but the debugger could have _changed_ them before the syscall was actually
invoked. Reorder the calls to fix that.

While we're touching ever call to audit_syscall_entry(), we also make it
take an extra argument: the architecture of the syscall which was made,
because some architectures allow more than one type of syscall.

Also add an explicit success/failure flag to audit_syscall_exit(), for
the benefit of architectures which return that in a condition register
rather than only returning a single register.

Change type of syscall return value to 'long' not 'int'.

Signed-off-by: David Woodhouse <dwmw2@infradead.org>
---
 kernel/auditsc.c | 22 ++++++++++++++++------
 1 file changed, 16 insertions(+), 6 deletions(-)

(limited to 'kernel')

diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index 00e87ffff13b..77e92592de57 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -123,7 +123,7 @@ struct audit_context {
 	int		    major;      /* syscall number */
 	unsigned long	    argv[4];    /* syscall arguments */
 	int		    return_valid; /* return code is valid */
-	int		    return_code;/* syscall return code */
+	long		    return_code;/* syscall return code */
 	int		    auditable;  /* 1 if record should be written */
 	int		    name_count;
 	struct audit_names  names[AUDIT_NAMES];
@@ -135,6 +135,7 @@ struct audit_context {
 	uid_t		    uid, euid, suid, fsuid;
 	gid_t		    gid, egid, sgid, fsgid;
 	unsigned long	    personality;
+	int		    arch;
 
 #if AUDIT_DEBUG
 	int		    put_count;
@@ -348,6 +349,10 @@ static int audit_filter_rules(struct task_struct *tsk,
 		case AUDIT_PERS:
 			result = (tsk->personality == value);
 			break;
+		case AUDIT_ARCH:
+			if (ctx) 
+				result = (ctx->arch == value);
+			break;
 
 		case AUDIT_EXIT:
 			if (ctx && ctx->return_valid)
@@ -355,7 +360,7 @@ static int audit_filter_rules(struct task_struct *tsk,
 			break;
 		case AUDIT_SUCCESS:
 			if (ctx && ctx->return_valid)
-				result = (ctx->return_code >= 0);
+				result = (ctx->return_valid == AUDITSC_SUCCESS);
 			break;
 		case AUDIT_DEVMAJOR:
 			if (ctx) {
@@ -648,8 +653,11 @@ static void audit_log_exit(struct audit_context *context)
 	audit_log_format(ab, "syscall=%d", context->major);
 	if (context->personality != PER_LINUX)
 		audit_log_format(ab, " per=%lx", context->personality);
+	audit_log_format(ab, " arch=%x", context->arch);
 	if (context->return_valid)
-		audit_log_format(ab, " exit=%d", context->return_code);
+		audit_log_format(ab, " success=%s exit=%ld", 
+				 (context->return_valid==AUDITSC_SUCCESS)?"yes":"no",
+				 context->return_code);
 	audit_log_format(ab,
 		  " a0=%lx a1=%lx a2=%lx a3=%lx items=%d"
 		  " pid=%d loginuid=%d uid=%d gid=%d"
@@ -773,7 +781,7 @@ static inline unsigned int audit_serial(void)
  * then the record will be written at syscall exit time (otherwise, it
  * will only be written if another part of the kernel requests that it
  * be written). */
-void audit_syscall_entry(struct task_struct *tsk, int major,
+void audit_syscall_entry(struct task_struct *tsk, int arch, int major,
 			 unsigned long a1, unsigned long a2,
 			 unsigned long a3, unsigned long a4)
 {
@@ -827,6 +835,7 @@ void audit_syscall_entry(struct task_struct *tsk, int major,
 	if (!audit_enabled)
 		return;
 
+	context->arch	    = arch;
 	context->major      = major;
 	context->argv[0]    = a1;
 	context->argv[1]    = a2;
@@ -850,13 +859,13 @@ void audit_syscall_entry(struct task_struct *tsk, int major,
  * filtering, or because some other part of the kernel write an audit
  * message), then write out the syscall information.  In call cases,
  * free the names stored from getname(). */
-void audit_syscall_exit(struct task_struct *tsk, int return_code)
+void audit_syscall_exit(struct task_struct *tsk, int valid, long return_code)
 {
 	struct audit_context *context;
 
 	get_task_struct(tsk);
 	task_lock(tsk);
-	context = audit_get_context(tsk, 1, return_code);
+	context = audit_get_context(tsk, valid, return_code);
 	task_unlock(tsk);
 
 	/* Not having a context here is ok, since the parent may have
@@ -869,6 +878,7 @@ void audit_syscall_exit(struct task_struct *tsk, int return_code)
 
 	context->in_syscall = 0;
 	context->auditable  = 0;
+
 	if (context->previous) {
 		struct audit_context *new_context = context->previous;
 		context->previous  = NULL;
-- 
cgit v1.2.3


From d812ddbb89e323d054a7d073466225966c8350c8 Mon Sep 17 00:00:00 2001
From: Steve Grubb <sgrubb@redhat.com>
Date: Fri, 29 Apr 2005 16:09:52 +0100
Subject: [AUDIT] Fix signedness of 'serial' in various routines.

Attached is a patch that corrects a signed/unsigned warning. I also noticed
that we needlessly init serial to 0. That only needs to occur if the kernel
was compiled without the audit system.

-Steve Grubb

Signed-off-by: David Woodhouse <dwmw2@infradead.org>
---
 kernel/audit.c   | 6 ++++--
 kernel/auditsc.c | 2 +-
 2 files changed, 5 insertions(+), 3 deletions(-)

(limited to 'kernel')

diff --git a/kernel/audit.c b/kernel/audit.c
index e7bff8000d23..aa35422c0c42 100644
--- a/kernel/audit.c
+++ b/kernel/audit.c
@@ -620,7 +620,7 @@ struct audit_buffer *audit_log_start(struct audit_context *ctx)
 	struct audit_buffer	*ab	= NULL;
 	unsigned long		flags;
 	struct timespec		t;
-	int			serial	= 0;
+	unsigned int		serial;
 
 	if (!audit_initialized)
 		return NULL;
@@ -669,8 +669,10 @@ struct audit_buffer *audit_log_start(struct audit_context *ctx)
 		audit_get_stamp(ab->ctx, &t, &serial);
 	else
 #endif
+	{
 		t = CURRENT_TIME;
-
+		serial = 0;
+	}
 	audit_log_format(ab, "audit(%lu.%03lu:%u): ",
 			 t.tv_sec, t.tv_nsec/1000000, serial);
 	return ab;
diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index 77e92592de57..49ecd707b953 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -992,7 +992,7 @@ void audit_inode(const char *name, const struct inode *inode)
 }
 
 void audit_get_stamp(struct audit_context *ctx,
-		     struct timespec *t, int *serial)
+		     struct timespec *t, unsigned int *serial)
 {
 	if (ctx) {
 		t->tv_sec  = ctx->ctime.tv_sec;
-- 
cgit v1.2.3


From c7fcb0ee74ef4cfdea02befacb55945c93641e44 Mon Sep 17 00:00:00 2001
From: Peter Martuccelli <peterm@redhat.com>
Date: Fri, 29 Apr 2005 16:10:24 +0100
Subject: [AUDIT] Avoid using %*.*s format strings.

They don't seem to work correctly (investigation ongoing), but we don't
actually need to do it anyway.

Patch from Peter Martuccelli <peterm@redhat.com>
Signed-off-by: David Woodhouse <dwmw2@infradead.org>
---
 kernel/audit.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'kernel')

diff --git a/kernel/audit.c b/kernel/audit.c
index aa35422c0c42..42ce282728b6 100644
--- a/kernel/audit.c
+++ b/kernel/audit.c
@@ -540,8 +540,8 @@ static inline int audit_log_drain(struct audit_buffer *ab)
 		if (!audit_pid) { /* No daemon */
 			int offset = ab->nlh ? NLMSG_SPACE(0) : 0;
 			int len    = skb->len - offset;
-			printk(KERN_ERR "%*.*s\n",
-			       len, len, skb->data + offset);
+			skb->data[offset + len] = '\0';
+			printk(KERN_ERR "%s\n", skb->data + offset);
 		}
 		kfree_skb(skb);
 		ab->nlh = NULL;
-- 
cgit v1.2.3


From 85c8721ff3bc96b702427a440616079e8daf8a2f Mon Sep 17 00:00:00 2001
From:  <dwmw2@shinybook.infradead.org>
Date: Fri, 29 Apr 2005 16:23:29 +0100
Subject: audit: update pointer to userspace tools, remove emacs mode tags

---
 kernel/audit.c   | 4 ++--
 kernel/auditsc.c | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

(limited to 'kernel')

diff --git a/kernel/audit.c b/kernel/audit.c
index 42ce282728b6..58c7d7e47299 100644
--- a/kernel/audit.c
+++ b/kernel/audit.c
@@ -1,4 +1,4 @@
-/* audit.c -- Auditing support -*- linux-c -*-
+/* audit.c -- Auditing support
  * Gateway between the kernel (e.g., selinux) and the user-space audit daemon.
  * System-call specific features have moved to auditsc.c
  *
@@ -38,7 +38,7 @@
  *	  6) Support low-overhead kernel-based filtering to minimize the
  *	     information that must be passed to user-space.
  *
- * Example user-space utilities: http://people.redhat.com/faith/audit/
+ * Example user-space utilities: http://people.redhat.com/sgrubb/audit/
  */
 
 #include <linux/init.h>
diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index 49ecd707b953..9ff2c1b1033e 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -1,4 +1,4 @@
-/* auditsc.c -- System-call auditing support -*- linux-c -*-
+/* auditsc.c -- System-call auditing support
  * Handles all system-call specific auditing features.
  *
  * Copyright 2003-2004 Red Hat Inc., Durham, North Carolina.
-- 
cgit v1.2.3


From c94c257c88c517f251da273a15c654224c7b6e21 Mon Sep 17 00:00:00 2001
From: Serge Hallyn <serue@us.ibm.com>
Date: Fri, 29 Apr 2005 16:27:17 +0100
Subject: Add audit uid to netlink credentials

Most audit control messages are sent over netlink.In order to properly
log the identity of the sender of audit control messages, we would like
to add the loginuid to the netlink_creds structure, as per the attached
patch.

Signed-off-by: Serge Hallyn <serue@us.ibm.com>
Signed-off-by: David Woodhouse <dwmw2@infradead.org>
---
 kernel/audit.c   | 46 +++++++++++++++++++++++++---------------------
 kernel/auditsc.c |  5 ++++-
 2 files changed, 29 insertions(+), 22 deletions(-)

(limited to 'kernel')

diff --git a/kernel/audit.c b/kernel/audit.c
index 58c7d7e47299..587d3b2eba7f 100644
--- a/kernel/audit.c
+++ b/kernel/audit.c
@@ -239,36 +239,36 @@ void audit_log_lost(const char *message)
 
 }
 
-static int audit_set_rate_limit(int limit)
+static int audit_set_rate_limit(int limit, uid_t loginuid)
 {
 	int old		 = audit_rate_limit;
 	audit_rate_limit = limit;
-	audit_log(current->audit_context, "audit_rate_limit=%d old=%d",
-		  audit_rate_limit, old);
+	audit_log(NULL, "audit_rate_limit=%d old=%d by auid %u",
+			audit_rate_limit, old, loginuid);
 	return old;
 }
 
-static int audit_set_backlog_limit(int limit)
+static int audit_set_backlog_limit(int limit, uid_t loginuid)
 {
 	int old		 = audit_backlog_limit;
 	audit_backlog_limit = limit;
-	audit_log(current->audit_context, "audit_backlog_limit=%d old=%d",
-		  audit_backlog_limit, old);
+	audit_log(NULL, "audit_backlog_limit=%d old=%d by auid %u",
+			audit_backlog_limit, old, loginuid);
 	return old;
 }
 
-static int audit_set_enabled(int state)
+static int audit_set_enabled(int state, uid_t loginuid)
 {
 	int old		 = audit_enabled;
 	if (state != 0 && state != 1)
 		return -EINVAL;
 	audit_enabled = state;
-	audit_log(current->audit_context, "audit_enabled=%d old=%d",
-		  audit_enabled, old);
+	audit_log(NULL, "audit_enabled=%d old=%d by auid %u",
+		  audit_enabled, old, loginuid);
 	return old;
 }
 
-static int audit_set_failure(int state)
+static int audit_set_failure(int state, uid_t loginuid)
 {
 	int old		 = audit_failure;
 	if (state != AUDIT_FAIL_SILENT
@@ -276,8 +276,8 @@ static int audit_set_failure(int state)
 	    && state != AUDIT_FAIL_PANIC)
 		return -EINVAL;
 	audit_failure = state;
-	audit_log(current->audit_context, "audit_failure=%d old=%d",
-		  audit_failure, old);
+	audit_log(NULL, "audit_failure=%d old=%d by auid %u",
+		  audit_failure, old, loginuid);
 	return old;
 }
 
@@ -344,6 +344,7 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
 	int			err;
 	struct audit_buffer	*ab;
 	u16			msg_type = nlh->nlmsg_type;
+	uid_t			loginuid; /* loginuid of sender */
 
 	err = audit_netlink_ok(NETLINK_CB(skb).eff_cap, msg_type);
 	if (err)
@@ -351,6 +352,7 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
 
 	pid  = NETLINK_CREDS(skb)->pid;
 	uid  = NETLINK_CREDS(skb)->uid;
+	loginuid = NETLINK_CB(skb).loginuid;
 	seq  = nlh->nlmsg_seq;
 	data = NLMSG_DATA(nlh);
 
@@ -371,34 +373,36 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
 			return -EINVAL;
 		status_get   = (struct audit_status *)data;
 		if (status_get->mask & AUDIT_STATUS_ENABLED) {
-			err = audit_set_enabled(status_get->enabled);
+			err = audit_set_enabled(status_get->enabled, loginuid);
 			if (err < 0) return err;
 		}
 		if (status_get->mask & AUDIT_STATUS_FAILURE) {
-			err = audit_set_failure(status_get->failure);
+			err = audit_set_failure(status_get->failure, loginuid);
 			if (err < 0) return err;
 		}
 		if (status_get->mask & AUDIT_STATUS_PID) {
 			int old   = audit_pid;
 			audit_pid = status_get->pid;
-			audit_log(current->audit_context,
-				  "audit_pid=%d old=%d", audit_pid, old);
+			audit_log(NULL, "audit_pid=%d old=%d by auid %u",
+				  audit_pid, old, loginuid);
 		}
 		if (status_get->mask & AUDIT_STATUS_RATE_LIMIT)
-			audit_set_rate_limit(status_get->rate_limit);
+			audit_set_rate_limit(status_get->rate_limit, loginuid);
 		if (status_get->mask & AUDIT_STATUS_BACKLOG_LIMIT)
-			audit_set_backlog_limit(status_get->backlog_limit);
+			audit_set_backlog_limit(status_get->backlog_limit,
+							loginuid);
 		break;
 	case AUDIT_USER:
 		ab = audit_log_start(NULL);
 		if (!ab)
 			break;	/* audit_panic has been called */
 		audit_log_format(ab,
-				 "user pid=%d uid=%d length=%d msg='%.1024s'",
+				 "user pid=%d uid=%d length=%d loginuid=%u"
+				 " msg='%.1024s'",
 				 pid, uid,
 				 (int)(nlh->nlmsg_len
 				       - ((char *)data - (char *)nlh)),
-				 (char *)data);
+				 loginuid, (char *)data);
 		ab->type = AUDIT_USER;
 		ab->pid  = pid;
 		audit_log_end(ab);
@@ -411,7 +415,7 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
 	case AUDIT_LIST:
 #ifdef CONFIG_AUDITSYSCALL
 		err = audit_receive_filter(nlh->nlmsg_type, NETLINK_CB(skb).pid,
-					   uid, seq, data);
+					   uid, seq, data, loginuid);
 #else
 		err = -EOPNOTSUPP;
 #endif
diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index 9ff2c1b1033e..66148f81d783 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -251,7 +251,8 @@ static int audit_copy_rule(struct audit_rule *d, struct audit_rule *s)
 	return 0;
 }
 
-int audit_receive_filter(int type, int pid, int uid, int seq, void *data)
+int audit_receive_filter(int type, int pid, int uid, int seq, void *data,
+							uid_t loginuid)
 {
 	u32		   flags;
 	struct audit_entry *entry;
@@ -286,6 +287,7 @@ int audit_receive_filter(int type, int pid, int uid, int seq, void *data)
 			err = audit_add_rule(entry, &audit_entlist);
 		if (!err && (flags & AUDIT_AT_EXIT))
 			err = audit_add_rule(entry, &audit_extlist);
+		audit_log(NULL, "auid %u added an audit rule\n", loginuid);
 		break;
 	case AUDIT_DEL:
 		flags =((struct audit_rule *)data)->flags;
@@ -295,6 +297,7 @@ int audit_receive_filter(int type, int pid, int uid, int seq, void *data)
 			err = audit_del_rule(data, &audit_entlist);
 		if (!err && (flags & AUDIT_AT_EXIT))
 			err = audit_del_rule(data, &audit_extlist);
+		audit_log(NULL, "auid %u removed an audit rule\n", loginuid);
 		break;
 	default:
 		return -EINVAL;
-- 
cgit v1.2.3


From 37509e749dc2072e667db806ef24b9e897f61b8a Mon Sep 17 00:00:00 2001
From: Chris Wright <chrisw@osdl.org>
Date: Fri, 29 Apr 2005 17:19:14 +0100
Subject: [AUDIT] Requeue messages at head of queue, up to audit_backlog

If netlink_unicast() fails, requeue the skb back at the head of the queue
it just came from, instead of the tail. And do so unless we've exceeded
the audit_backlog limit; not according to some other arbitrary limit.

From: Chris Wright <chrisw@osdl.org>
Signed-off-by: David Woodhouse <dwmw2@infradead.org>
---
 kernel/audit.c | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

(limited to 'kernel')

diff --git a/kernel/audit.c b/kernel/audit.c
index 587d3b2eba7f..4a697c73faec 100644
--- a/kernel/audit.c
+++ b/kernel/audit.c
@@ -142,7 +142,6 @@ struct audit_buffer {
 	int		     total;
 	int		     type;
 	int		     pid;
-	int		     count; /* Times requeued */
 };
 
 void audit_set_type(struct audit_buffer *ab, int type)
@@ -526,9 +525,9 @@ static inline int audit_log_drain(struct audit_buffer *ab)
 			retval = netlink_unicast(audit_sock, skb, audit_pid,
 						 MSG_DONTWAIT);
 		}
-		if (retval == -EAGAIN && ab->count < 5) {
-			++ab->count;
-			skb_queue_tail(&ab->sklist, skb);
+		if (retval == -EAGAIN &&
+		    (atomic_read(&audit_backlog)) < audit_backlog_limit) {
+			skb_queue_head(&ab->sklist, skb);
 			audit_log_end_irq(ab);
 			return 1;
 		}
@@ -666,7 +665,6 @@ struct audit_buffer *audit_log_start(struct audit_context *ctx)
 	ab->total = 0;
 	ab->type  = AUDIT_KERNEL;
 	ab->pid   = 0;
-	ab->count = 0;
 
 #ifdef CONFIG_AUDITSYSCALL
 	if (ab->ctx)
-- 
cgit v1.2.3


From 456be6cd90dbbb9b0ea01d56932d56d110d51cf7 Mon Sep 17 00:00:00 2001
From: Steve Grubb <sgrubb@redhat.com>
Date: Fri, 29 Apr 2005 17:30:07 +0100
Subject: [AUDIT] LOGIN message credentials

Attached is a new patch that solves the issue of getting valid credentials
into the LOGIN message. The current code was assuming that the audit context
had already been copied. This is not always the case for LOGIN messages.

To solve the problem, the patch passes the task struct to the function that
emits the message where it can get valid credentials.

Signed-off-by: Steve Grubb <sgrubb@redhat.com>
Signed-off-by: David Woodhouse <dwmw2@infradead.org>
---
 kernel/auditsc.c | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

(limited to 'kernel')

diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index 66148f81d783..37b3ac94bc47 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -1010,20 +1010,21 @@ void audit_get_stamp(struct audit_context *ctx,
 
 extern int audit_set_type(struct audit_buffer *ab, int type);
 
-int audit_set_loginuid(struct audit_context *ctx, uid_t loginuid)
+int audit_set_loginuid(struct task_struct *task, uid_t loginuid)
 {
-	if (ctx) {
+	if (task->audit_context) {
 		struct audit_buffer *ab;
 
 		ab = audit_log_start(NULL);
 		if (ab) {
 			audit_log_format(ab, "login pid=%d uid=%u "
 				"old loginuid=%u new loginuid=%u",
-				ctx->pid, ctx->uid, ctx->loginuid, loginuid);
+				task->pid, task->uid, 
+				task->audit_context->loginuid, loginuid);
 			audit_set_type(ab, AUDIT_LOGIN);
 			audit_log_end(ab);
 		}
-		ctx->loginuid = loginuid;
+		task->audit_context->loginuid = loginuid;
 	}
 	return 0;
 }
-- 
cgit v1.2.3


From c06fec5022ebe014af876da2df4a0eee836e97c8 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@ppc970.osdl.org>
Date: Fri, 29 Apr 2005 09:37:07 -0700
Subject: Remove bogus BUG() in kernel/exit.c

It's old sanity checking that may have been useful for debugging, but
is just bogus these days.

Noticed by Mattia Belletti.
---
 kernel/exit.c | 2 --
 1 file changed, 2 deletions(-)

(limited to 'kernel')

diff --git a/kernel/exit.c b/kernel/exit.c
index 39d35935b371..93851bcd9584 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -517,8 +517,6 @@ static inline void choose_new_parent(task_t *p, task_t *reaper, task_t *child_re
 	 */
 	BUG_ON(p == reaper || reaper->exit_state >= EXIT_ZOMBIE);
 	p->real_parent = reaper;
-	if (p->parent == p->real_parent)
-		BUG();
 }
 
 static inline void reparent_thread(task_t *p, task_t *father, int traced)
-- 
cgit v1.2.3


From d59dd4620fb8d6422555a9e2b82a707718e68327 Mon Sep 17 00:00:00 2001
From: "akpm@osdl.org" <akpm@osdl.org>
Date: Sun, 1 May 2005 08:58:47 -0700
Subject: [PATCH] use smp_mb/wmb/rmb where possible

Replace a number of memory barriers with smp_ variants.  This means we won't
take the unnecessary hit on UP machines.

Signed-off-by: Anton Blanchard <anton@samba.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 kernel/kthread.c      |  2 +-
 kernel/profile.c      |  2 +-
 kernel/ptrace.c       |  2 +-
 kernel/stop_machine.c | 10 +++++-----
 kernel/sys.c          | 20 ++++++++++----------
 kernel/timer.c        |  2 +-
 6 files changed, 19 insertions(+), 19 deletions(-)

(limited to 'kernel')

diff --git a/kernel/kthread.c b/kernel/kthread.c
index e377e2244103..f50f174e92da 100644
--- a/kernel/kthread.c
+++ b/kernel/kthread.c
@@ -174,7 +174,7 @@ int kthread_stop(struct task_struct *k)
 
 	/* Must init completion *before* thread sees kthread_stop_info.k */
 	init_completion(&kthread_stop_info.done);
-	wmb();
+	smp_wmb();
 
 	/* Now set kthread_should_stop() to true, and wake it up. */
 	kthread_stop_info.k = k;
diff --git a/kernel/profile.c b/kernel/profile.c
index a38fa70075fe..a66be468c422 100644
--- a/kernel/profile.c
+++ b/kernel/profile.c
@@ -522,7 +522,7 @@ static int __init create_hash_tables(void)
 	return 0;
 out_cleanup:
 	prof_on = 0;
-	mb();
+	smp_mb();
 	on_each_cpu(profile_nop, NULL, 0, 1);
 	for_each_online_cpu(cpu) {
 		struct page *page;
diff --git a/kernel/ptrace.c b/kernel/ptrace.c
index 88b306c4e841..f5cc1cec0fb4 100644
--- a/kernel/ptrace.c
+++ b/kernel/ptrace.c
@@ -135,7 +135,7 @@ int ptrace_attach(struct task_struct *task)
  	    (current->gid != task->sgid) ||
  	    (current->gid != task->gid)) && !capable(CAP_SYS_PTRACE))
 		goto bad;
-	rmb();
+	smp_rmb();
 	if (!task->mm->dumpable && !capable(CAP_SYS_PTRACE))
 		goto bad;
 	/* the same process cannot be attached many times */
diff --git a/kernel/stop_machine.c b/kernel/stop_machine.c
index c39ed70af174..6116b25aa7cf 100644
--- a/kernel/stop_machine.c
+++ b/kernel/stop_machine.c
@@ -33,7 +33,7 @@ static int stopmachine(void *cpu)
 	set_cpus_allowed(current, cpumask_of_cpu((int)(long)cpu));
 
 	/* Ack: we are alive */
-	mb(); /* Theoretically the ack = 0 might not be on this CPU yet. */
+	smp_mb(); /* Theoretically the ack = 0 might not be on this CPU yet. */
 	atomic_inc(&stopmachine_thread_ack);
 
 	/* Simple state machine */
@@ -43,14 +43,14 @@ static int stopmachine(void *cpu)
 			local_irq_disable();
 			irqs_disabled = 1;
 			/* Ack: irqs disabled. */
-			mb(); /* Must read state first. */
+			smp_mb(); /* Must read state first. */
 			atomic_inc(&stopmachine_thread_ack);
 		} else if (stopmachine_state == STOPMACHINE_PREPARE
 			   && !prepared) {
 			/* Everyone is in place, hold CPU. */
 			preempt_disable();
 			prepared = 1;
-			mb(); /* Must read state first. */
+			smp_mb(); /* Must read state first. */
 			atomic_inc(&stopmachine_thread_ack);
 		}
 		/* Yield in first stage: migration threads need to
@@ -62,7 +62,7 @@ static int stopmachine(void *cpu)
 	}
 
 	/* Ack: we are exiting. */
-	mb(); /* Must read state first. */
+	smp_mb(); /* Must read state first. */
 	atomic_inc(&stopmachine_thread_ack);
 
 	if (irqs_disabled)
@@ -77,7 +77,7 @@ static int stopmachine(void *cpu)
 static void stopmachine_set_state(enum stopmachine_state state)
 {
 	atomic_set(&stopmachine_thread_ack, 0);
-	wmb();
+	smp_wmb();
 	stopmachine_state = state;
 	while (atomic_read(&stopmachine_thread_ack) != stopmachine_num_threads)
 		cpu_relax();
diff --git a/kernel/sys.c b/kernel/sys.c
index 462d78d55895..df2ddcc6863b 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -525,7 +525,7 @@ asmlinkage long sys_setregid(gid_t rgid, gid_t egid)
 	if (new_egid != old_egid)
 	{
 		current->mm->dumpable = 0;
-		wmb();
+		smp_wmb();
 	}
 	if (rgid != (gid_t) -1 ||
 	    (egid != (gid_t) -1 && egid != old_rgid))
@@ -556,7 +556,7 @@ asmlinkage long sys_setgid(gid_t gid)
 		if(old_egid != gid)
 		{
 			current->mm->dumpable=0;
-			wmb();
+			smp_wmb();
 		}
 		current->gid = current->egid = current->sgid = current->fsgid = gid;
 	}
@@ -565,7 +565,7 @@ asmlinkage long sys_setgid(gid_t gid)
 		if(old_egid != gid)
 		{
 			current->mm->dumpable=0;
-			wmb();
+			smp_wmb();
 		}
 		current->egid = current->fsgid = gid;
 	}
@@ -596,7 +596,7 @@ static int set_user(uid_t new_ruid, int dumpclear)
 	if(dumpclear)
 	{
 		current->mm->dumpable = 0;
-		wmb();
+		smp_wmb();
 	}
 	current->uid = new_ruid;
 	return 0;
@@ -653,7 +653,7 @@ asmlinkage long sys_setreuid(uid_t ruid, uid_t euid)
 	if (new_euid != old_euid)
 	{
 		current->mm->dumpable=0;
-		wmb();
+		smp_wmb();
 	}
 	current->fsuid = current->euid = new_euid;
 	if (ruid != (uid_t) -1 ||
@@ -703,7 +703,7 @@ asmlinkage long sys_setuid(uid_t uid)
 	if (old_euid != uid)
 	{
 		current->mm->dumpable = 0;
-		wmb();
+		smp_wmb();
 	}
 	current->fsuid = current->euid = uid;
 	current->suid = new_suid;
@@ -748,7 +748,7 @@ asmlinkage long sys_setresuid(uid_t ruid, uid_t euid, uid_t suid)
 		if (euid != current->euid)
 		{
 			current->mm->dumpable = 0;
-			wmb();
+			smp_wmb();
 		}
 		current->euid = euid;
 	}
@@ -798,7 +798,7 @@ asmlinkage long sys_setresgid(gid_t rgid, gid_t egid, gid_t sgid)
 		if (egid != current->egid)
 		{
 			current->mm->dumpable = 0;
-			wmb();
+			smp_wmb();
 		}
 		current->egid = egid;
 	}
@@ -845,7 +845,7 @@ asmlinkage long sys_setfsuid(uid_t uid)
 		if (uid != old_fsuid)
 		{
 			current->mm->dumpable = 0;
-			wmb();
+			smp_wmb();
 		}
 		current->fsuid = uid;
 	}
@@ -875,7 +875,7 @@ asmlinkage long sys_setfsgid(gid_t gid)
 		if (gid != old_fsgid)
 		{
 			current->mm->dumpable = 0;
-			wmb();
+			smp_wmb();
 		}
 		current->fsgid = gid;
 		key_fsgid_changed(current);
diff --git a/kernel/timer.c b/kernel/timer.c
index ecb3d67c0e14..207aa4f0aa10 100644
--- a/kernel/timer.c
+++ b/kernel/timer.c
@@ -1007,7 +1007,7 @@ asmlinkage long sys_getppid(void)
 		 * Make sure we read the pid before re-reading the
 		 * parent pointer:
 		 */
-		rmb();
+		smp_rmb();
 		parent = me->group_leader->real_parent;
 		if (old != parent)
 			continue;
-- 
cgit v1.2.3


From e43379f10b42194b8a6e1de342cfb44463c0f6da Mon Sep 17 00:00:00 2001
From: Matt Mackall <mpm@selenic.com>
Date: Sun, 1 May 2005 08:59:00 -0700
Subject: [PATCH] nice and rt-prio rlimits

Add a pair of rlimits for allowing non-root tasks to raise nice and rt
priorities. Defaults to traditional behavior. Originally written by
Chris Wright.

The patch implements a simple rlimit ceiling for the RT (and nice) priorities
a task can set.  The rlimit defaults to 0, meaning no change in behavior by
default.  A value of 50 means RT priority levels 1-50 are allowed.  A value of
100 means all 99 privilege levels from 1 to 99 are allowed.  CAP_SYS_NICE is
blanket permission.

(akpm: see http://www.uwsg.iu.edu/hypermail/linux/kernel/0503.1/1921.html for
tips on integrating this with PAM).

Signed-off-by: Matt Mackall <mpm@selenic.com>
Acked-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 kernel/sched.c | 25 +++++++++++++++++++------
 kernel/sys.c   |  2 +-
 2 files changed, 20 insertions(+), 7 deletions(-)

(limited to 'kernel')

diff --git a/kernel/sched.c b/kernel/sched.c
index 9bb7489ee645..5dadcc6df7dd 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -3223,6 +3223,19 @@ out_unlock:
 
 EXPORT_SYMBOL(set_user_nice);
 
+/*
+ * can_nice - check if a task can reduce its nice value
+ * @p: task
+ * @nice: nice value
+ */
+int can_nice(const task_t *p, const int nice)
+{
+	/* convert nice value [19,-20] to rlimit style value [0,39] */
+	int nice_rlim = 19 - nice;
+	return (nice_rlim <= p->signal->rlim[RLIMIT_NICE].rlim_cur ||
+		capable(CAP_SYS_NICE));
+}
+
 #ifdef __ARCH_WANT_SYS_NICE
 
 /*
@@ -3242,12 +3255,8 @@ asmlinkage long sys_nice(int increment)
 	 * We don't have to worry. Conceptually one call occurs first
 	 * and we have a single winner.
 	 */
-	if (increment < 0) {
-		if (!capable(CAP_SYS_NICE))
-			return -EPERM;
-		if (increment < -40)
-			increment = -40;
-	}
+	if (increment < -40)
+		increment = -40;
 	if (increment > 40)
 		increment = 40;
 
@@ -3257,6 +3266,9 @@ asmlinkage long sys_nice(int increment)
 	if (nice > 19)
 		nice = 19;
 
+	if (increment < 0 && !can_nice(current, nice))
+		return -EPERM;
+
 	retval = security_task_setnice(current, nice);
 	if (retval)
 		return retval;
@@ -3372,6 +3384,7 @@ recheck:
 		return -EINVAL;
 
 	if ((policy == SCHED_FIFO || policy == SCHED_RR) &&
+	    param->sched_priority > p->signal->rlim[RLIMIT_RTPRIO].rlim_cur &&
 	    !capable(CAP_SYS_NICE))
 		return -EPERM;
 	if ((current->euid != p->euid) && (current->euid != p->uid) &&
diff --git a/kernel/sys.c b/kernel/sys.c
index df2ddcc6863b..7f43d6e62c7a 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -227,7 +227,7 @@ static int set_one_prio(struct task_struct *p, int niceval, int error)
 		error = -EPERM;
 		goto out;
 	}
-	if (niceval < task_nice(p) && !capable(CAP_SYS_NICE)) {
+	if (niceval < task_nice(p) && !can_nice(p, niceval)) {
 		error = -EACCES;
 		goto out;
 	}
-- 
cgit v1.2.3


From d59745ce3e7aa13856bca16d3bcbb95041775ff6 Mon Sep 17 00:00:00 2001
From: Matt Mackall <mpm@selenic.com>
Date: Sun, 1 May 2005 08:59:02 -0700
Subject: [PATCH] clean up kernel messages

Arrange for all kernel printks to be no-ops.  Only available if
CONFIG_EMBEDDED.

This patch saves about 375k on my laptop config and nearly 100k on minimal
configs.

Signed-off-by: Matt Mackall <mpm@selenic.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 kernel/printk.c | 27 +++++++++++++++++++++------
 1 file changed, 21 insertions(+), 6 deletions(-)

(limited to 'kernel')

diff --git a/kernel/printk.c b/kernel/printk.c
index 1498689548d1..290a07ce2c8a 100644
--- a/kernel/printk.c
+++ b/kernel/printk.c
@@ -85,10 +85,6 @@ static int console_locked;
  */
 static DEFINE_SPINLOCK(logbuf_lock);
 
-static char __log_buf[__LOG_BUF_LEN];
-static char *log_buf = __log_buf;
-static int log_buf_len = __LOG_BUF_LEN;
-
 #define LOG_BUF_MASK	(log_buf_len-1)
 #define LOG_BUF(idx) (log_buf[(idx) & LOG_BUF_MASK])
 
@@ -99,7 +95,6 @@ static int log_buf_len = __LOG_BUF_LEN;
 static unsigned long log_start;	/* Index into log_buf: next char to be read by syslog() */
 static unsigned long con_start;	/* Index into log_buf: next char to be sent to consoles */
 static unsigned long log_end;	/* Index into log_buf: most-recently-written-char + 1 */
-static unsigned long logged_chars; /* Number of chars produced since last read+clear operation */
 
 /*
  *	Array of consoles built from command line options (console=)
@@ -120,6 +115,13 @@ static int preferred_console = -1;
 /* Flag: console code may call schedule() */
 static int console_may_schedule;
 
+#ifdef CONFIG_PRINTK
+
+static char __log_buf[__LOG_BUF_LEN];
+static char *log_buf = __log_buf;
+static int log_buf_len = __LOG_BUF_LEN;
+static unsigned long logged_chars; /* Number of chars produced since last read+clear operation */
+
 /*
  *	Setup a list of consoles. Called from init/main.c
  */
@@ -535,6 +537,7 @@ __setup("time", printk_time_setup);
  * then changes console_loglevel may break. This is because console_loglevel
  * is inspected when the actual printing occurs.
  */
+
 asmlinkage int printk(const char *fmt, ...)
 {
 	va_list args;
@@ -655,6 +658,18 @@ out:
 EXPORT_SYMBOL(printk);
 EXPORT_SYMBOL(vprintk);
 
+#else
+
+asmlinkage long sys_syslog(int type, char __user * buf, int len)
+{
+	return 0;
+}
+
+int do_syslog(int type, char __user * buf, int len) { return 0; }
+static void call_console_drivers(unsigned long start, unsigned long end) {}
+
+#endif
+
 /**
  * acquire_console_sem - lock the console system for exclusive use.
  *
@@ -931,7 +946,7 @@ int unregister_console(struct console * console)
 	return res;
 }
 EXPORT_SYMBOL(unregister_console);
-	
+
 /**
  * tty_write_message - write a message to a certain tty, not just the console.
  *
-- 
cgit v1.2.3


From 66cf8f1443301a1d5bc9c21709e4264e6919a3e1 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@us.ibm.com>
Date: Sun, 1 May 2005 08:59:03 -0700
Subject: [PATCH] kernel/rcupdate.c: make the exports EXPORT_SYMBOL_GPL

The gpl exports need to be put back.  Moving them to GPL -- but in a
measured manner, as I proposed on this list some months ago -- is fine.
Changing these particular exports precipitously is most definitely -not-
fine.  Here is my earlier proposal:

	http://marc.theaimsgroup.com/?l=linux-kernel&m=110520930301813&w=2

See below for a patch that puts the exports back, along with an updated
version of my earlier patch that starts the process of moving them to GPL.
I will also be following this message with RFC patches that introduce two
(EXPORT_SYMBOL_GPL) interfaces to replace synchronize_kernel(), which then
becomes deprecated.

Signed-off-by: <paulmck@us.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 kernel/rcupdate.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'kernel')

diff --git a/kernel/rcupdate.c b/kernel/rcupdate.c
index d00eded75d71..ad497722f04f 100644
--- a/kernel/rcupdate.c
+++ b/kernel/rcupdate.c
@@ -465,6 +465,6 @@ void synchronize_kernel(void)
 }
 
 module_param(maxbatch, int, 0);
-EXPORT_SYMBOL_GPL(call_rcu);
-EXPORT_SYMBOL_GPL(call_rcu_bh);
-EXPORT_SYMBOL_GPL(synchronize_kernel);
+EXPORT_SYMBOL(call_rcu);  /* WARNING: GPL-only in April 2006. */
+EXPORT_SYMBOL(call_rcu_bh);  /* WARNING: GPL-only in April 2006. */
+EXPORT_SYMBOL(synchronize_kernel);  /* WARNING: GPL-only in April 2006. */
-- 
cgit v1.2.3


From 9b06e818985d139fd9e82c28297f7744e1b484e1 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@us.ibm.com>
Date: Sun, 1 May 2005 08:59:04 -0700
Subject: [PATCH] Deprecate synchronize_kernel, GPL replacement

The synchronize_kernel() primitive is used for quite a few different purposes:
waiting for RCU readers, waiting for NMIs, waiting for interrupts, and so on.
This makes RCU code harder to read, since synchronize_kernel() might or might
not have matching rcu_read_lock()s.  This patch creates a new
synchronize_rcu() that is to be used for RCU readers and a new
synchronize_sched() that is used for the rest.  These two new primitives
currently have the same implementation, but this is might well change with
additional real-time support.  Both new primitives are GPL-only, the old
primitive is deprecated.

Signed-off-by: Paul E. McKenney <paulmck@us.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 kernel/rcupdate.c | 16 ++++++++++++++--
 1 file changed, 14 insertions(+), 2 deletions(-)

(limited to 'kernel')

diff --git a/kernel/rcupdate.c b/kernel/rcupdate.c
index ad497722f04f..f436993bd590 100644
--- a/kernel/rcupdate.c
+++ b/kernel/rcupdate.c
@@ -444,15 +444,18 @@ static void wakeme_after_rcu(struct rcu_head  *head)
 }
 
 /**
- * synchronize_kernel - wait until a grace period has elapsed.
+ * synchronize_rcu - wait until a grace period has elapsed.
  *
  * Control will return to the caller some time after a full grace
  * period has elapsed, in other words after all currently executing RCU
  * read-side critical sections have completed.  RCU read-side critical
  * sections are delimited by rcu_read_lock() and rcu_read_unlock(),
  * and may be nested.
+ *
+ * If your read-side code is not protected by rcu_read_lock(), do -not-
+ * use synchronize_rcu().
  */
-void synchronize_kernel(void)
+void synchronize_rcu(void)
 {
 	struct rcu_synchronize rcu;
 
@@ -464,7 +467,16 @@ void synchronize_kernel(void)
 	wait_for_completion(&rcu.completion);
 }
 
+/*
+ * Deprecated, use synchronize_rcu() or synchronize_sched() instead.
+ */
+void synchronize_kernel(void)
+{
+	synchronize_rcu();
+}
+
 module_param(maxbatch, int, 0);
 EXPORT_SYMBOL(call_rcu);  /* WARNING: GPL-only in April 2006. */
 EXPORT_SYMBOL(call_rcu_bh);  /* WARNING: GPL-only in April 2006. */
+EXPORT_SYMBOL_GPL(synchronize_rcu);
 EXPORT_SYMBOL(synchronize_kernel);  /* WARNING: GPL-only in April 2006. */
-- 
cgit v1.2.3


From fbd568a3e61a7decb8a754ad952aaa5b5c82e9e5 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@us.ibm.com>
Date: Sun, 1 May 2005 08:59:04 -0700
Subject: [PATCH] Change synchronize_kernel to _rcu and _sched

This patch changes calls to synchronize_kernel(), deprecated in the earlier
"Deprecate synchronize_kernel, GPL replacement" patch to instead call the new
synchronize_rcu() and synchronize_sched() APIs.

Signed-off-by: Paul E. McKenney <paulmck@us.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 kernel/module.c  | 2 +-
 kernel/profile.c | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'kernel')

diff --git a/kernel/module.c b/kernel/module.c
index 2dbfa0773faf..5734ab09d3f9 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -1801,7 +1801,7 @@ sys_init_module(void __user *umod,
 		/* Init routine failed: abort.  Try to protect us from
                    buggy refcounters. */
 		mod->state = MODULE_STATE_GOING;
-		synchronize_kernel();
+		synchronize_sched();
 		if (mod->unsafe)
 			printk(KERN_ERR "%s: module is now stuck!\n",
 			       mod->name);
diff --git a/kernel/profile.c b/kernel/profile.c
index a66be468c422..0221a50ca867 100644
--- a/kernel/profile.c
+++ b/kernel/profile.c
@@ -184,7 +184,7 @@ void unregister_timer_hook(int (*hook)(struct pt_regs *))
 	WARN_ON(hook != timer_hook);
 	timer_hook = NULL;
 	/* make sure all CPUs see the NULL hook */
-	synchronize_kernel();
+	synchronize_sched();  /* Allow ongoing interrupts to complete. */
 }
 
 EXPORT_SYMBOL_GPL(register_timer_hook);
-- 
cgit v1.2.3


From 7d87e14c236d6c4cab66d87cf0bc1e0f0375d308 Mon Sep 17 00:00:00 2001
From: Stephen Rothwell <sfr@canb.auug.org.au>
Date: Sun, 1 May 2005 08:59:12 -0700
Subject: [PATCH] consolidate sys_shmat

Signed-off-by: Stephen Rothwell <sfr@canb.auug.org.au>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 kernel/sys_ni.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'kernel')

diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c
index 1802a311dd3f..0dda70ed1f98 100644
--- a/kernel/sys_ni.c
+++ b/kernel/sys_ni.c
@@ -52,6 +52,7 @@ cond_syscall(sys_msgsnd);
 cond_syscall(sys_msgrcv);
 cond_syscall(sys_msgctl);
 cond_syscall(sys_shmget);
+cond_syscall(sys_shmat);
 cond_syscall(sys_shmdt);
 cond_syscall(sys_shmctl);
 cond_syscall(sys_mq_open);
-- 
cgit v1.2.3


From 7ed20e1ad521b5f5df61bf6559ae60738e393741 Mon Sep 17 00:00:00 2001
From: Jesper Juhl <juhl-lkml@dif.dk>
Date: Sun, 1 May 2005 08:59:14 -0700
Subject: [PATCH] convert that currently tests _NSIG directly to use
 valid_signal()

Convert most of the current code that uses _NSIG directly to instead use
valid_signal().  This avoids gcc -W warnings and off-by-one errors.

Signed-off-by: Jesper Juhl <juhl-lkml@dif.dk>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 kernel/exit.c   | 5 +++--
 kernel/futex.c  | 3 ++-
 kernel/ptrace.c | 3 ++-
 kernel/signal.c | 9 +++++----
 kernel/sys.c    | 3 ++-
 5 files changed, 14 insertions(+), 9 deletions(-)

(limited to 'kernel')

diff --git a/kernel/exit.c b/kernel/exit.c
index 93851bcd9584..eb8da36e13df 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -27,6 +27,7 @@
 #include <linux/mempolicy.h>
 #include <linux/cpuset.h>
 #include <linux/syscalls.h>
+#include <linux/signal.h>
 
 #include <asm/uaccess.h>
 #include <asm/unistd.h>
@@ -277,7 +278,7 @@ void set_special_pids(pid_t session, pid_t pgrp)
  */
 int allow_signal(int sig)
 {
-	if (sig < 1 || sig > _NSIG)
+	if (!valid_signal(sig) || sig < 1)
 		return -EINVAL;
 
 	spin_lock_irq(&current->sighand->siglock);
@@ -298,7 +299,7 @@ EXPORT_SYMBOL(allow_signal);
 
 int disallow_signal(int sig)
 {
-	if (sig < 1 || sig > _NSIG)
+	if (!valid_signal(sig) || sig < 1)
 		return -EINVAL;
 
 	spin_lock_irq(&current->sighand->siglock);
diff --git a/kernel/futex.c b/kernel/futex.c
index 7b54a672d0ad..c7130f86106c 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -39,6 +39,7 @@
 #include <linux/mount.h>
 #include <linux/pagemap.h>
 #include <linux/syscalls.h>
+#include <linux/signal.h>
 
 #define FUTEX_HASHBITS (CONFIG_BASE_SMALL ? 4 : 8)
 
@@ -654,7 +655,7 @@ static int futex_fd(unsigned long uaddr, int signal)
 	int ret, err;
 
 	ret = -EINVAL;
-	if (signal < 0 || signal > _NSIG)
+	if (!valid_signal(signal))
 		goto out;
 
 	ret = get_unused_fd();
diff --git a/kernel/ptrace.c b/kernel/ptrace.c
index f5cc1cec0fb4..8dcb8f6288bc 100644
--- a/kernel/ptrace.c
+++ b/kernel/ptrace.c
@@ -16,6 +16,7 @@
 #include <linux/smp_lock.h>
 #include <linux/ptrace.h>
 #include <linux/security.h>
+#include <linux/signal.h>
 
 #include <asm/pgtable.h>
 #include <asm/uaccess.h>
@@ -166,7 +167,7 @@ bad:
 
 int ptrace_detach(struct task_struct *child, unsigned int data)
 {
-	if ((unsigned long) data > _NSIG)
+	if (!valid_signal(data))
 		return	-EIO;
 
 	/* Architecture-specific hardware disable .. */
diff --git a/kernel/signal.c b/kernel/signal.c
index e6567d7f2b62..8f3debc77c5b 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -23,6 +23,7 @@
 #include <linux/syscalls.h>
 #include <linux/ptrace.h>
 #include <linux/posix-timers.h>
+#include <linux/signal.h>
 #include <asm/param.h>
 #include <asm/uaccess.h>
 #include <asm/unistd.h>
@@ -646,7 +647,7 @@ static int check_kill_permission(int sig, struct siginfo *info,
 				 struct task_struct *t)
 {
 	int error = -EINVAL;
-	if (sig < 0 || sig > _NSIG)
+	if (!valid_signal(sig))
 		return error;
 	error = -EPERM;
 	if ((!info || ((unsigned long)info != 1 &&
@@ -1245,7 +1246,7 @@ send_sig_info(int sig, struct siginfo *info, struct task_struct *p)
 	 * Make sure legacy kernel users don't send in bad values
 	 * (normal paths check this in check_kill_permission).
 	 */
-	if (sig < 0 || sig > _NSIG)
+	if (!valid_signal(sig))
 		return -EINVAL;
 
 	/*
@@ -1520,7 +1521,7 @@ void do_notify_parent(struct task_struct *tsk, int sig)
 		if (psig->action[SIGCHLD-1].sa.sa_handler == SIG_IGN)
 			sig = 0;
 	}
-	if (sig > 0 && sig <= _NSIG)
+	if (valid_signal(sig) && sig > 0)
 		__group_send_sig_info(sig, &info, tsk->parent);
 	__wake_up_parent(tsk, tsk->parent);
 	spin_unlock_irqrestore(&psig->siglock, flags);
@@ -2364,7 +2365,7 @@ do_sigaction(int sig, const struct k_sigaction *act, struct k_sigaction *oact)
 {
 	struct k_sigaction *k;
 
-	if (sig < 1 || sig > _NSIG || (act && sig_kernel_only(sig)))
+	if (!valid_signal(sig) || sig < 1 || (act && sig_kernel_only(sig)))
 		return -EINVAL;
 
 	k = &current->sighand->action[sig-1];
diff --git a/kernel/sys.c b/kernel/sys.c
index 7f43d6e62c7a..f64e97cabe25 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -25,6 +25,7 @@
 #include <linux/dcookies.h>
 #include <linux/suspend.h>
 #include <linux/tty.h>
+#include <linux/signal.h>
 
 #include <linux/compat.h>
 #include <linux/syscalls.h>
@@ -1637,7 +1638,7 @@ asmlinkage long sys_prctl(int option, unsigned long arg2, unsigned long arg3,
 	switch (option) {
 		case PR_SET_PDEATHSIG:
 			sig = arg2;
-			if (sig < 0 || sig > _NSIG) {
+			if (!valid_signal(sig)) {
 				error = -EINVAL;
 				break;
 			}
-- 
cgit v1.2.3


From 4dc3b16ba18c0f967ad100c52fa65b01a4f76ff0 Mon Sep 17 00:00:00 2001
From: Pavel Pisa <pisa@cmp.felk.cvut.cz>
Date: Sun, 1 May 2005 08:59:25 -0700
Subject: [PATCH] DocBook: changes and extensions to the kernel documentation

I have recompiled Linux kernel 2.6.11.5 documentation for me and our
university students again.  The documentation could be extended for more
sources which are equipped by structured comments for recent 2.6 kernels.  I
have tried to proceed with that task.  I have done that more times from 2.6.0
time and it gets boring to do same changes again and again.  Linux kernel
compiles after changes for i386 and ARM targets.  I have added references to
some more files into kernel-api book, I have added some section names as well.
 So please, check that changes do not break something and that categories are
not too much skewed.

I have changed kernel-doc to accept "fastcall" and "asmlinkage" words reserved
by kernel convention.  Most of the other changes are modifications in the
comments to make kernel-doc happy, accept some parameters description and do
not bail out on errors.  Changed <pid> to @pid in the description, moved some
#ifdef before comments to correct function to comments bindings, etc.

You can see result of the modified documentation build at
  http://cmp.felk.cvut.cz/~pisa/linux/lkdb-2.6.11.tar.gz

Some more sources are ready to be included into kernel-doc generated
documentation.  Sources has been added into kernel-api for now.  Some more
section names added and probably some more chaos introduced as result of quick
cleanup work.

Signed-off-by: Pavel Pisa <pisa@cmp.felk.cvut.cz>
Signed-off-by: Martin Waitz <tali@admingilde.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 kernel/exit.c         | 2 +-
 kernel/power/swsusp.c | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'kernel')

diff --git a/kernel/exit.c b/kernel/exit.c
index eb8da36e13df..419d9d3c4c48 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -210,7 +210,7 @@ static inline int has_stopped_jobs(int pgrp)
 }
 
 /**
- * reparent_to_init() - Reparent the calling kernel thread to the init task.
+ * reparent_to_init - Reparent the calling kernel thread to the init task.
  *
  * If a kernel thread is launched as a result of a system call, or if
  * it ever exits, it should generally reparent itself to init so that
diff --git a/kernel/power/swsusp.c b/kernel/power/swsusp.c
index ae5bebc3b18f..90b3b68dee3f 100644
--- a/kernel/power/swsusp.c
+++ b/kernel/power/swsusp.c
@@ -1099,7 +1099,7 @@ static struct pbe * swsusp_pagedir_relocate(struct pbe *pblist)
 	return pblist;
 }
 
-/**
+/*
  *	Using bio to read from swap.
  *	This code requires a bit more work than just using buffer heads
  *	but, it is the recommended way for 2.5/2.6.
-- 
cgit v1.2.3


From 67be2dd1bace0ec7ce2dbc1bba3f8df3d7be597e Mon Sep 17 00:00:00 2001
From: Martin Waitz <tali@admingilde.org>
Date: Sun, 1 May 2005 08:59:26 -0700
Subject: [PATCH] DocBook: fix some descriptions

Some KernelDoc descriptions are updated to match the current code.
No code changes.

Signed-off-by: Martin Waitz <tali@admingilde.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 kernel/sched.c  | 3 ++-
 kernel/sysctl.c | 2 ++
 2 files changed, 4 insertions(+), 1 deletion(-)

(limited to 'kernel')

diff --git a/kernel/sched.c b/kernel/sched.c
index 5dadcc6df7dd..0dc3158667a2 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -2906,6 +2906,7 @@ static void __wake_up_common(wait_queue_head_t *q, unsigned int mode,
  * @q: the waitqueue
  * @mode: which threads
  * @nr_exclusive: how many wake-one or wake-many threads to wake up
+ * @key: is directly passed to the wakeup function
  */
 void fastcall __wake_up(wait_queue_head_t *q, unsigned int mode,
 				int nr_exclusive, void *key)
@@ -2928,7 +2929,7 @@ void fastcall __wake_up_locked(wait_queue_head_t *q, unsigned int mode)
 }
 
 /**
- * __wake_up - sync- wake up threads blocked on a waitqueue.
+ * __wake_up_sync - wake up threads blocked on a waitqueue.
  * @q: the waitqueue
  * @mode: which threads
  * @nr_exclusive: how many wake-one or wake-many threads to wake up
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 79dbd93bd697..701d12c63068 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -1991,6 +1991,8 @@ int proc_dointvec_userhz_jiffies(ctl_table *table, int write, struct file *filp,
  * @filp: the file structure
  * @buffer: the user buffer
  * @lenp: the size of the user buffer
+ * @ppos: file position
+ * @ppos: the current position in the file
  *
  * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
  * values from/to the user buffer, treated as an ASCII string. 
-- 
cgit v1.2.3


From 408b664a7d394a5e4315fbd14aca49b042cb2b08 Mon Sep 17 00:00:00 2001
From: Adrian Bunk <bunk@stusta.de>
Date: Sun, 1 May 2005 08:59:29 -0700
Subject: [PATCH] make lots of things static

Another large rollup of various patches from Adrian which make things static
where they were needlessly exported.

Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 kernel/exit.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'kernel')

diff --git a/kernel/exit.c b/kernel/exit.c
index 419d9d3c4c48..7be283d98983 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -39,6 +39,8 @@ extern struct task_struct *child_reaper;
 
 int getrusage(struct task_struct *, int, struct rusage __user *);
 
+static void exit_mm(struct task_struct * tsk);
+
 static void __unhash_process(struct task_struct *p)
 {
 	nr_threads--;
@@ -474,7 +476,7 @@ EXPORT_SYMBOL_GPL(exit_fs);
  * Turn us into a lazy TLB process if we
  * aren't already..
  */
-void exit_mm(struct task_struct * tsk)
+static void exit_mm(struct task_struct * tsk)
 {
 	struct mm_struct *mm = tsk->mm;
 
-- 
cgit v1.2.3


From 0dd8e06bdaa0a97e706ee1a489a1f6176c4ddc64 Mon Sep 17 00:00:00 2001
From: Chris Wright <chrisw@osdl.org>
Date: Tue, 3 May 2005 14:01:15 +0100
Subject: [PATCH] add new audit data to last skb

When adding more formatted audit data to an skb for delivery to userspace,
the kernel will attempt to reuse an skb that has spare room.  However, if
the audit message has already been fragmented to multiple skb's, the search
for spare room in the skb uses the head of the list.  This will corrupt the
audit message with trailing bytes being placed midway through the stream.
Fix is to look at the end of the list.

Signed-off-by: Chris Wright <chrisw@osdl.org>
Signed-off-by: David Woodhouse <dwmw2@infradead.org>
---
 kernel/audit.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'kernel')

diff --git a/kernel/audit.c b/kernel/audit.c
index 4a697c73faec..00455a9cf027 100644
--- a/kernel/audit.c
+++ b/kernel/audit.c
@@ -486,7 +486,7 @@ static void audit_log_move(struct audit_buffer *ab)
 	if (ab->len == 0)
 		return;
 
-	skb = skb_peek(&ab->sklist);
+	skb = skb_peek_tail(&ab->sklist);
 	if (!skb || skb_tailroom(skb) <= ab->len + extra) {
 		skb = alloc_skb(2 * ab->len + extra, GFP_ATOMIC);
 		if (!skb) {
-- 
cgit v1.2.3


From 012914dad25bd5cacf88af4429eecda62a06020d Mon Sep 17 00:00:00 2001
From: Russ Anderson <(rja@sgi.com)>
Date: Sat, 23 Apr 2005 00:08:00 -0700
Subject: [patch] MCA recovery module undefined symbol fix

The patch "MCA recovery improvements" added do_exit to mca_drv.c.
That's fine when the mca recovery code is built in the kernel
(CONFIG_IA64_MCA_RECOVERY=y) but breaks building the mca recovery
code as a module (CONFIG_IA64_MCA_RECOVERY=m).

Most users are currently building this as a module, as loading
and unloading the module provides a very convenient way to turn
on/off error recovery.

This patch exports do_exit, so mca_drv.c can build as a module.

Signed-off-by: Russ Anderson (rja@sgi.com)
Signed-off-by: Tony Luck <tony.luck@intel.com>
---
 kernel/exit.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'kernel')

diff --git a/kernel/exit.c b/kernel/exit.c
index 7be283d98983..edaa50b5bbfa 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -846,6 +846,8 @@ fastcall NORET_TYPE void do_exit(long code)
 	for (;;) ;
 }
 
+EXPORT_SYMBOL_GPL(do_exit);
+
 NORET_TYPE void complete_and_exit(struct completion *comp, long code)
 {
 	if (comp)
-- 
cgit v1.2.3


From 2a0a6ebee1d68552152ae8d4aeda91d806995dec Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Tue, 3 May 2005 14:55:09 -0700
Subject: [NETLINK]: Synchronous message processing.

Let's recap the problem.  The current asynchronous netlink kernel
message processing is vulnerable to these attacks:

1) Hit and run: Attacker sends one or more messages and then exits
before they're processed.  This may confuse/disable the next netlink
user that gets the netlink address of the attacker since it may
receive the responses to the attacker's messages.

Proposed solutions:

a) Synchronous processing.
b) Stream mode socket.
c) Restrict/prohibit binding.

2) Starvation: Because various netlink rcv functions were written
to not return until all messages have been processed on a socket,
it is possible for these functions to execute for an arbitrarily
long period of time.  If this is successfully exploited it could
also be used to hold rtnl forever.

Proposed solutions:

a) Synchronous processing.
b) Stream mode socket.

Firstly let's cross off solution c).  It only solves the first
problem and it has user-visible impacts.  In particular, it'll
break user space applications that expect to bind or communicate
with specific netlink addresses (pid's).

So we're left with a choice of synchronous processing versus
SOCK_STREAM for netlink.

For the moment I'm sticking with the synchronous approach as
suggested by Alexey since it's simpler and I'd rather spend
my time working on other things.

However, it does have a number of deficiencies compared to the
stream mode solution:

1) User-space to user-space netlink communication is still vulnerable.

2) Inefficient use of resources.  This is especially true for rtnetlink
since the lock is shared with other users such as networking drivers.
The latter could hold the rtnl while communicating with hardware which
causes the rtnetlink user to wait when it could be doing other things.

3) It is still possible to DoS all netlink users by flooding the kernel
netlink receive queue.  The attacker simply fills the receive socket
with a single netlink message that fills up the entire queue.  The
attacker then continues to call sendmsg with the same message in a loop.

Point 3) can be countered by retransmissions in user-space code, however
it is pretty messy.

In light of these problems (in particular, point 3), we should implement
stream mode netlink at some point.  In the mean time, here is a patch
that implements synchronous processing.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 kernel/audit.c | 19 ++++++++-----------
 1 file changed, 8 insertions(+), 11 deletions(-)

(limited to 'kernel')

diff --git a/kernel/audit.c b/kernel/audit.c
index 0f84dd7af2c8..ac26d4d960d3 100644
--- a/kernel/audit.c
+++ b/kernel/audit.c
@@ -427,7 +427,7 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
 /* Get message from skb (based on rtnetlink_rcv_skb).  Each message is
  * processed by audit_receive_msg.  Malformed skbs with wrong length are
  * discarded silently.  */
-static int audit_receive_skb(struct sk_buff *skb)
+static void audit_receive_skb(struct sk_buff *skb)
 {
 	int		err;
 	struct nlmsghdr	*nlh;
@@ -436,7 +436,7 @@ static int audit_receive_skb(struct sk_buff *skb)
 	while (skb->len >= NLMSG_SPACE(0)) {
 		nlh = (struct nlmsghdr *)skb->data;
 		if (nlh->nlmsg_len < sizeof(*nlh) || skb->len < nlh->nlmsg_len)
-			return 0;
+			return;
 		rlen = NLMSG_ALIGN(nlh->nlmsg_len);
 		if (rlen > skb->len)
 			rlen = skb->len;
@@ -446,23 +446,20 @@ static int audit_receive_skb(struct sk_buff *skb)
 			netlink_ack(skb, nlh, 0);
 		skb_pull(skb, rlen);
 	}
-	return 0;
 }
 
 /* Receive messages from netlink socket. */
 static void audit_receive(struct sock *sk, int length)
 {
 	struct sk_buff  *skb;
+	unsigned int qlen;
 
-	if (down_trylock(&audit_netlink_sem))
-		return;
+	down(&audit_netlink_sem);
 
-				/* FIXME: this must not cause starvation */
-	while ((skb = skb_dequeue(&sk->sk_receive_queue))) {
-		if (audit_receive_skb(skb) && skb->len)
-			skb_queue_head(&sk->sk_receive_queue, skb);
-		else
-			kfree_skb(skb);
+	for (qlen = skb_queue_len(&sk->sk_receive_queue); qlen; qlen--) {
+		skb = skb_dequeue(&sk->sk_receive_queue);
+		audit_receive_skb(skb);
+		kfree_skb(skb);
 	}
 	up(&audit_netlink_sem);
 }
-- 
cgit v1.2.3


From 075d6eb16d273dab7b7b4b83fcee8bce4ee387ed Mon Sep 17 00:00:00 2001
From: David Woodhouse <dwmw2@infradead.org>
Date: Thu, 5 May 2005 16:15:09 -0700
Subject: [PATCH] ppc32: platform-specific functions missing from kallsyms.

The PPC32 kernel puts platform-specific functions into separate sections so
that unneeded parts of it can be freed when we've booted and actually
worked out what we're running on today.

This makes kallsyms ignore those functions, because they're not between
_[se]text or _[se]inittext.  Rather than teaching kallsyms about the
various pmac/chrp/etc sections, this patch adds '_[se]extratext' markers
for kallsyms.

Signed-off-by: David Woodhouse <dwmw2@infradead.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 kernel/kallsyms.c | 13 +++++++++++--
 1 file changed, 11 insertions(+), 2 deletions(-)

(limited to 'kernel')

diff --git a/kernel/kallsyms.c b/kernel/kallsyms.c
index 1627f8d6e0cd..13bcec151b57 100644
--- a/kernel/kallsyms.c
+++ b/kernel/kallsyms.c
@@ -46,6 +46,14 @@ static inline int is_kernel_inittext(unsigned long addr)
 	return 0;
 }
 
+static inline int is_kernel_extratext(unsigned long addr)
+{
+	if (addr >= (unsigned long)_sextratext
+	    && addr <= (unsigned long)_eextratext)
+		return 1;
+	return 0;
+}
+
 static inline int is_kernel_text(unsigned long addr)
 {
 	if (addr >= (unsigned long)_stext && addr <= (unsigned long)_etext)
@@ -169,8 +177,9 @@ const char *kallsyms_lookup(unsigned long addr,
 	namebuf[0] = 0;
 
 	if ((all_var && is_kernel(addr)) ||
-	    (!all_var && (is_kernel_text(addr) || is_kernel_inittext(addr)))) {
-		unsigned long symbol_end=0;
+	    (!all_var && (is_kernel_text(addr) || is_kernel_inittext(addr) ||
+				is_kernel_extratext(addr)))) {
+		unsigned long symbol_end = 0;
 
 		/* do a binary search on the sorted kallsyms_addresses array */
 		low = 0;
-- 
cgit v1.2.3


From 7d12e522ba13ce718b7ec32b75803dece8adb072 Mon Sep 17 00:00:00 2001
From: Anton Blanchard <anton@samba.org>
Date: Thu, 5 May 2005 16:15:11 -0700
Subject: [PATCH] ppc64: remove hidden -fno-omit-frame-pointer for schedule.c

While looking at code generated by gcc4.0 I noticed some functions still
had frame pointers, even after we stopped ppc64 from defining
CONFIG_FRAME_POINTER.  It turns out kernel/Makefile hardwires
-fno-omit-frame-pointer on when compiling schedule.c.

Create CONFIG_SCHED_NO_NO_OMIT_FRAME_POINTER and define it on architectures
that dont require frame pointers in sched.c code.

(akpm: blame me for the name)

Signed-off-by: Anton Blanchard <anton@samba.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 kernel/Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'kernel')

diff --git a/kernel/Makefile b/kernel/Makefile
index eb88b446c2cc..b01d26fe8db7 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -29,7 +29,7 @@ obj-$(CONFIG_SYSFS) += ksysfs.o
 obj-$(CONFIG_GENERIC_HARDIRQS) += irq/
 obj-$(CONFIG_SECCOMP) += seccomp.o
 
-ifneq ($(CONFIG_IA64),y)
+ifneq ($(CONFIG_SCHED_NO_NO_OMIT_FRAME_POINTER),y)
 # According to Alan Modra <alan@linuxcare.com.au>, the -fno-omit-frame-pointer is
 # needed for x86 only.  Why this used to be enabled for all architectures is beyond
 # me.  I suspect most platforms don't need this, but until we know that for sure
-- 
cgit v1.2.3


From 04dea5f93231204cc3ca0ab793ce76dbb10c86ba Mon Sep 17 00:00:00 2001
From: Prasanna S Panchamukhi <prasanna@in.ibm.com>
Date: Thu, 5 May 2005 16:15:41 -0700
Subject: [PATCH] Kprobes: Oops! in unregister_kprobe()

kernel oops!  when unregister_kprobe() is called on a non-registered
kprobe.  This patch fixes the above problem by checking if the probe exists
before unregistering.

Signed-off-by: Prasanna S Panchamukhi <prasanna@in.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 kernel/kprobes.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

(limited to 'kernel')

diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index 1d5dd1337bd1..d8903e60c99a 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -107,13 +107,17 @@ rm_kprobe:
 void unregister_kprobe(struct kprobe *p)
 {
 	unsigned long flags;
-	arch_remove_kprobe(p);
 	spin_lock_irqsave(&kprobe_lock, flags);
+	if (!get_kprobe(p->addr)) {
+		spin_unlock_irqrestore(&kprobe_lock, flags);
+		return;
+	}
 	*p->addr = p->opcode;
 	hlist_del(&p->hlist);
 	flush_icache_range((unsigned long) p->addr,
 			   (unsigned long) p->addr + sizeof(kprobe_opcode_t));
 	spin_unlock_irqrestore(&kprobe_lock, flags);
+	arch_remove_kprobe(p);
 }
 
 static struct notifier_block kprobe_exceptions_nb = {
-- 
cgit v1.2.3


From 64f562c6df3cfc5d1b2b4bdbcb7951457df9c237 Mon Sep 17 00:00:00 2001
From: Ananth N Mavinakayanahalli <amavin@redhat.com>
Date: Thu, 5 May 2005 16:15:42 -0700
Subject: [PATCH] kprobes: Allow multiple kprobes at the same address

Allow registration of multiple kprobes at an address in an architecture
agnostic way.  Corresponding handlers will be invoked in a sequence.  But,
a kprobe and a jprobe can't (yet) co-exist at the same address.

Signed-off-by: Ananth N Mavinakayanahalli <amavin@redhat.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 kernel/kprobes.c | 144 ++++++++++++++++++++++++++++++++++++++++++++++++++-----
 1 file changed, 131 insertions(+), 13 deletions(-)

(limited to 'kernel')

diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index d8903e60c99a..037142b72a49 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -44,6 +44,7 @@ static struct hlist_head kprobe_table[KPROBE_TABLE_SIZE];
 
 unsigned int kprobe_cpu = NR_CPUS;
 static DEFINE_SPINLOCK(kprobe_lock);
+static struct kprobe *curr_kprobe;
 
 /* Locks kprobe: irqs must be disabled */
 void lock_kprobes(void)
@@ -73,22 +74,139 @@ struct kprobe *get_kprobe(void *addr)
 	return NULL;
 }
 
+/*
+ * Aggregate handlers for multiple kprobes support - these handlers
+ * take care of invoking the individual kprobe handlers on p->list
+ */
+int aggr_pre_handler(struct kprobe *p, struct pt_regs *regs)
+{
+	struct kprobe *kp;
+
+	list_for_each_entry(kp, &p->list, list) {
+		if (kp->pre_handler) {
+			curr_kprobe = kp;
+			kp->pre_handler(kp, regs);
+			curr_kprobe = NULL;
+		}
+	}
+	return 0;
+}
+
+void aggr_post_handler(struct kprobe *p, struct pt_regs *regs,
+		unsigned long flags)
+{
+	struct kprobe *kp;
+
+	list_for_each_entry(kp, &p->list, list) {
+		if (kp->post_handler) {
+			curr_kprobe = kp;
+			kp->post_handler(kp, regs, flags);
+			curr_kprobe = NULL;
+		}
+	}
+	return;
+}
+
+int aggr_fault_handler(struct kprobe *p, struct pt_regs *regs, int trapnr)
+{
+	/*
+	 * if we faulted "during" the execution of a user specified
+	 * probe handler, invoke just that probe's fault handler
+	 */
+	if (curr_kprobe && curr_kprobe->fault_handler) {
+		if (curr_kprobe->fault_handler(curr_kprobe, regs, trapnr))
+			return 1;
+	}
+	return 0;
+}
+
+/*
+ * Fill in the required fields of the "manager kprobe". Replace the
+ * earlier kprobe in the hlist with the manager kprobe
+ */
+static inline void add_aggr_kprobe(struct kprobe *ap, struct kprobe *p)
+{
+	ap->addr = p->addr;
+	ap->opcode = p->opcode;
+	memcpy(&ap->ainsn, &p->ainsn, sizeof(struct arch_specific_insn));
+
+	ap->pre_handler = aggr_pre_handler;
+	ap->post_handler = aggr_post_handler;
+	ap->fault_handler = aggr_fault_handler;
+
+	INIT_LIST_HEAD(&ap->list);
+	list_add(&p->list, &ap->list);
+
+	INIT_HLIST_NODE(&ap->hlist);
+	hlist_del(&p->hlist);
+	hlist_add_head(&ap->hlist,
+		&kprobe_table[hash_ptr(ap->addr, KPROBE_HASH_BITS)]);
+}
+
+/*
+ * This is the second or subsequent kprobe at the address - handle
+ * the intricacies
+ * TODO: Move kcalloc outside the spinlock
+ */
+static int register_aggr_kprobe(struct kprobe *old_p, struct kprobe *p)
+{
+	int ret = 0;
+	struct kprobe *ap;
+
+	if (old_p->break_handler || p->break_handler) {
+		ret = -EEXIST;	/* kprobe and jprobe can't (yet) coexist */
+	} else if (old_p->pre_handler == aggr_pre_handler) {
+		list_add(&p->list, &old_p->list);
+	} else {
+		ap = kcalloc(1, sizeof(struct kprobe), GFP_ATOMIC);
+		if (!ap)
+			return -ENOMEM;
+		add_aggr_kprobe(ap, old_p);
+		list_add(&p->list, &ap->list);
+	}
+	return ret;
+}
+
+/* kprobe removal house-keeping routines */
+static inline void cleanup_kprobe(struct kprobe *p, unsigned long flags)
+{
+	*p->addr = p->opcode;
+	hlist_del(&p->hlist);
+	flush_icache_range((unsigned long) p->addr,
+		   (unsigned long) p->addr + sizeof(kprobe_opcode_t));
+	spin_unlock_irqrestore(&kprobe_lock, flags);
+	arch_remove_kprobe(p);
+}
+
+static inline void cleanup_aggr_kprobe(struct kprobe *old_p,
+		struct kprobe *p, unsigned long flags)
+{
+	list_del(&p->list);
+	if (list_empty(&old_p->list)) {
+		cleanup_kprobe(old_p, flags);
+		kfree(old_p);
+	} else
+		spin_unlock_irqrestore(&kprobe_lock, flags);
+}
+
 int register_kprobe(struct kprobe *p)
 {
 	int ret = 0;
 	unsigned long flags = 0;
+	struct kprobe *old_p;
 
 	if ((ret = arch_prepare_kprobe(p)) != 0) {
 		goto rm_kprobe;
 	}
 	spin_lock_irqsave(&kprobe_lock, flags);
-	INIT_HLIST_NODE(&p->hlist);
-	if (get_kprobe(p->addr)) {
-		ret = -EEXIST;
+	old_p = get_kprobe(p->addr);
+	if (old_p) {
+		ret = register_aggr_kprobe(old_p, p);
 		goto out;
 	}
-	arch_copy_kprobe(p);
 
+	arch_copy_kprobe(p);
+	INIT_HLIST_NODE(&p->hlist);
 	hlist_add_head(&p->hlist,
 		       &kprobe_table[hash_ptr(p->addr, KPROBE_HASH_BITS)]);
 
@@ -107,17 +225,17 @@ rm_kprobe:
 void unregister_kprobe(struct kprobe *p)
 {
 	unsigned long flags;
+	struct kprobe *old_p;
+
 	spin_lock_irqsave(&kprobe_lock, flags);
-	if (!get_kprobe(p->addr)) {
+	old_p = get_kprobe(p->addr);
+	if (old_p) {
+		if (old_p->pre_handler == aggr_pre_handler)
+			cleanup_aggr_kprobe(old_p, p, flags);
+		else
+			cleanup_kprobe(p, flags);
+	} else
 		spin_unlock_irqrestore(&kprobe_lock, flags);
-		return;
-	}
-	*p->addr = p->opcode;
-	hlist_del(&p->hlist);
-	flush_icache_range((unsigned long) p->addr,
-			   (unsigned long) p->addr + sizeof(kprobe_opcode_t));
-	spin_unlock_irqrestore(&kprobe_lock, flags);
-	arch_remove_kprobe(p);
 }
 
 static struct notifier_block kprobe_exceptions_nb = {
-- 
cgit v1.2.3


From b7e4e85337060354f8b860cc38066725559313a4 Mon Sep 17 00:00:00 2001
From: Paulo Marques <pmarques@grupopie.com>
Date: Thu, 5 May 2005 16:15:49 -0700
Subject: [PATCH] setitimer timer expires too early

It seems that the code responsible for this is in kernel/itimer.c:126:

	p->signal->real_timer.expires = jiffies + interval;
	add_timer(&p->signal->real_timer);

If you request an interval of, lets say 900 usecs, the interval given by
timeval_to_jiffies will be 1.

If you request this when we are half-way between two timer ticks, the
interval will only give 400 usecs.

If we want to guarantee that we never ever give intervals less than
requested, the simple solution would be to change that to:

	p->signal->real_timer.expires = jiffies + interval + 1;

This however will produce pathological cases, like having a idle system
being requested 1 ms timeouts will give systematically 2 ms timeouts,
whereas currently it simply gives a few usecs less than 1 ms.

The complex (and more computationally expensive) solution would be to
check the gettimeofday time, and compute the correct number of jiffies.
This way, if we request a 300 usecs timer 200 usecs inside the timer
tick, we can wait just one tick, but not if we are 800 usecs inside the
tick. This would also mean that we would have to lock preemption during
these computations to avoid races, etc.

I've searched the archives but couldn't find this particular issue being
discussed before.

Attached is a patch to do the simple solution, in case anybody thinks
that it should be used.

Signed-Off-By: Paulo Marques <pmarques@grupopie.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 kernel/itimer.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

(limited to 'kernel')

diff --git a/kernel/itimer.c b/kernel/itimer.c
index e9a40e947e07..1dc988e0d2c7 100644
--- a/kernel/itimer.c
+++ b/kernel/itimer.c
@@ -123,7 +123,11 @@ static inline void it_real_arm(struct task_struct *p, unsigned long interval)
 		return;
 	if (interval > (unsigned long) LONG_MAX)
 		interval = LONG_MAX;
-	p->signal->real_timer.expires = jiffies + interval;
+	/* the "+ 1" below makes sure that the timer doesn't go off before
+	 * the interval requested. This could happen if
+	 * time requested % (usecs per jiffy) is more than the usecs left
+	 * in the current jiffy */
+	p->signal->real_timer.expires = jiffies + interval + 1;
 	add_timer(&p->signal->real_timer);
 }
 
-- 
cgit v1.2.3


From ebe8b54134314cc31331f6e26f42276cd947d1df Mon Sep 17 00:00:00 2001
From: Domen Puncer <domen@coderock.org>
Date: Thu, 5 May 2005 16:16:19 -0700
Subject: [PATCH] correctly name the Shell sort

As per http://www.nist.gov/dads/HTML/shellsort.html, this should be
referred to as a Shell sort.  Shell-Metzner is a misnomer.

Signed-off-by: Daniel Dickman <didickman@yahoo.com>
Signed-off-by: Domen Puncer <domen@coderock.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 kernel/sys.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'kernel')

diff --git a/kernel/sys.c b/kernel/sys.c
index f64e97cabe25..f006632c2ba7 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -1195,7 +1195,7 @@ static int groups_from_user(struct group_info *group_info,
 	return 0;
 }
 
-/* a simple shell-metzner sort */
+/* a simple Shell sort */
 static void groups_sort(struct group_info *group_info)
 {
 	int base, max, stride;
-- 
cgit v1.2.3