From a271c241a6036d4d583d0f47a02ba5f18b8b92b5 Mon Sep 17 00:00:00 2001 From: Tom 'spot' Callaway Date: Sun, 24 Apr 2005 20:38:02 -0700 Subject: [SPARC]: Stop-A printk cleanup This patch is incredibly trivial, but it does resolve some of the user confusion as to what "L1-A" actually is. Clarify printk message to refer to Stop-A (L1-A). Gentoo has a virtually identical patch in their kernel sources. Signed-off-by: Tom 'spot' Callaway Signed-off-by: David S. Miller --- kernel/panic.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'kernel') diff --git a/kernel/panic.c b/kernel/panic.c index 0fa3f3a66fb6..081f7465fc8d 100644 --- a/kernel/panic.c +++ b/kernel/panic.c @@ -102,9 +102,9 @@ NORET_TYPE void panic(const char * fmt, ...) #ifdef __sparc__ { extern int stop_a_enabled; - /* Make sure the user can actually press L1-A */ + /* Make sure the user can actually press Stop-A (L1-A) */ stop_a_enabled = 1; - printk(KERN_EMERG "Press L1-A to return to the boot prom\n"); + printk(KERN_EMERG "Press Stop-A (L1-A) to return to the boot prom\n"); } #endif #if defined(CONFIG_ARCH_S390) -- cgit v1.2.3 From 9acf6597c533f3d5c991f730c6a1be296679018e Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Thu, 28 Apr 2005 08:13:58 -0700 Subject: [PATCH] time interpolator: Fix settimeofday inaccuracy settimeofday will set the time a little bit too early on systems using time interpolation since it subtracts the current interpolator offset from the time. This used to be necessary with the code in 2.6.9 and earlier but the new code resets the time interpolator after setting the time. Thus the time is set too early and gettimeofday will return a time slightly before the time specified with settimeofday if invoked immeditely after settimeofday. This removes the obsolete subtraction of the time interpolator offset and makes settimeofday set the time accurately. Signed-off-by: Christoph Lameter Signed-off-by: Linus Torvalds --- kernel/time.c | 8 -------- 1 file changed, 8 deletions(-) (limited to 'kernel') diff --git a/kernel/time.c b/kernel/time.c index 96fd0f499631..d4335c1c884c 100644 --- a/kernel/time.c +++ b/kernel/time.c @@ -516,14 +516,6 @@ int do_settimeofday (struct timespec *tv) write_seqlock_irq(&xtime_lock); { - /* - * This is revolting. We need to set "xtime" correctly. However, the value - * in this location is the value at the most recent update of wall time. - * Discover what correction gettimeofday would have done, and then undo - * it! - */ - nsec -= time_interpolator_get_offset(); - wtm_sec = wall_to_monotonic.tv_sec + (xtime.tv_sec - sec); wtm_nsec = wall_to_monotonic.tv_nsec + (xtime.tv_nsec - nsec); -- cgit v1.2.3 From 83c7d09173fdb6b06b109e65895392db3e49ac9c Mon Sep 17 00:00:00 2001 From: Date: Fri, 29 Apr 2005 15:54:44 +0100 Subject: AUDIT: Avoid log pollution by untrusted strings. We log strings from userspace, such as arguments to open(). These could be formatted to contain \n followed by fake audit log entries. Provide a function for logging such strings, which gives a hex dump when the string contains anything but basic printable ASCII characters. Use it for logging filenames. Signed-off-by: David Woodhouse --- kernel/audit.c | 23 +++++++++++++++++++++++ kernel/auditsc.c | 7 ++++--- 2 files changed, 27 insertions(+), 3 deletions(-) (limited to 'kernel') diff --git a/kernel/audit.c b/kernel/audit.c index 0f84dd7af2c8..dca7b99615d2 100644 --- a/kernel/audit.c +++ b/kernel/audit.c @@ -720,6 +720,29 @@ void audit_log_format(struct audit_buffer *ab, const char *fmt, ...) va_end(args); } +void audit_log_hex(struct audit_buffer *ab, const unsigned char *buf, size_t len) +{ + int i; + + for (i=0; i 0x7f) { + audit_log_hex(ab, string, strlen(string)); + return; + } + p++; + } + audit_log_format(ab, "\"%s\"", string); +} + + /* This is a helper-function to print the d_path without using a static * buffer or allocating another buffer in addition to the one in * audit_buffer. */ diff --git a/kernel/auditsc.c b/kernel/auditsc.c index 6f1931381bc9..00e87ffff13b 100644 --- a/kernel/auditsc.c +++ b/kernel/auditsc.c @@ -696,9 +696,10 @@ static void audit_log_exit(struct audit_context *context) if (!ab) continue; /* audit_panic has been called */ audit_log_format(ab, "item=%d", i); - if (context->names[i].name) - audit_log_format(ab, " name=%s", - context->names[i].name); + if (context->names[i].name) { + audit_log_format(ab, " name="); + audit_log_untrustedstring(ab, context->names[i].name); + } if (context->names[i].ino != (unsigned long)-1) audit_log_format(ab, " inode=%lu dev=%02x:%02x mode=%#o" " uid=%d gid=%d rdev=%02x:%02x", -- cgit v1.2.3 From 81b7854d52d35ed2353dd47033ae630d18322a2d Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Fri, 29 Apr 2005 15:59:11 +0100 Subject: audit_log_untrustedstring() warning fix kernel/audit.c: In function `audit_log_untrustedstring': kernel/audit.c:736: warning: comparison is always false due to limited range of data type Signed-off-by: Andrew Morton Signed-off-by: David Woodhouse --- kernel/audit.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/audit.c b/kernel/audit.c index dca7b99615d2..e7bff8000d23 100644 --- a/kernel/audit.c +++ b/kernel/audit.c @@ -730,7 +730,7 @@ void audit_log_hex(struct audit_buffer *ab, const unsigned char *buf, size_t len void audit_log_untrustedstring(struct audit_buffer *ab, const char *string) { - const char *p = string; + const unsigned char *p = string; while (*p) { if (*p == '"' || *p == ' ' || *p < 0x20 || *p > 0x7f) { -- cgit v1.2.3 From 2fd6f58ba6efc82ea2c9c2630f7ff5ed9eeaf34a Mon Sep 17 00:00:00 2001 From: Date: Fri, 29 Apr 2005 16:08:28 +0100 Subject: [AUDIT] Don't allow ptrace to fool auditing, log arch of audited syscalls. We were calling ptrace_notify() after auditing the syscall and arguments, but the debugger could have _changed_ them before the syscall was actually invoked. Reorder the calls to fix that. While we're touching ever call to audit_syscall_entry(), we also make it take an extra argument: the architecture of the syscall which was made, because some architectures allow more than one type of syscall. Also add an explicit success/failure flag to audit_syscall_exit(), for the benefit of architectures which return that in a condition register rather than only returning a single register. Change type of syscall return value to 'long' not 'int'. Signed-off-by: David Woodhouse --- kernel/auditsc.c | 22 ++++++++++++++++------ 1 file changed, 16 insertions(+), 6 deletions(-) (limited to 'kernel') diff --git a/kernel/auditsc.c b/kernel/auditsc.c index 00e87ffff13b..77e92592de57 100644 --- a/kernel/auditsc.c +++ b/kernel/auditsc.c @@ -123,7 +123,7 @@ struct audit_context { int major; /* syscall number */ unsigned long argv[4]; /* syscall arguments */ int return_valid; /* return code is valid */ - int return_code;/* syscall return code */ + long return_code;/* syscall return code */ int auditable; /* 1 if record should be written */ int name_count; struct audit_names names[AUDIT_NAMES]; @@ -135,6 +135,7 @@ struct audit_context { uid_t uid, euid, suid, fsuid; gid_t gid, egid, sgid, fsgid; unsigned long personality; + int arch; #if AUDIT_DEBUG int put_count; @@ -348,6 +349,10 @@ static int audit_filter_rules(struct task_struct *tsk, case AUDIT_PERS: result = (tsk->personality == value); break; + case AUDIT_ARCH: + if (ctx) + result = (ctx->arch == value); + break; case AUDIT_EXIT: if (ctx && ctx->return_valid) @@ -355,7 +360,7 @@ static int audit_filter_rules(struct task_struct *tsk, break; case AUDIT_SUCCESS: if (ctx && ctx->return_valid) - result = (ctx->return_code >= 0); + result = (ctx->return_valid == AUDITSC_SUCCESS); break; case AUDIT_DEVMAJOR: if (ctx) { @@ -648,8 +653,11 @@ static void audit_log_exit(struct audit_context *context) audit_log_format(ab, "syscall=%d", context->major); if (context->personality != PER_LINUX) audit_log_format(ab, " per=%lx", context->personality); + audit_log_format(ab, " arch=%x", context->arch); if (context->return_valid) - audit_log_format(ab, " exit=%d", context->return_code); + audit_log_format(ab, " success=%s exit=%ld", + (context->return_valid==AUDITSC_SUCCESS)?"yes":"no", + context->return_code); audit_log_format(ab, " a0=%lx a1=%lx a2=%lx a3=%lx items=%d" " pid=%d loginuid=%d uid=%d gid=%d" @@ -773,7 +781,7 @@ static inline unsigned int audit_serial(void) * then the record will be written at syscall exit time (otherwise, it * will only be written if another part of the kernel requests that it * be written). */ -void audit_syscall_entry(struct task_struct *tsk, int major, +void audit_syscall_entry(struct task_struct *tsk, int arch, int major, unsigned long a1, unsigned long a2, unsigned long a3, unsigned long a4) { @@ -827,6 +835,7 @@ void audit_syscall_entry(struct task_struct *tsk, int major, if (!audit_enabled) return; + context->arch = arch; context->major = major; context->argv[0] = a1; context->argv[1] = a2; @@ -850,13 +859,13 @@ void audit_syscall_entry(struct task_struct *tsk, int major, * filtering, or because some other part of the kernel write an audit * message), then write out the syscall information. In call cases, * free the names stored from getname(). */ -void audit_syscall_exit(struct task_struct *tsk, int return_code) +void audit_syscall_exit(struct task_struct *tsk, int valid, long return_code) { struct audit_context *context; get_task_struct(tsk); task_lock(tsk); - context = audit_get_context(tsk, 1, return_code); + context = audit_get_context(tsk, valid, return_code); task_unlock(tsk); /* Not having a context here is ok, since the parent may have @@ -869,6 +878,7 @@ void audit_syscall_exit(struct task_struct *tsk, int return_code) context->in_syscall = 0; context->auditable = 0; + if (context->previous) { struct audit_context *new_context = context->previous; context->previous = NULL; -- cgit v1.2.3 From d812ddbb89e323d054a7d073466225966c8350c8 Mon Sep 17 00:00:00 2001 From: Steve Grubb Date: Fri, 29 Apr 2005 16:09:52 +0100 Subject: [AUDIT] Fix signedness of 'serial' in various routines. Attached is a patch that corrects a signed/unsigned warning. I also noticed that we needlessly init serial to 0. That only needs to occur if the kernel was compiled without the audit system. -Steve Grubb Signed-off-by: David Woodhouse --- kernel/audit.c | 6 ++++-- kernel/auditsc.c | 2 +- 2 files changed, 5 insertions(+), 3 deletions(-) (limited to 'kernel') diff --git a/kernel/audit.c b/kernel/audit.c index e7bff8000d23..aa35422c0c42 100644 --- a/kernel/audit.c +++ b/kernel/audit.c @@ -620,7 +620,7 @@ struct audit_buffer *audit_log_start(struct audit_context *ctx) struct audit_buffer *ab = NULL; unsigned long flags; struct timespec t; - int serial = 0; + unsigned int serial; if (!audit_initialized) return NULL; @@ -669,8 +669,10 @@ struct audit_buffer *audit_log_start(struct audit_context *ctx) audit_get_stamp(ab->ctx, &t, &serial); else #endif + { t = CURRENT_TIME; - + serial = 0; + } audit_log_format(ab, "audit(%lu.%03lu:%u): ", t.tv_sec, t.tv_nsec/1000000, serial); return ab; diff --git a/kernel/auditsc.c b/kernel/auditsc.c index 77e92592de57..49ecd707b953 100644 --- a/kernel/auditsc.c +++ b/kernel/auditsc.c @@ -992,7 +992,7 @@ void audit_inode(const char *name, const struct inode *inode) } void audit_get_stamp(struct audit_context *ctx, - struct timespec *t, int *serial) + struct timespec *t, unsigned int *serial) { if (ctx) { t->tv_sec = ctx->ctime.tv_sec; -- cgit v1.2.3 From c7fcb0ee74ef4cfdea02befacb55945c93641e44 Mon Sep 17 00:00:00 2001 From: Peter Martuccelli Date: Fri, 29 Apr 2005 16:10:24 +0100 Subject: [AUDIT] Avoid using %*.*s format strings. They don't seem to work correctly (investigation ongoing), but we don't actually need to do it anyway. Patch from Peter Martuccelli Signed-off-by: David Woodhouse --- kernel/audit.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'kernel') diff --git a/kernel/audit.c b/kernel/audit.c index aa35422c0c42..42ce282728b6 100644 --- a/kernel/audit.c +++ b/kernel/audit.c @@ -540,8 +540,8 @@ static inline int audit_log_drain(struct audit_buffer *ab) if (!audit_pid) { /* No daemon */ int offset = ab->nlh ? NLMSG_SPACE(0) : 0; int len = skb->len - offset; - printk(KERN_ERR "%*.*s\n", - len, len, skb->data + offset); + skb->data[offset + len] = '\0'; + printk(KERN_ERR "%s\n", skb->data + offset); } kfree_skb(skb); ab->nlh = NULL; -- cgit v1.2.3 From 85c8721ff3bc96b702427a440616079e8daf8a2f Mon Sep 17 00:00:00 2001 From: Date: Fri, 29 Apr 2005 16:23:29 +0100 Subject: audit: update pointer to userspace tools, remove emacs mode tags --- kernel/audit.c | 4 ++-- kernel/auditsc.c | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'kernel') diff --git a/kernel/audit.c b/kernel/audit.c index 42ce282728b6..58c7d7e47299 100644 --- a/kernel/audit.c +++ b/kernel/audit.c @@ -1,4 +1,4 @@ -/* audit.c -- Auditing support -*- linux-c -*- +/* audit.c -- Auditing support * Gateway between the kernel (e.g., selinux) and the user-space audit daemon. * System-call specific features have moved to auditsc.c * @@ -38,7 +38,7 @@ * 6) Support low-overhead kernel-based filtering to minimize the * information that must be passed to user-space. * - * Example user-space utilities: http://people.redhat.com/faith/audit/ + * Example user-space utilities: http://people.redhat.com/sgrubb/audit/ */ #include diff --git a/kernel/auditsc.c b/kernel/auditsc.c index 49ecd707b953..9ff2c1b1033e 100644 --- a/kernel/auditsc.c +++ b/kernel/auditsc.c @@ -1,4 +1,4 @@ -/* auditsc.c -- System-call auditing support -*- linux-c -*- +/* auditsc.c -- System-call auditing support * Handles all system-call specific auditing features. * * Copyright 2003-2004 Red Hat Inc., Durham, North Carolina. -- cgit v1.2.3 From c94c257c88c517f251da273a15c654224c7b6e21 Mon Sep 17 00:00:00 2001 From: Serge Hallyn Date: Fri, 29 Apr 2005 16:27:17 +0100 Subject: Add audit uid to netlink credentials Most audit control messages are sent over netlink.In order to properly log the identity of the sender of audit control messages, we would like to add the loginuid to the netlink_creds structure, as per the attached patch. Signed-off-by: Serge Hallyn Signed-off-by: David Woodhouse --- kernel/audit.c | 46 +++++++++++++++++++++++++--------------------- kernel/auditsc.c | 5 ++++- 2 files changed, 29 insertions(+), 22 deletions(-) (limited to 'kernel') diff --git a/kernel/audit.c b/kernel/audit.c index 58c7d7e47299..587d3b2eba7f 100644 --- a/kernel/audit.c +++ b/kernel/audit.c @@ -239,36 +239,36 @@ void audit_log_lost(const char *message) } -static int audit_set_rate_limit(int limit) +static int audit_set_rate_limit(int limit, uid_t loginuid) { int old = audit_rate_limit; audit_rate_limit = limit; - audit_log(current->audit_context, "audit_rate_limit=%d old=%d", - audit_rate_limit, old); + audit_log(NULL, "audit_rate_limit=%d old=%d by auid %u", + audit_rate_limit, old, loginuid); return old; } -static int audit_set_backlog_limit(int limit) +static int audit_set_backlog_limit(int limit, uid_t loginuid) { int old = audit_backlog_limit; audit_backlog_limit = limit; - audit_log(current->audit_context, "audit_backlog_limit=%d old=%d", - audit_backlog_limit, old); + audit_log(NULL, "audit_backlog_limit=%d old=%d by auid %u", + audit_backlog_limit, old, loginuid); return old; } -static int audit_set_enabled(int state) +static int audit_set_enabled(int state, uid_t loginuid) { int old = audit_enabled; if (state != 0 && state != 1) return -EINVAL; audit_enabled = state; - audit_log(current->audit_context, "audit_enabled=%d old=%d", - audit_enabled, old); + audit_log(NULL, "audit_enabled=%d old=%d by auid %u", + audit_enabled, old, loginuid); return old; } -static int audit_set_failure(int state) +static int audit_set_failure(int state, uid_t loginuid) { int old = audit_failure; if (state != AUDIT_FAIL_SILENT @@ -276,8 +276,8 @@ static int audit_set_failure(int state) && state != AUDIT_FAIL_PANIC) return -EINVAL; audit_failure = state; - audit_log(current->audit_context, "audit_failure=%d old=%d", - audit_failure, old); + audit_log(NULL, "audit_failure=%d old=%d by auid %u", + audit_failure, old, loginuid); return old; } @@ -344,6 +344,7 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh) int err; struct audit_buffer *ab; u16 msg_type = nlh->nlmsg_type; + uid_t loginuid; /* loginuid of sender */ err = audit_netlink_ok(NETLINK_CB(skb).eff_cap, msg_type); if (err) @@ -351,6 +352,7 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh) pid = NETLINK_CREDS(skb)->pid; uid = NETLINK_CREDS(skb)->uid; + loginuid = NETLINK_CB(skb).loginuid; seq = nlh->nlmsg_seq; data = NLMSG_DATA(nlh); @@ -371,34 +373,36 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh) return -EINVAL; status_get = (struct audit_status *)data; if (status_get->mask & AUDIT_STATUS_ENABLED) { - err = audit_set_enabled(status_get->enabled); + err = audit_set_enabled(status_get->enabled, loginuid); if (err < 0) return err; } if (status_get->mask & AUDIT_STATUS_FAILURE) { - err = audit_set_failure(status_get->failure); + err = audit_set_failure(status_get->failure, loginuid); if (err < 0) return err; } if (status_get->mask & AUDIT_STATUS_PID) { int old = audit_pid; audit_pid = status_get->pid; - audit_log(current->audit_context, - "audit_pid=%d old=%d", audit_pid, old); + audit_log(NULL, "audit_pid=%d old=%d by auid %u", + audit_pid, old, loginuid); } if (status_get->mask & AUDIT_STATUS_RATE_LIMIT) - audit_set_rate_limit(status_get->rate_limit); + audit_set_rate_limit(status_get->rate_limit, loginuid); if (status_get->mask & AUDIT_STATUS_BACKLOG_LIMIT) - audit_set_backlog_limit(status_get->backlog_limit); + audit_set_backlog_limit(status_get->backlog_limit, + loginuid); break; case AUDIT_USER: ab = audit_log_start(NULL); if (!ab) break; /* audit_panic has been called */ audit_log_format(ab, - "user pid=%d uid=%d length=%d msg='%.1024s'", + "user pid=%d uid=%d length=%d loginuid=%u" + " msg='%.1024s'", pid, uid, (int)(nlh->nlmsg_len - ((char *)data - (char *)nlh)), - (char *)data); + loginuid, (char *)data); ab->type = AUDIT_USER; ab->pid = pid; audit_log_end(ab); @@ -411,7 +415,7 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh) case AUDIT_LIST: #ifdef CONFIG_AUDITSYSCALL err = audit_receive_filter(nlh->nlmsg_type, NETLINK_CB(skb).pid, - uid, seq, data); + uid, seq, data, loginuid); #else err = -EOPNOTSUPP; #endif diff --git a/kernel/auditsc.c b/kernel/auditsc.c index 9ff2c1b1033e..66148f81d783 100644 --- a/kernel/auditsc.c +++ b/kernel/auditsc.c @@ -251,7 +251,8 @@ static int audit_copy_rule(struct audit_rule *d, struct audit_rule *s) return 0; } -int audit_receive_filter(int type, int pid, int uid, int seq, void *data) +int audit_receive_filter(int type, int pid, int uid, int seq, void *data, + uid_t loginuid) { u32 flags; struct audit_entry *entry; @@ -286,6 +287,7 @@ int audit_receive_filter(int type, int pid, int uid, int seq, void *data) err = audit_add_rule(entry, &audit_entlist); if (!err && (flags & AUDIT_AT_EXIT)) err = audit_add_rule(entry, &audit_extlist); + audit_log(NULL, "auid %u added an audit rule\n", loginuid); break; case AUDIT_DEL: flags =((struct audit_rule *)data)->flags; @@ -295,6 +297,7 @@ int audit_receive_filter(int type, int pid, int uid, int seq, void *data) err = audit_del_rule(data, &audit_entlist); if (!err && (flags & AUDIT_AT_EXIT)) err = audit_del_rule(data, &audit_extlist); + audit_log(NULL, "auid %u removed an audit rule\n", loginuid); break; default: return -EINVAL; -- cgit v1.2.3 From 37509e749dc2072e667db806ef24b9e897f61b8a Mon Sep 17 00:00:00 2001 From: Chris Wright Date: Fri, 29 Apr 2005 17:19:14 +0100 Subject: [AUDIT] Requeue messages at head of queue, up to audit_backlog If netlink_unicast() fails, requeue the skb back at the head of the queue it just came from, instead of the tail. And do so unless we've exceeded the audit_backlog limit; not according to some other arbitrary limit. From: Chris Wright Signed-off-by: David Woodhouse --- kernel/audit.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) (limited to 'kernel') diff --git a/kernel/audit.c b/kernel/audit.c index 587d3b2eba7f..4a697c73faec 100644 --- a/kernel/audit.c +++ b/kernel/audit.c @@ -142,7 +142,6 @@ struct audit_buffer { int total; int type; int pid; - int count; /* Times requeued */ }; void audit_set_type(struct audit_buffer *ab, int type) @@ -526,9 +525,9 @@ static inline int audit_log_drain(struct audit_buffer *ab) retval = netlink_unicast(audit_sock, skb, audit_pid, MSG_DONTWAIT); } - if (retval == -EAGAIN && ab->count < 5) { - ++ab->count; - skb_queue_tail(&ab->sklist, skb); + if (retval == -EAGAIN && + (atomic_read(&audit_backlog)) < audit_backlog_limit) { + skb_queue_head(&ab->sklist, skb); audit_log_end_irq(ab); return 1; } @@ -666,7 +665,6 @@ struct audit_buffer *audit_log_start(struct audit_context *ctx) ab->total = 0; ab->type = AUDIT_KERNEL; ab->pid = 0; - ab->count = 0; #ifdef CONFIG_AUDITSYSCALL if (ab->ctx) -- cgit v1.2.3 From 456be6cd90dbbb9b0ea01d56932d56d110d51cf7 Mon Sep 17 00:00:00 2001 From: Steve Grubb Date: Fri, 29 Apr 2005 17:30:07 +0100 Subject: [AUDIT] LOGIN message credentials Attached is a new patch that solves the issue of getting valid credentials into the LOGIN message. The current code was assuming that the audit context had already been copied. This is not always the case for LOGIN messages. To solve the problem, the patch passes the task struct to the function that emits the message where it can get valid credentials. Signed-off-by: Steve Grubb Signed-off-by: David Woodhouse --- kernel/auditsc.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'kernel') diff --git a/kernel/auditsc.c b/kernel/auditsc.c index 66148f81d783..37b3ac94bc47 100644 --- a/kernel/auditsc.c +++ b/kernel/auditsc.c @@ -1010,20 +1010,21 @@ void audit_get_stamp(struct audit_context *ctx, extern int audit_set_type(struct audit_buffer *ab, int type); -int audit_set_loginuid(struct audit_context *ctx, uid_t loginuid) +int audit_set_loginuid(struct task_struct *task, uid_t loginuid) { - if (ctx) { + if (task->audit_context) { struct audit_buffer *ab; ab = audit_log_start(NULL); if (ab) { audit_log_format(ab, "login pid=%d uid=%u " "old loginuid=%u new loginuid=%u", - ctx->pid, ctx->uid, ctx->loginuid, loginuid); + task->pid, task->uid, + task->audit_context->loginuid, loginuid); audit_set_type(ab, AUDIT_LOGIN); audit_log_end(ab); } - ctx->loginuid = loginuid; + task->audit_context->loginuid = loginuid; } return 0; } -- cgit v1.2.3 From c06fec5022ebe014af876da2df4a0eee836e97c8 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Fri, 29 Apr 2005 09:37:07 -0700 Subject: Remove bogus BUG() in kernel/exit.c It's old sanity checking that may have been useful for debugging, but is just bogus these days. Noticed by Mattia Belletti. --- kernel/exit.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'kernel') diff --git a/kernel/exit.c b/kernel/exit.c index 39d35935b371..93851bcd9584 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -517,8 +517,6 @@ static inline void choose_new_parent(task_t *p, task_t *reaper, task_t *child_re */ BUG_ON(p == reaper || reaper->exit_state >= EXIT_ZOMBIE); p->real_parent = reaper; - if (p->parent == p->real_parent) - BUG(); } static inline void reparent_thread(task_t *p, task_t *father, int traced) -- cgit v1.2.3 From d59dd4620fb8d6422555a9e2b82a707718e68327 Mon Sep 17 00:00:00 2001 From: "akpm@osdl.org" Date: Sun, 1 May 2005 08:58:47 -0700 Subject: [PATCH] use smp_mb/wmb/rmb where possible Replace a number of memory barriers with smp_ variants. This means we won't take the unnecessary hit on UP machines. Signed-off-by: Anton Blanchard Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/kthread.c | 2 +- kernel/profile.c | 2 +- kernel/ptrace.c | 2 +- kernel/stop_machine.c | 10 +++++----- kernel/sys.c | 20 ++++++++++---------- kernel/timer.c | 2 +- 6 files changed, 19 insertions(+), 19 deletions(-) (limited to 'kernel') diff --git a/kernel/kthread.c b/kernel/kthread.c index e377e2244103..f50f174e92da 100644 --- a/kernel/kthread.c +++ b/kernel/kthread.c @@ -174,7 +174,7 @@ int kthread_stop(struct task_struct *k) /* Must init completion *before* thread sees kthread_stop_info.k */ init_completion(&kthread_stop_info.done); - wmb(); + smp_wmb(); /* Now set kthread_should_stop() to true, and wake it up. */ kthread_stop_info.k = k; diff --git a/kernel/profile.c b/kernel/profile.c index a38fa70075fe..a66be468c422 100644 --- a/kernel/profile.c +++ b/kernel/profile.c @@ -522,7 +522,7 @@ static int __init create_hash_tables(void) return 0; out_cleanup: prof_on = 0; - mb(); + smp_mb(); on_each_cpu(profile_nop, NULL, 0, 1); for_each_online_cpu(cpu) { struct page *page; diff --git a/kernel/ptrace.c b/kernel/ptrace.c index 88b306c4e841..f5cc1cec0fb4 100644 --- a/kernel/ptrace.c +++ b/kernel/ptrace.c @@ -135,7 +135,7 @@ int ptrace_attach(struct task_struct *task) (current->gid != task->sgid) || (current->gid != task->gid)) && !capable(CAP_SYS_PTRACE)) goto bad; - rmb(); + smp_rmb(); if (!task->mm->dumpable && !capable(CAP_SYS_PTRACE)) goto bad; /* the same process cannot be attached many times */ diff --git a/kernel/stop_machine.c b/kernel/stop_machine.c index c39ed70af174..6116b25aa7cf 100644 --- a/kernel/stop_machine.c +++ b/kernel/stop_machine.c @@ -33,7 +33,7 @@ static int stopmachine(void *cpu) set_cpus_allowed(current, cpumask_of_cpu((int)(long)cpu)); /* Ack: we are alive */ - mb(); /* Theoretically the ack = 0 might not be on this CPU yet. */ + smp_mb(); /* Theoretically the ack = 0 might not be on this CPU yet. */ atomic_inc(&stopmachine_thread_ack); /* Simple state machine */ @@ -43,14 +43,14 @@ static int stopmachine(void *cpu) local_irq_disable(); irqs_disabled = 1; /* Ack: irqs disabled. */ - mb(); /* Must read state first. */ + smp_mb(); /* Must read state first. */ atomic_inc(&stopmachine_thread_ack); } else if (stopmachine_state == STOPMACHINE_PREPARE && !prepared) { /* Everyone is in place, hold CPU. */ preempt_disable(); prepared = 1; - mb(); /* Must read state first. */ + smp_mb(); /* Must read state first. */ atomic_inc(&stopmachine_thread_ack); } /* Yield in first stage: migration threads need to @@ -62,7 +62,7 @@ static int stopmachine(void *cpu) } /* Ack: we are exiting. */ - mb(); /* Must read state first. */ + smp_mb(); /* Must read state first. */ atomic_inc(&stopmachine_thread_ack); if (irqs_disabled) @@ -77,7 +77,7 @@ static int stopmachine(void *cpu) static void stopmachine_set_state(enum stopmachine_state state) { atomic_set(&stopmachine_thread_ack, 0); - wmb(); + smp_wmb(); stopmachine_state = state; while (atomic_read(&stopmachine_thread_ack) != stopmachine_num_threads) cpu_relax(); diff --git a/kernel/sys.c b/kernel/sys.c index 462d78d55895..df2ddcc6863b 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -525,7 +525,7 @@ asmlinkage long sys_setregid(gid_t rgid, gid_t egid) if (new_egid != old_egid) { current->mm->dumpable = 0; - wmb(); + smp_wmb(); } if (rgid != (gid_t) -1 || (egid != (gid_t) -1 && egid != old_rgid)) @@ -556,7 +556,7 @@ asmlinkage long sys_setgid(gid_t gid) if(old_egid != gid) { current->mm->dumpable=0; - wmb(); + smp_wmb(); } current->gid = current->egid = current->sgid = current->fsgid = gid; } @@ -565,7 +565,7 @@ asmlinkage long sys_setgid(gid_t gid) if(old_egid != gid) { current->mm->dumpable=0; - wmb(); + smp_wmb(); } current->egid = current->fsgid = gid; } @@ -596,7 +596,7 @@ static int set_user(uid_t new_ruid, int dumpclear) if(dumpclear) { current->mm->dumpable = 0; - wmb(); + smp_wmb(); } current->uid = new_ruid; return 0; @@ -653,7 +653,7 @@ asmlinkage long sys_setreuid(uid_t ruid, uid_t euid) if (new_euid != old_euid) { current->mm->dumpable=0; - wmb(); + smp_wmb(); } current->fsuid = current->euid = new_euid; if (ruid != (uid_t) -1 || @@ -703,7 +703,7 @@ asmlinkage long sys_setuid(uid_t uid) if (old_euid != uid) { current->mm->dumpable = 0; - wmb(); + smp_wmb(); } current->fsuid = current->euid = uid; current->suid = new_suid; @@ -748,7 +748,7 @@ asmlinkage long sys_setresuid(uid_t ruid, uid_t euid, uid_t suid) if (euid != current->euid) { current->mm->dumpable = 0; - wmb(); + smp_wmb(); } current->euid = euid; } @@ -798,7 +798,7 @@ asmlinkage long sys_setresgid(gid_t rgid, gid_t egid, gid_t sgid) if (egid != current->egid) { current->mm->dumpable = 0; - wmb(); + smp_wmb(); } current->egid = egid; } @@ -845,7 +845,7 @@ asmlinkage long sys_setfsuid(uid_t uid) if (uid != old_fsuid) { current->mm->dumpable = 0; - wmb(); + smp_wmb(); } current->fsuid = uid; } @@ -875,7 +875,7 @@ asmlinkage long sys_setfsgid(gid_t gid) if (gid != old_fsgid) { current->mm->dumpable = 0; - wmb(); + smp_wmb(); } current->fsgid = gid; key_fsgid_changed(current); diff --git a/kernel/timer.c b/kernel/timer.c index ecb3d67c0e14..207aa4f0aa10 100644 --- a/kernel/timer.c +++ b/kernel/timer.c @@ -1007,7 +1007,7 @@ asmlinkage long sys_getppid(void) * Make sure we read the pid before re-reading the * parent pointer: */ - rmb(); + smp_rmb(); parent = me->group_leader->real_parent; if (old != parent) continue; -- cgit v1.2.3 From e43379f10b42194b8a6e1de342cfb44463c0f6da Mon Sep 17 00:00:00 2001 From: Matt Mackall Date: Sun, 1 May 2005 08:59:00 -0700 Subject: [PATCH] nice and rt-prio rlimits Add a pair of rlimits for allowing non-root tasks to raise nice and rt priorities. Defaults to traditional behavior. Originally written by Chris Wright. The patch implements a simple rlimit ceiling for the RT (and nice) priorities a task can set. The rlimit defaults to 0, meaning no change in behavior by default. A value of 50 means RT priority levels 1-50 are allowed. A value of 100 means all 99 privilege levels from 1 to 99 are allowed. CAP_SYS_NICE is blanket permission. (akpm: see http://www.uwsg.iu.edu/hypermail/linux/kernel/0503.1/1921.html for tips on integrating this with PAM). Signed-off-by: Matt Mackall Acked-by: Ingo Molnar Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/sched.c | 25 +++++++++++++++++++------ kernel/sys.c | 2 +- 2 files changed, 20 insertions(+), 7 deletions(-) (limited to 'kernel') diff --git a/kernel/sched.c b/kernel/sched.c index 9bb7489ee645..5dadcc6df7dd 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -3223,6 +3223,19 @@ out_unlock: EXPORT_SYMBOL(set_user_nice); +/* + * can_nice - check if a task can reduce its nice value + * @p: task + * @nice: nice value + */ +int can_nice(const task_t *p, const int nice) +{ + /* convert nice value [19,-20] to rlimit style value [0,39] */ + int nice_rlim = 19 - nice; + return (nice_rlim <= p->signal->rlim[RLIMIT_NICE].rlim_cur || + capable(CAP_SYS_NICE)); +} + #ifdef __ARCH_WANT_SYS_NICE /* @@ -3242,12 +3255,8 @@ asmlinkage long sys_nice(int increment) * We don't have to worry. Conceptually one call occurs first * and we have a single winner. */ - if (increment < 0) { - if (!capable(CAP_SYS_NICE)) - return -EPERM; - if (increment < -40) - increment = -40; - } + if (increment < -40) + increment = -40; if (increment > 40) increment = 40; @@ -3257,6 +3266,9 @@ asmlinkage long sys_nice(int increment) if (nice > 19) nice = 19; + if (increment < 0 && !can_nice(current, nice)) + return -EPERM; + retval = security_task_setnice(current, nice); if (retval) return retval; @@ -3372,6 +3384,7 @@ recheck: return -EINVAL; if ((policy == SCHED_FIFO || policy == SCHED_RR) && + param->sched_priority > p->signal->rlim[RLIMIT_RTPRIO].rlim_cur && !capable(CAP_SYS_NICE)) return -EPERM; if ((current->euid != p->euid) && (current->euid != p->uid) && diff --git a/kernel/sys.c b/kernel/sys.c index df2ddcc6863b..7f43d6e62c7a 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -227,7 +227,7 @@ static int set_one_prio(struct task_struct *p, int niceval, int error) error = -EPERM; goto out; } - if (niceval < task_nice(p) && !capable(CAP_SYS_NICE)) { + if (niceval < task_nice(p) && !can_nice(p, niceval)) { error = -EACCES; goto out; } -- cgit v1.2.3 From d59745ce3e7aa13856bca16d3bcbb95041775ff6 Mon Sep 17 00:00:00 2001 From: Matt Mackall Date: Sun, 1 May 2005 08:59:02 -0700 Subject: [PATCH] clean up kernel messages Arrange for all kernel printks to be no-ops. Only available if CONFIG_EMBEDDED. This patch saves about 375k on my laptop config and nearly 100k on minimal configs. Signed-off-by: Matt Mackall Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/printk.c | 27 +++++++++++++++++++++------ 1 file changed, 21 insertions(+), 6 deletions(-) (limited to 'kernel') diff --git a/kernel/printk.c b/kernel/printk.c index 1498689548d1..290a07ce2c8a 100644 --- a/kernel/printk.c +++ b/kernel/printk.c @@ -85,10 +85,6 @@ static int console_locked; */ static DEFINE_SPINLOCK(logbuf_lock); -static char __log_buf[__LOG_BUF_LEN]; -static char *log_buf = __log_buf; -static int log_buf_len = __LOG_BUF_LEN; - #define LOG_BUF_MASK (log_buf_len-1) #define LOG_BUF(idx) (log_buf[(idx) & LOG_BUF_MASK]) @@ -99,7 +95,6 @@ static int log_buf_len = __LOG_BUF_LEN; static unsigned long log_start; /* Index into log_buf: next char to be read by syslog() */ static unsigned long con_start; /* Index into log_buf: next char to be sent to consoles */ static unsigned long log_end; /* Index into log_buf: most-recently-written-char + 1 */ -static unsigned long logged_chars; /* Number of chars produced since last read+clear operation */ /* * Array of consoles built from command line options (console=) @@ -120,6 +115,13 @@ static int preferred_console = -1; /* Flag: console code may call schedule() */ static int console_may_schedule; +#ifdef CONFIG_PRINTK + +static char __log_buf[__LOG_BUF_LEN]; +static char *log_buf = __log_buf; +static int log_buf_len = __LOG_BUF_LEN; +static unsigned long logged_chars; /* Number of chars produced since last read+clear operation */ + /* * Setup a list of consoles. Called from init/main.c */ @@ -535,6 +537,7 @@ __setup("time", printk_time_setup); * then changes console_loglevel may break. This is because console_loglevel * is inspected when the actual printing occurs. */ + asmlinkage int printk(const char *fmt, ...) { va_list args; @@ -655,6 +658,18 @@ out: EXPORT_SYMBOL(printk); EXPORT_SYMBOL(vprintk); +#else + +asmlinkage long sys_syslog(int type, char __user * buf, int len) +{ + return 0; +} + +int do_syslog(int type, char __user * buf, int len) { return 0; } +static void call_console_drivers(unsigned long start, unsigned long end) {} + +#endif + /** * acquire_console_sem - lock the console system for exclusive use. * @@ -931,7 +946,7 @@ int unregister_console(struct console * console) return res; } EXPORT_SYMBOL(unregister_console); - + /** * tty_write_message - write a message to a certain tty, not just the console. * -- cgit v1.2.3 From 66cf8f1443301a1d5bc9c21709e4264e6919a3e1 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Sun, 1 May 2005 08:59:03 -0700 Subject: [PATCH] kernel/rcupdate.c: make the exports EXPORT_SYMBOL_GPL The gpl exports need to be put back. Moving them to GPL -- but in a measured manner, as I proposed on this list some months ago -- is fine. Changing these particular exports precipitously is most definitely -not- fine. Here is my earlier proposal: http://marc.theaimsgroup.com/?l=linux-kernel&m=110520930301813&w=2 See below for a patch that puts the exports back, along with an updated version of my earlier patch that starts the process of moving them to GPL. I will also be following this message with RFC patches that introduce two (EXPORT_SYMBOL_GPL) interfaces to replace synchronize_kernel(), which then becomes deprecated. Signed-off-by: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/rcupdate.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'kernel') diff --git a/kernel/rcupdate.c b/kernel/rcupdate.c index d00eded75d71..ad497722f04f 100644 --- a/kernel/rcupdate.c +++ b/kernel/rcupdate.c @@ -465,6 +465,6 @@ void synchronize_kernel(void) } module_param(maxbatch, int, 0); -EXPORT_SYMBOL_GPL(call_rcu); -EXPORT_SYMBOL_GPL(call_rcu_bh); -EXPORT_SYMBOL_GPL(synchronize_kernel); +EXPORT_SYMBOL(call_rcu); /* WARNING: GPL-only in April 2006. */ +EXPORT_SYMBOL(call_rcu_bh); /* WARNING: GPL-only in April 2006. */ +EXPORT_SYMBOL(synchronize_kernel); /* WARNING: GPL-only in April 2006. */ -- cgit v1.2.3 From 9b06e818985d139fd9e82c28297f7744e1b484e1 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Sun, 1 May 2005 08:59:04 -0700 Subject: [PATCH] Deprecate synchronize_kernel, GPL replacement The synchronize_kernel() primitive is used for quite a few different purposes: waiting for RCU readers, waiting for NMIs, waiting for interrupts, and so on. This makes RCU code harder to read, since synchronize_kernel() might or might not have matching rcu_read_lock()s. This patch creates a new synchronize_rcu() that is to be used for RCU readers and a new synchronize_sched() that is used for the rest. These two new primitives currently have the same implementation, but this is might well change with additional real-time support. Both new primitives are GPL-only, the old primitive is deprecated. Signed-off-by: Paul E. McKenney Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/rcupdate.c | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) (limited to 'kernel') diff --git a/kernel/rcupdate.c b/kernel/rcupdate.c index ad497722f04f..f436993bd590 100644 --- a/kernel/rcupdate.c +++ b/kernel/rcupdate.c @@ -444,15 +444,18 @@ static void wakeme_after_rcu(struct rcu_head *head) } /** - * synchronize_kernel - wait until a grace period has elapsed. + * synchronize_rcu - wait until a grace period has elapsed. * * Control will return to the caller some time after a full grace * period has elapsed, in other words after all currently executing RCU * read-side critical sections have completed. RCU read-side critical * sections are delimited by rcu_read_lock() and rcu_read_unlock(), * and may be nested. + * + * If your read-side code is not protected by rcu_read_lock(), do -not- + * use synchronize_rcu(). */ -void synchronize_kernel(void) +void synchronize_rcu(void) { struct rcu_synchronize rcu; @@ -464,7 +467,16 @@ void synchronize_kernel(void) wait_for_completion(&rcu.completion); } +/* + * Deprecated, use synchronize_rcu() or synchronize_sched() instead. + */ +void synchronize_kernel(void) +{ + synchronize_rcu(); +} + module_param(maxbatch, int, 0); EXPORT_SYMBOL(call_rcu); /* WARNING: GPL-only in April 2006. */ EXPORT_SYMBOL(call_rcu_bh); /* WARNING: GPL-only in April 2006. */ +EXPORT_SYMBOL_GPL(synchronize_rcu); EXPORT_SYMBOL(synchronize_kernel); /* WARNING: GPL-only in April 2006. */ -- cgit v1.2.3 From fbd568a3e61a7decb8a754ad952aaa5b5c82e9e5 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Sun, 1 May 2005 08:59:04 -0700 Subject: [PATCH] Change synchronize_kernel to _rcu and _sched This patch changes calls to synchronize_kernel(), deprecated in the earlier "Deprecate synchronize_kernel, GPL replacement" patch to instead call the new synchronize_rcu() and synchronize_sched() APIs. Signed-off-by: Paul E. McKenney Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/module.c | 2 +- kernel/profile.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'kernel') diff --git a/kernel/module.c b/kernel/module.c index 2dbfa0773faf..5734ab09d3f9 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -1801,7 +1801,7 @@ sys_init_module(void __user *umod, /* Init routine failed: abort. Try to protect us from buggy refcounters. */ mod->state = MODULE_STATE_GOING; - synchronize_kernel(); + synchronize_sched(); if (mod->unsafe) printk(KERN_ERR "%s: module is now stuck!\n", mod->name); diff --git a/kernel/profile.c b/kernel/profile.c index a66be468c422..0221a50ca867 100644 --- a/kernel/profile.c +++ b/kernel/profile.c @@ -184,7 +184,7 @@ void unregister_timer_hook(int (*hook)(struct pt_regs *)) WARN_ON(hook != timer_hook); timer_hook = NULL; /* make sure all CPUs see the NULL hook */ - synchronize_kernel(); + synchronize_sched(); /* Allow ongoing interrupts to complete. */ } EXPORT_SYMBOL_GPL(register_timer_hook); -- cgit v1.2.3 From 7d87e14c236d6c4cab66d87cf0bc1e0f0375d308 Mon Sep 17 00:00:00 2001 From: Stephen Rothwell Date: Sun, 1 May 2005 08:59:12 -0700 Subject: [PATCH] consolidate sys_shmat Signed-off-by: Stephen Rothwell Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/sys_ni.c | 1 + 1 file changed, 1 insertion(+) (limited to 'kernel') diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c index 1802a311dd3f..0dda70ed1f98 100644 --- a/kernel/sys_ni.c +++ b/kernel/sys_ni.c @@ -52,6 +52,7 @@ cond_syscall(sys_msgsnd); cond_syscall(sys_msgrcv); cond_syscall(sys_msgctl); cond_syscall(sys_shmget); +cond_syscall(sys_shmat); cond_syscall(sys_shmdt); cond_syscall(sys_shmctl); cond_syscall(sys_mq_open); -- cgit v1.2.3 From 7ed20e1ad521b5f5df61bf6559ae60738e393741 Mon Sep 17 00:00:00 2001 From: Jesper Juhl Date: Sun, 1 May 2005 08:59:14 -0700 Subject: [PATCH] convert that currently tests _NSIG directly to use valid_signal() Convert most of the current code that uses _NSIG directly to instead use valid_signal(). This avoids gcc -W warnings and off-by-one errors. Signed-off-by: Jesper Juhl Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/exit.c | 5 +++-- kernel/futex.c | 3 ++- kernel/ptrace.c | 3 ++- kernel/signal.c | 9 +++++---- kernel/sys.c | 3 ++- 5 files changed, 14 insertions(+), 9 deletions(-) (limited to 'kernel') diff --git a/kernel/exit.c b/kernel/exit.c index 93851bcd9584..eb8da36e13df 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -27,6 +27,7 @@ #include #include #include +#include #include #include @@ -277,7 +278,7 @@ void set_special_pids(pid_t session, pid_t pgrp) */ int allow_signal(int sig) { - if (sig < 1 || sig > _NSIG) + if (!valid_signal(sig) || sig < 1) return -EINVAL; spin_lock_irq(¤t->sighand->siglock); @@ -298,7 +299,7 @@ EXPORT_SYMBOL(allow_signal); int disallow_signal(int sig) { - if (sig < 1 || sig > _NSIG) + if (!valid_signal(sig) || sig < 1) return -EINVAL; spin_lock_irq(¤t->sighand->siglock); diff --git a/kernel/futex.c b/kernel/futex.c index 7b54a672d0ad..c7130f86106c 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -39,6 +39,7 @@ #include #include #include +#include #define FUTEX_HASHBITS (CONFIG_BASE_SMALL ? 4 : 8) @@ -654,7 +655,7 @@ static int futex_fd(unsigned long uaddr, int signal) int ret, err; ret = -EINVAL; - if (signal < 0 || signal > _NSIG) + if (!valid_signal(signal)) goto out; ret = get_unused_fd(); diff --git a/kernel/ptrace.c b/kernel/ptrace.c index f5cc1cec0fb4..8dcb8f6288bc 100644 --- a/kernel/ptrace.c +++ b/kernel/ptrace.c @@ -16,6 +16,7 @@ #include #include #include +#include #include #include @@ -166,7 +167,7 @@ bad: int ptrace_detach(struct task_struct *child, unsigned int data) { - if ((unsigned long) data > _NSIG) + if (!valid_signal(data)) return -EIO; /* Architecture-specific hardware disable .. */ diff --git a/kernel/signal.c b/kernel/signal.c index e6567d7f2b62..8f3debc77c5b 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -23,6 +23,7 @@ #include #include #include +#include #include #include #include @@ -646,7 +647,7 @@ static int check_kill_permission(int sig, struct siginfo *info, struct task_struct *t) { int error = -EINVAL; - if (sig < 0 || sig > _NSIG) + if (!valid_signal(sig)) return error; error = -EPERM; if ((!info || ((unsigned long)info != 1 && @@ -1245,7 +1246,7 @@ send_sig_info(int sig, struct siginfo *info, struct task_struct *p) * Make sure legacy kernel users don't send in bad values * (normal paths check this in check_kill_permission). */ - if (sig < 0 || sig > _NSIG) + if (!valid_signal(sig)) return -EINVAL; /* @@ -1520,7 +1521,7 @@ void do_notify_parent(struct task_struct *tsk, int sig) if (psig->action[SIGCHLD-1].sa.sa_handler == SIG_IGN) sig = 0; } - if (sig > 0 && sig <= _NSIG) + if (valid_signal(sig) && sig > 0) __group_send_sig_info(sig, &info, tsk->parent); __wake_up_parent(tsk, tsk->parent); spin_unlock_irqrestore(&psig->siglock, flags); @@ -2364,7 +2365,7 @@ do_sigaction(int sig, const struct k_sigaction *act, struct k_sigaction *oact) { struct k_sigaction *k; - if (sig < 1 || sig > _NSIG || (act && sig_kernel_only(sig))) + if (!valid_signal(sig) || sig < 1 || (act && sig_kernel_only(sig))) return -EINVAL; k = ¤t->sighand->action[sig-1]; diff --git a/kernel/sys.c b/kernel/sys.c index 7f43d6e62c7a..f64e97cabe25 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -25,6 +25,7 @@ #include #include #include +#include #include #include @@ -1637,7 +1638,7 @@ asmlinkage long sys_prctl(int option, unsigned long arg2, unsigned long arg3, switch (option) { case PR_SET_PDEATHSIG: sig = arg2; - if (sig < 0 || sig > _NSIG) { + if (!valid_signal(sig)) { error = -EINVAL; break; } -- cgit v1.2.3 From 4dc3b16ba18c0f967ad100c52fa65b01a4f76ff0 Mon Sep 17 00:00:00 2001 From: Pavel Pisa Date: Sun, 1 May 2005 08:59:25 -0700 Subject: [PATCH] DocBook: changes and extensions to the kernel documentation I have recompiled Linux kernel 2.6.11.5 documentation for me and our university students again. The documentation could be extended for more sources which are equipped by structured comments for recent 2.6 kernels. I have tried to proceed with that task. I have done that more times from 2.6.0 time and it gets boring to do same changes again and again. Linux kernel compiles after changes for i386 and ARM targets. I have added references to some more files into kernel-api book, I have added some section names as well. So please, check that changes do not break something and that categories are not too much skewed. I have changed kernel-doc to accept "fastcall" and "asmlinkage" words reserved by kernel convention. Most of the other changes are modifications in the comments to make kernel-doc happy, accept some parameters description and do not bail out on errors. Changed to @pid in the description, moved some #ifdef before comments to correct function to comments bindings, etc. You can see result of the modified documentation build at http://cmp.felk.cvut.cz/~pisa/linux/lkdb-2.6.11.tar.gz Some more sources are ready to be included into kernel-doc generated documentation. Sources has been added into kernel-api for now. Some more section names added and probably some more chaos introduced as result of quick cleanup work. Signed-off-by: Pavel Pisa Signed-off-by: Martin Waitz Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/exit.c | 2 +- kernel/power/swsusp.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'kernel') diff --git a/kernel/exit.c b/kernel/exit.c index eb8da36e13df..419d9d3c4c48 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -210,7 +210,7 @@ static inline int has_stopped_jobs(int pgrp) } /** - * reparent_to_init() - Reparent the calling kernel thread to the init task. + * reparent_to_init - Reparent the calling kernel thread to the init task. * * If a kernel thread is launched as a result of a system call, or if * it ever exits, it should generally reparent itself to init so that diff --git a/kernel/power/swsusp.c b/kernel/power/swsusp.c index ae5bebc3b18f..90b3b68dee3f 100644 --- a/kernel/power/swsusp.c +++ b/kernel/power/swsusp.c @@ -1099,7 +1099,7 @@ static struct pbe * swsusp_pagedir_relocate(struct pbe *pblist) return pblist; } -/** +/* * Using bio to read from swap. * This code requires a bit more work than just using buffer heads * but, it is the recommended way for 2.5/2.6. -- cgit v1.2.3 From 67be2dd1bace0ec7ce2dbc1bba3f8df3d7be597e Mon Sep 17 00:00:00 2001 From: Martin Waitz Date: Sun, 1 May 2005 08:59:26 -0700 Subject: [PATCH] DocBook: fix some descriptions Some KernelDoc descriptions are updated to match the current code. No code changes. Signed-off-by: Martin Waitz Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/sched.c | 3 ++- kernel/sysctl.c | 2 ++ 2 files changed, 4 insertions(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/sched.c b/kernel/sched.c index 5dadcc6df7dd..0dc3158667a2 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -2906,6 +2906,7 @@ static void __wake_up_common(wait_queue_head_t *q, unsigned int mode, * @q: the waitqueue * @mode: which threads * @nr_exclusive: how many wake-one or wake-many threads to wake up + * @key: is directly passed to the wakeup function */ void fastcall __wake_up(wait_queue_head_t *q, unsigned int mode, int nr_exclusive, void *key) @@ -2928,7 +2929,7 @@ void fastcall __wake_up_locked(wait_queue_head_t *q, unsigned int mode) } /** - * __wake_up - sync- wake up threads blocked on a waitqueue. + * __wake_up_sync - wake up threads blocked on a waitqueue. * @q: the waitqueue * @mode: which threads * @nr_exclusive: how many wake-one or wake-many threads to wake up diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 79dbd93bd697..701d12c63068 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -1991,6 +1991,8 @@ int proc_dointvec_userhz_jiffies(ctl_table *table, int write, struct file *filp, * @filp: the file structure * @buffer: the user buffer * @lenp: the size of the user buffer + * @ppos: file position + * @ppos: the current position in the file * * Reads/writes up to table->maxlen/sizeof(unsigned int) integer * values from/to the user buffer, treated as an ASCII string. -- cgit v1.2.3 From 408b664a7d394a5e4315fbd14aca49b042cb2b08 Mon Sep 17 00:00:00 2001 From: Adrian Bunk Date: Sun, 1 May 2005 08:59:29 -0700 Subject: [PATCH] make lots of things static Another large rollup of various patches from Adrian which make things static where they were needlessly exported. Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/exit.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/exit.c b/kernel/exit.c index 419d9d3c4c48..7be283d98983 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -39,6 +39,8 @@ extern struct task_struct *child_reaper; int getrusage(struct task_struct *, int, struct rusage __user *); +static void exit_mm(struct task_struct * tsk); + static void __unhash_process(struct task_struct *p) { nr_threads--; @@ -474,7 +476,7 @@ EXPORT_SYMBOL_GPL(exit_fs); * Turn us into a lazy TLB process if we * aren't already.. */ -void exit_mm(struct task_struct * tsk) +static void exit_mm(struct task_struct * tsk) { struct mm_struct *mm = tsk->mm; -- cgit v1.2.3 From 0dd8e06bdaa0a97e706ee1a489a1f6176c4ddc64 Mon Sep 17 00:00:00 2001 From: Chris Wright Date: Tue, 3 May 2005 14:01:15 +0100 Subject: [PATCH] add new audit data to last skb When adding more formatted audit data to an skb for delivery to userspace, the kernel will attempt to reuse an skb that has spare room. However, if the audit message has already been fragmented to multiple skb's, the search for spare room in the skb uses the head of the list. This will corrupt the audit message with trailing bytes being placed midway through the stream. Fix is to look at the end of the list. Signed-off-by: Chris Wright Signed-off-by: David Woodhouse --- kernel/audit.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/audit.c b/kernel/audit.c index 4a697c73faec..00455a9cf027 100644 --- a/kernel/audit.c +++ b/kernel/audit.c @@ -486,7 +486,7 @@ static void audit_log_move(struct audit_buffer *ab) if (ab->len == 0) return; - skb = skb_peek(&ab->sklist); + skb = skb_peek_tail(&ab->sklist); if (!skb || skb_tailroom(skb) <= ab->len + extra) { skb = alloc_skb(2 * ab->len + extra, GFP_ATOMIC); if (!skb) { -- cgit v1.2.3 From 012914dad25bd5cacf88af4429eecda62a06020d Mon Sep 17 00:00:00 2001 From: Russ Anderson <(rja@sgi.com)> Date: Sat, 23 Apr 2005 00:08:00 -0700 Subject: [patch] MCA recovery module undefined symbol fix The patch "MCA recovery improvements" added do_exit to mca_drv.c. That's fine when the mca recovery code is built in the kernel (CONFIG_IA64_MCA_RECOVERY=y) but breaks building the mca recovery code as a module (CONFIG_IA64_MCA_RECOVERY=m). Most users are currently building this as a module, as loading and unloading the module provides a very convenient way to turn on/off error recovery. This patch exports do_exit, so mca_drv.c can build as a module. Signed-off-by: Russ Anderson (rja@sgi.com) Signed-off-by: Tony Luck --- kernel/exit.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'kernel') diff --git a/kernel/exit.c b/kernel/exit.c index 7be283d98983..edaa50b5bbfa 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -846,6 +846,8 @@ fastcall NORET_TYPE void do_exit(long code) for (;;) ; } +EXPORT_SYMBOL_GPL(do_exit); + NORET_TYPE void complete_and_exit(struct completion *comp, long code) { if (comp) -- cgit v1.2.3 From 2a0a6ebee1d68552152ae8d4aeda91d806995dec Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Tue, 3 May 2005 14:55:09 -0700 Subject: [NETLINK]: Synchronous message processing. Let's recap the problem. The current asynchronous netlink kernel message processing is vulnerable to these attacks: 1) Hit and run: Attacker sends one or more messages and then exits before they're processed. This may confuse/disable the next netlink user that gets the netlink address of the attacker since it may receive the responses to the attacker's messages. Proposed solutions: a) Synchronous processing. b) Stream mode socket. c) Restrict/prohibit binding. 2) Starvation: Because various netlink rcv functions were written to not return until all messages have been processed on a socket, it is possible for these functions to execute for an arbitrarily long period of time. If this is successfully exploited it could also be used to hold rtnl forever. Proposed solutions: a) Synchronous processing. b) Stream mode socket. Firstly let's cross off solution c). It only solves the first problem and it has user-visible impacts. In particular, it'll break user space applications that expect to bind or communicate with specific netlink addresses (pid's). So we're left with a choice of synchronous processing versus SOCK_STREAM for netlink. For the moment I'm sticking with the synchronous approach as suggested by Alexey since it's simpler and I'd rather spend my time working on other things. However, it does have a number of deficiencies compared to the stream mode solution: 1) User-space to user-space netlink communication is still vulnerable. 2) Inefficient use of resources. This is especially true for rtnetlink since the lock is shared with other users such as networking drivers. The latter could hold the rtnl while communicating with hardware which causes the rtnetlink user to wait when it could be doing other things. 3) It is still possible to DoS all netlink users by flooding the kernel netlink receive queue. The attacker simply fills the receive socket with a single netlink message that fills up the entire queue. The attacker then continues to call sendmsg with the same message in a loop. Point 3) can be countered by retransmissions in user-space code, however it is pretty messy. In light of these problems (in particular, point 3), we should implement stream mode netlink at some point. In the mean time, here is a patch that implements synchronous processing. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- kernel/audit.c | 19 ++++++++----------- 1 file changed, 8 insertions(+), 11 deletions(-) (limited to 'kernel') diff --git a/kernel/audit.c b/kernel/audit.c index 0f84dd7af2c8..ac26d4d960d3 100644 --- a/kernel/audit.c +++ b/kernel/audit.c @@ -427,7 +427,7 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh) /* Get message from skb (based on rtnetlink_rcv_skb). Each message is * processed by audit_receive_msg. Malformed skbs with wrong length are * discarded silently. */ -static int audit_receive_skb(struct sk_buff *skb) +static void audit_receive_skb(struct sk_buff *skb) { int err; struct nlmsghdr *nlh; @@ -436,7 +436,7 @@ static int audit_receive_skb(struct sk_buff *skb) while (skb->len >= NLMSG_SPACE(0)) { nlh = (struct nlmsghdr *)skb->data; if (nlh->nlmsg_len < sizeof(*nlh) || skb->len < nlh->nlmsg_len) - return 0; + return; rlen = NLMSG_ALIGN(nlh->nlmsg_len); if (rlen > skb->len) rlen = skb->len; @@ -446,23 +446,20 @@ static int audit_receive_skb(struct sk_buff *skb) netlink_ack(skb, nlh, 0); skb_pull(skb, rlen); } - return 0; } /* Receive messages from netlink socket. */ static void audit_receive(struct sock *sk, int length) { struct sk_buff *skb; + unsigned int qlen; - if (down_trylock(&audit_netlink_sem)) - return; + down(&audit_netlink_sem); - /* FIXME: this must not cause starvation */ - while ((skb = skb_dequeue(&sk->sk_receive_queue))) { - if (audit_receive_skb(skb) && skb->len) - skb_queue_head(&sk->sk_receive_queue, skb); - else - kfree_skb(skb); + for (qlen = skb_queue_len(&sk->sk_receive_queue); qlen; qlen--) { + skb = skb_dequeue(&sk->sk_receive_queue); + audit_receive_skb(skb); + kfree_skb(skb); } up(&audit_netlink_sem); } -- cgit v1.2.3 From 075d6eb16d273dab7b7b4b83fcee8bce4ee387ed Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Thu, 5 May 2005 16:15:09 -0700 Subject: [PATCH] ppc32: platform-specific functions missing from kallsyms. The PPC32 kernel puts platform-specific functions into separate sections so that unneeded parts of it can be freed when we've booted and actually worked out what we're running on today. This makes kallsyms ignore those functions, because they're not between _[se]text or _[se]inittext. Rather than teaching kallsyms about the various pmac/chrp/etc sections, this patch adds '_[se]extratext' markers for kallsyms. Signed-off-by: David Woodhouse Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/kallsyms.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) (limited to 'kernel') diff --git a/kernel/kallsyms.c b/kernel/kallsyms.c index 1627f8d6e0cd..13bcec151b57 100644 --- a/kernel/kallsyms.c +++ b/kernel/kallsyms.c @@ -46,6 +46,14 @@ static inline int is_kernel_inittext(unsigned long addr) return 0; } +static inline int is_kernel_extratext(unsigned long addr) +{ + if (addr >= (unsigned long)_sextratext + && addr <= (unsigned long)_eextratext) + return 1; + return 0; +} + static inline int is_kernel_text(unsigned long addr) { if (addr >= (unsigned long)_stext && addr <= (unsigned long)_etext) @@ -169,8 +177,9 @@ const char *kallsyms_lookup(unsigned long addr, namebuf[0] = 0; if ((all_var && is_kernel(addr)) || - (!all_var && (is_kernel_text(addr) || is_kernel_inittext(addr)))) { - unsigned long symbol_end=0; + (!all_var && (is_kernel_text(addr) || is_kernel_inittext(addr) || + is_kernel_extratext(addr)))) { + unsigned long symbol_end = 0; /* do a binary search on the sorted kallsyms_addresses array */ low = 0; -- cgit v1.2.3 From 7d12e522ba13ce718b7ec32b75803dece8adb072 Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Thu, 5 May 2005 16:15:11 -0700 Subject: [PATCH] ppc64: remove hidden -fno-omit-frame-pointer for schedule.c While looking at code generated by gcc4.0 I noticed some functions still had frame pointers, even after we stopped ppc64 from defining CONFIG_FRAME_POINTER. It turns out kernel/Makefile hardwires -fno-omit-frame-pointer on when compiling schedule.c. Create CONFIG_SCHED_NO_NO_OMIT_FRAME_POINTER and define it on architectures that dont require frame pointers in sched.c code. (akpm: blame me for the name) Signed-off-by: Anton Blanchard Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/Makefile b/kernel/Makefile index eb88b446c2cc..b01d26fe8db7 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -29,7 +29,7 @@ obj-$(CONFIG_SYSFS) += ksysfs.o obj-$(CONFIG_GENERIC_HARDIRQS) += irq/ obj-$(CONFIG_SECCOMP) += seccomp.o -ifneq ($(CONFIG_IA64),y) +ifneq ($(CONFIG_SCHED_NO_NO_OMIT_FRAME_POINTER),y) # According to Alan Modra , the -fno-omit-frame-pointer is # needed for x86 only. Why this used to be enabled for all architectures is beyond # me. I suspect most platforms don't need this, but until we know that for sure -- cgit v1.2.3 From 04dea5f93231204cc3ca0ab793ce76dbb10c86ba Mon Sep 17 00:00:00 2001 From: Prasanna S Panchamukhi Date: Thu, 5 May 2005 16:15:41 -0700 Subject: [PATCH] Kprobes: Oops! in unregister_kprobe() kernel oops! when unregister_kprobe() is called on a non-registered kprobe. This patch fixes the above problem by checking if the probe exists before unregistering. Signed-off-by: Prasanna S Panchamukhi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/kprobes.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/kprobes.c b/kernel/kprobes.c index 1d5dd1337bd1..d8903e60c99a 100644 --- a/kernel/kprobes.c +++ b/kernel/kprobes.c @@ -107,13 +107,17 @@ rm_kprobe: void unregister_kprobe(struct kprobe *p) { unsigned long flags; - arch_remove_kprobe(p); spin_lock_irqsave(&kprobe_lock, flags); + if (!get_kprobe(p->addr)) { + spin_unlock_irqrestore(&kprobe_lock, flags); + return; + } *p->addr = p->opcode; hlist_del(&p->hlist); flush_icache_range((unsigned long) p->addr, (unsigned long) p->addr + sizeof(kprobe_opcode_t)); spin_unlock_irqrestore(&kprobe_lock, flags); + arch_remove_kprobe(p); } static struct notifier_block kprobe_exceptions_nb = { -- cgit v1.2.3 From 64f562c6df3cfc5d1b2b4bdbcb7951457df9c237 Mon Sep 17 00:00:00 2001 From: Ananth N Mavinakayanahalli Date: Thu, 5 May 2005 16:15:42 -0700 Subject: [PATCH] kprobes: Allow multiple kprobes at the same address Allow registration of multiple kprobes at an address in an architecture agnostic way. Corresponding handlers will be invoked in a sequence. But, a kprobe and a jprobe can't (yet) co-exist at the same address. Signed-off-by: Ananth N Mavinakayanahalli Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/kprobes.c | 144 ++++++++++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 131 insertions(+), 13 deletions(-) (limited to 'kernel') diff --git a/kernel/kprobes.c b/kernel/kprobes.c index d8903e60c99a..037142b72a49 100644 --- a/kernel/kprobes.c +++ b/kernel/kprobes.c @@ -44,6 +44,7 @@ static struct hlist_head kprobe_table[KPROBE_TABLE_SIZE]; unsigned int kprobe_cpu = NR_CPUS; static DEFINE_SPINLOCK(kprobe_lock); +static struct kprobe *curr_kprobe; /* Locks kprobe: irqs must be disabled */ void lock_kprobes(void) @@ -73,22 +74,139 @@ struct kprobe *get_kprobe(void *addr) return NULL; } +/* + * Aggregate handlers for multiple kprobes support - these handlers + * take care of invoking the individual kprobe handlers on p->list + */ +int aggr_pre_handler(struct kprobe *p, struct pt_regs *regs) +{ + struct kprobe *kp; + + list_for_each_entry(kp, &p->list, list) { + if (kp->pre_handler) { + curr_kprobe = kp; + kp->pre_handler(kp, regs); + curr_kprobe = NULL; + } + } + return 0; +} + +void aggr_post_handler(struct kprobe *p, struct pt_regs *regs, + unsigned long flags) +{ + struct kprobe *kp; + + list_for_each_entry(kp, &p->list, list) { + if (kp->post_handler) { + curr_kprobe = kp; + kp->post_handler(kp, regs, flags); + curr_kprobe = NULL; + } + } + return; +} + +int aggr_fault_handler(struct kprobe *p, struct pt_regs *regs, int trapnr) +{ + /* + * if we faulted "during" the execution of a user specified + * probe handler, invoke just that probe's fault handler + */ + if (curr_kprobe && curr_kprobe->fault_handler) { + if (curr_kprobe->fault_handler(curr_kprobe, regs, trapnr)) + return 1; + } + return 0; +} + +/* + * Fill in the required fields of the "manager kprobe". Replace the + * earlier kprobe in the hlist with the manager kprobe + */ +static inline void add_aggr_kprobe(struct kprobe *ap, struct kprobe *p) +{ + ap->addr = p->addr; + ap->opcode = p->opcode; + memcpy(&ap->ainsn, &p->ainsn, sizeof(struct arch_specific_insn)); + + ap->pre_handler = aggr_pre_handler; + ap->post_handler = aggr_post_handler; + ap->fault_handler = aggr_fault_handler; + + INIT_LIST_HEAD(&ap->list); + list_add(&p->list, &ap->list); + + INIT_HLIST_NODE(&ap->hlist); + hlist_del(&p->hlist); + hlist_add_head(&ap->hlist, + &kprobe_table[hash_ptr(ap->addr, KPROBE_HASH_BITS)]); +} + +/* + * This is the second or subsequent kprobe at the address - handle + * the intricacies + * TODO: Move kcalloc outside the spinlock + */ +static int register_aggr_kprobe(struct kprobe *old_p, struct kprobe *p) +{ + int ret = 0; + struct kprobe *ap; + + if (old_p->break_handler || p->break_handler) { + ret = -EEXIST; /* kprobe and jprobe can't (yet) coexist */ + } else if (old_p->pre_handler == aggr_pre_handler) { + list_add(&p->list, &old_p->list); + } else { + ap = kcalloc(1, sizeof(struct kprobe), GFP_ATOMIC); + if (!ap) + return -ENOMEM; + add_aggr_kprobe(ap, old_p); + list_add(&p->list, &ap->list); + } + return ret; +} + +/* kprobe removal house-keeping routines */ +static inline void cleanup_kprobe(struct kprobe *p, unsigned long flags) +{ + *p->addr = p->opcode; + hlist_del(&p->hlist); + flush_icache_range((unsigned long) p->addr, + (unsigned long) p->addr + sizeof(kprobe_opcode_t)); + spin_unlock_irqrestore(&kprobe_lock, flags); + arch_remove_kprobe(p); +} + +static inline void cleanup_aggr_kprobe(struct kprobe *old_p, + struct kprobe *p, unsigned long flags) +{ + list_del(&p->list); + if (list_empty(&old_p->list)) { + cleanup_kprobe(old_p, flags); + kfree(old_p); + } else + spin_unlock_irqrestore(&kprobe_lock, flags); +} + int register_kprobe(struct kprobe *p) { int ret = 0; unsigned long flags = 0; + struct kprobe *old_p; if ((ret = arch_prepare_kprobe(p)) != 0) { goto rm_kprobe; } spin_lock_irqsave(&kprobe_lock, flags); - INIT_HLIST_NODE(&p->hlist); - if (get_kprobe(p->addr)) { - ret = -EEXIST; + old_p = get_kprobe(p->addr); + if (old_p) { + ret = register_aggr_kprobe(old_p, p); goto out; } - arch_copy_kprobe(p); + arch_copy_kprobe(p); + INIT_HLIST_NODE(&p->hlist); hlist_add_head(&p->hlist, &kprobe_table[hash_ptr(p->addr, KPROBE_HASH_BITS)]); @@ -107,17 +225,17 @@ rm_kprobe: void unregister_kprobe(struct kprobe *p) { unsigned long flags; + struct kprobe *old_p; + spin_lock_irqsave(&kprobe_lock, flags); - if (!get_kprobe(p->addr)) { + old_p = get_kprobe(p->addr); + if (old_p) { + if (old_p->pre_handler == aggr_pre_handler) + cleanup_aggr_kprobe(old_p, p, flags); + else + cleanup_kprobe(p, flags); + } else spin_unlock_irqrestore(&kprobe_lock, flags); - return; - } - *p->addr = p->opcode; - hlist_del(&p->hlist); - flush_icache_range((unsigned long) p->addr, - (unsigned long) p->addr + sizeof(kprobe_opcode_t)); - spin_unlock_irqrestore(&kprobe_lock, flags); - arch_remove_kprobe(p); } static struct notifier_block kprobe_exceptions_nb = { -- cgit v1.2.3 From b7e4e85337060354f8b860cc38066725559313a4 Mon Sep 17 00:00:00 2001 From: Paulo Marques Date: Thu, 5 May 2005 16:15:49 -0700 Subject: [PATCH] setitimer timer expires too early It seems that the code responsible for this is in kernel/itimer.c:126: p->signal->real_timer.expires = jiffies + interval; add_timer(&p->signal->real_timer); If you request an interval of, lets say 900 usecs, the interval given by timeval_to_jiffies will be 1. If you request this when we are half-way between two timer ticks, the interval will only give 400 usecs. If we want to guarantee that we never ever give intervals less than requested, the simple solution would be to change that to: p->signal->real_timer.expires = jiffies + interval + 1; This however will produce pathological cases, like having a idle system being requested 1 ms timeouts will give systematically 2 ms timeouts, whereas currently it simply gives a few usecs less than 1 ms. The complex (and more computationally expensive) solution would be to check the gettimeofday time, and compute the correct number of jiffies. This way, if we request a 300 usecs timer 200 usecs inside the timer tick, we can wait just one tick, but not if we are 800 usecs inside the tick. This would also mean that we would have to lock preemption during these computations to avoid races, etc. I've searched the archives but couldn't find this particular issue being discussed before. Attached is a patch to do the simple solution, in case anybody thinks that it should be used. Signed-Off-By: Paulo Marques Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/itimer.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/itimer.c b/kernel/itimer.c index e9a40e947e07..1dc988e0d2c7 100644 --- a/kernel/itimer.c +++ b/kernel/itimer.c @@ -123,7 +123,11 @@ static inline void it_real_arm(struct task_struct *p, unsigned long interval) return; if (interval > (unsigned long) LONG_MAX) interval = LONG_MAX; - p->signal->real_timer.expires = jiffies + interval; + /* the "+ 1" below makes sure that the timer doesn't go off before + * the interval requested. This could happen if + * time requested % (usecs per jiffy) is more than the usecs left + * in the current jiffy */ + p->signal->real_timer.expires = jiffies + interval + 1; add_timer(&p->signal->real_timer); } -- cgit v1.2.3 From ebe8b54134314cc31331f6e26f42276cd947d1df Mon Sep 17 00:00:00 2001 From: Domen Puncer Date: Thu, 5 May 2005 16:16:19 -0700 Subject: [PATCH] correctly name the Shell sort As per http://www.nist.gov/dads/HTML/shellsort.html, this should be referred to as a Shell sort. Shell-Metzner is a misnomer. Signed-off-by: Daniel Dickman Signed-off-by: Domen Puncer Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/sys.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/sys.c b/kernel/sys.c index f64e97cabe25..f006632c2ba7 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -1195,7 +1195,7 @@ static int groups_from_user(struct group_info *group_info, return 0; } -/* a simple shell-metzner sort */ +/* a simple Shell sort */ static void groups_sort(struct group_info *group_info) { int base, max, stride; -- cgit v1.2.3