From 2ef9481e666b4654159ac9f847e6963809e3c470 Mon Sep 17 00:00:00 2001 From: Jon Mason Date: Mon, 23 Jan 2006 10:58:20 -0600 Subject: [PATCH] powerpc: trivial: modify comments to refer to new location of files This patch removes all self references and fixes references to files in the now defunct arch/ppc64 tree. I think this accomplises everything wanted, though there might be a few references I missed. Signed-off-by: Jon Mason Signed-off-by: Paul Mackerras --- kernel/auditsc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/auditsc.c b/kernel/auditsc.c index 685c25175d96..3e376202dd48 100644 --- a/kernel/auditsc.c +++ b/kernel/auditsc.c @@ -958,7 +958,7 @@ void audit_syscall_entry(struct task_struct *tsk, int arch, int major, * * i386 no * x86_64 no - * ppc64 yes (see arch/ppc64/kernel/misc.S) + * ppc64 yes (see arch/powerpc/platforms/iseries/misc.S) * * This also happens with vm86 emulation in a non-nested manner * (entries without exits), so this case must be caught. -- cgit v1.2.3 From 1fa44ecad2b86475e038aed81b0bf333fa484f8b Mon Sep 17 00:00:00 2001 From: James Bottomley Date: Thu, 23 Feb 2006 12:43:43 -0600 Subject: [SCSI] add execute_in_process_context() API We have several points in the SCSI stack (primarily for our device functions) where we need to guarantee process context, but (given the place where the last reference was released) we cannot guarantee this. This API gets around the issue by executing the function directly if the caller has process context, but scheduling a workqueue to execute in process context if the caller doesn't have it. Signed-off-by: James Bottomley --- kernel/workqueue.c | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) (limited to 'kernel') diff --git a/kernel/workqueue.c b/kernel/workqueue.c index b052e2c4c710..e9e464a90376 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -27,6 +27,7 @@ #include #include #include +#include /* * The per-CPU workqueue (if single thread, we always use the first @@ -476,6 +477,34 @@ void cancel_rearming_delayed_work(struct work_struct *work) } EXPORT_SYMBOL(cancel_rearming_delayed_work); +/** + * execute_in_process_context - reliably execute the routine with user context + * @fn: the function to execute + * @data: data to pass to the function + * @ew: guaranteed storage for the execute work structure (must + * be available when the work executes) + * + * Executes the function immediately if process context is available, + * otherwise schedules the function for delayed execution. + * + * Returns: 0 - function was executed + * 1 - function was scheduled for execution + */ +int execute_in_process_context(void (*fn)(void *data), void *data, + struct execute_work *ew) +{ + if (!in_interrupt()) { + fn(data); + return 0; + } + + INIT_WORK(&ew->work, fn, data); + schedule_work(&ew->work); + + return 1; +} +EXPORT_SYMBOL_GPL(execute_in_process_context); + int keventd_up(void) { return keventd_wq != NULL; -- cgit v1.2.3 From d2b176ed878d4d5fcc0bd35656dfd373f3702af9 Mon Sep 17 00:00:00 2001 From: Jes Sorensen Date: Tue, 28 Feb 2006 09:42:23 -0800 Subject: [IA64] sysctl option to silence unaligned trap warnings Allow sysadmin to disable all warnings about userland apps making unaligned accesses by using: # echo 1 > /proc/sys/kernel/ignore-unaligned-usertrap Rather than having to use prctl on a process by process basis. Default behaivour leaves the warnings enabled. Signed-off-by: Jes Sorensen Signed-off-by: Tony Luck --- kernel/sysctl.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'kernel') diff --git a/kernel/sysctl.c b/kernel/sysctl.c index c05a2b7125e1..acf6c1550f27 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -124,6 +124,10 @@ extern int sysctl_hz_timer; extern int acct_parm[]; #endif +#ifdef CONFIG_IA64 +extern int no_unaligned_warning; +#endif + static int parse_table(int __user *, int, void __user *, size_t __user *, void __user *, size_t, ctl_table *, void **); static int proc_doutsstring(ctl_table *table, int write, struct file *filp, @@ -665,6 +669,16 @@ static ctl_table kern_table[] = { .mode = 0644, .proc_handler = &proc_dointvec, }, +#endif +#ifdef CONFIG_IA64 + { + .ctl_name = KERN_IA64_UNALIGNED, + .procname = "ignore-unaligned-usertrap", + .data = &no_unaligned_warning, + .maxlen = sizeof (int), + .mode = 0644, + .proc_handler = &proc_dointvec, + }, #endif { .ctl_name = 0 } }; -- cgit v1.2.3 From 7f99f06f01aa9460b5a18f1b0e0900c90d0a84fc Mon Sep 17 00:00:00 2001 From: Stefan Seyfried Date: Thu, 2 Mar 2006 02:54:34 -0800 Subject: [PATCH] fix acpi_video_flags on x86-64 acpi_video_flags variable is unsigned long, so it should be set as such. This actually matters on x86-64. Signed-off-by: Stefan Seyfried Signed-off-by: Pavel Machek Cc: "Brown, Len" Cc: Andi Kleen Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/sysctl.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/sysctl.c b/kernel/sysctl.c index acf6c1550f27..de2d9109194e 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -667,7 +667,7 @@ static ctl_table kern_table[] = { .data = &acpi_video_flags, .maxlen = sizeof (unsigned long), .mode = 0644, - .proc_handler = &proc_dointvec, + .proc_handler = &proc_doulongvec_minmax, }, #endif #ifdef CONFIG_IA64 -- cgit v1.2.3 From 685db65e422bfa523b8a9dacb5a658b42b254f05 Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Thu, 2 Mar 2006 02:54:35 -0800 Subject: [PATCH] time_interpolator: Use readq_relaxed() instead of readq(). On some platforms readq performs additional work to make sure I/O is done in a coherent way. This is not needed for time retrieval as done by the time interpolator. So we can use readq_relaxed instead which will improve performance. It affects sparc64 and ia64 only. Apparently it makes a significant difference on ia64. Signed-off-by: Christoph Lameter Cc: john stultz Cc: "David S. Miller" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/timer.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'kernel') diff --git a/kernel/timer.c b/kernel/timer.c index fe3a9a9f8328..fc6646fd5aab 100644 --- a/kernel/timer.c +++ b/kernel/timer.c @@ -1351,10 +1351,10 @@ static inline u64 time_interpolator_get_cycles(unsigned int src) return x(); case TIME_SOURCE_MMIO64 : - return readq((void __iomem *) time_interpolator->addr); + return readq_relaxed((void __iomem *)time_interpolator->addr); case TIME_SOURCE_MMIO32 : - return readl((void __iomem *) time_interpolator->addr); + return readl_relaxed((void __iomem *)time_interpolator->addr); default: return get_cycles(); } -- cgit v1.2.3 From 8ba7b0a14b2ec19583bedbcdbea7f1c5008fc922 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Mon, 6 Mar 2006 17:38:49 -0800 Subject: Add early-boot-safety check to cond_resched() Just to be safe, we should not trigger a conditional reschedule during the early boot sequence. We've historically done some questionable early on, and the safety warnings in __might_sleep() are generally turned off during that period, so there might be problems lurking. This affects CONFIG_PREEMPT_VOLUNTARY, which takes over might_sleep() to cause a voluntary conditional reschedule. Acked-by: Ingo Molnar Signed-off-by: Linus Torvalds --- kernel/sched.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'kernel') diff --git a/kernel/sched.c b/kernel/sched.c index 12d291bf3379..3454bb869fd0 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -4028,6 +4028,8 @@ static inline void __cond_resched(void) */ if (unlikely(preempt_count())) return; + if (unlikely(system_state != SYSTEM_RUNNING)) + return; do { add_preempt_count(PREEMPT_ACTIVE); schedule(); -- cgit v1.2.3 From 69239749e1ac4f3496906aa4267cb9f61ce52c9c Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Mon, 6 Mar 2006 15:42:45 -0800 Subject: [PATCH] fix next_timer_interrupt() for hrtimer Also from Thomas Gleixner Function next_timer_interrupt() got broken with a recent patch 6ba1b91213e81aa92b5cf7539f7d2a94ff54947c as sys_nanosleep() was moved to hrtimer. This broke things as next_timer_interrupt() did not check hrtimer tree for next event. Function next_timer_interrupt() is needed with dyntick (CONFIG_NO_IDLE_HZ, VST) implementations, as the system can be in idle when next hrtimer event was supposed to happen. At least ARM and S390 currently use next_timer_interrupt(). Signed-off-by: Thomas Gleixner Cc: Martin Schwidefsky Cc: Heiko Carstens Cc: Russell King Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/hrtimer.c | 35 +++++++++++++++++++++++++++++++++++ kernel/timer.c | 16 ++++++++++++++++ 2 files changed, 51 insertions(+) (limited to 'kernel') diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c index 5ae51f1bc7c8..14bc9cfa6399 100644 --- a/kernel/hrtimer.c +++ b/kernel/hrtimer.c @@ -505,6 +505,41 @@ ktime_t hrtimer_get_remaining(const struct hrtimer *timer) return rem; } +#ifdef CONFIG_NO_IDLE_HZ +/** + * hrtimer_get_next_event - get the time until next expiry event + * + * Returns the delta to the next expiry event or KTIME_MAX if no timer + * is pending. + */ +ktime_t hrtimer_get_next_event(void) +{ + struct hrtimer_base *base = __get_cpu_var(hrtimer_bases); + ktime_t delta, mindelta = { .tv64 = KTIME_MAX }; + unsigned long flags; + int i; + + for (i = 0; i < MAX_HRTIMER_BASES; i++, base++) { + struct hrtimer *timer; + + spin_lock_irqsave(&base->lock, flags); + if (!base->first) { + spin_unlock_irqrestore(&base->lock, flags); + continue; + } + timer = rb_entry(base->first, struct hrtimer, node); + delta.tv64 = timer->expires.tv64; + spin_unlock_irqrestore(&base->lock, flags); + delta = ktime_sub(delta, base->get_time()); + if (delta.tv64 < mindelta.tv64) + mindelta.tv64 = delta.tv64; + } + if (mindelta.tv64 < 0) + mindelta.tv64 = 0; + return mindelta; +} +#endif + /** * hrtimer_init - initialize a timer to the given clock * diff --git a/kernel/timer.c b/kernel/timer.c index fc6646fd5aab..8256f3f5ec0d 100644 --- a/kernel/timer.c +++ b/kernel/timer.c @@ -489,9 +489,21 @@ unsigned long next_timer_interrupt(void) struct list_head *list; struct timer_list *nte; unsigned long expires; + unsigned long hr_expires = MAX_JIFFY_OFFSET; + ktime_t hr_delta; tvec_t *varray[4]; int i, j; + hr_delta = hrtimer_get_next_event(); + if (hr_delta.tv64 != KTIME_MAX) { + struct timespec tsdelta; + tsdelta = ktime_to_timespec(hr_delta); + hr_expires = timespec_to_jiffies(&tsdelta); + if (hr_expires < 3) + return hr_expires + jiffies; + } + hr_expires += jiffies; + base = &__get_cpu_var(tvec_bases); spin_lock(&base->t_base.lock); expires = base->timer_jiffies + (LONG_MAX >> 1); @@ -542,6 +554,10 @@ found: } } spin_unlock(&base->t_base.lock); + + if (time_before(hr_expires, expires)) + return hr_expires; + return expires; } #endif -- cgit v1.2.3 From 5aee405c662ca644980c184774277fc6d0769a84 Mon Sep 17 00:00:00 2001 From: Atsushi Nemoto Date: Mon, 6 Mar 2006 15:42:51 -0800 Subject: [PATCH] time: add barrier after updating jiffies_64 Add a compiler barrier so that we don't read jiffies before updating jiffies_64. Signed-off-by: Atsushi Nemoto Cc: Ralf Baechle Cc: Paul Mackerras Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/timer.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'kernel') diff --git a/kernel/timer.c b/kernel/timer.c index 8256f3f5ec0d..bf7c4193b936 100644 --- a/kernel/timer.c +++ b/kernel/timer.c @@ -941,6 +941,8 @@ static inline void update_times(void) void do_timer(struct pt_regs *regs) { jiffies_64++; + /* prevent loading jiffies before storing new jiffies_64 value. */ + barrier(); update_times(); softlockup_tick(regs); } -- cgit v1.2.3 From 81c29a857d3c8d6ea9c4f20d196c36bf0a07c615 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Tue, 7 Mar 2006 21:55:27 -0800 Subject: [PATCH] idle threads should have a sane ->timestamp value Idle threads should have a sane ->timestamp value, to avoid init kernel thread(s) from inheriting it and causing miscalculations in try_to_wake_up(). Reported-by: Mike Galbraith . Signed-off-by: Ingo Molnar Cc: Nick Piggin Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/sched.c | 1 + 1 file changed, 1 insertion(+) (limited to 'kernel') diff --git a/kernel/sched.c b/kernel/sched.c index 3454bb869fd0..e82c99f1db64 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -4335,6 +4335,7 @@ void __devinit init_idle(task_t *idle, int cpu) runqueue_t *rq = cpu_rq(cpu); unsigned long flags; + idle->timestamp = sched_clock(); idle->sleep_avg = 0; idle->array = NULL; idle->prio = MAX_PRIO; -- cgit v1.2.3 From 21a1ea9eb40411d4ee29448c53b9e4c0654d6ceb Mon Sep 17 00:00:00 2001 From: Dipankar Sarma Date: Tue, 7 Mar 2006 21:55:33 -0800 Subject: [PATCH] rcu batch tuning This patch adds new tunables for RCU queue and finished batches. There are two types of controls - number of completed RCU updates invoked in a batch (blimit) and monitoring for high rate of incoming RCUs on a cpu (qhimark, qlowmark). By default, the per-cpu batch limit is set to a small value. If the input RCU rate exceeds the high watermark, we do two things - force quiescent state on all cpus and set the batch limit of the CPU to INTMAX. Setting batch limit to INTMAX forces all finished RCUs to be processed in one shot. If we have more than INTMAX RCUs queued up, then we have bigger problems anyway. Once the incoming queued RCUs fall below the low watermark, the batch limit is set to the default. Signed-off-by: Dipankar Sarma Cc: "Paul E. McKenney" Cc: "David S. Miller" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/rcupdate.c | 76 ++++++++++++++++++++++++++++++++++++++++++------------- 1 file changed, 58 insertions(+), 18 deletions(-) (limited to 'kernel') diff --git a/kernel/rcupdate.c b/kernel/rcupdate.c index 0cf8146bd585..8cf15a569fcd 100644 --- a/kernel/rcupdate.c +++ b/kernel/rcupdate.c @@ -67,7 +67,43 @@ DEFINE_PER_CPU(struct rcu_data, rcu_bh_data) = { 0L }; /* Fake initialization required by compiler */ static DEFINE_PER_CPU(struct tasklet_struct, rcu_tasklet) = {NULL}; -static int maxbatch = 10000; +static int blimit = 10; +static int qhimark = 10000; +static int qlowmark = 100; +#ifdef CONFIG_SMP +static int rsinterval = 1000; +#endif + +static atomic_t rcu_barrier_cpu_count; +static struct semaphore rcu_barrier_sema; +static struct completion rcu_barrier_completion; + +#ifdef CONFIG_SMP +static void force_quiescent_state(struct rcu_data *rdp, + struct rcu_ctrlblk *rcp) +{ + int cpu; + cpumask_t cpumask; + set_need_resched(); + if (unlikely(rdp->qlen - rdp->last_rs_qlen > rsinterval)) { + rdp->last_rs_qlen = rdp->qlen; + /* + * Don't send IPI to itself. With irqs disabled, + * rdp->cpu is the current cpu. + */ + cpumask = rcp->cpumask; + cpu_clear(rdp->cpu, cpumask); + for_each_cpu_mask(cpu, cpumask) + smp_send_reschedule(cpu); + } +} +#else +static inline void force_quiescent_state(struct rcu_data *rdp, + struct rcu_ctrlblk *rcp) +{ + set_need_resched(); +} +#endif /** * call_rcu - Queue an RCU callback for invocation after a grace period. @@ -92,17 +128,13 @@ void fastcall call_rcu(struct rcu_head *head, rdp = &__get_cpu_var(rcu_data); *rdp->nxttail = head; rdp->nxttail = &head->next; - - if (unlikely(++rdp->count > 10000)) - set_need_resched(); - + if (unlikely(++rdp->qlen > qhimark)) { + rdp->blimit = INT_MAX; + force_quiescent_state(rdp, &rcu_ctrlblk); + } local_irq_restore(flags); } -static atomic_t rcu_barrier_cpu_count; -static struct semaphore rcu_barrier_sema; -static struct completion rcu_barrier_completion; - /** * call_rcu_bh - Queue an RCU for invocation after a quicker grace period. * @head: structure to be used for queueing the RCU updates. @@ -131,12 +163,12 @@ void fastcall call_rcu_bh(struct rcu_head *head, rdp = &__get_cpu_var(rcu_bh_data); *rdp->nxttail = head; rdp->nxttail = &head->next; - rdp->count++; -/* - * Should we directly call rcu_do_batch() here ? - * if (unlikely(rdp->count > 10000)) - * rcu_do_batch(rdp); - */ + + if (unlikely(++rdp->qlen > qhimark)) { + rdp->blimit = INT_MAX; + force_quiescent_state(rdp, &rcu_bh_ctrlblk); + } + local_irq_restore(flags); } @@ -199,10 +231,12 @@ static void rcu_do_batch(struct rcu_data *rdp) next = rdp->donelist = list->next; list->func(list); list = next; - rdp->count--; - if (++count >= maxbatch) + rdp->qlen--; + if (++count >= rdp->blimit) break; } + if (rdp->blimit == INT_MAX && rdp->qlen <= qlowmark) + rdp->blimit = blimit; if (!rdp->donelist) rdp->donetail = &rdp->donelist; else @@ -473,6 +507,7 @@ static void rcu_init_percpu_data(int cpu, struct rcu_ctrlblk *rcp, rdp->quiescbatch = rcp->completed; rdp->qs_pending = 0; rdp->cpu = cpu; + rdp->blimit = blimit; } static void __devinit rcu_online_cpu(int cpu) @@ -567,7 +602,12 @@ void synchronize_kernel(void) synchronize_rcu(); } -module_param(maxbatch, int, 0); +module_param(blimit, int, 0); +module_param(qhimark, int, 0); +module_param(qlowmark, int, 0); +#ifdef CONFIG_SMP +module_param(rsinterval, int, 0); +#endif EXPORT_SYMBOL_GPL(rcu_batches_completed); EXPORT_SYMBOL(call_rcu); /* WARNING: GPL-only in April 2006. */ EXPORT_SYMBOL(call_rcu_bh); /* WARNING: GPL-only in April 2006. */ -- cgit v1.2.3 From 529bf6be5c04f2e869d07bfdb122e9fd98ade714 Mon Sep 17 00:00:00 2001 From: Dipankar Sarma Date: Tue, 7 Mar 2006 21:55:35 -0800 Subject: [PATCH] fix file counting I have benchmarked this on an x86_64 NUMA system and see no significant performance difference on kernbench. Tested on both x86_64 and powerpc. The way we do file struct accounting is not very suitable for batched freeing. For scalability reasons, file accounting was constructor/destructor based. This meant that nr_files was decremented only when the object was removed from the slab cache. This is susceptible to slab fragmentation. With RCU based file structure, consequent batched freeing and a test program like Serge's, we just speed this up and end up with a very fragmented slab - llm22:~ # cat /proc/sys/fs/file-nr 587730 0 758844 At the same time, I see only a 2000+ objects in filp cache. The following patch I fixes this problem. This patch changes the file counting by removing the filp_count_lock. Instead we use a separate percpu counter, nr_files, for now and all accesses to it are through get_nr_files() api. In the sysctl handler for nr_files, we populate files_stat.nr_files before returning to user. Counting files as an when they are created and destroyed (as opposed to inside slab) allows us to correctly count open files with RCU. Signed-off-by: Dipankar Sarma Cc: "Paul E. McKenney" Cc: "David S. Miller" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/sysctl.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/sysctl.c b/kernel/sysctl.c index de2d9109194e..32b48e8ee36e 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -50,6 +50,9 @@ #include #include +extern int proc_nr_files(ctl_table *table, int write, struct file *filp, + void __user *buffer, size_t *lenp, loff_t *ppos); + #if defined(CONFIG_SYSCTL) /* External variables not in a header file. */ @@ -943,7 +946,7 @@ static ctl_table fs_table[] = { .data = &files_stat, .maxlen = 3*sizeof(int), .mode = 0444, - .proc_handler = &proc_dointvec, + .proc_handler = &proc_nr_files, }, { .ctl_name = FS_MAXFILE, -- cgit v1.2.3 From 7cd9013be6c22f3ff6f777354f766c8c0b955e17 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sat, 11 Mar 2006 03:27:18 -0800 Subject: [PATCH] remove __put_task_struct_cb export again The patch '[PATCH] RCU signal handling' [1] added an export for __put_task_struct_cb, a put_task_struct helper newly introduced in that patch. But the put_task_struct couldn't be used modular previously as __put_task_struct wasn't exported. There are not callers of it in modular code, and it shouldn't be exported because we don't want drivers to hold references to task_structs. This patch removes the export and folds __put_task_struct into __put_task_struct_cb as there's no other caller. [1] http://www2.kernel.org/git/gitweb.cgi?p=linux/kernel/git/torvalds/linux-2.6.git;a=commit;h=e56d090310d7625ecb43a1eeebd479f04affb48b Signed-off-by: Christoph Hellwig Acked-by: Paul E. McKenney Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/fork.c | 4 +++- kernel/sched.c | 7 ------- 2 files changed, 3 insertions(+), 8 deletions(-) (limited to 'kernel') diff --git a/kernel/fork.c b/kernel/fork.c index fbea12d7a943..a8eab86de7f1 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -108,8 +108,10 @@ void free_task(struct task_struct *tsk) } EXPORT_SYMBOL(free_task); -void __put_task_struct(struct task_struct *tsk) +void __put_task_struct_cb(struct rcu_head *rhp) { + struct task_struct *tsk = container_of(rhp, struct task_struct, rcu); + WARN_ON(!(tsk->exit_state & (EXIT_DEAD | EXIT_ZOMBIE))); WARN_ON(atomic_read(&tsk->usage)); WARN_ON(tsk == current); diff --git a/kernel/sched.c b/kernel/sched.c index e82c99f1db64..4d46e90f59c3 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -178,13 +178,6 @@ static unsigned int task_timeslice(task_t *p) #define task_hot(p, now, sd) ((long long) ((now) - (p)->last_ran) \ < (long long) (sd)->cache_hot_time) -void __put_task_struct_cb(struct rcu_head *rhp) -{ - __put_task_struct(container_of(rhp, struct task_struct, rcu)); -} - -EXPORT_SYMBOL_GPL(__put_task_struct_cb); - /* * These are the runqueue data structures: */ -- cgit v1.2.3 From f9a3879abf2f1a27c39915e6074b8ff15a24cb55 Mon Sep 17 00:00:00 2001 From: GOTO Masanori Date: Mon, 13 Mar 2006 21:20:44 -0800 Subject: [PATCH] Fix sigaltstack corruption among cloned threads This patch fixes alternate signal stack corruption among cloned threads with CLONE_SIGHAND (and CLONE_VM) for linux-2.6.16-rc6. The value of alternate signal stack is currently inherited after a call of clone(... CLONE_SIGHAND | CLONE_VM). But if sigaltstack is set by a parent thread, and then if multiple cloned child threads (+ parent threads) call signal handler at the same time, some threads may be conflicted - because they share to use the same alternative signal stack region. Finally they get sigsegv. It's an undesirable race condition. Note that child threads created from NPTL pthread_create() also hit this conflict when the parent thread uses sigaltstack, without my patch. To fix this problem, this patch clears the child threads' sigaltstack information like exec(). This behavior follows the SUSv3 specification. In SUSv3, pthread_create() says "The alternate stack shall not be inherited (when new threads are initialized)". It means that sigaltstack should be cleared when sigaltstack memory space is shared by cloned threads with CLONE_SIGHAND. Note that I chose "if (clone_flags & CLONE_SIGHAND)" line because: - If clone_flags line is not existed, fork() does not inherit sigaltstack. - CLONE_VM is another choice, but vfork() does not inherit sigaltstack. - CLONE_SIGHAND implies CLONE_VM, and it looks suitable. - CLONE_THREAD is another candidate, and includes CLONE_SIGHAND + CLONE_VM, but this flag has a bit different semantics. I decided to use CLONE_SIGHAND. [ Changed to test for CLONE_VM && !CLONE_VFORK after discussion --Linus ] Signed-off-by: GOTO Masanori Cc: Roland McGrath Cc: Ingo Molnar Acked-by: Linus Torvalds Cc: Ulrich Drepper Cc: Jakub Jelinek Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/fork.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'kernel') diff --git a/kernel/fork.c b/kernel/fork.c index a8eab86de7f1..ccdfbb16c86d 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1061,6 +1061,12 @@ static task_t *copy_process(unsigned long clone_flags, */ p->clear_child_tid = (clone_flags & CLONE_CHILD_CLEARTID) ? child_tidptr: NULL; + /* + * sigaltstack should be cleared when sharing the same VM + */ + if ((clone_flags & (CLONE_VM|CLONE_VFORK)) == CLONE_VM) + p->sas_ss_sp = p->sas_ss_size = 0; + /* * Syscall tracing should be turned off in the child regardless * of CLONE_PTRACE. -- cgit v1.2.3 From e0e8eb54d8ae0c4cfd1d297f6351b08a7f635c5f Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Thu, 16 Mar 2006 10:31:38 -0700 Subject: [PATCH] unshare: Use rcu_assign_pointer when setting sighand The sighand pointer only needs the rcu_read_lock on the read side. So only depending on task_lock protection when setting this pointer is not enough. We also need a memory barrier to ensure the initialization is seen first. Use rcu_assign_pointer as it does this for us, and clearly documents that we are setting an rcu readable pointer. Signed-off-by: Eric W. Biederman Acked-by: Paul E. McKenney Signed-off-by: Linus Torvalds --- kernel/fork.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/fork.c b/kernel/fork.c index ccdfbb16c86d..46060cb24af0 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1569,7 +1569,7 @@ asmlinkage long sys_unshare(unsigned long unshare_flags) if (new_sigh) { sigh = current->sighand; - current->sighand = new_sigh; + rcu_assign_pointer(current->sighand, new_sigh); new_sigh = sigh; } -- cgit v1.2.3 From 67890d7084085e29c51afa2514036d42643fd3cf Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Thu, 16 Mar 2006 23:04:00 -0800 Subject: [PATCH] time_interpolator: add __read_mostly The pointer to the current time interpolator and the current list of time interpolators are typically only changed during bootup. Adding __read_mostly takes them away from possibly hot cachelines. Signed-off-by: Christoph Lameter Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/timer.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'kernel') diff --git a/kernel/timer.c b/kernel/timer.c index bf7c4193b936..2410c18dbeb1 100644 --- a/kernel/timer.c +++ b/kernel/timer.c @@ -1354,8 +1354,8 @@ void __init init_timers(void) #ifdef CONFIG_TIME_INTERPOLATION -struct time_interpolator *time_interpolator; -static struct time_interpolator *time_interpolator_list; +struct time_interpolator *time_interpolator __read_mostly; +static struct time_interpolator *time_interpolator_list __read_mostly; static DEFINE_SPINLOCK(time_interpolator_lock); static inline u64 time_interpolator_get_cycles(unsigned int src) -- cgit v1.2.3 From a0a0c28c1a7109d7955815074c52cac079ab3ba5 Mon Sep 17 00:00:00 2001 From: Roman Zippel Date: Thu, 16 Mar 2006 23:04:01 -0800 Subject: [PATCH] posix-timers: fix requeue accounting when signal is ignored When the posix-timer signal is ignored then the timer is rearmed by the callback function. The requeue pending accounting has to be fixed up else the state might be wrong. Signed-off-by: Roman Zippel Signed-off-by: Thomas Gleixner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/posix-timers.c | 1 + 1 file changed, 1 insertion(+) (limited to 'kernel') diff --git a/kernel/posix-timers.c b/kernel/posix-timers.c index 216f574b5ffb..fa895fc2ecf5 100644 --- a/kernel/posix-timers.c +++ b/kernel/posix-timers.c @@ -353,6 +353,7 @@ static int posix_timer_fn(void *data) hrtimer_forward(&timr->it.real.timer, timr->it.real.interval); ret = HRTIMER_RESTART; + ++timr->it_requeue_pending; } } -- cgit v1.2.3 From 2d61b86775a5676a8fba2ba2f0f869564e35c630 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Sat, 18 Mar 2006 20:41:10 +0300 Subject: [PATCH] disable unshare(CLONE_VM) for now sys_unshare() does mmput(new_mm). This is not enough if we have mm->core_waiters. This patch is a temporary fix for soon to be released 2.6.16. Signed-off-by: Oleg Nesterov [ Checked with Uli: "I'm not planning to use unshare(CLONE_VM). It's not needed for any functionality planned so far. What we (as in Red Hat) need unshare() for now is the filesystem side." ] Signed-off-by: Linus Torvalds --- kernel/fork.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'kernel') diff --git a/kernel/fork.c b/kernel/fork.c index 46060cb24af0..b373322ca497 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1478,9 +1478,7 @@ static int unshare_vm(unsigned long unshare_flags, struct mm_struct **new_mmp) if ((unshare_flags & CLONE_VM) && (mm && atomic_read(&mm->mm_users) > 1)) { - *new_mmp = dup_mm(current); - if (!*new_mmp) - return -ENOMEM; + return -EINVAL; } return 0; -- cgit v1.2.3 From afc847b7ddcf636e524cf5b0de644bd3a9419a8c Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 28 Feb 2006 12:51:55 -0500 Subject: [PATCH] don't do exit_io_context() until we know we won't be doing any IO testcase: mount /dev/sdb10 /mnt touch /mnt/tmp/b umount /mnt mount /dev/sdb10 /mnt rm /mnt/tmp/b --- kernel/exit.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'kernel') diff --git a/kernel/exit.c b/kernel/exit.c index 531aadca5530..d1e8d500a7e1 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -807,8 +807,6 @@ fastcall NORET_TYPE void do_exit(long code) panic("Attempted to kill the idle task!"); if (unlikely(tsk->pid == 1)) panic("Attempted to kill init!"); - if (tsk->io_context) - exit_io_context(); if (unlikely(current->ptrace & PT_TRACE_EXIT)) { current->ptrace_message = code; @@ -822,6 +820,8 @@ fastcall NORET_TYPE void do_exit(long code) if (unlikely(tsk->flags & PF_EXITING)) { printk(KERN_ALERT "Fixing recursive fault but reboot is needed!\n"); + if (tsk->io_context) + exit_io_context(); set_current_state(TASK_UNINTERRUPTIBLE); schedule(); } @@ -881,6 +881,9 @@ fastcall NORET_TYPE void do_exit(long code) */ mutex_debug_check_no_locks_held(tsk); + if (tsk->io_context) + exit_io_context(); + /* PF_DEAD causes final put_task_struct after we schedule. */ preempt_disable(); BUG_ON(tsk->flags & PF_DEAD); -- cgit v1.2.3 From 51107301b629640f9ab76fe23bf385e187b9ac29 Mon Sep 17 00:00:00 2001 From: Jun'ichi Nomura Date: Wed, 15 Mar 2006 08:28:55 -0500 Subject: [PATCH] kobject: fix build error if CONFIG_SYSFS=n Moving uevent_seqnum and uevent_helper to kobject_uevent.c because they are used even if CONFIG_SYSFS=n while kernel/ksysfs.c is built only if CONFIG_SYSFS=y, Signed-off-by: Jun'ichi Nomura Signed-off-by: Greg Kroah-Hartman --- kernel/ksysfs.c | 3 --- 1 file changed, 3 deletions(-) (limited to 'kernel') diff --git a/kernel/ksysfs.c b/kernel/ksysfs.c index d5eeae0fa5bc..f2690ed74530 100644 --- a/kernel/ksysfs.c +++ b/kernel/ksysfs.c @@ -15,9 +15,6 @@ #include #include -u64 uevent_seqnum; -char uevent_helper[UEVENT_HELPER_PATH_LEN] = "/sbin/hotplug"; - #define KERNEL_ATTR_RO(_name) \ static struct subsys_attribute _name##_attr = __ATTR_RO(_name) -- cgit v1.2.3 From 3fd6805f4dfb02bcfb5634972eabad0e790f119a Mon Sep 17 00:00:00 2001 From: Sam Ravnborg Date: Wed, 8 Feb 2006 21:16:45 +0100 Subject: [PATCH] Clean up module.c symbol searching logic Signed-off-by: Sam Ravnborg Signed-off-by: Greg Kroah-Hartman --- kernel/module.c | 73 ++++++++++++++++++++++++++++++++------------------------- 1 file changed, 41 insertions(+), 32 deletions(-) (limited to 'kernel') diff --git a/kernel/module.c b/kernel/module.c index 5aad477ddc79..2a892b20d68f 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -135,6 +135,18 @@ extern const unsigned long __start___kcrctab_gpl[]; #define symversion(base, idx) ((base) ? ((base) + (idx)) : NULL) #endif +/* lookup symbol in given range of kernel_symbols */ +static const struct kernel_symbol *lookup_symbol(const char *name, + const struct kernel_symbol *start, + const struct kernel_symbol *stop) +{ + const struct kernel_symbol *ks = start; + for (; ks < stop; ks++) + if (strcmp(ks->name, name) == 0) + return ks; + return NULL; +} + /* Find a symbol, return value, crc and module which owns it */ static unsigned long __find_symbol(const char *name, struct module **owner, @@ -142,39 +154,41 @@ static unsigned long __find_symbol(const char *name, int gplok) { struct module *mod; - unsigned int i; + const struct kernel_symbol *ks; /* Core kernel first. */ *owner = NULL; - for (i = 0; __start___ksymtab+i < __stop___ksymtab; i++) { - if (strcmp(__start___ksymtab[i].name, name) == 0) { - *crc = symversion(__start___kcrctab, i); - return __start___ksymtab[i].value; - } + ks = lookup_symbol(name, __start___ksymtab, __stop___ksymtab); + if (ks) { + *crc = symversion(__start___kcrctab, (ks - __start___ksymtab)); + return ks->value; } if (gplok) { - for (i = 0; __start___ksymtab_gpl+i<__stop___ksymtab_gpl; i++) - if (strcmp(__start___ksymtab_gpl[i].name, name) == 0) { - *crc = symversion(__start___kcrctab_gpl, i); - return __start___ksymtab_gpl[i].value; - } + ks = lookup_symbol(name, __start___ksymtab_gpl, + __stop___ksymtab_gpl); + if (ks) { + *crc = symversion(__start___kcrctab_gpl, + (ks - __start___ksymtab_gpl)); + return ks->value; + } } /* Now try modules. */ list_for_each_entry(mod, &modules, list) { *owner = mod; - for (i = 0; i < mod->num_syms; i++) - if (strcmp(mod->syms[i].name, name) == 0) { - *crc = symversion(mod->crcs, i); - return mod->syms[i].value; - } + ks = lookup_symbol(name, mod->syms, mod->syms + mod->num_syms); + if (ks) { + *crc = symversion(mod->crcs, (ks - mod->syms)); + return ks->value; + } if (gplok) { - for (i = 0; i < mod->num_gpl_syms; i++) { - if (strcmp(mod->gpl_syms[i].name, name) == 0) { - *crc = symversion(mod->gpl_crcs, i); - return mod->gpl_syms[i].value; - } + ks = lookup_symbol(name, mod->gpl_syms, + mod->gpl_syms + mod->num_gpl_syms); + if (ks) { + *crc = symversion(mod->gpl_crcs, + (ks - mod->gpl_syms)); + return ks->value; } } } @@ -1444,18 +1458,13 @@ static void setup_modinfo(struct module *mod, Elf_Shdr *sechdrs, #ifdef CONFIG_KALLSYMS int is_exported(const char *name, const struct module *mod) { - unsigned int i; - - if (!mod) { - for (i = 0; __start___ksymtab+i < __stop___ksymtab; i++) - if (strcmp(__start___ksymtab[i].name, name) == 0) - return 1; - return 0; - } - for (i = 0; i < mod->num_syms; i++) - if (strcmp(mod->syms[i].name, name) == 0) + if (!mod && lookup_symbol(name, __start___ksymtab, __stop___ksymtab)) + return 1; + else + if (lookup_symbol(name, mod->syms, mod->syms + mod->num_syms)) return 1; - return 0; + else + return 0; } /* As per nm */ -- cgit v1.2.3 From 9f28bb7e1d0188a993403ab39b774785892805e1 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Mon, 20 Mar 2006 13:17:13 -0800 Subject: [PATCH] add EXPORT_SYMBOL_GPL_FUTURE() This patch adds the ability to mark symbols that will be changed in the future, so that kernel modules that don't include MODULE_LICENSE("GPL") and use the symbols, will be flagged and printed out to the system log. Signed-off-by: Greg Kroah-Hartman --- kernel/module.c | 49 +++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 47 insertions(+), 2 deletions(-) (limited to 'kernel') diff --git a/kernel/module.c b/kernel/module.c index 2a892b20d68f..5ca99fbe9f44 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -126,8 +126,11 @@ extern const struct kernel_symbol __start___ksymtab[]; extern const struct kernel_symbol __stop___ksymtab[]; extern const struct kernel_symbol __start___ksymtab_gpl[]; extern const struct kernel_symbol __stop___ksymtab_gpl[]; +extern const struct kernel_symbol __start___ksymtab_gpl_future[]; +extern const struct kernel_symbol __stop___ksymtab_gpl_future[]; extern const unsigned long __start___kcrctab[]; extern const unsigned long __start___kcrctab_gpl[]; +extern const unsigned long __start___kcrctab_gpl_future[]; #ifndef CONFIG_MODVERSIONS #define symversion(base, idx) NULL @@ -172,6 +175,22 @@ static unsigned long __find_symbol(const char *name, return ks->value; } } + ks = lookup_symbol(name, __start___ksymtab_gpl_future, + __stop___ksymtab_gpl_future); + if (ks) { + if (!gplok) { + printk(KERN_WARNING "Symbol %s is being used " + "by a non-GPL module, which will not " + "be allowed in the future\n", name); + printk(KERN_WARNING "Please see the file " + "Documentation/feature-removal-schedule.txt " + "in the kernel source tree for more " + "details.\n"); + } + *crc = symversion(__start___kcrctab_gpl_future, + (ks - __start___ksymtab_gpl_future)); + return ks->value; + } /* Now try modules. */ list_for_each_entry(mod, &modules, list) { @@ -191,6 +210,23 @@ static unsigned long __find_symbol(const char *name, return ks->value; } } + ks = lookup_symbol(name, mod->gpl_future_syms, + (mod->gpl_future_syms + + mod->num_gpl_future_syms)); + if (ks) { + if (!gplok) { + printk(KERN_WARNING "Symbol %s is being used " + "by a non-GPL module, which will not " + "be allowed in the future\n", name); + printk(KERN_WARNING "Please see the file " + "Documentation/feature-removal-schedule.txt " + "in the kernel source tree for more " + "details.\n"); + } + *crc = symversion(mod->gpl_future_crcs, + (ks - mod->gpl_future_syms)); + return ks->value; + } } DEBUGP("Failed to find symbol %s\n", name); return 0; @@ -1546,7 +1582,8 @@ static struct module *load_module(void __user *umod, char *secstrings, *args, *modmagic, *strtab = NULL; unsigned int i, symindex = 0, strindex = 0, setupindex, exindex, exportindex, modindex, obsparmindex, infoindex, gplindex, - crcindex, gplcrcindex, versindex, pcpuindex; + crcindex, gplcrcindex, versindex, pcpuindex, gplfutureindex, + gplfuturecrcindex; long arglen; struct module *mod; long err = 0; @@ -1627,8 +1664,10 @@ static struct module *load_module(void __user *umod, /* Optional sections */ exportindex = find_sec(hdr, sechdrs, secstrings, "__ksymtab"); gplindex = find_sec(hdr, sechdrs, secstrings, "__ksymtab_gpl"); + gplfutureindex = find_sec(hdr, sechdrs, secstrings, "__ksymtab_gpl_future"); crcindex = find_sec(hdr, sechdrs, secstrings, "__kcrctab"); gplcrcindex = find_sec(hdr, sechdrs, secstrings, "__kcrctab_gpl"); + gplfuturecrcindex = find_sec(hdr, sechdrs, secstrings, "__kcrctab_gpl_future"); setupindex = find_sec(hdr, sechdrs, secstrings, "__param"); exindex = find_sec(hdr, sechdrs, secstrings, "__ex_table"); obsparmindex = find_sec(hdr, sechdrs, secstrings, "__obsparm"); @@ -1784,10 +1823,16 @@ static struct module *load_module(void __user *umod, mod->gpl_syms = (void *)sechdrs[gplindex].sh_addr; if (gplcrcindex) mod->gpl_crcs = (void *)sechdrs[gplcrcindex].sh_addr; + mod->num_gpl_future_syms = sechdrs[gplfutureindex].sh_size / + sizeof(*mod->gpl_future_syms); + mod->gpl_future_syms = (void *)sechdrs[gplfutureindex].sh_addr; + if (gplfuturecrcindex) + mod->gpl_future_crcs = (void *)sechdrs[gplfuturecrcindex].sh_addr; #ifdef CONFIG_MODVERSIONS if ((mod->num_syms && !crcindex) || - (mod->num_gpl_syms && !gplcrcindex)) { + (mod->num_gpl_syms && !gplcrcindex) || + (mod->num_gpl_future_syms && !gplfuturecrcindex)) { printk(KERN_WARNING "%s: No versions for exported symbols." " Tainting kernel.\n", mod->name); add_taint(TAINT_FORCED_MODULE); -- cgit v1.2.3 From 01ca70dca5c64cb774a8ac2f50bddff21d60169f Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Mon, 20 Mar 2006 13:17:13 -0800 Subject: [PATCH] add EXPORT_SYMBOL_GPL_FUTURE() to RCU subsystem As the RCU symbols are going to be changed to GPL in the near future, lets warn users that this is going to happen. Cc: Paul McKenney Acked-by: Dipankar Sarma Signed-off-by: Greg Kroah-Hartman --- kernel/rcupdate.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'kernel') diff --git a/kernel/rcupdate.c b/kernel/rcupdate.c index 8cf15a569fcd..fedf5e369755 100644 --- a/kernel/rcupdate.c +++ b/kernel/rcupdate.c @@ -609,7 +609,7 @@ module_param(qlowmark, int, 0); module_param(rsinterval, int, 0); #endif EXPORT_SYMBOL_GPL(rcu_batches_completed); -EXPORT_SYMBOL(call_rcu); /* WARNING: GPL-only in April 2006. */ -EXPORT_SYMBOL(call_rcu_bh); /* WARNING: GPL-only in April 2006. */ +EXPORT_SYMBOL_GPL_FUTURE(call_rcu); /* WARNING: GPL-only in April 2006. */ +EXPORT_SYMBOL_GPL_FUTURE(call_rcu_bh); /* WARNING: GPL-only in April 2006. */ EXPORT_SYMBOL_GPL(synchronize_rcu); -EXPORT_SYMBOL(synchronize_kernel); /* WARNING: GPL-only in April 2006. */ +EXPORT_SYMBOL_GPL_FUTURE(synchronize_kernel); /* WARNING: GPL-only in April 2006. */ -- cgit v1.2.3 From 03e88ae1b13dfdc8bbaa59b8198e1ca53aad12ac Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Thu, 16 Feb 2006 13:50:23 -0800 Subject: [PATCH] fix module sysfs files reference counting The module files, refcnt, version, and srcversion did not properly increment the owner's module reference count, allowing the modules to be removed while the files were open, causing oopses. This patch fixes this, and also fixes the problem that the version and srcversion files were not showing up, unless CONFIG_MODULE_UNLOAD was enabled, which is not correct. Cc: Nathan Lynch Signed-off-by: Greg Kroah-Hartman --- kernel/module.c | 77 +++++++++++++++++++++++---------------------------------- kernel/params.c | 10 -------- 2 files changed, 31 insertions(+), 56 deletions(-) (limited to 'kernel') diff --git a/kernel/module.c b/kernel/module.c index 5ca99fbe9f44..77764f22f021 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -429,7 +429,6 @@ static inline void percpu_modcopy(void *pcpudst, const void *src, } #endif /* CONFIG_SMP */ -#ifdef CONFIG_MODULE_UNLOAD #define MODINFO_ATTR(field) \ static void setup_modinfo_##field(struct module *mod, const char *s) \ { \ @@ -461,12 +460,7 @@ static struct module_attribute modinfo_##field = { \ MODINFO_ATTR(version); MODINFO_ATTR(srcversion); -static struct module_attribute *modinfo_attrs[] = { - &modinfo_version, - &modinfo_srcversion, - NULL, -}; - +#ifdef CONFIG_MODULE_UNLOAD /* Init the unload section of the module. */ static void module_unload_init(struct module *mod) { @@ -781,6 +775,15 @@ static inline void module_unload_init(struct module *mod) } #endif /* CONFIG_MODULE_UNLOAD */ +static struct module_attribute *modinfo_attrs[] = { + &modinfo_version, + &modinfo_srcversion, +#ifdef CONFIG_MODULE_UNLOAD + &refcnt, +#endif + NULL, +}; + #ifdef CONFIG_OBSOLETE_MODPARM /* Bounds checking done below */ static int obsparm_copy_string(const char *val, struct kernel_param *kp) @@ -1106,37 +1109,28 @@ static inline void remove_sect_attrs(struct module *mod) } #endif /* CONFIG_KALLSYMS */ - -#ifdef CONFIG_MODULE_UNLOAD -static inline int module_add_refcnt_attr(struct module *mod) -{ - return sysfs_create_file(&mod->mkobj.kobj, &refcnt.attr); -} -static void module_remove_refcnt_attr(struct module *mod) -{ - return sysfs_remove_file(&mod->mkobj.kobj, &refcnt.attr); -} -#else -static inline int module_add_refcnt_attr(struct module *mod) -{ - return 0; -} -static void module_remove_refcnt_attr(struct module *mod) -{ -} -#endif - -#ifdef CONFIG_MODULE_UNLOAD static int module_add_modinfo_attrs(struct module *mod) { struct module_attribute *attr; + struct module_attribute *temp_attr; int error = 0; int i; + mod->modinfo_attrs = kzalloc((sizeof(struct module_attribute) * + (ARRAY_SIZE(modinfo_attrs) + 1)), + GFP_KERNEL); + if (!mod->modinfo_attrs) + return -ENOMEM; + + temp_attr = mod->modinfo_attrs; for (i = 0; (attr = modinfo_attrs[i]) && !error; i++) { if (!attr->test || - (attr->test && attr->test(mod))) - error = sysfs_create_file(&mod->mkobj.kobj,&attr->attr); + (attr->test && attr->test(mod))) { + memcpy(temp_attr, attr, sizeof(*temp_attr)); + temp_attr->attr.owner = mod; + error = sysfs_create_file(&mod->mkobj.kobj,&temp_attr->attr); + ++temp_attr; + } } return error; } @@ -1146,12 +1140,16 @@ static void module_remove_modinfo_attrs(struct module *mod) struct module_attribute *attr; int i; - for (i = 0; (attr = modinfo_attrs[i]); i++) { + for (i = 0; (attr = &mod->modinfo_attrs[i]); i++) { + /* pick a field to test for end of list */ + if (!attr->attr.name) + break; sysfs_remove_file(&mod->mkobj.kobj,&attr->attr); - attr->free(mod); + if (attr->free) + attr->free(mod); } + kfree(mod->modinfo_attrs); } -#endif static int mod_sysfs_setup(struct module *mod, struct kernel_param *kparam, @@ -1169,19 +1167,13 @@ static int mod_sysfs_setup(struct module *mod, if (err) goto out; - err = module_add_refcnt_attr(mod); - if (err) - goto out_unreg; - err = module_param_sysfs_setup(mod, kparam, num_params); if (err) goto out_unreg; -#ifdef CONFIG_MODULE_UNLOAD err = module_add_modinfo_attrs(mod); if (err) goto out_unreg; -#endif return 0; @@ -1193,10 +1185,7 @@ out: static void mod_kobject_remove(struct module *mod) { -#ifdef CONFIG_MODULE_UNLOAD module_remove_modinfo_attrs(mod); -#endif - module_remove_refcnt_attr(mod); module_param_sysfs_remove(mod); kobject_unregister(&mod->mkobj.kobj); @@ -1474,7 +1463,6 @@ static char *get_modinfo(Elf_Shdr *sechdrs, return NULL; } -#ifdef CONFIG_MODULE_UNLOAD static void setup_modinfo(struct module *mod, Elf_Shdr *sechdrs, unsigned int infoindex) { @@ -1489,7 +1477,6 @@ static void setup_modinfo(struct module *mod, Elf_Shdr *sechdrs, attr->attr.name)); } } -#endif #ifdef CONFIG_KALLSYMS int is_exported(const char *name, const struct module *mod) @@ -1803,10 +1790,8 @@ static struct module *load_module(void __user *umod, if (strcmp(mod->name, "driverloader") == 0) add_taint(TAINT_PROPRIETARY_MODULE); -#ifdef CONFIG_MODULE_UNLOAD /* Set up MODINFO_ATTR fields */ setup_modinfo(mod, sechdrs, infoindex); -#endif /* Fix up syms, so that st_value is a pointer to location. */ err = simplify_symbols(sechdrs, symindex, strtab, versindex, pcpuindex, diff --git a/kernel/params.c b/kernel/params.c index c76ad25e6a21..a29150582310 100644 --- a/kernel/params.c +++ b/kernel/params.c @@ -638,13 +638,8 @@ static ssize_t module_attr_show(struct kobject *kobj, if (!attribute->show) return -EIO; - if (!try_module_get(mk->mod)) - return -ENODEV; - ret = attribute->show(attribute, mk->mod, buf); - module_put(mk->mod); - return ret; } @@ -662,13 +657,8 @@ static ssize_t module_attr_store(struct kobject *kobj, if (!attribute->store) return -EIO; - if (!try_module_get(mk->mod)) - return -ENODEV; - ret = attribute->store(attribute, mk->mod, buf, len); - module_put(mk->mod); - return ret; } -- cgit v1.2.3 From 9430d58e34ec3861e1ca72f8e49105b227aad327 Mon Sep 17 00:00:00 2001 From: Mike Galbraith Date: Wed, 22 Mar 2006 00:07:33 -0800 Subject: [PATCH] sched: remove sleep_avg multiplier Remove the sleep_avg multiplier. This multiplier was necessary back when we had 10 seconds of dynamic range in sleep_avg, but now that we only have one second, it causes that one second to be compressed down to 100ms in some cases. This is particularly noticeable when compiling a kernel in a slow NFS mount, and I believe it to be a very likely candidate for other recently reported network related interactivity problems. In testing, I can detect no negative impact of this removal. Signed-off-by: Mike Galbraith Acked-by: Ingo Molnar Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/sched.c | 6 ------ 1 file changed, 6 deletions(-) (limited to 'kernel') diff --git a/kernel/sched.c b/kernel/sched.c index 4d46e90f59c3..6b6e0d70eb30 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -706,12 +706,6 @@ static int recalc_task_prio(task_t *p, unsigned long long now) p->sleep_avg = JIFFIES_TO_NS(MAX_SLEEP_AVG - DEF_TIMESLICE); } else { - /* - * The lower the sleep avg a task has the more - * rapidly it will rise with sleep time. - */ - sleep_time *= (MAX_BONUS - CURRENT_BONUS(p)) ? : 1; - /* * Tasks waking from uninterruptible sleep are * limited in their sleep_avg rise as they -- cgit v1.2.3 From 06f9d4f94a075285d25253edbf57f2cda07d4ff3 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Wed, 22 Mar 2006 00:07:40 -0800 Subject: [PATCH] unshare: Error if passed unsupported flags A bare bones trivial patch to ensure we always get -EINVAL on the unsupported cases for sys_unshare. If this goes in before 2.6.16 it allows us to forward compatible with future applications using sys_unshare. Signed-off-by: Eric W. Biederman Cc: JANAK DESAI Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/fork.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'kernel') diff --git a/kernel/fork.c b/kernel/fork.c index b373322ca497..9bd7b65ee418 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1534,6 +1534,12 @@ asmlinkage long sys_unshare(unsigned long unshare_flags) check_unshare_flags(&unshare_flags); + /* Return -EINVAL for all unsupported flags */ + err = -EINVAL; + if (unshare_flags & ~(CLONE_THREAD|CLONE_FS|CLONE_NEWNS|CLONE_SIGHAND| + CLONE_VM|CLONE_FILES|CLONE_SYSVSEM)) + goto bad_unshare_out; + if ((err = unshare_thread(unshare_flags))) goto bad_unshare_out; if ((err = unshare_fs(unshare_flags, &new_fs))) -- cgit v1.2.3 From 78eef01b0fae087c5fadbd85dd4fe2918c3a015f Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Wed, 22 Mar 2006 00:08:16 -0800 Subject: [PATCH] on_each_cpu(): disable local interrupts When on_each_cpu() runs the callback on other CPUs, it runs with local interrupts disabled. So we should run the function with local interrupts disabled on this CPU, too. And do the same for UP, so the callback is run in the same environment on both UP and SMP. (strictly it should do preempt_disable() too, but I think local_irq_disable is sufficiently equivalent). Also uninlines on_each_cpu(). softirq.c was the most appropriate file I could find, but it doesn't seem to justify creating a new file. Oh, and fix up that comment over (under?) x86's smp_call_function(). It drives me nuts. Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/softirq.c | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) (limited to 'kernel') diff --git a/kernel/softirq.c b/kernel/softirq.c index ad3295cdded5..ec8fed42a86f 100644 --- a/kernel/softirq.c +++ b/kernel/softirq.c @@ -16,6 +16,7 @@ #include #include #include +#include #include /* @@ -495,3 +496,22 @@ __init int spawn_ksoftirqd(void) register_cpu_notifier(&cpu_nfb); return 0; } + +#ifdef CONFIG_SMP +/* + * Call a function on all processors + */ +int on_each_cpu(void (*func) (void *info), void *info, int retry, int wait) +{ + int ret = 0; + + preempt_disable(); + ret = smp_call_function(func, info, retry, wait); + local_irq_disable(); + func(info); + local_irq_enable(); + preempt_enable(); + return ret; +} +EXPORT_SYMBOL(on_each_cpu); +#endif -- cgit v1.2.3