From 829b6c1ef488856c6a46a2f705f5068062d5f34c Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Thu, 11 Mar 2010 14:04:30 -0800 Subject: timer stats: Fix del_timer_sync() and try_to_del_timer_sync() These functions forgot to run timer_stats_timer_clear_start_info(). It's unobvious what effect this has and whether it matters much - we won't be printing it out anyway if the timer's detached. Untested, just an Ingo trollpatch. [ Nevertheless correct - tglx ] Signed-off-by: Andrew Morton Cc: Ingo Molnar Cc: johnstul@us.ibm.com Signed-off-by: Thomas Gleixner --- kernel/timer.c | 1 + 1 file changed, 1 insertion(+) (limited to 'kernel/timer.c') diff --git a/kernel/timer.c b/kernel/timer.c index c61a7949387f..fc965eae0e87 100644 --- a/kernel/timer.c +++ b/kernel/timer.c @@ -880,6 +880,7 @@ int try_to_del_timer_sync(struct timer_list *timer) if (base->running_timer == timer) goto out; + timer_stats_timer_clear_start_info(timer); ret = 0; if (timer_pending(timer)) { detach_timer(timer, 1); -- cgit v1.2.3 From 06f71b922ce5a05352acd706564ca4ae1f2add0e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Thu, 11 Mar 2010 14:04:46 -0800 Subject: timer: Print function name for timer callbacks modifying preemption count MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A function scheduled with a timer must not exit with a different preempt count than it was entered. To make helping users running into the corresponding BUG() easier also print the name of the bad function not only its address. [ tglx: Sanitized printk ] Signed-off-by: Uwe Kleine-König Cc: johnstul@us.ibm.com Cc: Ingo Molnar Signed-off-by: Andrew Morton Signed-off-by: Thomas Gleixner --- kernel/timer.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) (limited to 'kernel/timer.c') diff --git a/kernel/timer.c b/kernel/timer.c index c61a7949387f..f82f4bfe2d88 100644 --- a/kernel/timer.c +++ b/kernel/timer.c @@ -1027,11 +1027,8 @@ static inline void __run_timers(struct tvec_base *base) lock_map_release(&lockdep_map); if (preempt_count != preempt_count()) { - printk(KERN_ERR "huh, entered %p " - "with preempt_count %08x, exited" - " with %08x?\n", - fn, preempt_count, - preempt_count()); + printk(KERN_ERR "timer: %pF preempt leak: %08x -> %08x\n", + fn, preempt_count, preempt_count()); BUG(); } } -- cgit v1.2.3 From 576da126a6c7364d70dfd58d0bbe43d05cf5859f Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 12 Mar 2010 21:10:29 +0100 Subject: timer: Split out timer function call The ident level is starting to be annoying. More white space than actual code. Split out the timer function call into its own function. Signed-off-by: Thomas Gleixner --- kernel/timer.c | 72 +++++++++++++++++++++++++++++----------------------------- 1 file changed, 36 insertions(+), 36 deletions(-) (limited to 'kernel/timer.c') diff --git a/kernel/timer.c b/kernel/timer.c index f82f4bfe2d88..45229694dc6a 100644 --- a/kernel/timer.c +++ b/kernel/timer.c @@ -953,6 +953,41 @@ static int cascade(struct tvec_base *base, struct tvec *tv, int index) return index; } +static void call_timer_fn(struct timer_list *timer, void (*fn)(unsigned long), + unsigned long data) +{ + int preempt_count = preempt_count(); + +#ifdef CONFIG_LOCKDEP + /* + * It is permissible to free the timer from inside the + * function that is called from it, this we need to take into + * account for lockdep too. To avoid bogus "held lock freed" + * warnings as well as problems when looking into + * timer->lockdep_map, make a copy and use that here. + */ + struct lockdep_map lockdep_map = timer->lockdep_map; +#endif + /* + * Couple the lock chain with the lock chain at + * del_timer_sync() by acquiring the lock_map around the fn() + * call here and in del_timer_sync(). + */ + lock_map_acquire(&lockdep_map); + + trace_timer_expire_entry(timer); + fn(data); + trace_timer_expire_exit(timer); + + lock_map_release(&lockdep_map); + + if (preempt_count != preempt_count()) { + printk(KERN_ERR "timer: %pF preempt leak: %08x -> %08x\n", + fn, preempt_count, preempt_count()); + BUG(); + } +} + #define INDEX(N) ((base->timer_jiffies >> (TVR_BITS + (N) * TVN_BITS)) & TVN_MASK) /** @@ -996,42 +1031,7 @@ static inline void __run_timers(struct tvec_base *base) detach_timer(timer, 1); spin_unlock_irq(&base->lock); - { - int preempt_count = preempt_count(); - -#ifdef CONFIG_LOCKDEP - /* - * It is permissible to free the timer from - * inside the function that is called from - * it, this we need to take into account for - * lockdep too. To avoid bogus "held lock - * freed" warnings as well as problems when - * looking into timer->lockdep_map, make a - * copy and use that here. - */ - struct lockdep_map lockdep_map = - timer->lockdep_map; -#endif - /* - * Couple the lock chain with the lock chain at - * del_timer_sync() by acquiring the lock_map - * around the fn() call here and in - * del_timer_sync(). - */ - lock_map_acquire(&lockdep_map); - - trace_timer_expire_entry(timer); - fn(data); - trace_timer_expire_exit(timer); - - lock_map_release(&lockdep_map); - - if (preempt_count != preempt_count()) { - printk(KERN_ERR "timer: %pF preempt leak: %08x -> %08x\n", - fn, preempt_count, preempt_count()); - BUG(); - } - } + call_timer_fn(timer, fn, data); spin_lock_irq(&base->lock); } } -- cgit v1.2.3 From 802702e0c2618465b813242d4dfee6a233ba0beb Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 12 Mar 2010 20:13:23 +0100 Subject: timer: Try to survive timer callback preempt_count leak If a timer callback leaks preempt_count we currently assert a BUG(). That makes it unnecessarily hard to retrieve information about the problem especially on laptops and headless stations. There is a decent chance to survive the preempt_count leak by restoring the preempt_count to the value before the callback. That allows in many cases to get valuable information about the root cause of the problem. We carried that fixup in preempt-rt for years and were able to decode such wreckage quite a few times. Signed-off-by: Thomas Gleixner Cc: Linux Torvalds Cc: Andrew Morton Cc: Arjan van de Veen --- kernel/timer.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) (limited to 'kernel/timer.c') diff --git a/kernel/timer.c b/kernel/timer.c index 45229694dc6a..7e12e7bc7ce6 100644 --- a/kernel/timer.c +++ b/kernel/timer.c @@ -982,9 +982,15 @@ static void call_timer_fn(struct timer_list *timer, void (*fn)(unsigned long), lock_map_release(&lockdep_map); if (preempt_count != preempt_count()) { - printk(KERN_ERR "timer: %pF preempt leak: %08x -> %08x\n", - fn, preempt_count, preempt_count()); - BUG(); + WARN_ONCE(1, "timer: %pF preempt leak: %08x -> %08x\n", + fn, preempt_count, preempt_count()); + /* + * Restore the preempt count. That gives us a decent + * chance to survive and extract information. If the + * callback kept a lock held, bad luck, but not worse + * than the BUG() we had. + */ + preempt_count() = preempt_count; } } -- cgit v1.2.3 From 5a0e3ad6af8660be21ca98a971cd00f331318c05 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 24 Mar 2010 17:04:11 +0900 Subject: include cleanup: Update gfp.h and slab.h includes to prepare for breaking implicit slab.h inclusion from percpu.h percpu.h is included by sched.h and module.h and thus ends up being included when building most .c files. percpu.h includes slab.h which in turn includes gfp.h making everything defined by the two files universally available and complicating inclusion dependencies. percpu.h -> slab.h dependency is about to be removed. Prepare for this change by updating users of gfp and slab facilities include those headers directly instead of assuming availability. As this conversion needs to touch large number of source files, the following script is used as the basis of conversion. http://userweb.kernel.org/~tj/misc/slabh-sweep.py The script does the followings. * Scan files for gfp and slab usages and update includes such that only the necessary includes are there. ie. if only gfp is used, gfp.h, if slab is used, slab.h. * When the script inserts a new include, it looks at the include blocks and try to put the new include such that its order conforms to its surrounding. It's put in the include block which contains core kernel includes, in the same order that the rest are ordered - alphabetical, Christmas tree, rev-Xmas-tree or at the end if there doesn't seem to be any matching order. * If the script can't find a place to put a new include (mostly because the file doesn't have fitting include block), it prints out an error message indicating which .h file needs to be added to the file. The conversion was done in the following steps. 1. The initial automatic conversion of all .c files updated slightly over 4000 files, deleting around 700 includes and adding ~480 gfp.h and ~3000 slab.h inclusions. The script emitted errors for ~400 files. 2. Each error was manually checked. Some didn't need the inclusion, some needed manual addition while adding it to implementation .h or embedding .c file was more appropriate for others. This step added inclusions to around 150 files. 3. The script was run again and the output was compared to the edits from #2 to make sure no file was left behind. 4. Several build tests were done and a couple of problems were fixed. e.g. lib/decompress_*.c used malloc/free() wrappers around slab APIs requiring slab.h to be added manually. 5. The script was run on all .h files but without automatically editing them as sprinkling gfp.h and slab.h inclusions around .h files could easily lead to inclusion dependency hell. Most gfp.h inclusion directives were ignored as stuff from gfp.h was usually wildly available and often used in preprocessor macros. Each slab.h inclusion directive was examined and added manually as necessary. 6. percpu.h was updated not to include slab.h. 7. Build test were done on the following configurations and failures were fixed. CONFIG_GCOV_KERNEL was turned off for all tests (as my distributed build env didn't work with gcov compiles) and a few more options had to be turned off depending on archs to make things build (like ipr on powerpc/64 which failed due to missing writeq). * x86 and x86_64 UP and SMP allmodconfig and a custom test config. * powerpc and powerpc64 SMP allmodconfig * sparc and sparc64 SMP allmodconfig * ia64 SMP allmodconfig * s390 SMP allmodconfig * alpha SMP allmodconfig * um on x86_64 SMP allmodconfig 8. percpu.h modifications were reverted so that it could be applied as a separate patch and serve as bisection point. Given the fact that I had only a couple of failures from tests on step 6, I'm fairly confident about the coverage of this conversion patch. If there is a breakage, it's likely to be something in one of the arch headers which should be easily discoverable easily on most builds of the specific arch. Signed-off-by: Tejun Heo Guess-its-ok-by: Christoph Lameter Cc: Ingo Molnar Cc: Lee Schermerhorn --- kernel/timer.c | 1 + 1 file changed, 1 insertion(+) (limited to 'kernel/timer.c') diff --git a/kernel/timer.c b/kernel/timer.c index fc965eae0e87..aeb6a54f2771 100644 --- a/kernel/timer.c +++ b/kernel/timer.c @@ -39,6 +39,7 @@ #include #include #include +#include #include #include -- cgit v1.2.3 From 3bbb9ec946428b96657126768f65487a48dd090c Mon Sep 17 00:00:00 2001 From: Arjan van de Ven Date: Thu, 11 Mar 2010 14:04:36 -0800 Subject: timers: Introduce the concept of timer slack for legacy timers While HR timers have had the concept of timer slack for quite some time now, the legacy timers lacked this concept, and had to make do with round_jiffies() and friends. Timer slack is important for power management; grouping timers reduces the number of wakeups which in turn reduces power consumption. This patch introduces timer slack to the legacy timers using the following pieces: * A slack field in the timer struct * An api (set_timer_slack) that callers can use to set explicit timer slack * A default slack of 0.4% of the requested delay for callers that do not set any explicit slack * Rounding code that is part of mod_timer() that tries to group timers around jiffies values every 'power of two' (so quick timers will group around every 2, but longer timers will group around every 4, 8, 16, 32 etc) Signed-off-by: Arjan van de Ven Cc: johnstul@us.ibm.com Cc: Ingo Molnar Signed-off-by: Andrew Morton Signed-off-by: Thomas Gleixner --- kernel/timer.c | 56 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 56 insertions(+) (limited to 'kernel/timer.c') diff --git a/kernel/timer.c b/kernel/timer.c index 7e12e7bc7ce6..49773f38c9bc 100644 --- a/kernel/timer.c +++ b/kernel/timer.c @@ -318,6 +318,24 @@ unsigned long round_jiffies_up_relative(unsigned long j) } EXPORT_SYMBOL_GPL(round_jiffies_up_relative); +/** + * set_timer_slack - set the allowed slack for a timer + * @slack_hz: the amount of time (in jiffies) allowed for rounding + * + * Set the amount of time, in jiffies, that a certain timer has + * in terms of slack. By setting this value, the timer subsystem + * will schedule the actual timer somewhere between + * the time mod_timer() asks for, and that time plus the slack. + * + * By setting the slack to -1, a percentage of the delay is used + * instead. + */ +void set_timer_slack(struct timer_list *timer, int slack_hz) +{ + timer->slack = slack_hz; +} +EXPORT_SYMBOL_GPL(set_timer_slack); + static inline void set_running_timer(struct tvec_base *base, struct timer_list *timer) @@ -549,6 +567,7 @@ static void __init_timer(struct timer_list *timer, { timer->entry.next = NULL; timer->base = __raw_get_cpu_var(tvec_bases); + timer->slack = -1; #ifdef CONFIG_TIMER_STATS timer->start_site = NULL; timer->start_pid = -1; @@ -714,6 +733,41 @@ int mod_timer_pending(struct timer_list *timer, unsigned long expires) } EXPORT_SYMBOL(mod_timer_pending); +/* + * Decide where to put the timer while taking the slack into account + * + * Algorithm: + * 1) calculate the maximum (absolute) time + * 2) calculate the highest bit where the expires and new max are different + * 3) use this bit to make a mask + * 4) use the bitmask to round down the maximum time, so that all last + * bits are zeros + */ +static inline +unsigned long apply_slack(struct timer_list *timer, unsigned long expires) +{ + unsigned long expires_limit, mask; + int bit; + + expires_limit = expires + timer->slack; + + if (timer->slack < 0) /* auto slack: use 0.4% */ + expires_limit = expires + (expires - jiffies)/256; + + mask = expires ^ expires_limit; + + if (mask == 0) + return expires; + + bit = find_last_bit(&mask, BITS_PER_LONG); + + mask = (1 << bit) - 1; + + expires_limit = expires_limit & ~(mask); + + return expires_limit; +} + /** * mod_timer - modify a timer's timeout * @timer: the timer to be modified @@ -744,6 +798,8 @@ int mod_timer(struct timer_list *timer, unsigned long expires) if (timer_pending(timer) && timer->expires == expires) return 1; + expires = apply_slack(timer, expires); + return __mod_timer(timer, expires, false, TIMER_NOT_PINNED); } EXPORT_SYMBOL(mod_timer); -- cgit v1.2.3 From f00e047efdf9d31c8a7dd7875b411f97cfa7d8e5 Mon Sep 17 00:00:00 2001 From: Jeff Chua Date: Mon, 24 May 2010 07:16:24 +0800 Subject: timers: Fix slack calculation for expired timers commit 3bbb9ec946 (timers: Introduce the concept of timer slack for legacy timers) does not take the case into account when the timer is already expired. This broke wireless drivers. The solution is not to apply slack to already expired timers. Signed-off-by: Thomas Gleixner Cc: Arjan van de Ven --- kernel/timer.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'kernel/timer.c') diff --git a/kernel/timer.c b/kernel/timer.c index 9199f3c52215..be394af5bc22 100644 --- a/kernel/timer.c +++ b/kernel/timer.c @@ -750,13 +750,14 @@ unsigned long apply_slack(struct timer_list *timer, unsigned long expires) unsigned long expires_limit, mask; int bit; - expires_limit = expires + timer->slack; + expires_limit = expires; - if (timer->slack < 0) /* auto slack: use 0.4% */ + if (timer->slack > -1) + expires_limit = expires + timer->slack; + else if (time_after(expires, jiffies)) /* auto slack: use 0.4% */ expires_limit = expires + (expires - jiffies)/256; mask = expires ^ expires_limit; - if (mask == 0) return expires; -- cgit v1.2.3 From 8e63d7795e30b4091e303cc8c060509bd8eea742 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 25 May 2010 20:43:30 +0200 Subject: timers: Fix slack calculation really commit f00e047ef (timers: Fix slack calculation for expired timers) fixed the issue of slack on expired timers only partially. Linus noticed that jiffies is volatile so it is reloaded twice, which generates bad code. But its worse. This can defeat the time_after() check if jiffies are incremented between time_after() and the slack calculation. Fix it by reading jiffies into a local variable, which prevents the compiler from loading it twice. While at it make the > -1 check into >= 0 which is easier to read. Signed-off-by: Thomas Gleixner Cc: Arjan van de Ven Cc: Linus Torvalds --- kernel/timer.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) (limited to 'kernel/timer.c') diff --git a/kernel/timer.c b/kernel/timer.c index be394af5bc22..d8decb8d46b0 100644 --- a/kernel/timer.c +++ b/kernel/timer.c @@ -747,16 +747,19 @@ EXPORT_SYMBOL(mod_timer_pending); static inline unsigned long apply_slack(struct timer_list *timer, unsigned long expires) { - unsigned long expires_limit, mask; + unsigned long expires_limit, mask, now; int bit; expires_limit = expires; - if (timer->slack > -1) + if (timer->slack >= 0) { expires_limit = expires + timer->slack; - else if (time_after(expires, jiffies)) /* auto slack: use 0.4% */ - expires_limit = expires + (expires - jiffies)/256; - + } else { + now = jiffies; + /* No slack, if already expired else auto slack 0.4% */ + if (time_after(expires, now)) + expires_limit = expires + (expires - now)/256; + } mask = expires ^ expires_limit; if (mask == 0) return expires; -- cgit v1.2.3 From 2abfb9e1d470f7082e5e20e4b11a271a0124211b Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 26 May 2010 16:07:13 +0200 Subject: timers: Move local variable into else section Fix nit-picking coding style detail. Signed-off-by: Thomas Gleixner --- kernel/timer.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'kernel/timer.c') diff --git a/kernel/timer.c b/kernel/timer.c index d8decb8d46b0..22118342a456 100644 --- a/kernel/timer.c +++ b/kernel/timer.c @@ -747,7 +747,7 @@ EXPORT_SYMBOL(mod_timer_pending); static inline unsigned long apply_slack(struct timer_list *timer, unsigned long expires) { - unsigned long expires_limit, mask, now; + unsigned long expires_limit, mask; int bit; expires_limit = expires; @@ -755,7 +755,8 @@ unsigned long apply_slack(struct timer_list *timer, unsigned long expires) if (timer->slack >= 0) { expires_limit = expires + timer->slack; } else { - now = jiffies; + unsigned long now = jiffies; + /* No slack, if already expired else auto slack 0.4% */ if (time_after(expires, now)) expires_limit = expires + (expires - now)/256; -- cgit v1.2.3 From 80b5184cc537718122e036afe7e62d202b70d077 Mon Sep 17 00:00:00 2001 From: Akinobu Mita Date: Wed, 26 May 2010 14:43:32 -0700 Subject: kernel/: convert cpu notifier to return encapsulate errno value By the previous modification, the cpu notifier can return encapsulate errno value. This converts the cpu notifiers for kernel/*.c Signed-off-by: Akinobu Mita Cc: Ingo Molnar Cc: Peter Zijlstra Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/timer.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'kernel/timer.c') diff --git a/kernel/timer.c b/kernel/timer.c index be394af5bc22..e3b8c697bde4 100644 --- a/kernel/timer.c +++ b/kernel/timer.c @@ -1680,11 +1680,14 @@ static int __cpuinit timer_cpu_notify(struct notifier_block *self, unsigned long action, void *hcpu) { long cpu = (long)hcpu; + int err; + switch(action) { case CPU_UP_PREPARE: case CPU_UP_PREPARE_FROZEN: - if (init_timers_cpu(cpu) < 0) - return NOTIFY_BAD; + err = init_timers_cpu(cpu); + if (err < 0) + return notifier_from_errno(err); break; #ifdef CONFIG_HOTPLUG_CPU case CPU_DEAD: -- cgit v1.2.3 From 9e506f7adce8e6165a104d3d78fddd8ff0cdccf8 Mon Sep 17 00:00:00 2001 From: Akinobu Mita Date: Fri, 4 Jun 2010 14:15:04 -0700 Subject: kernel/: fix BUG_ON checks for cpu notifier callbacks direct call The commit 80b5184cc537718122e036afe7e62d202b70d077 ("kernel/: convert cpu notifier to return encapsulate errno value") changed the return value of cpu notifier callbacks. Those callbacks don't return NOTIFY_BAD on failures anymore. But there are a few callbacks which are called directly at init time and checking the return value. I forgot to change BUG_ON checking by the direct callers in the commit. Signed-off-by: Akinobu Mita Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/timer.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel/timer.c') diff --git a/kernel/timer.c b/kernel/timer.c index 2454172a80d3..ee305c8d4e18 100644 --- a/kernel/timer.c +++ b/kernel/timer.c @@ -1717,7 +1717,7 @@ void __init init_timers(void) init_timer_stats(); - BUG_ON(err == NOTIFY_BAD); + BUG_ON(err != NOTIFY_OK); register_cpu_notifier(&timers_nb); open_softirq(TIMER_SOFTIRQ, run_timer_softirq); } -- cgit v1.2.3