From ce31332d3c77532d6ea97ddcb475a2b02dd358b4 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 29 Apr 2011 00:02:00 +0200 Subject: hrtimer: Initialize CLOCK_ID to HRTIMER_BASE table statically MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Sedat and Bruno reported RCU stalls which turned out to be caused by the following; sched_init() calls init_rt_bandwidth() which calls hrtimer_init() _BEFORE_ hrtimers_init() is called. While not entirely correct this worked because hrtimer_init() only accessed statically initialized data (hrtimer_bases.clock_base[CLOCK_MONOTONIC]) Commit e06383db9 (hrtimers: extend hrtimer base code to handle more then 2 clockids) added an indirection to the hrtimer_bases.clock_base lookup to avoid gap handling in the hot path. The table which is used for the translataion from CLOCK_ID to HRTIMER_BASE index is initialized at runtime in hrtimers_init(). So the early call of the scheduler code translates CLOCK_MONOTONIC to HRTIMER_BASE_REALTIME. Thus the rt_bandwith timer ends up on CLOCK_REALTIME. If the timer is armed and the wall clock time is set (e.g. ntpdate in the early boot process - which also gives the problem deterministic behaviour i.e. magic recovery after N hours), then the timer ends up with an expiry time far into the future. That breaks the RT throttler mechanism as rt runtime is accumulated and never cleared, so the rt throttler detects a false cpu hog condition and blocks all RT tasks until the timer finally expires. That in turn stalls the RCU thread of TINYRCU which leads to an huge amount of RCU callbacks piling up. Make the translation table statically initialized, so we are back to the status of <= 2.6.39. Reported-and-tested-by: Sedat Dilek Reported-by: Bruno PrĂ©mont Cc: John stultz Cc: Mike Galbraith Cc: Paul E. McKenney Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/%3Calpine.LFD.2.02.1104282353140.3005%40ionos%3E Reviewed-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- kernel/hrtimer.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'kernel/hrtimer.c') diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c index 9017478c5d4c..87fdb3f8db14 100644 --- a/kernel/hrtimer.c +++ b/kernel/hrtimer.c @@ -81,7 +81,11 @@ DEFINE_PER_CPU(struct hrtimer_cpu_base, hrtimer_bases) = } }; -static int hrtimer_clock_to_base_table[MAX_CLOCKS]; +static int hrtimer_clock_to_base_table[MAX_CLOCKS] = { + [CLOCK_REALTIME] = HRTIMER_BASE_REALTIME, + [CLOCK_MONOTONIC] = HRTIMER_BASE_MONOTONIC, + [CLOCK_BOOTTIME] = HRTIMER_BASE_BOOTTIME, +}; static inline int hrtimer_clockid_to_base(clockid_t clock_id) { @@ -1722,10 +1726,6 @@ static struct notifier_block __cpuinitdata hrtimers_nb = { void __init hrtimers_init(void) { - hrtimer_clock_to_base_table[CLOCK_REALTIME] = HRTIMER_BASE_REALTIME; - hrtimer_clock_to_base_table[CLOCK_MONOTONIC] = HRTIMER_BASE_MONOTONIC; - hrtimer_clock_to_base_table[CLOCK_BOOTTIME] = HRTIMER_BASE_BOOTTIME; - hrtimer_cpu_notify(&hrtimers_nb, (unsigned long)CPU_UP_PREPARE, (void *)(long)smp_processor_id()); register_cpu_notifier(&hrtimers_nb); -- cgit v1.2.3 From 942c3c5c329274fa6de5998cb911cf3d0a42d0b1 Mon Sep 17 00:00:00 2001 From: Mike Frysinger Date: Mon, 2 May 2011 15:24:27 -0400 Subject: hrtimer: Make lookup table const Signed-off-by: Mike Frysinger Link: http://lkml.kernel.org/r/%3C1304364267-14489-1-git-send-email-vapier%40gentoo.org%3E Signed-off-by: Thomas Gleixner --- kernel/hrtimer.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel/hrtimer.c') diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c index 87fdb3f8db14..dbbbf7d43080 100644 --- a/kernel/hrtimer.c +++ b/kernel/hrtimer.c @@ -81,7 +81,7 @@ DEFINE_PER_CPU(struct hrtimer_cpu_base, hrtimer_bases) = } }; -static int hrtimer_clock_to_base_table[MAX_CLOCKS] = { +static const int hrtimer_clock_to_base_table[MAX_CLOCKS] = { [CLOCK_REALTIME] = HRTIMER_BASE_REALTIME, [CLOCK_MONOTONIC] = HRTIMER_BASE_MONOTONIC, [CLOCK_BOOTTIME] = HRTIMER_BASE_BOOTTIME, -- cgit v1.2.3 From b12a03ce4880bd13786a98db6de494a3e0123129 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 2 May 2011 16:48:57 +0200 Subject: hrtimers: Prepare for cancel on clock was set timers Make clock_was_set() unconditional and rename hres_timers_resume to hrtimers_resume. This is a preparatory patch for hrtimers which are cancelled when clock realtime was set. Signed-off-by: Thomas Gleixner --- kernel/hrtimer.c | 125 +++++++++++++++++++++++++++---------------------------- 1 file changed, 61 insertions(+), 64 deletions(-) (limited to 'kernel/hrtimer.c') diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c index dbbbf7d43080..c145ed643bca 100644 --- a/kernel/hrtimer.c +++ b/kernel/hrtimer.c @@ -621,66 +621,6 @@ static int hrtimer_reprogram(struct hrtimer *timer, return res; } - -/* - * Retrigger next event is called after clock was set - * - * Called with interrupts disabled via on_each_cpu() - */ -static void retrigger_next_event(void *arg) -{ - struct hrtimer_cpu_base *base; - struct timespec realtime_offset, wtm, sleep; - - if (!hrtimer_hres_active()) - return; - - get_xtime_and_monotonic_and_sleep_offset(&realtime_offset, &wtm, - &sleep); - set_normalized_timespec(&realtime_offset, -wtm.tv_sec, -wtm.tv_nsec); - - base = &__get_cpu_var(hrtimer_bases); - - /* Adjust CLOCK_REALTIME offset */ - raw_spin_lock(&base->lock); - base->clock_base[HRTIMER_BASE_REALTIME].offset = - timespec_to_ktime(realtime_offset); - base->clock_base[HRTIMER_BASE_BOOTTIME].offset = - timespec_to_ktime(sleep); - - hrtimer_force_reprogram(base, 0); - raw_spin_unlock(&base->lock); -} - -/* - * Clock realtime was set - * - * Change the offset of the realtime clock vs. the monotonic - * clock. - * - * We might have to reprogram the high resolution timer interrupt. On - * SMP we call the architecture specific code to retrigger _all_ high - * resolution timer interrupts. On UP we just disable interrupts and - * call the high resolution interrupt code. - */ -void clock_was_set(void) -{ - /* Retrigger the CPU local events everywhere */ - on_each_cpu(retrigger_next_event, NULL, 1); -} - -/* - * During resume we might have to reprogram the high resolution timer - * interrupt (on the local CPU): - */ -void hres_timers_resume(void) -{ - WARN_ONCE(!irqs_disabled(), - KERN_INFO "hres_timers_resume() called with IRQs enabled!"); - - retrigger_next_event(NULL); -} - /* * Initialize the high resolution related parts of cpu_base */ @@ -714,12 +654,14 @@ static inline int hrtimer_enqueue_reprogram(struct hrtimer *timer, return 0; } +static void retrigger_next_event(void *arg); + /* * Switch to high resolution mode */ static int hrtimer_switch_to_hres(void) { - int cpu = smp_processor_id(); + int i, cpu = smp_processor_id(); struct hrtimer_cpu_base *base = &per_cpu(hrtimer_bases, cpu); unsigned long flags; @@ -735,9 +677,8 @@ static int hrtimer_switch_to_hres(void) return 0; } base->hres_active = 1; - base->clock_base[HRTIMER_BASE_REALTIME].resolution = KTIME_HIGH_RES; - base->clock_base[HRTIMER_BASE_MONOTONIC].resolution = KTIME_HIGH_RES; - base->clock_base[HRTIMER_BASE_BOOTTIME].resolution = KTIME_HIGH_RES; + for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++) + base->clock_base[i].resolution = KTIME_HIGH_RES; tick_setup_sched_timer(); @@ -764,6 +705,62 @@ static inline void hrtimer_init_hres(struct hrtimer_cpu_base *base) { } #endif /* CONFIG_HIGH_RES_TIMERS */ +/* + * Retrigger next event is called after clock was set + * + * Called with interrupts disabled via on_each_cpu() + */ +static void retrigger_next_event(void *arg) +{ + struct hrtimer_cpu_base *base = &__get_cpu_var(hrtimer_bases); + struct timespec realtime_offset, xtim, wtm, sleep; + + if (!hrtimer_hres_active()) + return; + + get_xtime_and_monotonic_and_sleep_offset(&xtim, &wtm, &sleep); + set_normalized_timespec(&realtime_offset, -wtm.tv_sec, -wtm.tv_nsec); + + /* Adjust CLOCK_REALTIME offset */ + raw_spin_lock(&base->lock); + base->clock_base[HRTIMER_BASE_REALTIME].offset = + timespec_to_ktime(realtime_offset); + base->clock_base[HRTIMER_BASE_BOOTTIME].offset = + timespec_to_ktime(sleep); + + hrtimer_force_reprogram(base, 0); + raw_spin_unlock(&base->lock); +} + +/* + * Clock realtime was set + * + * Change the offset of the realtime clock vs. the monotonic + * clock. + * + * We might have to reprogram the high resolution timer interrupt. On + * SMP we call the architecture specific code to retrigger _all_ high + * resolution timer interrupts. On UP we just disable interrupts and + * call the high resolution interrupt code. + */ +void clock_was_set(void) +{ + /* Retrigger the CPU local events everywhere */ + on_each_cpu(retrigger_next_event, NULL, 1); +} + +/* + * During resume we might have to reprogram the high resolution timer + * interrupt (on the local CPU): + */ +void hrtimers_resume(void) +{ + WARN_ONCE(!irqs_disabled(), + KERN_INFO "hrtimers_resume() called with IRQs enabled!"); + + retrigger_next_event(NULL); +} + static inline void timer_stats_hrtimer_set_start_info(struct hrtimer *timer) { #ifdef CONFIG_TIMER_STATS -- cgit v1.2.3 From 99ee5315dac6211e972fa3f23bcc9a0343ff58c4 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 27 Apr 2011 14:16:42 +0200 Subject: timerfd: Allow timers to be cancelled when clock was set Some applications must be aware of clock realtime being set backward. A simple example is a clock applet which arms a timer for the next minute display. If clock realtime is set backward then the applet displays a stale time for the amount of time which the clock was set backwards. Due to that applications poll the time because we don't have an interface. Extend the timerfd interface by adding a flag which puts the timer onto a different internal realtime clock. All timers on this clock are expired whenever the clock was set. The timerfd core records the monotonic offset when the timer is created. When the timer is armed, then the current offset is compared to the previous recorded offset. When it has changed, then timerfd_settime returns -ECANCELED. When a timer is read the offset is compared and if it changed -ECANCELED returned to user space. Periodic timers are not rearmed in the cancelation case. Signed-off-by: Thomas Gleixner Acked-by: John Stultz Cc: Chris Friesen Tested-by: Kay Sievers Cc: "Kirill A. Shutemov" Cc: Peter Zijlstra Cc: Davide Libenzi Reviewed-by: Alexander Shishkin Link: http://lkml.kernel.org/r/%3Calpine.LFD.2.02.1104271359580.3323%40ionos%3E Signed-off-by: Thomas Gleixner --- kernel/hrtimer.c | 36 +++++++++++++++++++++++++++++++++++- 1 file changed, 35 insertions(+), 1 deletion(-) (limited to 'kernel/hrtimer.c') diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c index c145ed643bca..eabcbd781433 100644 --- a/kernel/hrtimer.c +++ b/kernel/hrtimer.c @@ -78,6 +78,11 @@ DEFINE_PER_CPU(struct hrtimer_cpu_base, hrtimer_bases) = .get_time = &ktime_get_boottime, .resolution = KTIME_LOW_RES, }, + { + .index = CLOCK_REALTIME_COS, + .get_time = &ktime_get_real, + .resolution = KTIME_LOW_RES, + }, } }; @@ -85,6 +90,7 @@ static const int hrtimer_clock_to_base_table[MAX_CLOCKS] = { [CLOCK_REALTIME] = HRTIMER_BASE_REALTIME, [CLOCK_MONOTONIC] = HRTIMER_BASE_MONOTONIC, [CLOCK_BOOTTIME] = HRTIMER_BASE_BOOTTIME, + [CLOCK_REALTIME_COS] = HRTIMER_BASE_REALTIME_COS, }; static inline int hrtimer_clockid_to_base(clockid_t clock_id) @@ -110,6 +116,7 @@ static void hrtimer_get_softirq_time(struct hrtimer_cpu_base *base) base->clock_base[HRTIMER_BASE_REALTIME].softirq_time = xtim; base->clock_base[HRTIMER_BASE_MONOTONIC].softirq_time = mono; base->clock_base[HRTIMER_BASE_BOOTTIME].softirq_time = boot; + base->clock_base[HRTIMER_BASE_REALTIME_COS].softirq_time = xtim; } /* @@ -479,6 +486,8 @@ static inline void debug_deactivate(struct hrtimer *timer) trace_hrtimer_cancel(timer); } +static void hrtimer_expire_cancelable(struct hrtimer_cpu_base *cpu_base); + /* High resolution timer related functions */ #ifdef CONFIG_HIGH_RES_TIMERS @@ -715,9 +724,14 @@ static void retrigger_next_event(void *arg) struct hrtimer_cpu_base *base = &__get_cpu_var(hrtimer_bases); struct timespec realtime_offset, xtim, wtm, sleep; - if (!hrtimer_hres_active()) + if (!hrtimer_hres_active()) { + raw_spin_lock(&base->lock); + hrtimer_expire_cancelable(base); + raw_spin_unlock(&base->lock); return; + } + /* Optimized out for !HIGH_RES */ get_xtime_and_monotonic_and_sleep_offset(&xtim, &wtm, &sleep); set_normalized_timespec(&realtime_offset, -wtm.tv_sec, -wtm.tv_nsec); @@ -727,6 +741,10 @@ static void retrigger_next_event(void *arg) timespec_to_ktime(realtime_offset); base->clock_base[HRTIMER_BASE_BOOTTIME].offset = timespec_to_ktime(sleep); + base->clock_base[HRTIMER_BASE_REALTIME_COS].offset = + timespec_to_ktime(realtime_offset); + + hrtimer_expire_cancelable(base); hrtimer_force_reprogram(base, 0); raw_spin_unlock(&base->lock); @@ -1222,6 +1240,22 @@ static void __run_hrtimer(struct hrtimer *timer, ktime_t *now) timer->state &= ~HRTIMER_STATE_CALLBACK; } +static void hrtimer_expire_cancelable(struct hrtimer_cpu_base *cpu_base) +{ + struct timerqueue_node *node; + struct hrtimer_clock_base *base; + ktime_t now = ktime_get_real(); + + base = &cpu_base->clock_base[HRTIMER_BASE_REALTIME_COS]; + + while ((node = timerqueue_getnext(&base->active))) { + struct hrtimer *timer; + + timer = container_of(node, struct hrtimer, node); + __run_hrtimer(timer, &now); + } +} + #ifdef CONFIG_HIGH_RES_TIMERS /* -- cgit v1.2.3 From 9ec2690758a5467f24beb301cca5098078073bba Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 20 May 2011 16:18:50 +0200 Subject: timerfd: Manage cancelable timers in timerfd Peter is concerned about the extra scan of CLOCK_REALTIME_COS in the timer interrupt. Yes, I did not think about it, because the solution was so elegant. I didn't like the extra list in timerfd when it was proposed some time ago, but with a rcu based list the list walk it's less horrible than the original global lock, which was held over the list iteration. Requested-by: Peter Zijlstra Signed-off-by: Thomas Gleixner Reviewed-by: Peter Zijlstra --- kernel/hrtimer.c | 94 +++++++++++++++++++------------------------------------- 1 file changed, 32 insertions(+), 62 deletions(-) (limited to 'kernel/hrtimer.c') diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c index eabcbd781433..26dd32f9f6b2 100644 --- a/kernel/hrtimer.c +++ b/kernel/hrtimer.c @@ -78,11 +78,6 @@ DEFINE_PER_CPU(struct hrtimer_cpu_base, hrtimer_bases) = .get_time = &ktime_get_boottime, .resolution = KTIME_LOW_RES, }, - { - .index = CLOCK_REALTIME_COS, - .get_time = &ktime_get_real, - .resolution = KTIME_LOW_RES, - }, } }; @@ -90,7 +85,6 @@ static const int hrtimer_clock_to_base_table[MAX_CLOCKS] = { [CLOCK_REALTIME] = HRTIMER_BASE_REALTIME, [CLOCK_MONOTONIC] = HRTIMER_BASE_MONOTONIC, [CLOCK_BOOTTIME] = HRTIMER_BASE_BOOTTIME, - [CLOCK_REALTIME_COS] = HRTIMER_BASE_REALTIME_COS, }; static inline int hrtimer_clockid_to_base(clockid_t clock_id) @@ -116,7 +110,6 @@ static void hrtimer_get_softirq_time(struct hrtimer_cpu_base *base) base->clock_base[HRTIMER_BASE_REALTIME].softirq_time = xtim; base->clock_base[HRTIMER_BASE_MONOTONIC].softirq_time = mono; base->clock_base[HRTIMER_BASE_BOOTTIME].softirq_time = boot; - base->clock_base[HRTIMER_BASE_REALTIME_COS].softirq_time = xtim; } /* @@ -486,8 +479,6 @@ static inline void debug_deactivate(struct hrtimer *timer) trace_hrtimer_cancel(timer); } -static void hrtimer_expire_cancelable(struct hrtimer_cpu_base *cpu_base); - /* High resolution timer related functions */ #ifdef CONFIG_HIGH_RES_TIMERS @@ -663,7 +654,33 @@ static inline int hrtimer_enqueue_reprogram(struct hrtimer *timer, return 0; } -static void retrigger_next_event(void *arg); +/* + * Retrigger next event is called after clock was set + * + * Called with interrupts disabled via on_each_cpu() + */ +static void retrigger_next_event(void *arg) +{ + struct hrtimer_cpu_base *base = &__get_cpu_var(hrtimer_bases); + struct timespec realtime_offset, xtim, wtm, sleep; + + if (!hrtimer_hres_active()) + return; + + /* Optimized out for !HIGH_RES */ + get_xtime_and_monotonic_and_sleep_offset(&xtim, &wtm, &sleep); + set_normalized_timespec(&realtime_offset, -wtm.tv_sec, -wtm.tv_nsec); + + /* Adjust CLOCK_REALTIME offset */ + raw_spin_lock(&base->lock); + base->clock_base[HRTIMER_BASE_REALTIME].offset = + timespec_to_ktime(realtime_offset); + base->clock_base[HRTIMER_BASE_BOOTTIME].offset = + timespec_to_ktime(sleep); + + hrtimer_force_reprogram(base, 0); + raw_spin_unlock(&base->lock); +} /* * Switch to high resolution mode @@ -711,45 +728,10 @@ static inline int hrtimer_enqueue_reprogram(struct hrtimer *timer, return 0; } static inline void hrtimer_init_hres(struct hrtimer_cpu_base *base) { } +static inline void retrigger_next_event(void *arg) { } #endif /* CONFIG_HIGH_RES_TIMERS */ -/* - * Retrigger next event is called after clock was set - * - * Called with interrupts disabled via on_each_cpu() - */ -static void retrigger_next_event(void *arg) -{ - struct hrtimer_cpu_base *base = &__get_cpu_var(hrtimer_bases); - struct timespec realtime_offset, xtim, wtm, sleep; - - if (!hrtimer_hres_active()) { - raw_spin_lock(&base->lock); - hrtimer_expire_cancelable(base); - raw_spin_unlock(&base->lock); - return; - } - - /* Optimized out for !HIGH_RES */ - get_xtime_and_monotonic_and_sleep_offset(&xtim, &wtm, &sleep); - set_normalized_timespec(&realtime_offset, -wtm.tv_sec, -wtm.tv_nsec); - - /* Adjust CLOCK_REALTIME offset */ - raw_spin_lock(&base->lock); - base->clock_base[HRTIMER_BASE_REALTIME].offset = - timespec_to_ktime(realtime_offset); - base->clock_base[HRTIMER_BASE_BOOTTIME].offset = - timespec_to_ktime(sleep); - base->clock_base[HRTIMER_BASE_REALTIME_COS].offset = - timespec_to_ktime(realtime_offset); - - hrtimer_expire_cancelable(base); - - hrtimer_force_reprogram(base, 0); - raw_spin_unlock(&base->lock); -} - /* * Clock realtime was set * @@ -763,8 +745,11 @@ static void retrigger_next_event(void *arg) */ void clock_was_set(void) { +#ifdef CONFIG_HIGHRES_TIMERS /* Retrigger the CPU local events everywhere */ on_each_cpu(retrigger_next_event, NULL, 1); +#endif + timerfd_clock_was_set(); } /* @@ -777,6 +762,7 @@ void hrtimers_resume(void) KERN_INFO "hrtimers_resume() called with IRQs enabled!"); retrigger_next_event(NULL); + timerfd_clock_was_set(); } static inline void timer_stats_hrtimer_set_start_info(struct hrtimer *timer) @@ -1240,22 +1226,6 @@ static void __run_hrtimer(struct hrtimer *timer, ktime_t *now) timer->state &= ~HRTIMER_STATE_CALLBACK; } -static void hrtimer_expire_cancelable(struct hrtimer_cpu_base *cpu_base) -{ - struct timerqueue_node *node; - struct hrtimer_clock_base *base; - ktime_t now = ktime_get_real(); - - base = &cpu_base->clock_base[HRTIMER_BASE_REALTIME_COS]; - - while ((node = timerqueue_getnext(&base->active))) { - struct hrtimer *timer; - - timer = container_of(node, struct hrtimer, node); - __run_hrtimer(timer, &now); - } -} - #ifdef CONFIG_HIGH_RES_TIMERS /* -- cgit v1.2.3 From ab8177bc53e8ae3a3ba6d200ce2c2dae263f7ee5 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 20 May 2011 13:05:15 +0200 Subject: hrtimers: Avoid touching inactive timer bases Instead of iterating over all possible timer bases avoid it by marking the active bases in the cpu base. Signed-off-by: Thomas Gleixner Reviewed-by: Peter Zijlstra --- kernel/hrtimer.c | 29 ++++++++++++++++++----------- 1 file changed, 18 insertions(+), 11 deletions(-) (limited to 'kernel/hrtimer.c') diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c index 26dd32f9f6b2..1b08f6d67f12 100644 --- a/kernel/hrtimer.c +++ b/kernel/hrtimer.c @@ -64,17 +64,20 @@ DEFINE_PER_CPU(struct hrtimer_cpu_base, hrtimer_bases) = .clock_base = { { - .index = CLOCK_REALTIME, + .index = HRTIMER_BASE_REALTIME, + .clockid = CLOCK_REALTIME, .get_time = &ktime_get_real, .resolution = KTIME_LOW_RES, }, { - .index = CLOCK_MONOTONIC, + .index = HRTIMER_BASE_MONOTONIC, + .clockid = CLOCK_MONOTONIC, .get_time = &ktime_get, .resolution = KTIME_LOW_RES, }, { - .index = CLOCK_BOOTTIME, + .index = HRTIMER_BASE_BOOTTIME, + .clockid = CLOCK_BOOTTIME, .get_time = &ktime_get_boottime, .resolution = KTIME_LOW_RES, }, @@ -196,7 +199,7 @@ switch_hrtimer_base(struct hrtimer *timer, struct hrtimer_clock_base *base, struct hrtimer_cpu_base *new_cpu_base; int this_cpu = smp_processor_id(); int cpu = hrtimer_get_target(this_cpu, pinned); - int basenum = hrtimer_clockid_to_base(base->index); + int basenum = base->index; again: new_cpu_base = &per_cpu(hrtimer_bases, cpu); @@ -857,6 +860,7 @@ static int enqueue_hrtimer(struct hrtimer *timer, debug_activate(timer); timerqueue_add(&base->active, &timer->node); + base->cpu_base->active_bases |= 1 << base->index; /* * HRTIMER_STATE_ENQUEUED is or'ed to the current state to preserve the @@ -898,6 +902,8 @@ static void __remove_hrtimer(struct hrtimer *timer, #endif } timerqueue_del(&base->active, &timer->node); + if (!timerqueue_getnext(&base->active)) + base->cpu_base->active_bases &= ~(1 << base->index); out: timer->state = newstate; } @@ -1235,7 +1241,6 @@ static void __run_hrtimer(struct hrtimer *timer, ktime_t *now) void hrtimer_interrupt(struct clock_event_device *dev) { struct hrtimer_cpu_base *cpu_base = &__get_cpu_var(hrtimer_bases); - struct hrtimer_clock_base *base; ktime_t expires_next, now, entry_time, delta; int i, retries = 0; @@ -1257,12 +1262,15 @@ retry: */ cpu_base->expires_next.tv64 = KTIME_MAX; - base = cpu_base->clock_base; - for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++) { - ktime_t basenow; + struct hrtimer_clock_base *base; struct timerqueue_node *node; + ktime_t basenow; + + if (!(cpu_base->active_bases & (1 << i))) + continue; + base = cpu_base->clock_base + i; basenow = ktime_add(now, base->offset); while ((node = timerqueue_getnext(&base->active))) { @@ -1295,7 +1303,6 @@ retry: __run_hrtimer(timer, &basenow); } - base++; } /* @@ -1526,7 +1533,7 @@ long __sched hrtimer_nanosleep_restart(struct restart_block *restart) struct timespec __user *rmtp; int ret = 0; - hrtimer_init_on_stack(&t.timer, restart->nanosleep.index, + hrtimer_init_on_stack(&t.timer, restart->nanosleep.clockid, HRTIMER_MODE_ABS); hrtimer_set_expires_tv64(&t.timer, restart->nanosleep.expires); @@ -1578,7 +1585,7 @@ long hrtimer_nanosleep(struct timespec *rqtp, struct timespec __user *rmtp, restart = ¤t_thread_info()->restart_block; restart->fn = hrtimer_nanosleep_restart; - restart->nanosleep.index = t.timer.base->index; + restart->nanosleep.clockid = t.timer.base->clockid; restart->nanosleep.rmtp = rmtp; restart->nanosleep.expires = hrtimer_get_expires_tv64(&t.timer); -- cgit v1.2.3 From 68fa61c026057a39d6ccb850aa8785043afbee02 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 20 May 2011 23:14:04 +0200 Subject: hrtimers: Reorder clock bases The ordering of the clock bases is historical due to the CLOCK_REALTIME and CLOCK_MONOTONIC constants. Now the hrtimer bases have their own enumeration due to the gap between CLOCK_MONOTONIC and CLOCK_BOOTTIME. So we can be more clever as most timers end up on the CLOCK_MONOTONIC base due to the virtue of POSIX declaring that relative CLOCK_REALTIME timers are not affected by time changes. In desktop environments this is slowly changing as applications switch to absolute timers, but I've observed empty CLOCK_REALTIME bases often enough. There is no performance penalty or overhead when CLOCK_REALTIME timers are active, but in case they are not we don't skip over a full cache line. Signed-off-by: Thomas Gleixner Reviewed-by: Peter Zijlstra --- kernel/hrtimer.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'kernel/hrtimer.c') diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c index 1b08f6d67f12..c541ee527ecb 100644 --- a/kernel/hrtimer.c +++ b/kernel/hrtimer.c @@ -63,18 +63,18 @@ DEFINE_PER_CPU(struct hrtimer_cpu_base, hrtimer_bases) = .clock_base = { - { - .index = HRTIMER_BASE_REALTIME, - .clockid = CLOCK_REALTIME, - .get_time = &ktime_get_real, - .resolution = KTIME_LOW_RES, - }, { .index = HRTIMER_BASE_MONOTONIC, .clockid = CLOCK_MONOTONIC, .get_time = &ktime_get, .resolution = KTIME_LOW_RES, }, + { + .index = HRTIMER_BASE_REALTIME, + .clockid = CLOCK_REALTIME, + .get_time = &ktime_get_real, + .resolution = KTIME_LOW_RES, + }, { .index = HRTIMER_BASE_BOOTTIME, .clockid = CLOCK_BOOTTIME, -- cgit v1.2.3 From 90ff1f30c0f401e325d6b2747618b7e3a0addaf8 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 25 May 2011 23:08:17 +0200 Subject: hrtimers: Fix typo causing erratic timers commit 9ec2690758a5 ("timerfd: Manage cancelable timers in timerfd") introduced a CONFIG_HIGHRES_TIMERS (should be CONFIG_HIGH_RES_TIMERS) typo, which caused applications depending on CLOCK_REALTIME timers to become sluggy due to the fact that the time base of the realtime timers was not updated when the wall clock time was set. This causes anything from 100% CPU use for some applications to odd delays and hickups. Reported-bisected-and-tested-by: Anca Emanuel Tested-by: Linus Torvalds Fatfingered-by: Thomas Gleixner Signed-off-by: Thomas Gleixner Signed-off-by: Linus Torvalds --- kernel/hrtimer.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel/hrtimer.c') diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c index c541ee527ecb..a9205e32a059 100644 --- a/kernel/hrtimer.c +++ b/kernel/hrtimer.c @@ -748,7 +748,7 @@ static inline void retrigger_next_event(void *arg) { } */ void clock_was_set(void) { -#ifdef CONFIG_HIGHRES_TIMERS +#ifdef CONFIG_HIGH_RES_TIMERS /* Retrigger the CPU local events everywhere */ on_each_cpu(retrigger_next_event, NULL, 1); #endif -- cgit v1.2.3