diff options
Diffstat (limited to 'kernel/time/hrtimer.c')
| -rw-r--r-- | kernel/time/hrtimer.c | 191 |
1 files changed, 143 insertions, 48 deletions
diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c index c98439fdce81..bf7fc4989e5c 100644 --- a/kernel/time/hrtimer.c +++ b/kernel/time/hrtimer.c @@ -49,6 +49,7 @@ #include <linux/sched/deadline.h> #include <linux/timer.h> #include <linux/freezer.h> +#include <linux/delay.h> #include <asm/uaccess.h> @@ -94,6 +95,9 @@ DEFINE_PER_CPU(struct hrtimer_cpu_base, hrtimer_bases) = }; static const int hrtimer_clock_to_base_table[MAX_CLOCKS] = { + /* Make sure we catch unsupported clockids */ + [0 ... MAX_CLOCKS - 1] = HRTIMER_MAX_CLOCK_BASES, + [CLOCK_REALTIME] = HRTIMER_BASE_REALTIME, [CLOCK_MONOTONIC] = HRTIMER_BASE_MONOTONIC, [CLOCK_BOOTTIME] = HRTIMER_BASE_BOOTTIME, @@ -102,7 +106,9 @@ static const int hrtimer_clock_to_base_table[MAX_CLOCKS] = { static inline int hrtimer_clockid_to_base(clockid_t clock_id) { - return hrtimer_clock_to_base_table[clock_id]; + int base = hrtimer_clock_to_base_table[clock_id]; + BUG_ON(base == HRTIMER_MAX_CLOCK_BASES); + return base; } /* @@ -880,7 +886,7 @@ static int enqueue_hrtimer(struct hrtimer *timer, base->cpu_base->active_bases |= 1 << base->index; - timer->state = HRTIMER_STATE_ENQUEUED; + timer->state |= HRTIMER_STATE_ENQUEUED; return timerqueue_add(&base->active, &timer->node); } @@ -897,14 +903,12 @@ static int enqueue_hrtimer(struct hrtimer *timer, */ static void __remove_hrtimer(struct hrtimer *timer, struct hrtimer_clock_base *base, - unsigned long newstate, int reprogram) + u8 newstate, int reprogram) { struct hrtimer_cpu_base *cpu_base = base->cpu_base; - unsigned int state = timer->state; - timer->state = newstate; - if (!(state & HRTIMER_STATE_ENQUEUED)) - return; + if (!(timer->state & HRTIMER_STATE_ENQUEUED)) + goto out; if (!timerqueue_del(&base->active, &timer->node)) cpu_base->active_bases &= ~(1 << base->index); @@ -921,6 +925,13 @@ static void __remove_hrtimer(struct hrtimer *timer, if (reprogram && timer == cpu_base->next_timer) hrtimer_force_reprogram(cpu_base, 1); #endif + +out: + /* + * We need to preserve PINNED state here, otherwise we may end up + * migrating pinned hrtimers as well. + */ + timer->state = newstate | (timer->state & HRTIMER_STATE_PINNED); } /* @@ -930,7 +941,7 @@ static inline int remove_hrtimer(struct hrtimer *timer, struct hrtimer_clock_base *base, bool restart) { if (hrtimer_is_queued(timer)) { - unsigned long state = timer->state; + u8 state = timer->state; int reprogram; /* @@ -949,11 +960,28 @@ remove_hrtimer(struct hrtimer *timer, struct hrtimer_clock_base *base, bool rest state = HRTIMER_STATE_INACTIVE; __remove_hrtimer(timer, base, state, reprogram); + timer->state &= ~HRTIMER_STATE_PINNED; return 1; } return 0; } +static inline ktime_t hrtimer_update_lowres(struct hrtimer *timer, ktime_t tim, + const enum hrtimer_mode mode) +{ +#ifdef CONFIG_TIME_LOW_RES + /* + * CONFIG_TIME_LOW_RES indicates that the system has no way to return + * granular time values. For relative timers we add hrtimer_resolution + * (i.e. one jiffie) to prevent short timeouts. + */ + timer->is_rel = mode & HRTIMER_MODE_REL; + if (timer->is_rel) + tim = ktime_add_safe(tim, ktime_set(0, hrtimer_resolution)); +#endif + return tim; +} + /** * hrtimer_start_range_ns - (re)start an hrtimer on the current CPU * @timer: the timer to be added @@ -974,19 +1002,10 @@ void hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim, /* Remove an active timer from the queue: */ remove_hrtimer(timer, base, true); - if (mode & HRTIMER_MODE_REL) { + if (mode & HRTIMER_MODE_REL) tim = ktime_add_safe(tim, base->get_time()); - /* - * CONFIG_TIME_LOW_RES is a temporary way for architectures - * to signal that they simply return xtime in - * do_gettimeoffset(). In this case we want to round up by - * resolution when starting a relative timer, to avoid short - * timeouts. This will go away with the GTOD framework. - */ -#ifdef CONFIG_TIME_LOW_RES - tim = ktime_add_safe(tim, ktime_set(0, hrtimer_resolution)); -#endif - } + + tim = hrtimer_update_lowres(timer, tim, mode); hrtimer_set_expires_range_ns(timer, tim, delta_ns); @@ -995,6 +1014,10 @@ void hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim, timer_stats_hrtimer_set_start_info(timer); + /* Update pinned state */ + timer->state &= ~HRTIMER_STATE_PINNED; + timer->state |= (!!(mode & HRTIMER_MODE_PINNED)) << HRTIMER_PINNED_SHIFT; + leftmost = enqueue_hrtimer(timer, new_base); if (!leftmost) goto unlock; @@ -1074,19 +1097,23 @@ EXPORT_SYMBOL_GPL(hrtimer_cancel); /** * hrtimer_get_remaining - get remaining time for the timer * @timer: the timer to read + * @adjust: adjust relative timers when CONFIG_TIME_LOW_RES=y */ -ktime_t hrtimer_get_remaining(const struct hrtimer *timer) +ktime_t __hrtimer_get_remaining(const struct hrtimer *timer, bool adjust) { unsigned long flags; ktime_t rem; lock_hrtimer_base(timer, &flags); - rem = hrtimer_expires_remaining(timer); + if (IS_ENABLED(CONFIG_TIME_LOW_RES) && adjust) + rem = hrtimer_expires_remaining_adjusted(timer); + else + rem = hrtimer_expires_remaining(timer); unlock_hrtimer_base(timer, &flags); return rem; } -EXPORT_SYMBOL_GPL(hrtimer_get_remaining); +EXPORT_SYMBOL_GPL(__hrtimer_get_remaining); #ifdef CONFIG_NO_HZ_COMMON /** @@ -1165,8 +1192,8 @@ bool hrtimer_active(const struct hrtimer *timer) cpu_base = READ_ONCE(timer->base->cpu_base); seq = raw_read_seqcount_begin(&cpu_base->seq); - if (timer->state != HRTIMER_STATE_INACTIVE || - cpu_base->running == timer) + if (((timer->state & ~HRTIMER_STATE_PINNED) != + HRTIMER_STATE_INACTIVE) || cpu_base->running == timer) return true; } while (read_seqcount_retry(&cpu_base->seq, seq) || @@ -1220,6 +1247,14 @@ static void __run_hrtimer(struct hrtimer_cpu_base *cpu_base, fn = timer->function; /* + * Clear the 'is relative' flag for the TIME_LOW_RES case. If the + * timer is restarted with a period then it becomes an absolute + * timer. If its not restarted it does not matter. + */ + if (IS_ENABLED(CONFIG_TIME_LOW_RES)) + timer->is_rel = false; + + /* * Because we run timers from hardirq context, there is no chance * they get migrated to another cpu, therefore its safe to unlock * the timer base. @@ -1595,17 +1630,43 @@ static void init_hrtimers_cpu(int cpu) hrtimer_init_hres(cpu_base); } -#ifdef CONFIG_HOTPLUG_CPU - -static void migrate_hrtimer_list(struct hrtimer_clock_base *old_base, - struct hrtimer_clock_base *new_base) +#if defined(CONFIG_HOTPLUG_CPU) +static void migrate_hrtimer_list(struct hrtimer_cpu_base *old_base, + struct hrtimer_cpu_base *new_base, + unsigned int i, + bool wait, + bool remove_pinned) { struct hrtimer *timer; struct timerqueue_node *node; + struct timerqueue_head pinned; + int is_pinned; + struct hrtimer_clock_base *old_c_base = &old_base->clock_base[i]; + struct hrtimer_clock_base *new_c_base = &new_base->clock_base[i]; - while ((node = timerqueue_getnext(&old_base->active))) { + timerqueue_init_head(&pinned); + + while ((node = timerqueue_getnext(&old_c_base->active))) { timer = container_of(node, struct hrtimer, node); - BUG_ON(hrtimer_callback_running(timer)); + if (wait) { + /* Ensure timers are done running before continuing */ + while (hrtimer_callback_running(timer)) { + raw_spin_unlock(&old_base->lock); + raw_spin_unlock(&new_base->lock); + cpu_relax(); + /* + * cpu_relax may just be a barrier. Grant the + * run_hrtimer_list code some time to obtain the + * spinlock. + */ + udelay(2); + raw_spin_lock(&new_base->lock); + raw_spin_lock_nested(&old_base->lock, + SINGLE_DEPTH_NESTING); + } + } else { + BUG_ON(hrtimer_callback_running(timer)); + } debug_deactivate(timer); /* @@ -1613,8 +1674,15 @@ static void migrate_hrtimer_list(struct hrtimer_clock_base *old_base, * timer could be seen as !active and just vanish away * under us on another CPU */ - __remove_hrtimer(timer, old_base, HRTIMER_STATE_ENQUEUED, 0); - timer->base = new_base; + __remove_hrtimer(timer, old_c_base, HRTIMER_STATE_ENQUEUED, 0); + + is_pinned = timer->state & HRTIMER_STATE_PINNED; + if (!remove_pinned && is_pinned) { + timerqueue_add(&pinned, &timer->node); + continue; + } + + timer->base = new_c_base; /* * Enqueue the timers on the new cpu. This does not * reprogram the event device in case the timer @@ -1623,19 +1691,25 @@ static void migrate_hrtimer_list(struct hrtimer_clock_base *old_base, * sort out already expired timers and reprogram the * event device. */ - enqueue_hrtimer(timer, new_base); + enqueue_hrtimer(timer, new_c_base); + } + + /* Re-queue pinned timers for non-hotplug usecase */ + while ((node = timerqueue_getnext(&pinned))) { + timer = container_of(node, struct hrtimer, node); + + timerqueue_del(&pinned, &timer->node); + enqueue_hrtimer(timer, old_c_base); } } -static void migrate_hrtimers(int scpu) +static void __migrate_hrtimers(int scpu, bool wait, bool remove_pinned) { struct hrtimer_cpu_base *old_base, *new_base; + unsigned long flags; int i; - BUG_ON(cpu_online(scpu)); - tick_cancel_sched_timer(scpu); - - local_irq_disable(); + local_irq_save(flags); old_base = &per_cpu(hrtimer_bases, scpu); new_base = this_cpu_ptr(&hrtimer_bases); /* @@ -1646,8 +1720,8 @@ static void migrate_hrtimers(int scpu) raw_spin_lock_nested(&old_base->lock, SINGLE_DEPTH_NESTING); for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++) { - migrate_hrtimer_list(&old_base->clock_base[i], - &new_base->clock_base[i]); + migrate_hrtimer_list(old_base, new_base, i, wait, + remove_pinned); } raw_spin_unlock(&old_base->lock); @@ -1655,7 +1729,20 @@ static void migrate_hrtimers(int scpu) /* Check, if we got expired work to do */ __hrtimer_peek_ahead_timers(); - local_irq_enable(); + local_irq_restore(flags); +} + +static void migrate_hrtimers(int scpu) +{ + BUG_ON(cpu_online(scpu)); + tick_cancel_sched_timer(scpu); + + __migrate_hrtimers(scpu, false, true); +} + +void hrtimer_quiesce_cpu(void *cpup) +{ + __migrate_hrtimers(*(int *)cpup, true, false); } #endif /* CONFIG_HOTPLUG_CPU */ @@ -1763,15 +1850,19 @@ schedule_hrtimeout_range_clock(ktime_t *expires, u64 delta, * You can set the task state as follows - * * %TASK_UNINTERRUPTIBLE - at least @timeout time is guaranteed to - * pass before the routine returns. + * pass before the routine returns unless the current task is explicitly + * woken up, (e.g. by wake_up_process()). * * %TASK_INTERRUPTIBLE - the routine may return early if a signal is - * delivered to the current task. + * delivered to the current task or the current task is explicitly woken + * up. * * The current task state is guaranteed to be TASK_RUNNING when this * routine returns. * - * Returns 0 when the timer has expired otherwise -EINTR + * Returns 0 when the timer has expired. If the task was woken before the + * timer expired by a signal (only possible in state TASK_INTERRUPTIBLE) or + * by an explicit wakeup, it returns -EINTR. */ int __sched schedule_hrtimeout_range(ktime_t *expires, u64 delta, const enum hrtimer_mode mode) @@ -1793,15 +1884,19 @@ EXPORT_SYMBOL_GPL(schedule_hrtimeout_range); * You can set the task state as follows - * * %TASK_UNINTERRUPTIBLE - at least @timeout time is guaranteed to - * pass before the routine returns. + * pass before the routine returns unless the current task is explicitly + * woken up, (e.g. by wake_up_process()). * * %TASK_INTERRUPTIBLE - the routine may return early if a signal is - * delivered to the current task. + * delivered to the current task or the current task is explicitly woken + * up. * * The current task state is guaranteed to be TASK_RUNNING when this * routine returns. * - * Returns 0 when the timer has expired otherwise -EINTR + * Returns 0 when the timer has expired. If the task was woken before the + * timer expired by a signal (only possible in state TASK_INTERRUPTIBLE) or + * by an explicit wakeup, it returns -EINTR. */ int __sched schedule_hrtimeout(ktime_t *expires, const enum hrtimer_mode mode) |
