diff options
Diffstat (limited to 'kernel/time/timer.c')
-rw-r--r-- | kernel/time/timer.c | 237 |
1 files changed, 152 insertions, 85 deletions
diff --git a/kernel/time/timer.c b/kernel/time/timer.c index 523fe1669d4c..903705687b52 100644 --- a/kernel/time/timer.c +++ b/kernel/time/timer.c @@ -94,12 +94,16 @@ struct tvec_base { struct tvec tv5; } ____cacheline_aligned; +static inline void __run_timers(struct tvec_base *base); static DEFINE_PER_CPU(struct tvec_base, tvec_bases); #if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ_COMMON) unsigned int sysctl_timer_migration = 1; +struct tvec_base tvec_base_deferrable; +static atomic_t deferrable_pending; + void timers_update_migration(bool update_nohz) { bool on = sysctl_timer_migration && tick_nohz_active; @@ -135,18 +139,66 @@ int timer_migration_handler(struct ctl_table *table, int write, } static inline struct tvec_base *get_target_base(struct tvec_base *base, - int pinned) + int pinned, u32 timer_flags) { + if (!pinned && !(timer_flags & TIMER_PINNED_ON_CPU) && + (timer_flags & TIMER_DEFERRABLE)) + return &tvec_base_deferrable; if (pinned || !base->migration_enabled) return this_cpu_ptr(&tvec_bases); return per_cpu_ptr(&tvec_bases, get_nohz_timer_target()); } + +static inline void __run_deferrable_timers(void) +{ + if ((atomic_cmpxchg(&deferrable_pending, 1, 0) && + tick_do_timer_cpu == TICK_DO_TIMER_NONE) || + tick_do_timer_cpu == smp_processor_id()) { + if (time_after_eq(jiffies, + tvec_base_deferrable.timer_jiffies)) + __run_timers(&tvec_base_deferrable); + } +} + +static inline void init_timer_deferrable_global(void) +{ + tvec_base_deferrable.cpu = nr_cpu_ids; + spin_lock_init(&tvec_base_deferrable.lock); + tvec_base_deferrable.timer_jiffies = jiffies; + tvec_base_deferrable.next_timer = tvec_base_deferrable.timer_jiffies; +} + +static inline struct tvec_base *get_timer_base(u32 timer_flags) +{ + if (!(timer_flags & TIMER_PINNED_ON_CPU) && + timer_flags & TIMER_DEFERRABLE) + return &tvec_base_deferrable; + else + return per_cpu_ptr(&tvec_bases, timer_flags & TIMER_CPUMASK); +} #else static inline struct tvec_base *get_target_base(struct tvec_base *base, - int pinned) + int pinned, u32 timer_flags) { return this_cpu_ptr(&tvec_bases); } + +static inline void __run_deferrable_timers(void) +{ +} + +static inline void init_timer_deferrable_global(void) +{ + /* + * initialize cpu unbound deferrable timer base only when CONFIG_SMP. + * UP kernel handles the timers with cpu 0 timer base. + */ +} + +static inline struct tvec_base *get_timer_base(u32 timer_flags) +{ + return per_cpu_ptr(&tvec_bases, timer_flags & TIMER_CPUMASK); +} #endif static unsigned long round_jiffies_common(unsigned long j, int cpu, @@ -448,38 +500,6 @@ static void internal_add_timer(struct tvec_base *base, struct timer_list *timer) } } -#ifdef CONFIG_TIMER_STATS -void __timer_stats_timer_set_start_info(struct timer_list *timer, void *addr) -{ - if (timer->start_site) - return; - - timer->start_site = addr; - memcpy(timer->start_comm, current->comm, TASK_COMM_LEN); - timer->start_pid = current->pid; -} - -static void timer_stats_account_timer(struct timer_list *timer) -{ - void *site; - - /* - * start_site can be concurrently reset by - * timer_stats_timer_clear_start_info() - */ - site = READ_ONCE(timer->start_site); - if (likely(!site)) - return; - - timer_stats_update_stats(timer, timer->start_pid, site, - timer->function, timer->start_comm, - timer->flags); -} - -#else -static void timer_stats_account_timer(struct timer_list *timer) {} -#endif - #ifdef CONFIG_DEBUG_OBJECTS_TIMERS static struct debug_obj_descr timer_debug_descr; @@ -682,11 +702,6 @@ static void do_init_timer(struct timer_list *timer, unsigned int flags, timer->entry.pprev = NULL; timer->flags = flags | raw_smp_processor_id(); timer->slack = -1; -#ifdef CONFIG_TIMER_STATS - timer->start_site = NULL; - timer->start_pid = -1; - memset(timer->start_comm, 0, TASK_COMM_LEN); -#endif lockdep_init_map(&timer->lockdep_map, name, key, 0); } @@ -775,7 +790,7 @@ static struct tvec_base *lock_timer_base(struct timer_list *timer, tf = READ_ONCE(timer->flags); if (!(tf & TIMER_MIGRATING)) { - base = per_cpu_ptr(&tvec_bases, tf & TIMER_CPUMASK); + base = get_timer_base(tf); spin_lock_irqsave(&base->lock, *flags); if (timer->flags == tf) return base; @@ -793,7 +808,6 @@ __mod_timer(struct timer_list *timer, unsigned long expires, unsigned long flags; int ret = 0; - timer_stats_timer_set_start_info(timer); BUG_ON(!timer->function); base = lock_timer_base(timer, &flags); @@ -804,7 +818,7 @@ __mod_timer(struct timer_list *timer, unsigned long expires, debug_activate(timer, expires); - new_base = get_target_base(base, pinned); + new_base = get_target_base(base, pinned, timer->flags); if (base != new_base) { /* @@ -826,6 +840,10 @@ __mod_timer(struct timer_list *timer, unsigned long expires, } } + if (pinned == TIMER_PINNED) + timer->flags |= TIMER_PINNED_ON_CPU; + else + timer->flags &= ~TIMER_PINNED_ON_CPU; timer->expires = expires; internal_add_timer(base, timer); @@ -988,7 +1006,6 @@ void add_timer_on(struct timer_list *timer, int cpu) struct tvec_base *base; unsigned long flags; - timer_stats_timer_set_start_info(timer); BUG_ON(timer_pending(timer) || !timer->function); /* @@ -1007,6 +1024,7 @@ void add_timer_on(struct timer_list *timer, int cpu) (timer->flags & ~TIMER_BASEMASK) | cpu); } + timer->flags |= TIMER_PINNED_ON_CPU; debug_activate(timer, timer->expires); internal_add_timer(base, timer); spin_unlock_irqrestore(&base->lock, flags); @@ -1032,7 +1050,6 @@ int del_timer(struct timer_list *timer) debug_assert_init(timer); - timer_stats_timer_clear_start_info(timer); if (timer_pending(timer)) { base = lock_timer_base(timer, &flags); ret = detach_if_pending(timer, base, true); @@ -1060,10 +1077,9 @@ int try_to_del_timer_sync(struct timer_list *timer) base = lock_timer_base(timer, &flags); - if (base->running_timer != timer) { - timer_stats_timer_clear_start_info(timer); + if (base->running_timer != timer) ret = detach_if_pending(timer, base, true); - } + spin_unlock_irqrestore(&base->lock, flags); return ret; @@ -1247,8 +1263,6 @@ static inline void __run_timers(struct tvec_base *base) data = timer->data; irqsafe = timer->flags & TIMER_IRQSAFE; - timer_stats_account_timer(timer); - base->running_timer = timer; detach_expired_timer(timer, base); @@ -1376,6 +1390,30 @@ static u64 cmp_next_hrtimer_event(u64 basem, u64 expires) return DIV_ROUND_UP_ULL(nextevt, TICK_NSEC) * TICK_NSEC; } +#ifdef CONFIG_SMP +/* + * check_pending_deferrable_timers - Check for unbound deferrable timer expiry. + * @cpu - Current CPU + * + * The function checks whether any global deferrable pending timers + * are exipired or not. This function does not check cpu bounded + * diferrable pending timers expiry. + * + * The function returns true when a cpu unbounded deferrable timer is expired. + */ +bool check_pending_deferrable_timers(int cpu) +{ + if (cpu == tick_do_timer_cpu || + tick_do_timer_cpu == TICK_DO_TIMER_NONE) { + if (time_after_eq(jiffies, tvec_base_deferrable.timer_jiffies) + && !atomic_cmpxchg(&deferrable_pending, 0, 1)) { + return true; + } + } + return false; +} +#endif + /** * get_next_timer_interrupt - return the time (clock mono) of the next timer * @basej: base time jiffies @@ -1440,6 +1478,8 @@ static void run_timer_softirq(struct softirq_action *h) { struct tvec_base *base = this_cpu_ptr(&tvec_bases); + __run_deferrable_timers(); + if (time_after_eq(jiffies, base->timer_jiffies)) __run_timers(base); } @@ -1482,11 +1522,12 @@ static void process_timeout(unsigned long __data) * You can set the task state as follows - * * %TASK_UNINTERRUPTIBLE - at least @timeout jiffies are guaranteed to - * pass before the routine returns. The routine will return 0 + * pass before the routine returns unless the current task is explicitly + * woken up, (e.g. by wake_up_process())". * * %TASK_INTERRUPTIBLE - the routine may return early if a signal is - * delivered to the current task. In this case the remaining time - * in jiffies will be returned, or 0 if the timer expired in time + * delivered to the current task or the current task is explicitly woken + * up. * * The current task state is guaranteed to be TASK_RUNNING when this * routine returns. @@ -1495,7 +1536,9 @@ static void process_timeout(unsigned long __data) * the CPU away without a bound on the timeout. In this case the return * value will be %MAX_SCHEDULE_TIMEOUT. * - * In all cases the return value is guaranteed to be non-negative. + * Returns 0 when the timer has expired otherwise the remaining time in + * jiffies will be returned. In all cases the return value is guaranteed + * to be non-negative. */ signed long __sched schedule_timeout(signed long timeout) { @@ -1573,56 +1616,82 @@ signed long __sched schedule_timeout_uninterruptible(signed long timeout) } EXPORT_SYMBOL(schedule_timeout_uninterruptible); -#ifdef CONFIG_HOTPLUG_CPU -static void migrate_timer_list(struct tvec_base *new_base, struct hlist_head *head) +#if defined(CONFIG_HOTPLUG_CPU) +static void migrate_timer_list(struct tvec_base *new_base, + struct hlist_head *head, bool remove_pinned) { struct timer_list *timer; int cpu = new_base->cpu; + struct hlist_node *n; + int is_pinned; + + hlist_for_each_entry_safe(timer, n, head, entry) { + is_pinned = timer->flags & TIMER_PINNED_ON_CPU; + if (!remove_pinned && is_pinned) + continue; - while (!hlist_empty(head)) { - timer = hlist_entry(head->first, struct timer_list, entry); - /* We ignore the accounting on the dying cpu */ - detach_timer(timer, false); + detach_if_pending(timer, get_timer_base(timer->flags), false); timer->flags = (timer->flags & ~TIMER_BASEMASK) | cpu; internal_add_timer(new_base, timer); } } -static void migrate_timers(int cpu) +static void __migrate_timers(int cpu, bool remove_pinned) { struct tvec_base *old_base; struct tvec_base *new_base; + unsigned long flags; int i; - BUG_ON(cpu_online(cpu)); old_base = per_cpu_ptr(&tvec_bases, cpu); new_base = get_cpu_ptr(&tvec_bases); /* * The caller is globally serialized and nobody else * takes two locks at once, deadlock is not possible. */ - spin_lock_irq(&new_base->lock); + spin_lock_irqsave(&new_base->lock, flags); spin_lock_nested(&old_base->lock, SINGLE_DEPTH_NESTING); - BUG_ON(old_base->running_timer); + /* + * If we're in the hotplug path, kill the system if there's a running + * timer. It's ok to have a running timer in the isolation case - the + * currently running or just expired timers are off of the timer wheel + * and so everything else can be migrated off. + */ + if (!cpu_online(cpu)) + BUG_ON(old_base->running_timer); for (i = 0; i < TVR_SIZE; i++) - migrate_timer_list(new_base, old_base->tv1.vec + i); + migrate_timer_list(new_base, old_base->tv1.vec + i, + remove_pinned); for (i = 0; i < TVN_SIZE; i++) { - migrate_timer_list(new_base, old_base->tv2.vec + i); - migrate_timer_list(new_base, old_base->tv3.vec + i); - migrate_timer_list(new_base, old_base->tv4.vec + i); - migrate_timer_list(new_base, old_base->tv5.vec + i); + migrate_timer_list(new_base, old_base->tv2.vec + i, + remove_pinned); + migrate_timer_list(new_base, old_base->tv3.vec + i, + remove_pinned); + migrate_timer_list(new_base, old_base->tv4.vec + i, + remove_pinned); + migrate_timer_list(new_base, old_base->tv5.vec + i, + remove_pinned); } - old_base->active_timers = 0; - old_base->all_timers = 0; - spin_unlock(&old_base->lock); - spin_unlock_irq(&new_base->lock); + spin_unlock_irqrestore(&new_base->lock, flags); put_cpu_ptr(&tvec_bases); } +/* Migrate timers from 'cpu' to this_cpu */ +static void migrate_timers(int cpu) +{ + BUG_ON(cpu_online(cpu)); + __migrate_timers(cpu, true); +} + +void timer_quiesce_cpu(void *cpup) +{ + __migrate_timers(*(int *)cpup, false); +} + static int timer_cpu_notify(struct notifier_block *self, unsigned long action, void *hcpu) { @@ -1663,12 +1732,13 @@ static void __init init_timer_cpus(void) for_each_possible_cpu(cpu) init_timer_cpu(cpu); + + init_timer_deferrable_global(); } void __init init_timers(void) { init_timer_cpus(); - init_timer_stats(); timer_register_cpu_notifier(); open_softirq(TIMER_SOFTIRQ, run_timer_softirq); } @@ -1702,16 +1772,6 @@ unsigned long msleep_interruptible(unsigned int msecs) EXPORT_SYMBOL(msleep_interruptible); -static void __sched do_usleep_range(unsigned long min, unsigned long max) -{ - ktime_t kmin; - u64 delta; - - kmin = ktime_set(0, min * NSEC_PER_USEC); - delta = (u64)(max - min) * NSEC_PER_USEC; - schedule_hrtimeout_range(&kmin, delta, HRTIMER_MODE_REL); -} - /** * usleep_range - Drop in replacement for udelay where wakeup is flexible * @min: Minimum time in usecs to sleep @@ -1719,7 +1779,14 @@ static void __sched do_usleep_range(unsigned long min, unsigned long max) */ void __sched usleep_range(unsigned long min, unsigned long max) { - __set_current_state(TASK_UNINTERRUPTIBLE); - do_usleep_range(min, max); + ktime_t exp = ktime_add_us(ktime_get(), min); + u64 delta = (u64)(max - min) * NSEC_PER_USEC; + + for (;;) { + __set_current_state(TASK_UNINTERRUPTIBLE); + /* Do not return before the requested sleep time has elapsed */ + if (!schedule_hrtimeout_range(&exp, delta, HRTIMER_MODE_ABS)) + break; + } } EXPORT_SYMBOL(usleep_range); |