summaryrefslogtreecommitdiff
path: root/kernel/time/timer.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/time/timer.c')
-rw-r--r--kernel/time/timer.c169
1 files changed, 129 insertions, 40 deletions
diff --git a/kernel/time/timer.c b/kernel/time/timer.c
index d1798fa0c743..2bde2c2b1cb3 100644
--- a/kernel/time/timer.c
+++ b/kernel/time/timer.c
@@ -94,12 +94,15 @@ struct tvec_base {
struct tvec tv5;
} ____cacheline_aligned;
+static inline void __run_timers(struct tvec_base *base);
static DEFINE_PER_CPU(struct tvec_base, tvec_bases);
#if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ_COMMON)
unsigned int sysctl_timer_migration = 1;
+struct tvec_base tvec_base_deferrable;
+
void timers_update_migration(bool update_nohz)
{
bool on = sysctl_timer_migration && tick_nohz_active;
@@ -135,18 +138,62 @@ int timer_migration_handler(struct ctl_table *table, int write,
}
static inline struct tvec_base *get_target_base(struct tvec_base *base,
- int pinned)
+ int pinned, u32 timer_flags)
{
+ if (!pinned && !(timer_flags & TIMER_PINNED_ON_CPU) &&
+ (timer_flags & TIMER_DEFERRABLE))
+ return &tvec_base_deferrable;
if (pinned || !base->migration_enabled)
return this_cpu_ptr(&tvec_bases);
return per_cpu_ptr(&tvec_bases, get_nohz_timer_target());
}
+
+static inline void __run_deferrable_timers(void)
+{
+ if (smp_processor_id() == tick_do_timer_cpu &&
+ time_after_eq(jiffies, tvec_base_deferrable.timer_jiffies))
+ __run_timers(&tvec_base_deferrable);
+}
+
+static inline void init_timer_deferrable_global(void)
+{
+ tvec_base_deferrable.cpu = nr_cpu_ids;
+ spin_lock_init(&tvec_base_deferrable.lock);
+ tvec_base_deferrable.timer_jiffies = jiffies;
+ tvec_base_deferrable.next_timer = tvec_base_deferrable.timer_jiffies;
+}
+
+static inline struct tvec_base *get_timer_base(u32 timer_flags)
+{
+ if (!(timer_flags & TIMER_PINNED_ON_CPU) &&
+ timer_flags & TIMER_DEFERRABLE)
+ return &tvec_base_deferrable;
+ else
+ return per_cpu_ptr(&tvec_bases, timer_flags & TIMER_CPUMASK);
+}
#else
static inline struct tvec_base *get_target_base(struct tvec_base *base,
- int pinned)
+ int pinned, u32 timer_flags)
{
return this_cpu_ptr(&tvec_bases);
}
+
+static inline void __run_deferrable_timers(void)
+{
+}
+
+static inline void init_timer_deferrable_global(void)
+{
+ /*
+ * initialize cpu unbound deferrable timer base only when CONFIG_SMP.
+ * UP kernel handles the timers with cpu 0 timer base.
+ */
+}
+
+static inline struct tvec_base *get_timer_base(u32 timer_flags)
+{
+ return per_cpu_ptr(&tvec_bases, timer_flags & TIMER_CPUMASK);
+}
#endif
static unsigned long round_jiffies_common(unsigned long j, int cpu,
@@ -764,11 +811,18 @@ static struct tvec_base *lock_timer_base(struct timer_list *timer,
__acquires(timer->base->lock)
{
for (;;) {
- u32 tf = timer->flags;
struct tvec_base *base;
+ u32 tf;
+
+ /*
+ * We need to use READ_ONCE() here, otherwise the compiler
+ * might re-read @tf between the check for TIMER_MIGRATING
+ * and spin_lock().
+ */
+ tf = READ_ONCE(timer->flags);
if (!(tf & TIMER_MIGRATING)) {
- base = per_cpu_ptr(&tvec_bases, tf & TIMER_CPUMASK);
+ base = get_timer_base(tf);
spin_lock_irqsave(&base->lock, *flags);
if (timer->flags == tf)
return base;
@@ -797,7 +851,7 @@ __mod_timer(struct timer_list *timer, unsigned long expires,
debug_activate(timer, expires);
- new_base = get_target_base(base, pinned);
+ new_base = get_target_base(base, pinned, timer->flags);
if (base != new_base) {
/*
@@ -819,6 +873,10 @@ __mod_timer(struct timer_list *timer, unsigned long expires,
}
}
+ if (pinned == TIMER_PINNED)
+ timer->flags |= TIMER_PINNED_ON_CPU;
+ else
+ timer->flags &= ~TIMER_PINNED_ON_CPU;
timer->expires = expires;
internal_add_timer(base, timer);
@@ -1000,6 +1058,7 @@ void add_timer_on(struct timer_list *timer, int cpu)
(timer->flags & ~TIMER_BASEMASK) | cpu);
}
+ timer->flags |= TIMER_PINNED_ON_CPU;
debug_activate(timer, timer->expires);
internal_add_timer(base, timer);
spin_unlock_irqrestore(&base->lock, flags);
@@ -1433,6 +1492,8 @@ static void run_timer_softirq(struct softirq_action *h)
{
struct tvec_base *base = this_cpu_ptr(&tvec_bases);
+ __run_deferrable_timers();
+
if (time_after_eq(jiffies, base->timer_jiffies))
__run_timers(base);
}
@@ -1475,11 +1536,12 @@ static void process_timeout(unsigned long __data)
* You can set the task state as follows -
*
* %TASK_UNINTERRUPTIBLE - at least @timeout jiffies are guaranteed to
- * pass before the routine returns. The routine will return 0
+ * pass before the routine returns unless the current task is explicitly
+ * woken up, (e.g. by wake_up_process())".
*
* %TASK_INTERRUPTIBLE - the routine may return early if a signal is
- * delivered to the current task. In this case the remaining time
- * in jiffies will be returned, or 0 if the timer expired in time
+ * delivered to the current task or the current task is explicitly woken
+ * up.
*
* The current task state is guaranteed to be TASK_RUNNING when this
* routine returns.
@@ -1488,7 +1550,9 @@ static void process_timeout(unsigned long __data)
* the CPU away without a bound on the timeout. In this case the return
* value will be %MAX_SCHEDULE_TIMEOUT.
*
- * In all cases the return value is guaranteed to be non-negative.
+ * Returns 0 when the timer has expired otherwise the remaining time in
+ * jiffies will be returned. In all cases the return value is guaranteed
+ * to be non-negative.
*/
signed long __sched schedule_timeout(signed long timeout)
{
@@ -1566,56 +1630,82 @@ signed long __sched schedule_timeout_uninterruptible(signed long timeout)
}
EXPORT_SYMBOL(schedule_timeout_uninterruptible);
-#ifdef CONFIG_HOTPLUG_CPU
-static void migrate_timer_list(struct tvec_base *new_base, struct hlist_head *head)
+#if defined(CONFIG_HOTPLUG_CPU)
+static void migrate_timer_list(struct tvec_base *new_base,
+ struct hlist_head *head, bool remove_pinned)
{
struct timer_list *timer;
int cpu = new_base->cpu;
+ struct hlist_node *n;
+ int is_pinned;
- while (!hlist_empty(head)) {
- timer = hlist_entry(head->first, struct timer_list, entry);
- /* We ignore the accounting on the dying cpu */
- detach_timer(timer, false);
+ hlist_for_each_entry_safe(timer, n, head, entry) {
+ is_pinned = timer->flags & TIMER_PINNED_ON_CPU;
+ if (!remove_pinned && is_pinned)
+ continue;
+
+ detach_if_pending(timer, get_timer_base(timer->flags), false);
timer->flags = (timer->flags & ~TIMER_BASEMASK) | cpu;
internal_add_timer(new_base, timer);
}
}
-static void migrate_timers(int cpu)
+static void __migrate_timers(int cpu, bool remove_pinned)
{
struct tvec_base *old_base;
struct tvec_base *new_base;
+ unsigned long flags;
int i;
- BUG_ON(cpu_online(cpu));
old_base = per_cpu_ptr(&tvec_bases, cpu);
new_base = get_cpu_ptr(&tvec_bases);
/*
* The caller is globally serialized and nobody else
* takes two locks at once, deadlock is not possible.
*/
- spin_lock_irq(&new_base->lock);
+ spin_lock_irqsave(&new_base->lock, flags);
spin_lock_nested(&old_base->lock, SINGLE_DEPTH_NESTING);
- BUG_ON(old_base->running_timer);
+ /*
+ * If we're in the hotplug path, kill the system if there's a running
+ * timer. It's ok to have a running timer in the isolation case - the
+ * currently running or just expired timers are off of the timer wheel
+ * and so everything else can be migrated off.
+ */
+ if (!cpu_online(cpu))
+ BUG_ON(old_base->running_timer);
for (i = 0; i < TVR_SIZE; i++)
- migrate_timer_list(new_base, old_base->tv1.vec + i);
+ migrate_timer_list(new_base, old_base->tv1.vec + i,
+ remove_pinned);
for (i = 0; i < TVN_SIZE; i++) {
- migrate_timer_list(new_base, old_base->tv2.vec + i);
- migrate_timer_list(new_base, old_base->tv3.vec + i);
- migrate_timer_list(new_base, old_base->tv4.vec + i);
- migrate_timer_list(new_base, old_base->tv5.vec + i);
+ migrate_timer_list(new_base, old_base->tv2.vec + i,
+ remove_pinned);
+ migrate_timer_list(new_base, old_base->tv3.vec + i,
+ remove_pinned);
+ migrate_timer_list(new_base, old_base->tv4.vec + i,
+ remove_pinned);
+ migrate_timer_list(new_base, old_base->tv5.vec + i,
+ remove_pinned);
}
- old_base->active_timers = 0;
- old_base->all_timers = 0;
-
spin_unlock(&old_base->lock);
- spin_unlock_irq(&new_base->lock);
+ spin_unlock_irqrestore(&new_base->lock, flags);
put_cpu_ptr(&tvec_bases);
}
+/* Migrate timers from 'cpu' to this_cpu */
+static void migrate_timers(int cpu)
+{
+ BUG_ON(cpu_online(cpu));
+ __migrate_timers(cpu, true);
+}
+
+void timer_quiesce_cpu(void *cpup)
+{
+ __migrate_timers(*(int *)cpup, false);
+}
+
static int timer_cpu_notify(struct notifier_block *self,
unsigned long action, void *hcpu)
{
@@ -1656,6 +1746,8 @@ static void __init init_timer_cpus(void)
for_each_possible_cpu(cpu)
init_timer_cpu(cpu);
+
+ init_timer_deferrable_global();
}
void __init init_timers(void)
@@ -1695,16 +1787,6 @@ unsigned long msleep_interruptible(unsigned int msecs)
EXPORT_SYMBOL(msleep_interruptible);
-static void __sched do_usleep_range(unsigned long min, unsigned long max)
-{
- ktime_t kmin;
- u64 delta;
-
- kmin = ktime_set(0, min * NSEC_PER_USEC);
- delta = (u64)(max - min) * NSEC_PER_USEC;
- schedule_hrtimeout_range(&kmin, delta, HRTIMER_MODE_REL);
-}
-
/**
* usleep_range - Drop in replacement for udelay where wakeup is flexible
* @min: Minimum time in usecs to sleep
@@ -1712,7 +1794,14 @@ static void __sched do_usleep_range(unsigned long min, unsigned long max)
*/
void __sched usleep_range(unsigned long min, unsigned long max)
{
- __set_current_state(TASK_UNINTERRUPTIBLE);
- do_usleep_range(min, max);
+ ktime_t exp = ktime_add_us(ktime_get(), min);
+ u64 delta = (u64)(max - min) * NSEC_PER_USEC;
+
+ for (;;) {
+ __set_current_state(TASK_UNINTERRUPTIBLE);
+ /* Do not return before the requested sleep time has elapsed */
+ if (!schedule_hrtimeout_range(&exp, delta, HRTIMER_MODE_ABS))
+ break;
+ }
}
EXPORT_SYMBOL(usleep_range);