diff options
Diffstat (limited to 'kernel/time')
| -rw-r--r-- | kernel/time/Makefile | 2 | ||||
| -rw-r--r-- | kernel/time/alarmtimer.c | 270 | ||||
| -rw-r--r-- | kernel/time/hrtimer.c | 111 | ||||
| -rw-r--r-- | kernel/time/sched_clock.c | 8 | ||||
| -rw-r--r-- | kernel/time/tick-sched.c | 92 | ||||
| -rw-r--r-- | kernel/time/timer.c | 128 |
6 files changed, 552 insertions, 59 deletions
diff --git a/kernel/time/Makefile b/kernel/time/Makefile index 49eca0beed32..5819ca07a22b 100644 --- a/kernel/time/Makefile +++ b/kernel/time/Makefile @@ -12,3 +12,5 @@ obj-$(CONFIG_TICK_ONESHOT) += tick-oneshot.o tick-sched.o obj-$(CONFIG_TIMER_STATS) += timer_stats.o obj-$(CONFIG_DEBUG_FS) += timekeeping_debug.o obj-$(CONFIG_TEST_UDELAY) += test_udelay.o + +ccflags-y += -Idrivers/cpuidle diff --git a/kernel/time/alarmtimer.c b/kernel/time/alarmtimer.c index 7fbba635a549..0cdc34ebd8d1 100644 --- a/kernel/time/alarmtimer.c +++ b/kernel/time/alarmtimer.c @@ -26,6 +26,11 @@ #include <linux/workqueue.h> #include <linux/freezer.h> +#ifdef CONFIG_MSM_PM +#include "lpm-levels.h" +#endif +#include <linux/workqueue.h> + /** * struct alarm_base - Alarm timer bases * @lock: Lock for syncrhonized access to the base @@ -46,14 +51,130 @@ static ktime_t freezer_delta; static DEFINE_SPINLOCK(freezer_delta_lock); static struct wakeup_source *ws; +static struct delayed_work work; +static struct workqueue_struct *power_off_alarm_workqueue; #ifdef CONFIG_RTC_CLASS /* rtc timer and device for setting alarm wakeups at suspend */ static struct rtc_timer rtctimer; static struct rtc_device *rtcdev; static DEFINE_SPINLOCK(rtcdev_lock); +static struct mutex power_on_alarm_lock; +static struct alarm init_alarm; /** + * power_on_alarm_init - Init power on alarm value + * + * Read rtc alarm value after device booting up and add this alarm + * into alarm queue. + */ +void power_on_alarm_init(void) +{ + struct rtc_wkalrm rtc_alarm; + struct rtc_time rt; + unsigned long alarm_time; + struct rtc_device *rtc; + ktime_t alarm_ktime; + + rtc = alarmtimer_get_rtcdev(); + + if (!rtc) + return; + + rtc_read_alarm(rtc, &rtc_alarm); + rt = rtc_alarm.time; + + rtc_tm_to_time(&rt, &alarm_time); + + if (alarm_time) { + alarm_ktime = ktime_set(alarm_time, 0); + alarm_init(&init_alarm, ALARM_POWEROFF_REALTIME, NULL); + alarm_start(&init_alarm, alarm_ktime); + } +} + +/** + * set_power_on_alarm - set power on alarm value into rtc register + * + * Get the soonest power off alarm timer and set the alarm value into rtc + * register. + */ +void set_power_on_alarm(void) +{ + int rc; + struct timespec wall_time, alarm_ts; + long alarm_secs = 0l; + long rtc_secs, alarm_time, alarm_delta; + struct rtc_time rtc_time; + struct rtc_wkalrm alarm; + struct rtc_device *rtc; + struct timerqueue_node *next; + unsigned long flags; + struct alarm_base *base = &alarm_bases[ALARM_POWEROFF_REALTIME]; + + rc = mutex_lock_interruptible(&power_on_alarm_lock); + if (rc != 0) + return; + + spin_lock_irqsave(&base->lock, flags); + next = timerqueue_getnext(&base->timerqueue); + spin_unlock_irqrestore(&base->lock, flags); + + if (next) { + alarm_ts = ktime_to_timespec(next->expires); + alarm_secs = alarm_ts.tv_sec; + } + + if (!alarm_secs) + goto disable_alarm; + + getnstimeofday(&wall_time); + + /* + * alarm_secs have to be bigger than "wall_time +1". + * It is to make sure that alarm time will be always + * bigger than wall time. + */ + if (alarm_secs <= wall_time.tv_sec + 1) + goto disable_alarm; + + rtc = alarmtimer_get_rtcdev(); + if (!rtc) + goto exit; + + rtc_read_time(rtc, &rtc_time); + rtc_tm_to_time(&rtc_time, &rtc_secs); + alarm_delta = wall_time.tv_sec - rtc_secs; + alarm_time = alarm_secs - alarm_delta; + + rtc_time_to_tm(alarm_time, &alarm.time); + alarm.enabled = 1; + rc = rtc_set_alarm(rtcdev, &alarm); + if (rc) + goto disable_alarm; + + mutex_unlock(&power_on_alarm_lock); + return; + +disable_alarm: + rtc_alarm_irq_enable(rtcdev, 0); +exit: + mutex_unlock(&power_on_alarm_lock); +} + +static void alarmtimer_triggered_func(void *p) +{ + struct rtc_device *rtc = rtcdev; + + if (!(rtc->irq_data & RTC_AF)) + return; + __pm_wakeup_event(ws, 2 * MSEC_PER_SEC); +} + +static struct rtc_task alarmtimer_rtc_task = { + .func = alarmtimer_triggered_func +}; +/** * alarmtimer_get_rtcdev - Return selected rtcdevice * * This function returns the rtc device to use for wakealarms. @@ -63,7 +184,7 @@ static DEFINE_SPINLOCK(rtcdev_lock); struct rtc_device *alarmtimer_get_rtcdev(void) { unsigned long flags; - struct rtc_device *ret; + struct rtc_device *ret = NULL; spin_lock_irqsave(&rtcdev_lock, flags); ret = rtcdev; @@ -77,33 +198,48 @@ static int alarmtimer_rtc_add_device(struct device *dev, struct class_interface *class_intf) { unsigned long flags; + int err = 0; struct rtc_device *rtc = to_rtc_device(dev); - if (rtcdev) return -EBUSY; - if (!rtc->ops->set_alarm) return -1; - if (!device_may_wakeup(rtc->dev.parent)) - return -1; spin_lock_irqsave(&rtcdev_lock, flags); if (!rtcdev) { + err = rtc_irq_register(rtc, &alarmtimer_rtc_task); + if (err) + goto rtc_irq_reg_err; rtcdev = rtc; /* hold a reference so it doesn't go away */ get_device(dev); } + +rtc_irq_reg_err: spin_unlock_irqrestore(&rtcdev_lock, flags); - return 0; + return err; + +} + +static void alarmtimer_rtc_remove_device(struct device *dev, + struct class_interface *class_intf) +{ + if (rtcdev && dev == &rtcdev->dev) { + rtc_irq_unregister(rtcdev, &alarmtimer_rtc_task); + rtcdev = NULL; + } } static inline void alarmtimer_rtc_timer_init(void) { + mutex_init(&power_on_alarm_lock); + rtc_timer_init(&rtctimer, NULL, NULL); } static struct class_interface alarmtimer_rtc_interface = { .add_dev = &alarmtimer_rtc_add_device, + .remove_dev = &alarmtimer_rtc_remove_device, }; static int alarmtimer_rtc_interface_setup(void) @@ -124,8 +260,14 @@ struct rtc_device *alarmtimer_get_rtcdev(void) static inline int alarmtimer_rtc_interface_setup(void) { return 0; } static inline void alarmtimer_rtc_interface_remove(void) { } static inline void alarmtimer_rtc_timer_init(void) { } +void set_power_on_alarm(void) { } #endif +static void alarm_work_func(struct work_struct *unused) +{ + set_power_on_alarm(); +} + /** * alarmtimer_enqueue - Adds an alarm timer to an alarm_base timerqueue * @base: pointer to the base where the timer is being run @@ -195,6 +337,10 @@ static enum hrtimer_restart alarmtimer_fired(struct hrtimer *timer) } spin_unlock_irqrestore(&base->lock, flags); + /* set next power off alarm */ + if (alarm->type == ALARM_POWEROFF_REALTIME) + queue_delayed_work(power_off_alarm_workqueue, &work, 0); + return ret; } @@ -217,6 +363,70 @@ EXPORT_SYMBOL_GPL(alarm_expires_remaining); * set an rtc timer to fire that far into the future, which * will wake us from suspend. */ +#if defined(CONFIG_RTC_DRV_QPNP) && defined(CONFIG_MSM_PM) +static int alarmtimer_suspend(struct device *dev) +{ + struct rtc_time tm; + ktime_t min, now; + unsigned long flags; + struct rtc_device *rtc; + int i; + int ret = 0; + + spin_lock_irqsave(&freezer_delta_lock, flags); + min = freezer_delta; + freezer_delta = ktime_set(0, 0); + spin_unlock_irqrestore(&freezer_delta_lock, flags); + + rtc = alarmtimer_get_rtcdev(); + /* If we have no rtcdev, just return */ + if (!rtc) + return 0; + + /* Find the soonest timer to expire*/ + for (i = 0; i < ALARM_NUMTYPE; i++) { + struct alarm_base *base = &alarm_bases[i]; + struct timerqueue_node *next; + ktime_t delta; + + spin_lock_irqsave(&base->lock, flags); + next = timerqueue_getnext(&base->timerqueue); + spin_unlock_irqrestore(&base->lock, flags); + if (!next) + continue; + delta = ktime_sub(next->expires, base->gettime()); + if (!min.tv64 || (delta.tv64 < min.tv64)) + min = delta; + } + if (min.tv64 == 0) + return 0; + + if (ktime_to_ns(min) < 2 * NSEC_PER_SEC) { + __pm_wakeup_event(ws, 2 * MSEC_PER_SEC); + return -EBUSY; + } + + /* Setup a timer to fire that far in the future */ + rtc_timer_cancel(rtc, &rtctimer); + rtc_read_time(rtc, &tm); + now = rtc_tm_to_ktime(tm); + now = ktime_add(now, min); + if (poweron_alarm) { + struct rtc_time tm_val; + unsigned long secs; + + tm_val = rtc_ktime_to_tm(min); + rtc_tm_to_time(&tm_val, &secs); + lpm_suspend_wake_time(secs); + } else { + /* Set alarm, if in the past reject suspend briefly to handle */ + ret = rtc_timer_start(rtc, &rtctimer, now, ktime_set(0, 0)); + if (ret < 0) + __pm_wakeup_event(ws, MSEC_PER_SEC); + } + return ret; +} +#else static int alarmtimer_suspend(struct device *dev) { struct rtc_time tm; @@ -226,6 +436,8 @@ static int alarmtimer_suspend(struct device *dev) int i; int ret; + cancel_delayed_work_sync(&work); + spin_lock_irqsave(&freezer_delta_lock, flags); min = freezer_delta; freezer_delta = ktime_set(0, 0); @@ -271,11 +483,31 @@ static int alarmtimer_suspend(struct device *dev) __pm_wakeup_event(ws, MSEC_PER_SEC); return ret; } +#endif +static int alarmtimer_resume(struct device *dev) +{ + struct rtc_device *rtc; + + rtc = alarmtimer_get_rtcdev(); + /* If we have no rtcdev, just return */ + if (!rtc) + return 0; + rtc_timer_cancel(rtc, &rtctimer); + + queue_delayed_work(power_off_alarm_workqueue, &work, 0); + return 0; +} + #else static int alarmtimer_suspend(struct device *dev) { return 0; } + +static int alarmtimer_resume(struct device *dev) +{ + return 0; +} #endif static void alarmtimer_freezerset(ktime_t absexp, enum alarmtimer_type type) @@ -443,12 +675,14 @@ EXPORT_SYMBOL_GPL(alarm_forward_now); * clock2alarm - helper that converts from clockid to alarmtypes * @clockid: clockid. */ -static enum alarmtimer_type clock2alarm(clockid_t clockid) +enum alarmtimer_type clock2alarm(clockid_t clockid) { if (clockid == CLOCK_REALTIME_ALARM) return ALARM_REALTIME; if (clockid == CLOCK_BOOTTIME_ALARM) return ALARM_BOOTTIME; + if (clockid == CLOCK_POWEROFF_ALARM) + return ALARM_POWEROFF_REALTIME; return -1; } @@ -800,6 +1034,7 @@ out: /* Suspend hook structures */ static const struct dev_pm_ops alarmtimer_pm_ops = { .suspend = alarmtimer_suspend, + .resume = alarmtimer_resume, }; static struct platform_driver alarmtimer_driver = { @@ -834,10 +1069,13 @@ static int __init alarmtimer_init(void) posix_timers_register_clock(CLOCK_REALTIME_ALARM, &alarm_clock); posix_timers_register_clock(CLOCK_BOOTTIME_ALARM, &alarm_clock); + posix_timers_register_clock(CLOCK_POWEROFF_ALARM, &alarm_clock); /* Initialize alarm bases */ alarm_bases[ALARM_REALTIME].base_clockid = CLOCK_REALTIME; alarm_bases[ALARM_REALTIME].gettime = &ktime_get_real; + alarm_bases[ALARM_POWEROFF_REALTIME].base_clockid = CLOCK_REALTIME; + alarm_bases[ALARM_POWEROFF_REALTIME].gettime = &ktime_get_real; alarm_bases[ALARM_BOOTTIME].base_clockid = CLOCK_BOOTTIME; alarm_bases[ALARM_BOOTTIME].gettime = &ktime_get_boottime; for (i = 0; i < ALARM_NUMTYPE; i++) { @@ -859,8 +1097,24 @@ static int __init alarmtimer_init(void) goto out_drv; } ws = wakeup_source_register("alarmtimer"); - return 0; + if (!ws) { + error = -ENOMEM; + goto out_ws; + } + + INIT_DELAYED_WORK(&work, alarm_work_func); + power_off_alarm_workqueue = + create_singlethread_workqueue("power_off_alarm"); + if (!power_off_alarm_workqueue) { + error = -ENOMEM; + goto out_wq; + } + return 0; +out_wq: + wakeup_source_unregister(ws); +out_ws: + platform_device_unregister(pdev); out_drv: platform_driver_unregister(&alarmtimer_driver); out_if: diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c index 405536b22c0c..271d83e30d19 100644 --- a/kernel/time/hrtimer.c +++ b/kernel/time/hrtimer.c @@ -49,6 +49,7 @@ #include <linux/sched/deadline.h> #include <linux/timer.h> #include <linux/freezer.h> +#include <linux/delay.h> #include <asm/uaccess.h> @@ -885,7 +886,7 @@ static int enqueue_hrtimer(struct hrtimer *timer, base->cpu_base->active_bases |= 1 << base->index; - timer->state = HRTIMER_STATE_ENQUEUED; + timer->state |= HRTIMER_STATE_ENQUEUED; return timerqueue_add(&base->active, &timer->node); } @@ -905,11 +906,9 @@ static void __remove_hrtimer(struct hrtimer *timer, u8 newstate, int reprogram) { struct hrtimer_cpu_base *cpu_base = base->cpu_base; - u8 state = timer->state; - timer->state = newstate; - if (!(state & HRTIMER_STATE_ENQUEUED)) - return; + if (!(timer->state & HRTIMER_STATE_ENQUEUED)) + goto out; if (!timerqueue_del(&base->active, &timer->node)) cpu_base->active_bases &= ~(1 << base->index); @@ -926,6 +925,13 @@ static void __remove_hrtimer(struct hrtimer *timer, if (reprogram && timer == cpu_base->next_timer) hrtimer_force_reprogram(cpu_base, 1); #endif + +out: + /* + * We need to preserve PINNED state here, otherwise we may end up + * migrating pinned hrtimers as well. + */ + timer->state = newstate | (timer->state & HRTIMER_STATE_PINNED); } /* @@ -954,6 +960,7 @@ remove_hrtimer(struct hrtimer *timer, struct hrtimer_clock_base *base, bool rest state = HRTIMER_STATE_INACTIVE; __remove_hrtimer(timer, base, state, reprogram); + timer->state &= ~HRTIMER_STATE_PINNED; return 1; } return 0; @@ -1007,6 +1014,10 @@ void hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim, timer_stats_hrtimer_set_start_info(timer); + /* Update pinned state */ + timer->state &= ~HRTIMER_STATE_PINNED; + timer->state |= (!!(mode & HRTIMER_MODE_PINNED)) << HRTIMER_PINNED_SHIFT; + leftmost = enqueue_hrtimer(timer, new_base); if (!leftmost) goto unlock; @@ -1181,8 +1192,8 @@ bool hrtimer_active(const struct hrtimer *timer) cpu_base = READ_ONCE(timer->base->cpu_base); seq = raw_read_seqcount_begin(&cpu_base->seq); - if (timer->state != HRTIMER_STATE_INACTIVE || - cpu_base->running == timer) + if (((timer->state & ~HRTIMER_STATE_PINNED) != + HRTIMER_STATE_INACTIVE) || cpu_base->running == timer) return true; } while (read_seqcount_retry(&cpu_base->seq, seq) || @@ -1619,17 +1630,43 @@ static void init_hrtimers_cpu(int cpu) hrtimer_init_hres(cpu_base); } -#ifdef CONFIG_HOTPLUG_CPU - -static void migrate_hrtimer_list(struct hrtimer_clock_base *old_base, - struct hrtimer_clock_base *new_base) +#if defined(CONFIG_HOTPLUG_CPU) +static void migrate_hrtimer_list(struct hrtimer_cpu_base *old_base, + struct hrtimer_cpu_base *new_base, + unsigned int i, + bool wait, + bool remove_pinned) { struct hrtimer *timer; struct timerqueue_node *node; + struct timerqueue_head pinned; + int is_pinned; + struct hrtimer_clock_base *old_c_base = &old_base->clock_base[i]; + struct hrtimer_clock_base *new_c_base = &new_base->clock_base[i]; + + timerqueue_init_head(&pinned); - while ((node = timerqueue_getnext(&old_base->active))) { + while ((node = timerqueue_getnext(&old_c_base->active))) { timer = container_of(node, struct hrtimer, node); - BUG_ON(hrtimer_callback_running(timer)); + if (wait) { + /* Ensure timers are done running before continuing */ + while (hrtimer_callback_running(timer)) { + raw_spin_unlock(&old_base->lock); + raw_spin_unlock(&new_base->lock); + cpu_relax(); + /* + * cpu_relax may just be a barrier. Grant the + * run_hrtimer_list code some time to obtain the + * spinlock. + */ + udelay(2); + raw_spin_lock(&new_base->lock); + raw_spin_lock_nested(&old_base->lock, + SINGLE_DEPTH_NESTING); + } + } else { + BUG_ON(hrtimer_callback_running(timer)); + } debug_deactivate(timer); /* @@ -1637,8 +1674,15 @@ static void migrate_hrtimer_list(struct hrtimer_clock_base *old_base, * timer could be seen as !active and just vanish away * under us on another CPU */ - __remove_hrtimer(timer, old_base, HRTIMER_STATE_ENQUEUED, 0); - timer->base = new_base; + __remove_hrtimer(timer, old_c_base, HRTIMER_STATE_ENQUEUED, 0); + + is_pinned = timer->state & HRTIMER_STATE_PINNED; + if (!remove_pinned && is_pinned) { + timerqueue_add(&pinned, &timer->node); + continue; + } + + timer->base = new_c_base; /* * Enqueue the timers on the new cpu. This does not * reprogram the event device in case the timer @@ -1647,19 +1691,25 @@ static void migrate_hrtimer_list(struct hrtimer_clock_base *old_base, * sort out already expired timers and reprogram the * event device. */ - enqueue_hrtimer(timer, new_base); + enqueue_hrtimer(timer, new_c_base); + } + + /* Re-queue pinned timers for non-hotplug usecase */ + while ((node = timerqueue_getnext(&pinned))) { + timer = container_of(node, struct hrtimer, node); + + timerqueue_del(&pinned, &timer->node); + enqueue_hrtimer(timer, old_c_base); } } -static void migrate_hrtimers(int scpu) +static void __migrate_hrtimers(int scpu, bool wait, bool remove_pinned) { struct hrtimer_cpu_base *old_base, *new_base; + unsigned long flags; int i; - BUG_ON(cpu_online(scpu)); - tick_cancel_sched_timer(scpu); - - local_irq_disable(); + local_irq_save(flags); old_base = &per_cpu(hrtimer_bases, scpu); new_base = this_cpu_ptr(&hrtimer_bases); /* @@ -1670,8 +1720,8 @@ static void migrate_hrtimers(int scpu) raw_spin_lock_nested(&old_base->lock, SINGLE_DEPTH_NESTING); for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++) { - migrate_hrtimer_list(&old_base->clock_base[i], - &new_base->clock_base[i]); + migrate_hrtimer_list(old_base, new_base, i, wait, + remove_pinned); } raw_spin_unlock(&old_base->lock); @@ -1679,7 +1729,20 @@ static void migrate_hrtimers(int scpu) /* Check, if we got expired work to do */ __hrtimer_peek_ahead_timers(); - local_irq_enable(); + local_irq_restore(flags); +} + +static void migrate_hrtimers(int scpu) +{ + BUG_ON(cpu_online(scpu)); + tick_cancel_sched_timer(scpu); + + __migrate_hrtimers(scpu, false, true); +} + +void hrtimer_quiesce_cpu(void *cpup) +{ + __migrate_hrtimers(*(int *)cpup, true, false); } #endif /* CONFIG_HOTPLUG_CPU */ diff --git a/kernel/time/sched_clock.c b/kernel/time/sched_clock.c index a26036d37a38..0637823aa5a6 100644 --- a/kernel/time/sched_clock.c +++ b/kernel/time/sched_clock.c @@ -70,6 +70,7 @@ struct clock_data { static struct hrtimer sched_clock_timer; static int irqtime = -1; +static int initialized; core_param(irqtime, irqtime, int, 0400); @@ -231,6 +232,11 @@ sched_clock_register(u64 (*read)(void), int bits, unsigned long rate) pr_debug("Registered %pF as sched_clock source\n", read); } +int sched_clock_initialized(void) +{ + return initialized; +} + void __init sched_clock_postinit(void) { /* @@ -249,6 +255,8 @@ void __init sched_clock_postinit(void) hrtimer_init(&sched_clock_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); sched_clock_timer.function = sched_clock_poll; hrtimer_start(&sched_clock_timer, cd.wrap_kt, HRTIMER_MODE_REL); + + initialized = 1; } /* diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index 22c57e191a23..651ff1a3a306 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -24,6 +24,7 @@ #include <linux/posix-timers.h> #include <linux/perf_event.h> #include <linux/context_tracking.h> +#include <linux/rq_stats.h> #include <asm/irq_regs.h> @@ -31,6 +32,10 @@ #include <trace/events/timer.h> +struct rq_data rq_info; +struct workqueue_struct *rq_wq; +spinlock_t rq_lock; + /* * Per cpu nohz control structure */ @@ -41,6 +46,21 @@ static DEFINE_PER_CPU(struct tick_sched, tick_cpu_sched); */ static ktime_t last_jiffies_update; +u64 jiffy_to_ktime_ns(u64 *now, u64 *jiffy_ktime_ns) +{ + u64 cur_jiffies; + unsigned long seq; + + do { + seq = read_seqbegin(&jiffies_lock); + *now = ktime_get_ns(); + *jiffy_ktime_ns = ktime_to_ns(last_jiffies_update); + cur_jiffies = get_jiffies_64(); + } while (read_seqretry(&jiffies_lock, seq)); + + return cur_jiffies; +} + struct tick_sched *tick_get_tick_sched(int cpu) { return &per_cpu(tick_cpu_sched, cpu); @@ -143,7 +163,7 @@ static void tick_sched_handle(struct tick_sched *ts, struct pt_regs *regs) * when we go busy again does not account too much ticks. */ if (ts->tick_stopped) { - touch_softlockup_watchdog(); + touch_softlockup_watchdog_sched(); if (is_idle_task(current)) ts->idle_jiffies++; } @@ -430,7 +450,7 @@ static void tick_nohz_update_jiffies(ktime_t now) tick_do_update_jiffies64(now); local_irq_restore(flags); - touch_softlockup_watchdog(); + touch_softlockup_watchdog_sched(); } /* @@ -701,7 +721,7 @@ static void tick_nohz_restart_sched_tick(struct tick_sched *ts, ktime_t now) update_cpu_load_nohz(); calc_load_exit_idle(); - touch_softlockup_watchdog(); + touch_softlockup_watchdog_sched(); /* * Cancel the scheduled timer and restore the tick */ @@ -1049,6 +1069,51 @@ void tick_irq_enter(void) * High resolution timer specific code */ #ifdef CONFIG_HIGH_RES_TIMERS +static void update_rq_stats(void) +{ + unsigned long jiffy_gap = 0; + unsigned int rq_avg = 0; + unsigned long flags = 0; + + jiffy_gap = jiffies - rq_info.rq_poll_last_jiffy; + + if (jiffy_gap >= rq_info.rq_poll_jiffies) { + + spin_lock_irqsave(&rq_lock, flags); + + if (!rq_info.rq_avg) + rq_info.rq_poll_total_jiffies = 0; + + rq_avg = nr_running() * 10; + + if (rq_info.rq_poll_total_jiffies) { + rq_avg = (rq_avg * jiffy_gap) + + (rq_info.rq_avg * + rq_info.rq_poll_total_jiffies); + do_div(rq_avg, + rq_info.rq_poll_total_jiffies + jiffy_gap); + } + + rq_info.rq_avg = rq_avg; + rq_info.rq_poll_total_jiffies += jiffy_gap; + rq_info.rq_poll_last_jiffy = jiffies; + + spin_unlock_irqrestore(&rq_lock, flags); + } +} + +static void wakeup_user(void) +{ + unsigned long jiffy_gap; + + jiffy_gap = jiffies - rq_info.def_timer_last_jiffy; + + if (jiffy_gap >= rq_info.def_timer_jiffies) { + rq_info.def_timer_last_jiffy = jiffies; + queue_work(rq_wq, &rq_info.def_timer_work); + } +} + /* * We rearm the timer until we get disabled by the idle code. * Called with interrupts disabled. @@ -1066,9 +1131,23 @@ static enum hrtimer_restart tick_sched_timer(struct hrtimer *timer) * Do not call, when we are not in irq context and have * no valid regs pointer */ - if (regs) + if (regs) { tick_sched_handle(ts, regs); + if (rq_info.init == 1 && + tick_do_timer_cpu == smp_processor_id()) { + /* + * update run queue statistics + */ + update_rq_stats(); + + /* + * wakeup user if needed + */ + wakeup_user(); + } + } + /* No need to reprogram if we are in idle or full dynticks mode */ if (unlikely(ts->tick_stopped)) return HRTIMER_NORESTART; @@ -1181,3 +1260,8 @@ int tick_check_oneshot_change(int allow_nohz) tick_nohz_switch_to_nohz(); return 0; } + +ktime_t * get_next_event_cpu(unsigned int cpu) +{ + return &(per_cpu(tick_cpu_device, cpu).evtdev->next_event); +} diff --git a/kernel/time/timer.c b/kernel/time/timer.c index d1798fa0c743..5ebefc7cfa4f 100644 --- a/kernel/time/timer.c +++ b/kernel/time/timer.c @@ -94,12 +94,15 @@ struct tvec_base { struct tvec tv5; } ____cacheline_aligned; +static inline void __run_timers(struct tvec_base *base); static DEFINE_PER_CPU(struct tvec_base, tvec_bases); #if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ_COMMON) unsigned int sysctl_timer_migration = 1; +struct tvec_base tvec_base_deferrable; + void timers_update_migration(bool update_nohz) { bool on = sysctl_timer_migration && tick_nohz_active; @@ -135,18 +138,62 @@ int timer_migration_handler(struct ctl_table *table, int write, } static inline struct tvec_base *get_target_base(struct tvec_base *base, - int pinned) + int pinned, u32 timer_flags) { + if (!pinned && !(timer_flags & TIMER_PINNED_ON_CPU) && + (timer_flags & TIMER_DEFERRABLE)) + return &tvec_base_deferrable; if (pinned || !base->migration_enabled) return this_cpu_ptr(&tvec_bases); return per_cpu_ptr(&tvec_bases, get_nohz_timer_target()); } + +static inline void __run_deferrable_timers(void) +{ + if (smp_processor_id() == tick_do_timer_cpu && + time_after_eq(jiffies, tvec_base_deferrable.timer_jiffies)) + __run_timers(&tvec_base_deferrable); +} + +static inline void init_timer_deferrable_global(void) +{ + tvec_base_deferrable.cpu = nr_cpu_ids; + spin_lock_init(&tvec_base_deferrable.lock); + tvec_base_deferrable.timer_jiffies = jiffies; + tvec_base_deferrable.next_timer = tvec_base_deferrable.timer_jiffies; +} + +static inline struct tvec_base *get_timer_base(u32 timer_flags) +{ + if (!(timer_flags & TIMER_PINNED_ON_CPU) && + timer_flags & TIMER_DEFERRABLE) + return &tvec_base_deferrable; + else + return per_cpu_ptr(&tvec_bases, timer_flags & TIMER_CPUMASK); +} #else static inline struct tvec_base *get_target_base(struct tvec_base *base, - int pinned) + int pinned, u32 timer_flags) { return this_cpu_ptr(&tvec_bases); } + +static inline void __run_deferrable_timers(void) +{ +} + +static inline void init_timer_deferrable_global(void) +{ + /* + * initialize cpu unbound deferrable timer base only when CONFIG_SMP. + * UP kernel handles the timers with cpu 0 timer base. + */ +} + +static inline struct tvec_base *get_timer_base(u32 timer_flags) +{ + return per_cpu_ptr(&tvec_bases, timer_flags & TIMER_CPUMASK); +} #endif static unsigned long round_jiffies_common(unsigned long j, int cpu, @@ -768,7 +815,7 @@ static struct tvec_base *lock_timer_base(struct timer_list *timer, struct tvec_base *base; if (!(tf & TIMER_MIGRATING)) { - base = per_cpu_ptr(&tvec_bases, tf & TIMER_CPUMASK); + base = get_timer_base(tf); spin_lock_irqsave(&base->lock, *flags); if (timer->flags == tf) return base; @@ -797,7 +844,7 @@ __mod_timer(struct timer_list *timer, unsigned long expires, debug_activate(timer, expires); - new_base = get_target_base(base, pinned); + new_base = get_target_base(base, pinned, timer->flags); if (base != new_base) { /* @@ -819,6 +866,10 @@ __mod_timer(struct timer_list *timer, unsigned long expires, } } + if (pinned == TIMER_PINNED) + timer->flags |= TIMER_PINNED_ON_CPU; + else + timer->flags &= ~TIMER_PINNED_ON_CPU; timer->expires = expires; internal_add_timer(base, timer); @@ -1000,6 +1051,7 @@ void add_timer_on(struct timer_list *timer, int cpu) (timer->flags & ~TIMER_BASEMASK) | cpu); } + timer->flags |= TIMER_PINNED_ON_CPU; debug_activate(timer, timer->expires); internal_add_timer(base, timer); spin_unlock_irqrestore(&base->lock, flags); @@ -1433,6 +1485,8 @@ static void run_timer_softirq(struct softirq_action *h) { struct tvec_base *base = this_cpu_ptr(&tvec_bases); + __run_deferrable_timers(); + if (time_after_eq(jiffies, base->timer_jiffies)) __run_timers(base); } @@ -1566,56 +1620,82 @@ signed long __sched schedule_timeout_uninterruptible(signed long timeout) } EXPORT_SYMBOL(schedule_timeout_uninterruptible); -#ifdef CONFIG_HOTPLUG_CPU -static void migrate_timer_list(struct tvec_base *new_base, struct hlist_head *head) +#if defined(CONFIG_HOTPLUG_CPU) +static void migrate_timer_list(struct tvec_base *new_base, + struct hlist_head *head, bool remove_pinned) { struct timer_list *timer; int cpu = new_base->cpu; + struct hlist_node *n; + int is_pinned; - while (!hlist_empty(head)) { - timer = hlist_entry(head->first, struct timer_list, entry); - /* We ignore the accounting on the dying cpu */ - detach_timer(timer, false); + hlist_for_each_entry_safe(timer, n, head, entry) { + is_pinned = timer->flags & TIMER_PINNED_ON_CPU; + if (!remove_pinned && is_pinned) + continue; + + detach_if_pending(timer, get_timer_base(timer->flags), false); timer->flags = (timer->flags & ~TIMER_BASEMASK) | cpu; internal_add_timer(new_base, timer); } } -static void migrate_timers(int cpu) +static void __migrate_timers(int cpu, bool remove_pinned) { struct tvec_base *old_base; struct tvec_base *new_base; + unsigned long flags; int i; - BUG_ON(cpu_online(cpu)); old_base = per_cpu_ptr(&tvec_bases, cpu); new_base = get_cpu_ptr(&tvec_bases); /* * The caller is globally serialized and nobody else * takes two locks at once, deadlock is not possible. */ - spin_lock_irq(&new_base->lock); + spin_lock_irqsave(&new_base->lock, flags); spin_lock_nested(&old_base->lock, SINGLE_DEPTH_NESTING); - BUG_ON(old_base->running_timer); + /* + * If we're in the hotplug path, kill the system if there's a running + * timer. It's ok to have a running timer in the isolation case - the + * currently running or just expired timers are off of the timer wheel + * and so everything else can be migrated off. + */ + if (!cpu_online(cpu)) + BUG_ON(old_base->running_timer); for (i = 0; i < TVR_SIZE; i++) - migrate_timer_list(new_base, old_base->tv1.vec + i); + migrate_timer_list(new_base, old_base->tv1.vec + i, + remove_pinned); for (i = 0; i < TVN_SIZE; i++) { - migrate_timer_list(new_base, old_base->tv2.vec + i); - migrate_timer_list(new_base, old_base->tv3.vec + i); - migrate_timer_list(new_base, old_base->tv4.vec + i); - migrate_timer_list(new_base, old_base->tv5.vec + i); + migrate_timer_list(new_base, old_base->tv2.vec + i, + remove_pinned); + migrate_timer_list(new_base, old_base->tv3.vec + i, + remove_pinned); + migrate_timer_list(new_base, old_base->tv4.vec + i, + remove_pinned); + migrate_timer_list(new_base, old_base->tv5.vec + i, + remove_pinned); } - old_base->active_timers = 0; - old_base->all_timers = 0; - spin_unlock(&old_base->lock); - spin_unlock_irq(&new_base->lock); + spin_unlock_irqrestore(&new_base->lock, flags); put_cpu_ptr(&tvec_bases); } +/* Migrate timers from 'cpu' to this_cpu */ +static void migrate_timers(int cpu) +{ + BUG_ON(cpu_online(cpu)); + __migrate_timers(cpu, true); +} + +void timer_quiesce_cpu(void *cpup) +{ + __migrate_timers(*(int *)cpup, false); +} + static int timer_cpu_notify(struct notifier_block *self, unsigned long action, void *hcpu) { @@ -1656,6 +1736,8 @@ static void __init init_timer_cpus(void) for_each_possible_cpu(cpu) init_timer_cpu(cpu); + + init_timer_deferrable_global(); } void __init init_timers(void) |
