diff options
Diffstat (limited to 'kernel/time')
-rw-r--r-- | kernel/time/Makefile | 3 | ||||
-rw-r--r-- | kernel/time/alarmtimer.c | 268 | ||||
-rw-r--r-- | kernel/time/clocksource.c | 48 | ||||
-rw-r--r-- | kernel/time/hrtimer.c | 134 | ||||
-rw-r--r-- | kernel/time/posix-cpu-timers.c | 2 | ||||
-rw-r--r-- | kernel/time/sched_clock.c | 19 | ||||
-rw-r--r-- | kernel/time/tick-sched.c | 98 | ||||
-rw-r--r-- | kernel/time/timer.c | 237 | ||||
-rw-r--r-- | kernel/time/timer_list.c | 10 | ||||
-rw-r--r-- | kernel/time/timer_stats.c | 425 |
10 files changed, 635 insertions, 609 deletions
diff --git a/kernel/time/Makefile b/kernel/time/Makefile index 49eca0beed32..b9b881ebaff3 100644 --- a/kernel/time/Makefile +++ b/kernel/time/Makefile @@ -9,6 +9,7 @@ ifeq ($(CONFIG_GENERIC_CLOCKEVENTS_BROADCAST),y) endif obj-$(CONFIG_GENERIC_SCHED_CLOCK) += sched_clock.o obj-$(CONFIG_TICK_ONESHOT) += tick-oneshot.o tick-sched.o -obj-$(CONFIG_TIMER_STATS) += timer_stats.o obj-$(CONFIG_DEBUG_FS) += timekeeping_debug.o obj-$(CONFIG_TEST_UDELAY) += test_udelay.o + +ccflags-y += -Idrivers/cpuidle diff --git a/kernel/time/alarmtimer.c b/kernel/time/alarmtimer.c index 6fcc367ad531..ceec77c652b5 100644 --- a/kernel/time/alarmtimer.c +++ b/kernel/time/alarmtimer.c @@ -26,6 +26,11 @@ #include <linux/workqueue.h> #include <linux/freezer.h> +#ifdef CONFIG_MSM_PM +#include "lpm-levels.h" +#endif +#include <linux/workqueue.h> + /** * struct alarm_base - Alarm timer bases * @lock: Lock for syncrhonized access to the base @@ -46,14 +51,130 @@ static ktime_t freezer_delta; static DEFINE_SPINLOCK(freezer_delta_lock); static struct wakeup_source *ws; +static struct delayed_work work; +static struct workqueue_struct *power_off_alarm_workqueue; #ifdef CONFIG_RTC_CLASS /* rtc timer and device for setting alarm wakeups at suspend */ static struct rtc_timer rtctimer; static struct rtc_device *rtcdev; static DEFINE_SPINLOCK(rtcdev_lock); +static struct mutex power_on_alarm_lock; +static struct alarm init_alarm; /** + * power_on_alarm_init - Init power on alarm value + * + * Read rtc alarm value after device booting up and add this alarm + * into alarm queue. + */ +void power_on_alarm_init(void) +{ + struct rtc_wkalrm rtc_alarm; + struct rtc_time rt; + unsigned long alarm_time; + struct rtc_device *rtc; + ktime_t alarm_ktime; + + rtc = alarmtimer_get_rtcdev(); + + if (!rtc) + return; + + rtc_read_alarm(rtc, &rtc_alarm); + rt = rtc_alarm.time; + + rtc_tm_to_time(&rt, &alarm_time); + + if (alarm_time) { + alarm_ktime = ktime_set(alarm_time, 0); + alarm_init(&init_alarm, ALARM_POWEROFF_REALTIME, NULL); + alarm_start(&init_alarm, alarm_ktime); + } +} + +/** + * set_power_on_alarm - set power on alarm value into rtc register + * + * Get the soonest power off alarm timer and set the alarm value into rtc + * register. + */ +void set_power_on_alarm(void) +{ + int rc; + struct timespec wall_time, alarm_ts; + long alarm_secs = 0l; + long rtc_secs, alarm_time, alarm_delta; + struct rtc_time rtc_time; + struct rtc_wkalrm alarm; + struct rtc_device *rtc; + struct timerqueue_node *next; + unsigned long flags; + struct alarm_base *base = &alarm_bases[ALARM_POWEROFF_REALTIME]; + + rc = mutex_lock_interruptible(&power_on_alarm_lock); + if (rc != 0) + return; + + spin_lock_irqsave(&base->lock, flags); + next = timerqueue_getnext(&base->timerqueue); + spin_unlock_irqrestore(&base->lock, flags); + + if (next) { + alarm_ts = ktime_to_timespec(next->expires); + alarm_secs = alarm_ts.tv_sec; + } + + if (!alarm_secs) + goto disable_alarm; + + getnstimeofday(&wall_time); + + /* + * alarm_secs have to be bigger than "wall_time +1". + * It is to make sure that alarm time will be always + * bigger than wall time. + */ + if (alarm_secs <= wall_time.tv_sec + 1) + goto disable_alarm; + + rtc = alarmtimer_get_rtcdev(); + if (!rtc) + goto exit; + + rtc_read_time(rtc, &rtc_time); + rtc_tm_to_time(&rtc_time, &rtc_secs); + alarm_delta = wall_time.tv_sec - rtc_secs; + alarm_time = alarm_secs - alarm_delta; + + rtc_time_to_tm(alarm_time, &alarm.time); + alarm.enabled = 1; + rc = rtc_set_alarm(rtcdev, &alarm); + if (rc) + goto disable_alarm; + + mutex_unlock(&power_on_alarm_lock); + return; + +disable_alarm: + rtc_alarm_irq_enable(rtcdev, 0); +exit: + mutex_unlock(&power_on_alarm_lock); +} + +static void alarmtimer_triggered_func(void *p) +{ + struct rtc_device *rtc = rtcdev; + + if (!(rtc->irq_data & RTC_AF)) + return; + __pm_wakeup_event(ws, 2 * MSEC_PER_SEC); +} + +static struct rtc_task alarmtimer_rtc_task = { + .func = alarmtimer_triggered_func +}; +/** * alarmtimer_get_rtcdev - Return selected rtcdevice * * This function returns the rtc device to use for wakealarms. @@ -63,7 +184,7 @@ static DEFINE_SPINLOCK(rtcdev_lock); struct rtc_device *alarmtimer_get_rtcdev(void) { unsigned long flags; - struct rtc_device *ret; + struct rtc_device *ret = NULL; spin_lock_irqsave(&rtcdev_lock, flags); ret = rtcdev; @@ -77,33 +198,48 @@ static int alarmtimer_rtc_add_device(struct device *dev, struct class_interface *class_intf) { unsigned long flags; + int err = 0; struct rtc_device *rtc = to_rtc_device(dev); - if (rtcdev) return -EBUSY; - if (!rtc->ops->set_alarm) return -1; - if (!device_may_wakeup(rtc->dev.parent)) - return -1; spin_lock_irqsave(&rtcdev_lock, flags); if (!rtcdev) { + err = rtc_irq_register(rtc, &alarmtimer_rtc_task); + if (err) + goto rtc_irq_reg_err; rtcdev = rtc; /* hold a reference so it doesn't go away */ get_device(dev); } + +rtc_irq_reg_err: spin_unlock_irqrestore(&rtcdev_lock, flags); - return 0; + return err; + +} + +static void alarmtimer_rtc_remove_device(struct device *dev, + struct class_interface *class_intf) +{ + if (rtcdev && dev == &rtcdev->dev) { + rtc_irq_unregister(rtcdev, &alarmtimer_rtc_task); + rtcdev = NULL; + } } static inline void alarmtimer_rtc_timer_init(void) { + mutex_init(&power_on_alarm_lock); + rtc_timer_init(&rtctimer, NULL, NULL); } static struct class_interface alarmtimer_rtc_interface = { .add_dev = &alarmtimer_rtc_add_device, + .remove_dev = &alarmtimer_rtc_remove_device, }; static int alarmtimer_rtc_interface_setup(void) @@ -124,8 +260,14 @@ struct rtc_device *alarmtimer_get_rtcdev(void) static inline int alarmtimer_rtc_interface_setup(void) { return 0; } static inline void alarmtimer_rtc_interface_remove(void) { } static inline void alarmtimer_rtc_timer_init(void) { } +void set_power_on_alarm(void) { } #endif +static void alarm_work_func(struct work_struct *unused) +{ + set_power_on_alarm(); +} + /** * alarmtimer_enqueue - Adds an alarm timer to an alarm_base timerqueue * @base: pointer to the base where the timer is being run @@ -195,6 +337,10 @@ static enum hrtimer_restart alarmtimer_fired(struct hrtimer *timer) } spin_unlock_irqrestore(&base->lock, flags); + /* set next power off alarm */ + if (alarm->type == ALARM_POWEROFF_REALTIME) + queue_delayed_work(power_off_alarm_workqueue, &work, 0); + return ret; } @@ -217,6 +363,68 @@ EXPORT_SYMBOL_GPL(alarm_expires_remaining); * set an rtc timer to fire that far into the future, which * will wake us from suspend. */ +#if defined(CONFIG_RTC_DRV_QPNP) && defined(CONFIG_MSM_PM) +static int alarmtimer_suspend(struct device *dev) +{ + struct rtc_time tm; + ktime_t min, now; + unsigned long flags; + struct rtc_device *rtc; + int i; + int ret = 0; + + spin_lock_irqsave(&freezer_delta_lock, flags); + min = freezer_delta; + freezer_delta = ktime_set(0, 0); + spin_unlock_irqrestore(&freezer_delta_lock, flags); + + rtc = alarmtimer_get_rtcdev(); + /* If we have no rtcdev, just return */ + if (!rtc) + return 0; + + /* Find the soonest timer to expire*/ + for (i = 0; i < ALARM_NUMTYPE; i++) { + struct alarm_base *base = &alarm_bases[i]; + struct timerqueue_node *next; + ktime_t delta; + + spin_lock_irqsave(&base->lock, flags); + next = timerqueue_getnext(&base->timerqueue); + spin_unlock_irqrestore(&base->lock, flags); + if (!next) + continue; + delta = ktime_sub(next->expires, base->gettime()); + if (!min.tv64 || (delta.tv64 < min.tv64)) + min = delta; + } + if (min.tv64 == 0) + return 0; + + if (ktime_to_ns(min) < 2 * NSEC_PER_SEC) { + __pm_wakeup_event(ws, 2 * MSEC_PER_SEC); + return -EBUSY; + } + + /* Setup a timer to fire that far in the future */ + rtc_timer_cancel(rtc, &rtctimer); + rtc_read_time(rtc, &tm); + now = rtc_tm_to_ktime(tm); + now = ktime_add(now, min); + if (poweron_alarm) { + uint64_t msec = 0; + + msec = ktime_to_ms(min); + lpm_suspend_wake_time(msec); + } else { + /* Set alarm, if in the past reject suspend briefly to handle */ + ret = rtc_timer_start(rtc, &rtctimer, now, ktime_set(0, 0)); + if (ret < 0) + __pm_wakeup_event(ws, MSEC_PER_SEC); + } + return ret; +} +#else static int alarmtimer_suspend(struct device *dev) { struct rtc_time tm; @@ -226,6 +434,8 @@ static int alarmtimer_suspend(struct device *dev) int i; int ret; + cancel_delayed_work_sync(&work); + spin_lock_irqsave(&freezer_delta_lock, flags); min = freezer_delta; freezer_delta = ktime_set(0, 0); @@ -271,11 +481,31 @@ static int alarmtimer_suspend(struct device *dev) __pm_wakeup_event(ws, MSEC_PER_SEC); return ret; } +#endif +static int alarmtimer_resume(struct device *dev) +{ + struct rtc_device *rtc; + + rtc = alarmtimer_get_rtcdev(); + /* If we have no rtcdev, just return */ + if (!rtc) + return 0; + rtc_timer_cancel(rtc, &rtctimer); + + queue_delayed_work(power_off_alarm_workqueue, &work, 0); + return 0; +} + #else static int alarmtimer_suspend(struct device *dev) { return 0; } + +static int alarmtimer_resume(struct device *dev) +{ + return 0; +} #endif static void alarmtimer_freezerset(ktime_t absexp, enum alarmtimer_type type) @@ -443,12 +673,14 @@ EXPORT_SYMBOL_GPL(alarm_forward_now); * clock2alarm - helper that converts from clockid to alarmtypes * @clockid: clockid. */ -static enum alarmtimer_type clock2alarm(clockid_t clockid) +enum alarmtimer_type clock2alarm(clockid_t clockid) { if (clockid == CLOCK_REALTIME_ALARM) return ALARM_REALTIME; if (clockid == CLOCK_BOOTTIME_ALARM) return ALARM_BOOTTIME; + if (clockid == CLOCK_POWEROFF_ALARM) + return ALARM_POWEROFF_REALTIME; return -1; } @@ -809,6 +1041,7 @@ out: /* Suspend hook structures */ static const struct dev_pm_ops alarmtimer_pm_ops = { .suspend = alarmtimer_suspend, + .resume = alarmtimer_resume, }; static struct platform_driver alarmtimer_driver = { @@ -843,10 +1076,13 @@ static int __init alarmtimer_init(void) posix_timers_register_clock(CLOCK_REALTIME_ALARM, &alarm_clock); posix_timers_register_clock(CLOCK_BOOTTIME_ALARM, &alarm_clock); + posix_timers_register_clock(CLOCK_POWEROFF_ALARM, &alarm_clock); /* Initialize alarm bases */ alarm_bases[ALARM_REALTIME].base_clockid = CLOCK_REALTIME; alarm_bases[ALARM_REALTIME].gettime = &ktime_get_real; + alarm_bases[ALARM_POWEROFF_REALTIME].base_clockid = CLOCK_REALTIME; + alarm_bases[ALARM_POWEROFF_REALTIME].gettime = &ktime_get_real; alarm_bases[ALARM_BOOTTIME].base_clockid = CLOCK_BOOTTIME; alarm_bases[ALARM_BOOTTIME].gettime = &ktime_get_boottime; for (i = 0; i < ALARM_NUMTYPE; i++) { @@ -868,8 +1104,24 @@ static int __init alarmtimer_init(void) goto out_drv; } ws = wakeup_source_register("alarmtimer"); - return 0; + if (!ws) { + error = -ENOMEM; + goto out_ws; + } + + INIT_DELAYED_WORK(&work, alarm_work_func); + power_off_alarm_workqueue = + create_singlethread_workqueue("power_off_alarm"); + if (!power_off_alarm_workqueue) { + error = -ENOMEM; + goto out_wq; + } + return 0; +out_wq: + wakeup_source_unregister(ws); +out_ws: + platform_device_unregister(pdev); out_drv: platform_driver_unregister(&alarmtimer_driver); out_if: diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c index b98810d2f3b4..89cc82a38e4d 100644 --- a/kernel/time/clocksource.c +++ b/kernel/time/clocksource.c @@ -108,7 +108,7 @@ static int finished_booting; #ifdef CONFIG_CLOCKSOURCE_WATCHDOG static void clocksource_watchdog_work(struct work_struct *work); -static void clocksource_select(void); +static void clocksource_select(bool force); static LIST_HEAD(watchdog_list); static struct clocksource *watchdog; @@ -415,7 +415,7 @@ static int clocksource_watchdog_kthread(void *data) { mutex_lock(&clocksource_mutex); if (__clocksource_watchdog_kthread()) - clocksource_select(); + clocksource_select(false); mutex_unlock(&clocksource_mutex); return 0; } @@ -555,11 +555,12 @@ static inline void clocksource_update_max_deferment(struct clocksource *cs) #ifndef CONFIG_ARCH_USES_GETTIMEOFFSET -static struct clocksource *clocksource_find_best(bool oneshot, bool skipcur) +static struct clocksource *clocksource_find_best(bool oneshot, bool skipcur, + bool force) { struct clocksource *cs; - if (!finished_booting || list_empty(&clocksource_list)) + if ((!finished_booting && !force) || list_empty(&clocksource_list)) return NULL; /* @@ -577,13 +578,13 @@ static struct clocksource *clocksource_find_best(bool oneshot, bool skipcur) return NULL; } -static void __clocksource_select(bool skipcur) +static void __clocksource_select(bool skipcur, bool force) { bool oneshot = tick_oneshot_mode_active(); struct clocksource *best, *cs; /* Find the best suitable clocksource */ - best = clocksource_find_best(oneshot, skipcur); + best = clocksource_find_best(oneshot, skipcur, force); if (!best) return; @@ -623,22 +624,40 @@ static void __clocksource_select(bool skipcur) * Select the clocksource with the best rating, or the clocksource, * which is selected by userspace override. */ -static void clocksource_select(void) +static void clocksource_select(bool force) { - __clocksource_select(false); + return __clocksource_select(false, force); } static void clocksource_select_fallback(void) { - __clocksource_select(true); + __clocksource_select(true, false); } #else /* !CONFIG_ARCH_USES_GETTIMEOFFSET */ -static inline void clocksource_select(void) { } + +static inline void clocksource_select(bool force) { } static inline void clocksource_select_fallback(void) { } #endif +/** + * clocksource_select_force - Force re-selection of the best clocksource + * among registered clocksources + * + * clocksource_select() can't select the best clocksource before + * calling clocksource_done_booting() and since clocksource_select() + * should be called with clocksource_mutex held, provide a new API + * can be called from other files to select best clockrouce irrespective + * of finished_booting flag. + */ +void clocksource_select_force(void) +{ + mutex_lock(&clocksource_mutex); + clocksource_select(true); + mutex_unlock(&clocksource_mutex); +} + /* * clocksource_done_booting - Called near the end of core bootup * @@ -655,7 +674,7 @@ static int __init clocksource_done_booting(void) * Run the watchdog first to eliminate unstable clock sources */ __clocksource_watchdog_kthread(); - clocksource_select(); + clocksource_select(false); mutex_unlock(&clocksource_mutex); return 0; } @@ -744,6 +763,7 @@ void __clocksource_update_freq_scale(struct clocksource *cs, u32 scale, u32 freq } EXPORT_SYMBOL_GPL(__clocksource_update_freq_scale); + /** * __clocksource_register_scale - Used to install new clocksources * @cs: clocksource to be registered @@ -765,7 +785,7 @@ int __clocksource_register_scale(struct clocksource *cs, u32 scale, u32 freq) mutex_lock(&clocksource_mutex); clocksource_enqueue(cs); clocksource_enqueue_watchdog(cs); - clocksource_select(); + clocksource_select(false); clocksource_select_watchdog(false); mutex_unlock(&clocksource_mutex); return 0; @@ -788,7 +808,7 @@ void clocksource_change_rating(struct clocksource *cs, int rating) { mutex_lock(&clocksource_mutex); __clocksource_change_rating(cs, rating); - clocksource_select(); + clocksource_select(false); clocksource_select_watchdog(false); mutex_unlock(&clocksource_mutex); } @@ -892,7 +912,7 @@ static ssize_t sysfs_override_clocksource(struct device *dev, ret = sysfs_get_uname(buf, override_name, count); if (ret >= 0) - clocksource_select(); + clocksource_select(false); mutex_unlock(&clocksource_mutex); diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c index d0d8717ebc3a..beafdf94b3b5 100644 --- a/kernel/time/hrtimer.c +++ b/kernel/time/hrtimer.c @@ -783,34 +783,6 @@ void hrtimers_resume(void) clock_was_set_delayed(); } -static inline void timer_stats_hrtimer_set_start_info(struct hrtimer *timer) -{ -#ifdef CONFIG_TIMER_STATS - if (timer->start_site) - return; - timer->start_site = __builtin_return_address(0); - memcpy(timer->start_comm, current->comm, TASK_COMM_LEN); - timer->start_pid = current->pid; -#endif -} - -static inline void timer_stats_hrtimer_clear_start_info(struct hrtimer *timer) -{ -#ifdef CONFIG_TIMER_STATS - timer->start_site = NULL; -#endif -} - -static inline void timer_stats_account_hrtimer(struct hrtimer *timer) -{ -#ifdef CONFIG_TIMER_STATS - if (likely(!timer_stats_active)) - return; - timer_stats_update_stats(timer, timer->start_pid, timer->start_site, - timer->function, timer->start_comm, 0); -#endif -} - /* * Counterpart to lock_hrtimer_base above: */ @@ -887,7 +859,7 @@ static int enqueue_hrtimer(struct hrtimer *timer, base->cpu_base->active_bases |= 1 << base->index; - timer->state = HRTIMER_STATE_ENQUEUED; + timer->state |= HRTIMER_STATE_ENQUEUED; return timerqueue_add(&base->active, &timer->node); } @@ -907,11 +879,9 @@ static void __remove_hrtimer(struct hrtimer *timer, u8 newstate, int reprogram) { struct hrtimer_cpu_base *cpu_base = base->cpu_base; - u8 state = timer->state; - timer->state = newstate; - if (!(state & HRTIMER_STATE_ENQUEUED)) - return; + if (!(timer->state & HRTIMER_STATE_ENQUEUED)) + goto out; if (!timerqueue_del(&base->active, &timer->node)) cpu_base->active_bases &= ~(1 << base->index); @@ -928,6 +898,13 @@ static void __remove_hrtimer(struct hrtimer *timer, if (reprogram && timer == cpu_base->next_timer) hrtimer_force_reprogram(cpu_base, 1); #endif + +out: + /* + * We need to preserve PINNED state here, otherwise we may end up + * migrating pinned hrtimers as well. + */ + timer->state = newstate | (timer->state & HRTIMER_STATE_PINNED); } /* @@ -949,13 +926,13 @@ remove_hrtimer(struct hrtimer *timer, struct hrtimer_clock_base *base, bool rest * rare case and less expensive than a smp call. */ debug_deactivate(timer); - timer_stats_hrtimer_clear_start_info(timer); reprogram = base->cpu_base == this_cpu_ptr(&hrtimer_bases); if (!restart) state = HRTIMER_STATE_INACTIVE; __remove_hrtimer(timer, base, state, reprogram); + timer->state &= ~HRTIMER_STATE_PINNED; return 1; } return 0; @@ -1007,7 +984,9 @@ void hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim, /* Switch the timer base, if necessary: */ new_base = switch_hrtimer_base(timer, base, mode & HRTIMER_MODE_PINNED); - timer_stats_hrtimer_set_start_info(timer); + /* Update pinned state */ + timer->state &= ~HRTIMER_STATE_PINNED; + timer->state |= (!!(mode & HRTIMER_MODE_PINNED)) << HRTIMER_PINNED_SHIFT; leftmost = enqueue_hrtimer(timer, new_base); if (!leftmost) @@ -1150,12 +1129,6 @@ static void __hrtimer_init(struct hrtimer *timer, clockid_t clock_id, base = hrtimer_clockid_to_base(clock_id); timer->base = &cpu_base->clock_base[base]; timerqueue_init(&timer->node); - -#ifdef CONFIG_TIMER_STATS - timer->start_site = NULL; - timer->start_pid = -1; - memset(timer->start_comm, 0, TASK_COMM_LEN); -#endif } /** @@ -1188,8 +1161,8 @@ bool hrtimer_active(const struct hrtimer *timer) cpu_base = READ_ONCE(timer->base->cpu_base); seq = raw_read_seqcount_begin(&cpu_base->seq); - if (timer->state != HRTIMER_STATE_INACTIVE || - cpu_base->running == timer) + if (((timer->state & ~HRTIMER_STATE_PINNED) != + HRTIMER_STATE_INACTIVE) || cpu_base->running == timer) return true; } while (read_seqcount_retry(&cpu_base->seq, seq) || @@ -1239,7 +1212,6 @@ static void __run_hrtimer(struct hrtimer_cpu_base *cpu_base, raw_write_seqcount_barrier(&cpu_base->seq); __remove_hrtimer(timer, base, HRTIMER_STATE_INACTIVE, 0); - timer_stats_account_hrtimer(timer); fn = timer->function; /* @@ -1627,17 +1599,23 @@ static void init_hrtimers_cpu(int cpu) hrtimer_init_hres(cpu_base); } -#ifdef CONFIG_HOTPLUG_CPU - +#if defined(CONFIG_HOTPLUG_CPU) static void migrate_hrtimer_list(struct hrtimer_clock_base *old_base, - struct hrtimer_clock_base *new_base) + struct hrtimer_clock_base *new_base, + bool remove_pinned) { struct hrtimer *timer; struct timerqueue_node *node; + struct timerqueue_head pinned; + int is_pinned; + bool is_hotplug = !cpu_online(old_base->cpu_base->cpu); + + timerqueue_init_head(&pinned); while ((node = timerqueue_getnext(&old_base->active))) { timer = container_of(node, struct hrtimer, node); - BUG_ON(hrtimer_callback_running(timer)); + if (is_hotplug) + BUG_ON(hrtimer_callback_running(timer)); debug_deactivate(timer); /* @@ -1646,6 +1624,13 @@ static void migrate_hrtimer_list(struct hrtimer_clock_base *old_base, * under us on another CPU */ __remove_hrtimer(timer, old_base, HRTIMER_STATE_ENQUEUED, 0); + + is_pinned = timer->state & HRTIMER_STATE_PINNED; + if (!remove_pinned && is_pinned) { + timerqueue_add(&pinned, &timer->node); + continue; + } + timer->base = new_base; /* * Enqueue the timers on the new cpu. This does not @@ -1657,17 +1642,23 @@ static void migrate_hrtimer_list(struct hrtimer_clock_base *old_base, */ enqueue_hrtimer(timer, new_base); } + + /* Re-queue pinned timers for non-hotplug usecase */ + while ((node = timerqueue_getnext(&pinned))) { + timer = container_of(node, struct hrtimer, node); + + timerqueue_del(&pinned, &timer->node); + enqueue_hrtimer(timer, old_base); + } } -static void migrate_hrtimers(int scpu) +static void __migrate_hrtimers(int scpu, bool remove_pinned) { struct hrtimer_cpu_base *old_base, *new_base; + unsigned long flags; int i; - BUG_ON(cpu_online(scpu)); - tick_cancel_sched_timer(scpu); - - local_irq_disable(); + local_irq_save(flags); old_base = &per_cpu(hrtimer_bases, scpu); new_base = this_cpu_ptr(&hrtimer_bases); /* @@ -1679,7 +1670,7 @@ static void migrate_hrtimers(int scpu) for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++) { migrate_hrtimer_list(&old_base->clock_base[i], - &new_base->clock_base[i]); + &new_base->clock_base[i], remove_pinned); } raw_spin_unlock(&old_base->lock); @@ -1687,7 +1678,20 @@ static void migrate_hrtimers(int scpu) /* Check, if we got expired work to do */ __hrtimer_peek_ahead_timers(); - local_irq_enable(); + local_irq_restore(flags); +} + +static void migrate_hrtimers(int scpu) +{ + BUG_ON(cpu_online(scpu)); + tick_cancel_sched_timer(scpu); + + __migrate_hrtimers(scpu, true); +} + +void hrtimer_quiesce_cpu(void *cpup) +{ + __migrate_hrtimers(*(int *)cpup, false); } #endif /* CONFIG_HOTPLUG_CPU */ @@ -1795,15 +1799,19 @@ schedule_hrtimeout_range_clock(ktime_t *expires, u64 delta, * You can set the task state as follows - * * %TASK_UNINTERRUPTIBLE - at least @timeout time is guaranteed to - * pass before the routine returns. + * pass before the routine returns unless the current task is explicitly + * woken up, (e.g. by wake_up_process()). * * %TASK_INTERRUPTIBLE - the routine may return early if a signal is - * delivered to the current task. + * delivered to the current task or the current task is explicitly woken + * up. * * The current task state is guaranteed to be TASK_RUNNING when this * routine returns. * - * Returns 0 when the timer has expired otherwise -EINTR + * Returns 0 when the timer has expired. If the task was woken before the + * timer expired by a signal (only possible in state TASK_INTERRUPTIBLE) or + * by an explicit wakeup, it returns -EINTR. */ int __sched schedule_hrtimeout_range(ktime_t *expires, u64 delta, const enum hrtimer_mode mode) @@ -1825,15 +1833,19 @@ EXPORT_SYMBOL_GPL(schedule_hrtimeout_range); * You can set the task state as follows - * * %TASK_UNINTERRUPTIBLE - at least @timeout time is guaranteed to - * pass before the routine returns. + * pass before the routine returns unless the current task is explicitly + * woken up, (e.g. by wake_up_process()). * * %TASK_INTERRUPTIBLE - the routine may return early if a signal is - * delivered to the current task. + * delivered to the current task or the current task is explicitly woken + * up. * * The current task state is guaranteed to be TASK_RUNNING when this * routine returns. * - * Returns 0 when the timer has expired otherwise -EINTR + * Returns 0 when the timer has expired. If the task was woken before the + * timer expired by a signal (only possible in state TASK_INTERRUPTIBLE) or + * by an explicit wakeup, it returns -EINTR. */ int __sched schedule_hrtimeout(ktime_t *expires, const enum hrtimer_mode mode) diff --git a/kernel/time/posix-cpu-timers.c b/kernel/time/posix-cpu-timers.c index 80016b329d94..051544aec37c 100644 --- a/kernel/time/posix-cpu-timers.c +++ b/kernel/time/posix-cpu-timers.c @@ -1250,7 +1250,7 @@ void run_posix_cpu_timers(struct task_struct *tsk) void set_process_cpu_timer(struct task_struct *tsk, unsigned int clock_idx, cputime_t *newval, cputime_t *oldval) { - unsigned long long now; + unsigned long long now = 0; WARN_ON_ONCE(clock_idx == CPUCLOCK_SCHED); cpu_timer_sample_group(clock_idx, tsk, &now); diff --git a/kernel/time/sched_clock.c b/kernel/time/sched_clock.c index 382b159d8592..1986ab6e5943 100644 --- a/kernel/time/sched_clock.c +++ b/kernel/time/sched_clock.c @@ -70,6 +70,11 @@ struct clock_data { static struct hrtimer sched_clock_timer; static int irqtime = -1; +static int initialized; +static u64 suspend_ns; +static u64 suspend_cycles; +static u64 resume_cycles; + core_param(irqtime, irqtime, int, 0400); @@ -236,6 +241,11 @@ sched_clock_register(u64 (*read)(void), int bits, unsigned long rate) pr_debug("Registered %pF as sched_clock source\n", read); } +int sched_clock_initialized(void) +{ + return initialized; +} + void __init sched_clock_postinit(void) { /* @@ -254,6 +264,8 @@ void __init sched_clock_postinit(void) hrtimer_init(&sched_clock_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); sched_clock_timer.function = sched_clock_poll; hrtimer_start(&sched_clock_timer, cd.wrap_kt, HRTIMER_MODE_REL); + + initialized = 1; } /* @@ -279,6 +291,11 @@ static int sched_clock_suspend(void) struct clock_read_data *rd = &cd.read_data[0]; update_sched_clock(); + + suspend_ns = rd->epoch_ns; + suspend_cycles = rd->epoch_cyc; + pr_info("suspend ns:%17llu suspend cycles:%17llu\n", + rd->epoch_ns, rd->epoch_cyc); hrtimer_cancel(&sched_clock_timer); rd->read_sched_clock = suspended_sched_clock_read; @@ -290,6 +307,8 @@ static void sched_clock_resume(void) struct clock_read_data *rd = &cd.read_data[0]; rd->epoch_cyc = cd.actual_read_sched_clock(); + resume_cycles = rd->epoch_cyc; + pr_info("resume cycles:%17llu\n", rd->epoch_cyc); hrtimer_start(&sched_clock_timer, cd.wrap_kt, HRTIMER_MODE_REL); rd->read_sched_clock = cd.actual_read_sched_clock; } diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index a935cbdc55a4..6579be96e041 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -19,11 +19,13 @@ #include <linux/percpu.h> #include <linux/profile.h> #include <linux/sched.h> +#include <linux/timer.h> #include <linux/module.h> #include <linux/irq_work.h> #include <linux/posix-timers.h> #include <linux/perf_event.h> #include <linux/context_tracking.h> +#include <linux/rq_stats.h> #include <asm/irq_regs.h> @@ -31,6 +33,10 @@ #include <trace/events/timer.h> +struct rq_data rq_info; +struct workqueue_struct *rq_wq; +spinlock_t rq_lock; + /* * Per cpu nohz control structure */ @@ -41,6 +47,21 @@ static DEFINE_PER_CPU(struct tick_sched, tick_cpu_sched); */ static ktime_t last_jiffies_update; +u64 jiffy_to_ktime_ns(u64 *now, u64 *jiffy_ktime_ns) +{ + u64 cur_jiffies; + unsigned long seq; + + do { + seq = read_seqbegin(&jiffies_lock); + *now = ktime_get_ns(); + *jiffy_ktime_ns = ktime_to_ns(last_jiffies_update); + cur_jiffies = get_jiffies_64(); + } while (read_seqretry(&jiffies_lock, seq)); + + return cur_jiffies; +} + struct tick_sched *tick_get_tick_sched(int cpu) { return &per_cpu(tick_cpu_sched, cpu); @@ -143,7 +164,7 @@ static void tick_sched_handle(struct tick_sched *ts, struct pt_regs *regs) * when we go busy again does not account too much ticks. */ if (ts->tick_stopped) { - touch_softlockup_watchdog(); + touch_softlockup_watchdog_sched(); if (is_idle_task(current)) ts->idle_jiffies++; } @@ -430,7 +451,7 @@ static void tick_nohz_update_jiffies(ktime_t now) tick_do_update_jiffies64(now); local_irq_restore(flags); - touch_softlockup_watchdog(); + touch_softlockup_watchdog_sched(); } /* @@ -716,7 +737,7 @@ static void tick_nohz_restart_sched_tick(struct tick_sched *ts, ktime_t now) update_cpu_load_nohz(); calc_load_exit_idle(); - touch_softlockup_watchdog(); + touch_softlockup_watchdog_sched(); /* * Cancel the scheduled timer and restore the tick */ @@ -804,6 +825,11 @@ static void __tick_nohz_idle_enter(struct tick_sched *ts) now = tick_nohz_start_idle(ts); +#ifdef CONFIG_SMP + if (check_pending_deferrable_timers(cpu)) + raise_softirq_irqoff(TIMER_SOFTIRQ); +#endif + if (can_stop_idle_tick(cpu, ts)) { int was_stopped = ts->tick_stopped; @@ -1076,6 +1102,51 @@ void tick_irq_enter(void) * High resolution timer specific code */ #ifdef CONFIG_HIGH_RES_TIMERS +static void update_rq_stats(void) +{ + unsigned long jiffy_gap = 0; + unsigned int rq_avg = 0; + unsigned long flags = 0; + + jiffy_gap = jiffies - rq_info.rq_poll_last_jiffy; + + if (jiffy_gap >= rq_info.rq_poll_jiffies) { + + spin_lock_irqsave(&rq_lock, flags); + + if (!rq_info.rq_avg) + rq_info.rq_poll_total_jiffies = 0; + + rq_avg = nr_running() * 10; + + if (rq_info.rq_poll_total_jiffies) { + rq_avg = (rq_avg * jiffy_gap) + + (rq_info.rq_avg * + rq_info.rq_poll_total_jiffies); + do_div(rq_avg, + rq_info.rq_poll_total_jiffies + jiffy_gap); + } + + rq_info.rq_avg = rq_avg; + rq_info.rq_poll_total_jiffies += jiffy_gap; + rq_info.rq_poll_last_jiffy = jiffies; + + spin_unlock_irqrestore(&rq_lock, flags); + } +} + +static void wakeup_user(void) +{ + unsigned long jiffy_gap; + + jiffy_gap = jiffies - rq_info.def_timer_last_jiffy; + + if (jiffy_gap >= rq_info.def_timer_jiffies) { + rq_info.def_timer_last_jiffy = jiffies; + queue_work(rq_wq, &rq_info.def_timer_work); + } +} + /* * We rearm the timer until we get disabled by the idle code. * Called with interrupts disabled. @@ -1093,9 +1164,23 @@ static enum hrtimer_restart tick_sched_timer(struct hrtimer *timer) * Do not call, when we are not in irq context and have * no valid regs pointer */ - if (regs) + if (regs) { tick_sched_handle(ts, regs); + if (rq_info.init == 1 && + tick_do_timer_cpu == smp_processor_id()) { + /* + * update run queue statistics + */ + update_rq_stats(); + + /* + * wakeup user if needed + */ + wakeup_user(); + } + } + /* No need to reprogram if we are in idle or full dynticks mode */ if (unlikely(ts->tick_stopped)) return HRTIMER_NORESTART; @@ -1208,3 +1293,8 @@ int tick_check_oneshot_change(int allow_nohz) tick_nohz_switch_to_nohz(); return 0; } + +ktime_t * get_next_event_cpu(unsigned int cpu) +{ + return &(per_cpu(tick_cpu_device, cpu).evtdev->next_event); +} diff --git a/kernel/time/timer.c b/kernel/time/timer.c index 523fe1669d4c..903705687b52 100644 --- a/kernel/time/timer.c +++ b/kernel/time/timer.c @@ -94,12 +94,16 @@ struct tvec_base { struct tvec tv5; } ____cacheline_aligned; +static inline void __run_timers(struct tvec_base *base); static DEFINE_PER_CPU(struct tvec_base, tvec_bases); #if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ_COMMON) unsigned int sysctl_timer_migration = 1; +struct tvec_base tvec_base_deferrable; +static atomic_t deferrable_pending; + void timers_update_migration(bool update_nohz) { bool on = sysctl_timer_migration && tick_nohz_active; @@ -135,18 +139,66 @@ int timer_migration_handler(struct ctl_table *table, int write, } static inline struct tvec_base *get_target_base(struct tvec_base *base, - int pinned) + int pinned, u32 timer_flags) { + if (!pinned && !(timer_flags & TIMER_PINNED_ON_CPU) && + (timer_flags & TIMER_DEFERRABLE)) + return &tvec_base_deferrable; if (pinned || !base->migration_enabled) return this_cpu_ptr(&tvec_bases); return per_cpu_ptr(&tvec_bases, get_nohz_timer_target()); } + +static inline void __run_deferrable_timers(void) +{ + if ((atomic_cmpxchg(&deferrable_pending, 1, 0) && + tick_do_timer_cpu == TICK_DO_TIMER_NONE) || + tick_do_timer_cpu == smp_processor_id()) { + if (time_after_eq(jiffies, + tvec_base_deferrable.timer_jiffies)) + __run_timers(&tvec_base_deferrable); + } +} + +static inline void init_timer_deferrable_global(void) +{ + tvec_base_deferrable.cpu = nr_cpu_ids; + spin_lock_init(&tvec_base_deferrable.lock); + tvec_base_deferrable.timer_jiffies = jiffies; + tvec_base_deferrable.next_timer = tvec_base_deferrable.timer_jiffies; +} + +static inline struct tvec_base *get_timer_base(u32 timer_flags) +{ + if (!(timer_flags & TIMER_PINNED_ON_CPU) && + timer_flags & TIMER_DEFERRABLE) + return &tvec_base_deferrable; + else + return per_cpu_ptr(&tvec_bases, timer_flags & TIMER_CPUMASK); +} #else static inline struct tvec_base *get_target_base(struct tvec_base *base, - int pinned) + int pinned, u32 timer_flags) { return this_cpu_ptr(&tvec_bases); } + +static inline void __run_deferrable_timers(void) +{ +} + +static inline void init_timer_deferrable_global(void) +{ + /* + * initialize cpu unbound deferrable timer base only when CONFIG_SMP. + * UP kernel handles the timers with cpu 0 timer base. + */ +} + +static inline struct tvec_base *get_timer_base(u32 timer_flags) +{ + return per_cpu_ptr(&tvec_bases, timer_flags & TIMER_CPUMASK); +} #endif static unsigned long round_jiffies_common(unsigned long j, int cpu, @@ -448,38 +500,6 @@ static void internal_add_timer(struct tvec_base *base, struct timer_list *timer) } } -#ifdef CONFIG_TIMER_STATS -void __timer_stats_timer_set_start_info(struct timer_list *timer, void *addr) -{ - if (timer->start_site) - return; - - timer->start_site = addr; - memcpy(timer->start_comm, current->comm, TASK_COMM_LEN); - timer->start_pid = current->pid; -} - -static void timer_stats_account_timer(struct timer_list *timer) -{ - void *site; - - /* - * start_site can be concurrently reset by - * timer_stats_timer_clear_start_info() - */ - site = READ_ONCE(timer->start_site); - if (likely(!site)) - return; - - timer_stats_update_stats(timer, timer->start_pid, site, - timer->function, timer->start_comm, - timer->flags); -} - -#else -static void timer_stats_account_timer(struct timer_list *timer) {} -#endif - #ifdef CONFIG_DEBUG_OBJECTS_TIMERS static struct debug_obj_descr timer_debug_descr; @@ -682,11 +702,6 @@ static void do_init_timer(struct timer_list *timer, unsigned int flags, timer->entry.pprev = NULL; timer->flags = flags | raw_smp_processor_id(); timer->slack = -1; -#ifdef CONFIG_TIMER_STATS - timer->start_site = NULL; - timer->start_pid = -1; - memset(timer->start_comm, 0, TASK_COMM_LEN); -#endif lockdep_init_map(&timer->lockdep_map, name, key, 0); } @@ -775,7 +790,7 @@ static struct tvec_base *lock_timer_base(struct timer_list *timer, tf = READ_ONCE(timer->flags); if (!(tf & TIMER_MIGRATING)) { - base = per_cpu_ptr(&tvec_bases, tf & TIMER_CPUMASK); + base = get_timer_base(tf); spin_lock_irqsave(&base->lock, *flags); if (timer->flags == tf) return base; @@ -793,7 +808,6 @@ __mod_timer(struct timer_list *timer, unsigned long expires, unsigned long flags; int ret = 0; - timer_stats_timer_set_start_info(timer); BUG_ON(!timer->function); base = lock_timer_base(timer, &flags); @@ -804,7 +818,7 @@ __mod_timer(struct timer_list *timer, unsigned long expires, debug_activate(timer, expires); - new_base = get_target_base(base, pinned); + new_base = get_target_base(base, pinned, timer->flags); if (base != new_base) { /* @@ -826,6 +840,10 @@ __mod_timer(struct timer_list *timer, unsigned long expires, } } + if (pinned == TIMER_PINNED) + timer->flags |= TIMER_PINNED_ON_CPU; + else + timer->flags &= ~TIMER_PINNED_ON_CPU; timer->expires = expires; internal_add_timer(base, timer); @@ -988,7 +1006,6 @@ void add_timer_on(struct timer_list *timer, int cpu) struct tvec_base *base; unsigned long flags; - timer_stats_timer_set_start_info(timer); BUG_ON(timer_pending(timer) || !timer->function); /* @@ -1007,6 +1024,7 @@ void add_timer_on(struct timer_list *timer, int cpu) (timer->flags & ~TIMER_BASEMASK) | cpu); } + timer->flags |= TIMER_PINNED_ON_CPU; debug_activate(timer, timer->expires); internal_add_timer(base, timer); spin_unlock_irqrestore(&base->lock, flags); @@ -1032,7 +1050,6 @@ int del_timer(struct timer_list *timer) debug_assert_init(timer); - timer_stats_timer_clear_start_info(timer); if (timer_pending(timer)) { base = lock_timer_base(timer, &flags); ret = detach_if_pending(timer, base, true); @@ -1060,10 +1077,9 @@ int try_to_del_timer_sync(struct timer_list *timer) base = lock_timer_base(timer, &flags); - if (base->running_timer != timer) { - timer_stats_timer_clear_start_info(timer); + if (base->running_timer != timer) ret = detach_if_pending(timer, base, true); - } + spin_unlock_irqrestore(&base->lock, flags); return ret; @@ -1247,8 +1263,6 @@ static inline void __run_timers(struct tvec_base *base) data = timer->data; irqsafe = timer->flags & TIMER_IRQSAFE; - timer_stats_account_timer(timer); - base->running_timer = timer; detach_expired_timer(timer, base); @@ -1376,6 +1390,30 @@ static u64 cmp_next_hrtimer_event(u64 basem, u64 expires) return DIV_ROUND_UP_ULL(nextevt, TICK_NSEC) * TICK_NSEC; } +#ifdef CONFIG_SMP +/* + * check_pending_deferrable_timers - Check for unbound deferrable timer expiry. + * @cpu - Current CPU + * + * The function checks whether any global deferrable pending timers + * are exipired or not. This function does not check cpu bounded + * diferrable pending timers expiry. + * + * The function returns true when a cpu unbounded deferrable timer is expired. + */ +bool check_pending_deferrable_timers(int cpu) +{ + if (cpu == tick_do_timer_cpu || + tick_do_timer_cpu == TICK_DO_TIMER_NONE) { + if (time_after_eq(jiffies, tvec_base_deferrable.timer_jiffies) + && !atomic_cmpxchg(&deferrable_pending, 0, 1)) { + return true; + } + } + return false; +} +#endif + /** * get_next_timer_interrupt - return the time (clock mono) of the next timer * @basej: base time jiffies @@ -1440,6 +1478,8 @@ static void run_timer_softirq(struct softirq_action *h) { struct tvec_base *base = this_cpu_ptr(&tvec_bases); + __run_deferrable_timers(); + if (time_after_eq(jiffies, base->timer_jiffies)) __run_timers(base); } @@ -1482,11 +1522,12 @@ static void process_timeout(unsigned long __data) * You can set the task state as follows - * * %TASK_UNINTERRUPTIBLE - at least @timeout jiffies are guaranteed to - * pass before the routine returns. The routine will return 0 + * pass before the routine returns unless the current task is explicitly + * woken up, (e.g. by wake_up_process())". * * %TASK_INTERRUPTIBLE - the routine may return early if a signal is - * delivered to the current task. In this case the remaining time - * in jiffies will be returned, or 0 if the timer expired in time + * delivered to the current task or the current task is explicitly woken + * up. * * The current task state is guaranteed to be TASK_RUNNING when this * routine returns. @@ -1495,7 +1536,9 @@ static void process_timeout(unsigned long __data) * the CPU away without a bound on the timeout. In this case the return * value will be %MAX_SCHEDULE_TIMEOUT. * - * In all cases the return value is guaranteed to be non-negative. + * Returns 0 when the timer has expired otherwise the remaining time in + * jiffies will be returned. In all cases the return value is guaranteed + * to be non-negative. */ signed long __sched schedule_timeout(signed long timeout) { @@ -1573,56 +1616,82 @@ signed long __sched schedule_timeout_uninterruptible(signed long timeout) } EXPORT_SYMBOL(schedule_timeout_uninterruptible); -#ifdef CONFIG_HOTPLUG_CPU -static void migrate_timer_list(struct tvec_base *new_base, struct hlist_head *head) +#if defined(CONFIG_HOTPLUG_CPU) +static void migrate_timer_list(struct tvec_base *new_base, + struct hlist_head *head, bool remove_pinned) { struct timer_list *timer; int cpu = new_base->cpu; + struct hlist_node *n; + int is_pinned; + + hlist_for_each_entry_safe(timer, n, head, entry) { + is_pinned = timer->flags & TIMER_PINNED_ON_CPU; + if (!remove_pinned && is_pinned) + continue; - while (!hlist_empty(head)) { - timer = hlist_entry(head->first, struct timer_list, entry); - /* We ignore the accounting on the dying cpu */ - detach_timer(timer, false); + detach_if_pending(timer, get_timer_base(timer->flags), false); timer->flags = (timer->flags & ~TIMER_BASEMASK) | cpu; internal_add_timer(new_base, timer); } } -static void migrate_timers(int cpu) +static void __migrate_timers(int cpu, bool remove_pinned) { struct tvec_base *old_base; struct tvec_base *new_base; + unsigned long flags; int i; - BUG_ON(cpu_online(cpu)); old_base = per_cpu_ptr(&tvec_bases, cpu); new_base = get_cpu_ptr(&tvec_bases); /* * The caller is globally serialized and nobody else * takes two locks at once, deadlock is not possible. */ - spin_lock_irq(&new_base->lock); + spin_lock_irqsave(&new_base->lock, flags); spin_lock_nested(&old_base->lock, SINGLE_DEPTH_NESTING); - BUG_ON(old_base->running_timer); + /* + * If we're in the hotplug path, kill the system if there's a running + * timer. It's ok to have a running timer in the isolation case - the + * currently running or just expired timers are off of the timer wheel + * and so everything else can be migrated off. + */ + if (!cpu_online(cpu)) + BUG_ON(old_base->running_timer); for (i = 0; i < TVR_SIZE; i++) - migrate_timer_list(new_base, old_base->tv1.vec + i); + migrate_timer_list(new_base, old_base->tv1.vec + i, + remove_pinned); for (i = 0; i < TVN_SIZE; i++) { - migrate_timer_list(new_base, old_base->tv2.vec + i); - migrate_timer_list(new_base, old_base->tv3.vec + i); - migrate_timer_list(new_base, old_base->tv4.vec + i); - migrate_timer_list(new_base, old_base->tv5.vec + i); + migrate_timer_list(new_base, old_base->tv2.vec + i, + remove_pinned); + migrate_timer_list(new_base, old_base->tv3.vec + i, + remove_pinned); + migrate_timer_list(new_base, old_base->tv4.vec + i, + remove_pinned); + migrate_timer_list(new_base, old_base->tv5.vec + i, + remove_pinned); } - old_base->active_timers = 0; - old_base->all_timers = 0; - spin_unlock(&old_base->lock); - spin_unlock_irq(&new_base->lock); + spin_unlock_irqrestore(&new_base->lock, flags); put_cpu_ptr(&tvec_bases); } +/* Migrate timers from 'cpu' to this_cpu */ +static void migrate_timers(int cpu) +{ + BUG_ON(cpu_online(cpu)); + __migrate_timers(cpu, true); +} + +void timer_quiesce_cpu(void *cpup) +{ + __migrate_timers(*(int *)cpup, false); +} + static int timer_cpu_notify(struct notifier_block *self, unsigned long action, void *hcpu) { @@ -1663,12 +1732,13 @@ static void __init init_timer_cpus(void) for_each_possible_cpu(cpu) init_timer_cpu(cpu); + + init_timer_deferrable_global(); } void __init init_timers(void) { init_timer_cpus(); - init_timer_stats(); timer_register_cpu_notifier(); open_softirq(TIMER_SOFTIRQ, run_timer_softirq); } @@ -1702,16 +1772,6 @@ unsigned long msleep_interruptible(unsigned int msecs) EXPORT_SYMBOL(msleep_interruptible); -static void __sched do_usleep_range(unsigned long min, unsigned long max) -{ - ktime_t kmin; - u64 delta; - - kmin = ktime_set(0, min * NSEC_PER_USEC); - delta = (u64)(max - min) * NSEC_PER_USEC; - schedule_hrtimeout_range(&kmin, delta, HRTIMER_MODE_REL); -} - /** * usleep_range - Drop in replacement for udelay where wakeup is flexible * @min: Minimum time in usecs to sleep @@ -1719,7 +1779,14 @@ static void __sched do_usleep_range(unsigned long min, unsigned long max) */ void __sched usleep_range(unsigned long min, unsigned long max) { - __set_current_state(TASK_UNINTERRUPTIBLE); - do_usleep_range(min, max); + ktime_t exp = ktime_add_us(ktime_get(), min); + u64 delta = (u64)(max - min) * NSEC_PER_USEC; + + for (;;) { + __set_current_state(TASK_UNINTERRUPTIBLE); + /* Do not return before the requested sleep time has elapsed */ + if (!schedule_hrtimeout_range(&exp, delta, HRTIMER_MODE_ABS)) + break; + } } EXPORT_SYMBOL(usleep_range); diff --git a/kernel/time/timer_list.c b/kernel/time/timer_list.c index ef4f16e81283..998d730401b3 100644 --- a/kernel/time/timer_list.c +++ b/kernel/time/timer_list.c @@ -63,21 +63,11 @@ static void print_timer(struct seq_file *m, struct hrtimer *taddr, struct hrtimer *timer, int idx, u64 now) { -#ifdef CONFIG_TIMER_STATS - char tmp[TASK_COMM_LEN + 1]; -#endif SEQ_printf(m, " #%d: ", idx); print_name_offset(m, taddr); SEQ_printf(m, ", "); print_name_offset(m, timer->function); SEQ_printf(m, ", S:%02x", timer->state); -#ifdef CONFIG_TIMER_STATS - SEQ_printf(m, ", "); - print_name_offset(m, timer->start_site); - memcpy(tmp, timer->start_comm, TASK_COMM_LEN); - tmp[TASK_COMM_LEN] = 0; - SEQ_printf(m, ", %s/%d", tmp, timer->start_pid); -#endif SEQ_printf(m, "\n"); SEQ_printf(m, " # expires at %Lu-%Lu nsecs [in %Ld to %Ld nsecs]\n", (unsigned long long)ktime_to_ns(hrtimer_get_softexpires(timer)), diff --git a/kernel/time/timer_stats.c b/kernel/time/timer_stats.c deleted file mode 100644 index 1adecb4b87c8..000000000000 --- a/kernel/time/timer_stats.c +++ /dev/null @@ -1,425 +0,0 @@ -/* - * kernel/time/timer_stats.c - * - * Collect timer usage statistics. - * - * Copyright(C) 2006, Red Hat, Inc., Ingo Molnar - * Copyright(C) 2006 Timesys Corp., Thomas Gleixner <tglx@timesys.com> - * - * timer_stats is based on timer_top, a similar functionality which was part of - * Con Kolivas dyntick patch set. It was developed by Daniel Petrini at the - * Instituto Nokia de Tecnologia - INdT - Manaus. timer_top's design was based - * on dynamic allocation of the statistics entries and linear search based - * lookup combined with a global lock, rather than the static array, hash - * and per-CPU locking which is used by timer_stats. It was written for the - * pre hrtimer kernel code and therefore did not take hrtimers into account. - * Nevertheless it provided the base for the timer_stats implementation and - * was a helpful source of inspiration. Kudos to Daniel and the Nokia folks - * for this effort. - * - * timer_top.c is - * Copyright (C) 2005 Instituto Nokia de Tecnologia - INdT - Manaus - * Written by Daniel Petrini <d.pensator@gmail.com> - * timer_top.c was released under the GNU General Public License version 2 - * - * We export the addresses and counting of timer functions being called, - * the pid and cmdline from the owner process if applicable. - * - * Start/stop data collection: - * # echo [1|0] >/proc/timer_stats - * - * Display the information collected so far: - * # cat /proc/timer_stats - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ - -#include <linux/proc_fs.h> -#include <linux/module.h> -#include <linux/spinlock.h> -#include <linux/sched.h> -#include <linux/seq_file.h> -#include <linux/kallsyms.h> - -#include <asm/uaccess.h> - -/* - * This is our basic unit of interest: a timer expiry event identified - * by the timer, its start/expire functions and the PID of the task that - * started the timer. We count the number of times an event happens: - */ -struct entry { - /* - * Hash list: - */ - struct entry *next; - - /* - * Hash keys: - */ - void *timer; - void *start_func; - void *expire_func; - pid_t pid; - - /* - * Number of timeout events: - */ - unsigned long count; - u32 flags; - - /* - * We save the command-line string to preserve - * this information past task exit: - */ - char comm[TASK_COMM_LEN + 1]; - -} ____cacheline_aligned_in_smp; - -/* - * Spinlock protecting the tables - not taken during lookup: - */ -static DEFINE_RAW_SPINLOCK(table_lock); - -/* - * Per-CPU lookup locks for fast hash lookup: - */ -static DEFINE_PER_CPU(raw_spinlock_t, tstats_lookup_lock); - -/* - * Mutex to serialize state changes with show-stats activities: - */ -static DEFINE_MUTEX(show_mutex); - -/* - * Collection status, active/inactive: - */ -int __read_mostly timer_stats_active; - -/* - * Beginning/end timestamps of measurement: - */ -static ktime_t time_start, time_stop; - -/* - * tstat entry structs only get allocated while collection is - * active and never freed during that time - this simplifies - * things quite a bit. - * - * They get freed when a new collection period is started. - */ -#define MAX_ENTRIES_BITS 10 -#define MAX_ENTRIES (1UL << MAX_ENTRIES_BITS) - -static unsigned long nr_entries; -static struct entry entries[MAX_ENTRIES]; - -static atomic_t overflow_count; - -/* - * The entries are in a hash-table, for fast lookup: - */ -#define TSTAT_HASH_BITS (MAX_ENTRIES_BITS - 1) -#define TSTAT_HASH_SIZE (1UL << TSTAT_HASH_BITS) -#define TSTAT_HASH_MASK (TSTAT_HASH_SIZE - 1) - -#define __tstat_hashfn(entry) \ - (((unsigned long)(entry)->timer ^ \ - (unsigned long)(entry)->start_func ^ \ - (unsigned long)(entry)->expire_func ^ \ - (unsigned long)(entry)->pid ) & TSTAT_HASH_MASK) - -#define tstat_hashentry(entry) (tstat_hash_table + __tstat_hashfn(entry)) - -static struct entry *tstat_hash_table[TSTAT_HASH_SIZE] __read_mostly; - -static void reset_entries(void) -{ - nr_entries = 0; - memset(entries, 0, sizeof(entries)); - memset(tstat_hash_table, 0, sizeof(tstat_hash_table)); - atomic_set(&overflow_count, 0); -} - -static struct entry *alloc_entry(void) -{ - if (nr_entries >= MAX_ENTRIES) - return NULL; - - return entries + nr_entries++; -} - -static int match_entries(struct entry *entry1, struct entry *entry2) -{ - return entry1->timer == entry2->timer && - entry1->start_func == entry2->start_func && - entry1->expire_func == entry2->expire_func && - entry1->pid == entry2->pid; -} - -/* - * Look up whether an entry matching this item is present - * in the hash already. Must be called with irqs off and the - * lookup lock held: - */ -static struct entry *tstat_lookup(struct entry *entry, char *comm) -{ - struct entry **head, *curr, *prev; - - head = tstat_hashentry(entry); - curr = *head; - - /* - * The fastpath is when the entry is already hashed, - * we do this with the lookup lock held, but with the - * table lock not held: - */ - while (curr) { - if (match_entries(curr, entry)) - return curr; - - curr = curr->next; - } - /* - * Slowpath: allocate, set up and link a new hash entry: - */ - prev = NULL; - curr = *head; - - raw_spin_lock(&table_lock); - /* - * Make sure we have not raced with another CPU: - */ - while (curr) { - if (match_entries(curr, entry)) - goto out_unlock; - - prev = curr; - curr = curr->next; - } - - curr = alloc_entry(); - if (curr) { - *curr = *entry; - curr->count = 0; - curr->next = NULL; - memcpy(curr->comm, comm, TASK_COMM_LEN); - - smp_mb(); /* Ensure that curr is initialized before insert */ - - if (prev) - prev->next = curr; - else - *head = curr; - } - out_unlock: - raw_spin_unlock(&table_lock); - - return curr; -} - -/** - * timer_stats_update_stats - Update the statistics for a timer. - * @timer: pointer to either a timer_list or a hrtimer - * @pid: the pid of the task which set up the timer - * @startf: pointer to the function which did the timer setup - * @timerf: pointer to the timer callback function of the timer - * @comm: name of the process which set up the timer - * @tflags: The flags field of the timer - * - * When the timer is already registered, then the event counter is - * incremented. Otherwise the timer is registered in a free slot. - */ -void timer_stats_update_stats(void *timer, pid_t pid, void *startf, - void *timerf, char *comm, u32 tflags) -{ - /* - * It doesn't matter which lock we take: - */ - raw_spinlock_t *lock; - struct entry *entry, input; - unsigned long flags; - - if (likely(!timer_stats_active)) - return; - - lock = &per_cpu(tstats_lookup_lock, raw_smp_processor_id()); - - input.timer = timer; - input.start_func = startf; - input.expire_func = timerf; - input.pid = pid; - input.flags = tflags; - - raw_spin_lock_irqsave(lock, flags); - if (!timer_stats_active) - goto out_unlock; - - entry = tstat_lookup(&input, comm); - if (likely(entry)) - entry->count++; - else - atomic_inc(&overflow_count); - - out_unlock: - raw_spin_unlock_irqrestore(lock, flags); -} - -static void print_name_offset(struct seq_file *m, unsigned long addr) -{ - char symname[KSYM_NAME_LEN]; - - if (lookup_symbol_name(addr, symname) < 0) - seq_printf(m, "<%p>", (void *)addr); - else - seq_printf(m, "%s", symname); -} - -static int tstats_show(struct seq_file *m, void *v) -{ - struct timespec period; - struct entry *entry; - unsigned long ms; - long events = 0; - ktime_t time; - int i; - - mutex_lock(&show_mutex); - /* - * If still active then calculate up to now: - */ - if (timer_stats_active) - time_stop = ktime_get(); - - time = ktime_sub(time_stop, time_start); - - period = ktime_to_timespec(time); - ms = period.tv_nsec / 1000000; - - seq_puts(m, "Timer Stats Version: v0.3\n"); - seq_printf(m, "Sample period: %ld.%03ld s\n", period.tv_sec, ms); - if (atomic_read(&overflow_count)) - seq_printf(m, "Overflow: %d entries\n", atomic_read(&overflow_count)); - seq_printf(m, "Collection: %s\n", timer_stats_active ? "active" : "inactive"); - - for (i = 0; i < nr_entries; i++) { - entry = entries + i; - if (entry->flags & TIMER_DEFERRABLE) { - seq_printf(m, "%4luD, %5d %-16s ", - entry->count, entry->pid, entry->comm); - } else { - seq_printf(m, " %4lu, %5d %-16s ", - entry->count, entry->pid, entry->comm); - } - - print_name_offset(m, (unsigned long)entry->start_func); - seq_puts(m, " ("); - print_name_offset(m, (unsigned long)entry->expire_func); - seq_puts(m, ")\n"); - - events += entry->count; - } - - ms += period.tv_sec * 1000; - if (!ms) - ms = 1; - - if (events && period.tv_sec) - seq_printf(m, "%ld total events, %ld.%03ld events/sec\n", - events, events * 1000 / ms, - (events * 1000000 / ms) % 1000); - else - seq_printf(m, "%ld total events\n", events); - - mutex_unlock(&show_mutex); - - return 0; -} - -/* - * After a state change, make sure all concurrent lookup/update - * activities have stopped: - */ -static void sync_access(void) -{ - unsigned long flags; - int cpu; - - for_each_online_cpu(cpu) { - raw_spinlock_t *lock = &per_cpu(tstats_lookup_lock, cpu); - - raw_spin_lock_irqsave(lock, flags); - /* nothing */ - raw_spin_unlock_irqrestore(lock, flags); - } -} - -static ssize_t tstats_write(struct file *file, const char __user *buf, - size_t count, loff_t *offs) -{ - char ctl[2]; - - if (count != 2 || *offs) - return -EINVAL; - - if (copy_from_user(ctl, buf, count)) - return -EFAULT; - - mutex_lock(&show_mutex); - switch (ctl[0]) { - case '0': - if (timer_stats_active) { - timer_stats_active = 0; - time_stop = ktime_get(); - sync_access(); - } - break; - case '1': - if (!timer_stats_active) { - reset_entries(); - time_start = ktime_get(); - smp_mb(); - timer_stats_active = 1; - } - break; - default: - count = -EINVAL; - } - mutex_unlock(&show_mutex); - - return count; -} - -static int tstats_open(struct inode *inode, struct file *filp) -{ - return single_open(filp, tstats_show, NULL); -} - -static const struct file_operations tstats_fops = { - .open = tstats_open, - .read = seq_read, - .write = tstats_write, - .llseek = seq_lseek, - .release = single_release, -}; - -void __init init_timer_stats(void) -{ - int cpu; - - for_each_possible_cpu(cpu) - raw_spin_lock_init(&per_cpu(tstats_lookup_lock, cpu)); -} - -static int __init init_tstats_procfs(void) -{ - struct proc_dir_entry *pe; - - pe = proc_create("timer_stats", 0644, NULL, &tstats_fops); - if (!pe) - return -ENOMEM; - return 0; -} -__initcall(init_tstats_procfs); |