summaryrefslogtreecommitdiff
path: root/kernel/time
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/time')
-rw-r--r--kernel/time/Makefile3
-rw-r--r--kernel/time/alarmtimer.c268
-rw-r--r--kernel/time/clocksource.c48
-rw-r--r--kernel/time/hrtimer.c134
-rw-r--r--kernel/time/posix-cpu-timers.c2
-rw-r--r--kernel/time/sched_clock.c19
-rw-r--r--kernel/time/tick-sched.c98
-rw-r--r--kernel/time/timer.c237
-rw-r--r--kernel/time/timer_list.c10
-rw-r--r--kernel/time/timer_stats.c425
10 files changed, 635 insertions, 609 deletions
diff --git a/kernel/time/Makefile b/kernel/time/Makefile
index 49eca0beed32..b9b881ebaff3 100644
--- a/kernel/time/Makefile
+++ b/kernel/time/Makefile
@@ -9,6 +9,7 @@ ifeq ($(CONFIG_GENERIC_CLOCKEVENTS_BROADCAST),y)
endif
obj-$(CONFIG_GENERIC_SCHED_CLOCK) += sched_clock.o
obj-$(CONFIG_TICK_ONESHOT) += tick-oneshot.o tick-sched.o
-obj-$(CONFIG_TIMER_STATS) += timer_stats.o
obj-$(CONFIG_DEBUG_FS) += timekeeping_debug.o
obj-$(CONFIG_TEST_UDELAY) += test_udelay.o
+
+ccflags-y += -Idrivers/cpuidle
diff --git a/kernel/time/alarmtimer.c b/kernel/time/alarmtimer.c
index 6fcc367ad531..ceec77c652b5 100644
--- a/kernel/time/alarmtimer.c
+++ b/kernel/time/alarmtimer.c
@@ -26,6 +26,11 @@
#include <linux/workqueue.h>
#include <linux/freezer.h>
+#ifdef CONFIG_MSM_PM
+#include "lpm-levels.h"
+#endif
+#include <linux/workqueue.h>
+
/**
* struct alarm_base - Alarm timer bases
* @lock: Lock for syncrhonized access to the base
@@ -46,14 +51,130 @@ static ktime_t freezer_delta;
static DEFINE_SPINLOCK(freezer_delta_lock);
static struct wakeup_source *ws;
+static struct delayed_work work;
+static struct workqueue_struct *power_off_alarm_workqueue;
#ifdef CONFIG_RTC_CLASS
/* rtc timer and device for setting alarm wakeups at suspend */
static struct rtc_timer rtctimer;
static struct rtc_device *rtcdev;
static DEFINE_SPINLOCK(rtcdev_lock);
+static struct mutex power_on_alarm_lock;
+static struct alarm init_alarm;
/**
+ * power_on_alarm_init - Init power on alarm value
+ *
+ * Read rtc alarm value after device booting up and add this alarm
+ * into alarm queue.
+ */
+void power_on_alarm_init(void)
+{
+ struct rtc_wkalrm rtc_alarm;
+ struct rtc_time rt;
+ unsigned long alarm_time;
+ struct rtc_device *rtc;
+ ktime_t alarm_ktime;
+
+ rtc = alarmtimer_get_rtcdev();
+
+ if (!rtc)
+ return;
+
+ rtc_read_alarm(rtc, &rtc_alarm);
+ rt = rtc_alarm.time;
+
+ rtc_tm_to_time(&rt, &alarm_time);
+
+ if (alarm_time) {
+ alarm_ktime = ktime_set(alarm_time, 0);
+ alarm_init(&init_alarm, ALARM_POWEROFF_REALTIME, NULL);
+ alarm_start(&init_alarm, alarm_ktime);
+ }
+}
+
+/**
+ * set_power_on_alarm - set power on alarm value into rtc register
+ *
+ * Get the soonest power off alarm timer and set the alarm value into rtc
+ * register.
+ */
+void set_power_on_alarm(void)
+{
+ int rc;
+ struct timespec wall_time, alarm_ts;
+ long alarm_secs = 0l;
+ long rtc_secs, alarm_time, alarm_delta;
+ struct rtc_time rtc_time;
+ struct rtc_wkalrm alarm;
+ struct rtc_device *rtc;
+ struct timerqueue_node *next;
+ unsigned long flags;
+ struct alarm_base *base = &alarm_bases[ALARM_POWEROFF_REALTIME];
+
+ rc = mutex_lock_interruptible(&power_on_alarm_lock);
+ if (rc != 0)
+ return;
+
+ spin_lock_irqsave(&base->lock, flags);
+ next = timerqueue_getnext(&base->timerqueue);
+ spin_unlock_irqrestore(&base->lock, flags);
+
+ if (next) {
+ alarm_ts = ktime_to_timespec(next->expires);
+ alarm_secs = alarm_ts.tv_sec;
+ }
+
+ if (!alarm_secs)
+ goto disable_alarm;
+
+ getnstimeofday(&wall_time);
+
+ /*
+ * alarm_secs have to be bigger than "wall_time +1".
+ * It is to make sure that alarm time will be always
+ * bigger than wall time.
+ */
+ if (alarm_secs <= wall_time.tv_sec + 1)
+ goto disable_alarm;
+
+ rtc = alarmtimer_get_rtcdev();
+ if (!rtc)
+ goto exit;
+
+ rtc_read_time(rtc, &rtc_time);
+ rtc_tm_to_time(&rtc_time, &rtc_secs);
+ alarm_delta = wall_time.tv_sec - rtc_secs;
+ alarm_time = alarm_secs - alarm_delta;
+
+ rtc_time_to_tm(alarm_time, &alarm.time);
+ alarm.enabled = 1;
+ rc = rtc_set_alarm(rtcdev, &alarm);
+ if (rc)
+ goto disable_alarm;
+
+ mutex_unlock(&power_on_alarm_lock);
+ return;
+
+disable_alarm:
+ rtc_alarm_irq_enable(rtcdev, 0);
+exit:
+ mutex_unlock(&power_on_alarm_lock);
+}
+
+static void alarmtimer_triggered_func(void *p)
+{
+ struct rtc_device *rtc = rtcdev;
+
+ if (!(rtc->irq_data & RTC_AF))
+ return;
+ __pm_wakeup_event(ws, 2 * MSEC_PER_SEC);
+}
+
+static struct rtc_task alarmtimer_rtc_task = {
+ .func = alarmtimer_triggered_func
+};
+/**
* alarmtimer_get_rtcdev - Return selected rtcdevice
*
* This function returns the rtc device to use for wakealarms.
@@ -63,7 +184,7 @@ static DEFINE_SPINLOCK(rtcdev_lock);
struct rtc_device *alarmtimer_get_rtcdev(void)
{
unsigned long flags;
- struct rtc_device *ret;
+ struct rtc_device *ret = NULL;
spin_lock_irqsave(&rtcdev_lock, flags);
ret = rtcdev;
@@ -77,33 +198,48 @@ static int alarmtimer_rtc_add_device(struct device *dev,
struct class_interface *class_intf)
{
unsigned long flags;
+ int err = 0;
struct rtc_device *rtc = to_rtc_device(dev);
-
if (rtcdev)
return -EBUSY;
-
if (!rtc->ops->set_alarm)
return -1;
- if (!device_may_wakeup(rtc->dev.parent))
- return -1;
spin_lock_irqsave(&rtcdev_lock, flags);
if (!rtcdev) {
+ err = rtc_irq_register(rtc, &alarmtimer_rtc_task);
+ if (err)
+ goto rtc_irq_reg_err;
rtcdev = rtc;
/* hold a reference so it doesn't go away */
get_device(dev);
}
+
+rtc_irq_reg_err:
spin_unlock_irqrestore(&rtcdev_lock, flags);
- return 0;
+ return err;
+
+}
+
+static void alarmtimer_rtc_remove_device(struct device *dev,
+ struct class_interface *class_intf)
+{
+ if (rtcdev && dev == &rtcdev->dev) {
+ rtc_irq_unregister(rtcdev, &alarmtimer_rtc_task);
+ rtcdev = NULL;
+ }
}
static inline void alarmtimer_rtc_timer_init(void)
{
+ mutex_init(&power_on_alarm_lock);
+
rtc_timer_init(&rtctimer, NULL, NULL);
}
static struct class_interface alarmtimer_rtc_interface = {
.add_dev = &alarmtimer_rtc_add_device,
+ .remove_dev = &alarmtimer_rtc_remove_device,
};
static int alarmtimer_rtc_interface_setup(void)
@@ -124,8 +260,14 @@ struct rtc_device *alarmtimer_get_rtcdev(void)
static inline int alarmtimer_rtc_interface_setup(void) { return 0; }
static inline void alarmtimer_rtc_interface_remove(void) { }
static inline void alarmtimer_rtc_timer_init(void) { }
+void set_power_on_alarm(void) { }
#endif
+static void alarm_work_func(struct work_struct *unused)
+{
+ set_power_on_alarm();
+}
+
/**
* alarmtimer_enqueue - Adds an alarm timer to an alarm_base timerqueue
* @base: pointer to the base where the timer is being run
@@ -195,6 +337,10 @@ static enum hrtimer_restart alarmtimer_fired(struct hrtimer *timer)
}
spin_unlock_irqrestore(&base->lock, flags);
+ /* set next power off alarm */
+ if (alarm->type == ALARM_POWEROFF_REALTIME)
+ queue_delayed_work(power_off_alarm_workqueue, &work, 0);
+
return ret;
}
@@ -217,6 +363,68 @@ EXPORT_SYMBOL_GPL(alarm_expires_remaining);
* set an rtc timer to fire that far into the future, which
* will wake us from suspend.
*/
+#if defined(CONFIG_RTC_DRV_QPNP) && defined(CONFIG_MSM_PM)
+static int alarmtimer_suspend(struct device *dev)
+{
+ struct rtc_time tm;
+ ktime_t min, now;
+ unsigned long flags;
+ struct rtc_device *rtc;
+ int i;
+ int ret = 0;
+
+ spin_lock_irqsave(&freezer_delta_lock, flags);
+ min = freezer_delta;
+ freezer_delta = ktime_set(0, 0);
+ spin_unlock_irqrestore(&freezer_delta_lock, flags);
+
+ rtc = alarmtimer_get_rtcdev();
+ /* If we have no rtcdev, just return */
+ if (!rtc)
+ return 0;
+
+ /* Find the soonest timer to expire*/
+ for (i = 0; i < ALARM_NUMTYPE; i++) {
+ struct alarm_base *base = &alarm_bases[i];
+ struct timerqueue_node *next;
+ ktime_t delta;
+
+ spin_lock_irqsave(&base->lock, flags);
+ next = timerqueue_getnext(&base->timerqueue);
+ spin_unlock_irqrestore(&base->lock, flags);
+ if (!next)
+ continue;
+ delta = ktime_sub(next->expires, base->gettime());
+ if (!min.tv64 || (delta.tv64 < min.tv64))
+ min = delta;
+ }
+ if (min.tv64 == 0)
+ return 0;
+
+ if (ktime_to_ns(min) < 2 * NSEC_PER_SEC) {
+ __pm_wakeup_event(ws, 2 * MSEC_PER_SEC);
+ return -EBUSY;
+ }
+
+ /* Setup a timer to fire that far in the future */
+ rtc_timer_cancel(rtc, &rtctimer);
+ rtc_read_time(rtc, &tm);
+ now = rtc_tm_to_ktime(tm);
+ now = ktime_add(now, min);
+ if (poweron_alarm) {
+ uint64_t msec = 0;
+
+ msec = ktime_to_ms(min);
+ lpm_suspend_wake_time(msec);
+ } else {
+ /* Set alarm, if in the past reject suspend briefly to handle */
+ ret = rtc_timer_start(rtc, &rtctimer, now, ktime_set(0, 0));
+ if (ret < 0)
+ __pm_wakeup_event(ws, MSEC_PER_SEC);
+ }
+ return ret;
+}
+#else
static int alarmtimer_suspend(struct device *dev)
{
struct rtc_time tm;
@@ -226,6 +434,8 @@ static int alarmtimer_suspend(struct device *dev)
int i;
int ret;
+ cancel_delayed_work_sync(&work);
+
spin_lock_irqsave(&freezer_delta_lock, flags);
min = freezer_delta;
freezer_delta = ktime_set(0, 0);
@@ -271,11 +481,31 @@ static int alarmtimer_suspend(struct device *dev)
__pm_wakeup_event(ws, MSEC_PER_SEC);
return ret;
}
+#endif
+static int alarmtimer_resume(struct device *dev)
+{
+ struct rtc_device *rtc;
+
+ rtc = alarmtimer_get_rtcdev();
+ /* If we have no rtcdev, just return */
+ if (!rtc)
+ return 0;
+ rtc_timer_cancel(rtc, &rtctimer);
+
+ queue_delayed_work(power_off_alarm_workqueue, &work, 0);
+ return 0;
+}
+
#else
static int alarmtimer_suspend(struct device *dev)
{
return 0;
}
+
+static int alarmtimer_resume(struct device *dev)
+{
+ return 0;
+}
#endif
static void alarmtimer_freezerset(ktime_t absexp, enum alarmtimer_type type)
@@ -443,12 +673,14 @@ EXPORT_SYMBOL_GPL(alarm_forward_now);
* clock2alarm - helper that converts from clockid to alarmtypes
* @clockid: clockid.
*/
-static enum alarmtimer_type clock2alarm(clockid_t clockid)
+enum alarmtimer_type clock2alarm(clockid_t clockid)
{
if (clockid == CLOCK_REALTIME_ALARM)
return ALARM_REALTIME;
if (clockid == CLOCK_BOOTTIME_ALARM)
return ALARM_BOOTTIME;
+ if (clockid == CLOCK_POWEROFF_ALARM)
+ return ALARM_POWEROFF_REALTIME;
return -1;
}
@@ -809,6 +1041,7 @@ out:
/* Suspend hook structures */
static const struct dev_pm_ops alarmtimer_pm_ops = {
.suspend = alarmtimer_suspend,
+ .resume = alarmtimer_resume,
};
static struct platform_driver alarmtimer_driver = {
@@ -843,10 +1076,13 @@ static int __init alarmtimer_init(void)
posix_timers_register_clock(CLOCK_REALTIME_ALARM, &alarm_clock);
posix_timers_register_clock(CLOCK_BOOTTIME_ALARM, &alarm_clock);
+ posix_timers_register_clock(CLOCK_POWEROFF_ALARM, &alarm_clock);
/* Initialize alarm bases */
alarm_bases[ALARM_REALTIME].base_clockid = CLOCK_REALTIME;
alarm_bases[ALARM_REALTIME].gettime = &ktime_get_real;
+ alarm_bases[ALARM_POWEROFF_REALTIME].base_clockid = CLOCK_REALTIME;
+ alarm_bases[ALARM_POWEROFF_REALTIME].gettime = &ktime_get_real;
alarm_bases[ALARM_BOOTTIME].base_clockid = CLOCK_BOOTTIME;
alarm_bases[ALARM_BOOTTIME].gettime = &ktime_get_boottime;
for (i = 0; i < ALARM_NUMTYPE; i++) {
@@ -868,8 +1104,24 @@ static int __init alarmtimer_init(void)
goto out_drv;
}
ws = wakeup_source_register("alarmtimer");
- return 0;
+ if (!ws) {
+ error = -ENOMEM;
+ goto out_ws;
+ }
+
+ INIT_DELAYED_WORK(&work, alarm_work_func);
+ power_off_alarm_workqueue =
+ create_singlethread_workqueue("power_off_alarm");
+ if (!power_off_alarm_workqueue) {
+ error = -ENOMEM;
+ goto out_wq;
+ }
+ return 0;
+out_wq:
+ wakeup_source_unregister(ws);
+out_ws:
+ platform_device_unregister(pdev);
out_drv:
platform_driver_unregister(&alarmtimer_driver);
out_if:
diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c
index b98810d2f3b4..89cc82a38e4d 100644
--- a/kernel/time/clocksource.c
+++ b/kernel/time/clocksource.c
@@ -108,7 +108,7 @@ static int finished_booting;
#ifdef CONFIG_CLOCKSOURCE_WATCHDOG
static void clocksource_watchdog_work(struct work_struct *work);
-static void clocksource_select(void);
+static void clocksource_select(bool force);
static LIST_HEAD(watchdog_list);
static struct clocksource *watchdog;
@@ -415,7 +415,7 @@ static int clocksource_watchdog_kthread(void *data)
{
mutex_lock(&clocksource_mutex);
if (__clocksource_watchdog_kthread())
- clocksource_select();
+ clocksource_select(false);
mutex_unlock(&clocksource_mutex);
return 0;
}
@@ -555,11 +555,12 @@ static inline void clocksource_update_max_deferment(struct clocksource *cs)
#ifndef CONFIG_ARCH_USES_GETTIMEOFFSET
-static struct clocksource *clocksource_find_best(bool oneshot, bool skipcur)
+static struct clocksource *clocksource_find_best(bool oneshot, bool skipcur,
+ bool force)
{
struct clocksource *cs;
- if (!finished_booting || list_empty(&clocksource_list))
+ if ((!finished_booting && !force) || list_empty(&clocksource_list))
return NULL;
/*
@@ -577,13 +578,13 @@ static struct clocksource *clocksource_find_best(bool oneshot, bool skipcur)
return NULL;
}
-static void __clocksource_select(bool skipcur)
+static void __clocksource_select(bool skipcur, bool force)
{
bool oneshot = tick_oneshot_mode_active();
struct clocksource *best, *cs;
/* Find the best suitable clocksource */
- best = clocksource_find_best(oneshot, skipcur);
+ best = clocksource_find_best(oneshot, skipcur, force);
if (!best)
return;
@@ -623,22 +624,40 @@ static void __clocksource_select(bool skipcur)
* Select the clocksource with the best rating, or the clocksource,
* which is selected by userspace override.
*/
-static void clocksource_select(void)
+static void clocksource_select(bool force)
{
- __clocksource_select(false);
+ return __clocksource_select(false, force);
}
static void clocksource_select_fallback(void)
{
- __clocksource_select(true);
+ __clocksource_select(true, false);
}
#else /* !CONFIG_ARCH_USES_GETTIMEOFFSET */
-static inline void clocksource_select(void) { }
+
+static inline void clocksource_select(bool force) { }
static inline void clocksource_select_fallback(void) { }
#endif
+/**
+ * clocksource_select_force - Force re-selection of the best clocksource
+ * among registered clocksources
+ *
+ * clocksource_select() can't select the best clocksource before
+ * calling clocksource_done_booting() and since clocksource_select()
+ * should be called with clocksource_mutex held, provide a new API
+ * can be called from other files to select best clockrouce irrespective
+ * of finished_booting flag.
+ */
+void clocksource_select_force(void)
+{
+ mutex_lock(&clocksource_mutex);
+ clocksource_select(true);
+ mutex_unlock(&clocksource_mutex);
+}
+
/*
* clocksource_done_booting - Called near the end of core bootup
*
@@ -655,7 +674,7 @@ static int __init clocksource_done_booting(void)
* Run the watchdog first to eliminate unstable clock sources
*/
__clocksource_watchdog_kthread();
- clocksource_select();
+ clocksource_select(false);
mutex_unlock(&clocksource_mutex);
return 0;
}
@@ -744,6 +763,7 @@ void __clocksource_update_freq_scale(struct clocksource *cs, u32 scale, u32 freq
}
EXPORT_SYMBOL_GPL(__clocksource_update_freq_scale);
+
/**
* __clocksource_register_scale - Used to install new clocksources
* @cs: clocksource to be registered
@@ -765,7 +785,7 @@ int __clocksource_register_scale(struct clocksource *cs, u32 scale, u32 freq)
mutex_lock(&clocksource_mutex);
clocksource_enqueue(cs);
clocksource_enqueue_watchdog(cs);
- clocksource_select();
+ clocksource_select(false);
clocksource_select_watchdog(false);
mutex_unlock(&clocksource_mutex);
return 0;
@@ -788,7 +808,7 @@ void clocksource_change_rating(struct clocksource *cs, int rating)
{
mutex_lock(&clocksource_mutex);
__clocksource_change_rating(cs, rating);
- clocksource_select();
+ clocksource_select(false);
clocksource_select_watchdog(false);
mutex_unlock(&clocksource_mutex);
}
@@ -892,7 +912,7 @@ static ssize_t sysfs_override_clocksource(struct device *dev,
ret = sysfs_get_uname(buf, override_name, count);
if (ret >= 0)
- clocksource_select();
+ clocksource_select(false);
mutex_unlock(&clocksource_mutex);
diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c
index d0d8717ebc3a..beafdf94b3b5 100644
--- a/kernel/time/hrtimer.c
+++ b/kernel/time/hrtimer.c
@@ -783,34 +783,6 @@ void hrtimers_resume(void)
clock_was_set_delayed();
}
-static inline void timer_stats_hrtimer_set_start_info(struct hrtimer *timer)
-{
-#ifdef CONFIG_TIMER_STATS
- if (timer->start_site)
- return;
- timer->start_site = __builtin_return_address(0);
- memcpy(timer->start_comm, current->comm, TASK_COMM_LEN);
- timer->start_pid = current->pid;
-#endif
-}
-
-static inline void timer_stats_hrtimer_clear_start_info(struct hrtimer *timer)
-{
-#ifdef CONFIG_TIMER_STATS
- timer->start_site = NULL;
-#endif
-}
-
-static inline void timer_stats_account_hrtimer(struct hrtimer *timer)
-{
-#ifdef CONFIG_TIMER_STATS
- if (likely(!timer_stats_active))
- return;
- timer_stats_update_stats(timer, timer->start_pid, timer->start_site,
- timer->function, timer->start_comm, 0);
-#endif
-}
-
/*
* Counterpart to lock_hrtimer_base above:
*/
@@ -887,7 +859,7 @@ static int enqueue_hrtimer(struct hrtimer *timer,
base->cpu_base->active_bases |= 1 << base->index;
- timer->state = HRTIMER_STATE_ENQUEUED;
+ timer->state |= HRTIMER_STATE_ENQUEUED;
return timerqueue_add(&base->active, &timer->node);
}
@@ -907,11 +879,9 @@ static void __remove_hrtimer(struct hrtimer *timer,
u8 newstate, int reprogram)
{
struct hrtimer_cpu_base *cpu_base = base->cpu_base;
- u8 state = timer->state;
- timer->state = newstate;
- if (!(state & HRTIMER_STATE_ENQUEUED))
- return;
+ if (!(timer->state & HRTIMER_STATE_ENQUEUED))
+ goto out;
if (!timerqueue_del(&base->active, &timer->node))
cpu_base->active_bases &= ~(1 << base->index);
@@ -928,6 +898,13 @@ static void __remove_hrtimer(struct hrtimer *timer,
if (reprogram && timer == cpu_base->next_timer)
hrtimer_force_reprogram(cpu_base, 1);
#endif
+
+out:
+ /*
+ * We need to preserve PINNED state here, otherwise we may end up
+ * migrating pinned hrtimers as well.
+ */
+ timer->state = newstate | (timer->state & HRTIMER_STATE_PINNED);
}
/*
@@ -949,13 +926,13 @@ remove_hrtimer(struct hrtimer *timer, struct hrtimer_clock_base *base, bool rest
* rare case and less expensive than a smp call.
*/
debug_deactivate(timer);
- timer_stats_hrtimer_clear_start_info(timer);
reprogram = base->cpu_base == this_cpu_ptr(&hrtimer_bases);
if (!restart)
state = HRTIMER_STATE_INACTIVE;
__remove_hrtimer(timer, base, state, reprogram);
+ timer->state &= ~HRTIMER_STATE_PINNED;
return 1;
}
return 0;
@@ -1007,7 +984,9 @@ void hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim,
/* Switch the timer base, if necessary: */
new_base = switch_hrtimer_base(timer, base, mode & HRTIMER_MODE_PINNED);
- timer_stats_hrtimer_set_start_info(timer);
+ /* Update pinned state */
+ timer->state &= ~HRTIMER_STATE_PINNED;
+ timer->state |= (!!(mode & HRTIMER_MODE_PINNED)) << HRTIMER_PINNED_SHIFT;
leftmost = enqueue_hrtimer(timer, new_base);
if (!leftmost)
@@ -1150,12 +1129,6 @@ static void __hrtimer_init(struct hrtimer *timer, clockid_t clock_id,
base = hrtimer_clockid_to_base(clock_id);
timer->base = &cpu_base->clock_base[base];
timerqueue_init(&timer->node);
-
-#ifdef CONFIG_TIMER_STATS
- timer->start_site = NULL;
- timer->start_pid = -1;
- memset(timer->start_comm, 0, TASK_COMM_LEN);
-#endif
}
/**
@@ -1188,8 +1161,8 @@ bool hrtimer_active(const struct hrtimer *timer)
cpu_base = READ_ONCE(timer->base->cpu_base);
seq = raw_read_seqcount_begin(&cpu_base->seq);
- if (timer->state != HRTIMER_STATE_INACTIVE ||
- cpu_base->running == timer)
+ if (((timer->state & ~HRTIMER_STATE_PINNED) !=
+ HRTIMER_STATE_INACTIVE) || cpu_base->running == timer)
return true;
} while (read_seqcount_retry(&cpu_base->seq, seq) ||
@@ -1239,7 +1212,6 @@ static void __run_hrtimer(struct hrtimer_cpu_base *cpu_base,
raw_write_seqcount_barrier(&cpu_base->seq);
__remove_hrtimer(timer, base, HRTIMER_STATE_INACTIVE, 0);
- timer_stats_account_hrtimer(timer);
fn = timer->function;
/*
@@ -1627,17 +1599,23 @@ static void init_hrtimers_cpu(int cpu)
hrtimer_init_hres(cpu_base);
}
-#ifdef CONFIG_HOTPLUG_CPU
-
+#if defined(CONFIG_HOTPLUG_CPU)
static void migrate_hrtimer_list(struct hrtimer_clock_base *old_base,
- struct hrtimer_clock_base *new_base)
+ struct hrtimer_clock_base *new_base,
+ bool remove_pinned)
{
struct hrtimer *timer;
struct timerqueue_node *node;
+ struct timerqueue_head pinned;
+ int is_pinned;
+ bool is_hotplug = !cpu_online(old_base->cpu_base->cpu);
+
+ timerqueue_init_head(&pinned);
while ((node = timerqueue_getnext(&old_base->active))) {
timer = container_of(node, struct hrtimer, node);
- BUG_ON(hrtimer_callback_running(timer));
+ if (is_hotplug)
+ BUG_ON(hrtimer_callback_running(timer));
debug_deactivate(timer);
/*
@@ -1646,6 +1624,13 @@ static void migrate_hrtimer_list(struct hrtimer_clock_base *old_base,
* under us on another CPU
*/
__remove_hrtimer(timer, old_base, HRTIMER_STATE_ENQUEUED, 0);
+
+ is_pinned = timer->state & HRTIMER_STATE_PINNED;
+ if (!remove_pinned && is_pinned) {
+ timerqueue_add(&pinned, &timer->node);
+ continue;
+ }
+
timer->base = new_base;
/*
* Enqueue the timers on the new cpu. This does not
@@ -1657,17 +1642,23 @@ static void migrate_hrtimer_list(struct hrtimer_clock_base *old_base,
*/
enqueue_hrtimer(timer, new_base);
}
+
+ /* Re-queue pinned timers for non-hotplug usecase */
+ while ((node = timerqueue_getnext(&pinned))) {
+ timer = container_of(node, struct hrtimer, node);
+
+ timerqueue_del(&pinned, &timer->node);
+ enqueue_hrtimer(timer, old_base);
+ }
}
-static void migrate_hrtimers(int scpu)
+static void __migrate_hrtimers(int scpu, bool remove_pinned)
{
struct hrtimer_cpu_base *old_base, *new_base;
+ unsigned long flags;
int i;
- BUG_ON(cpu_online(scpu));
- tick_cancel_sched_timer(scpu);
-
- local_irq_disable();
+ local_irq_save(flags);
old_base = &per_cpu(hrtimer_bases, scpu);
new_base = this_cpu_ptr(&hrtimer_bases);
/*
@@ -1679,7 +1670,7 @@ static void migrate_hrtimers(int scpu)
for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++) {
migrate_hrtimer_list(&old_base->clock_base[i],
- &new_base->clock_base[i]);
+ &new_base->clock_base[i], remove_pinned);
}
raw_spin_unlock(&old_base->lock);
@@ -1687,7 +1678,20 @@ static void migrate_hrtimers(int scpu)
/* Check, if we got expired work to do */
__hrtimer_peek_ahead_timers();
- local_irq_enable();
+ local_irq_restore(flags);
+}
+
+static void migrate_hrtimers(int scpu)
+{
+ BUG_ON(cpu_online(scpu));
+ tick_cancel_sched_timer(scpu);
+
+ __migrate_hrtimers(scpu, true);
+}
+
+void hrtimer_quiesce_cpu(void *cpup)
+{
+ __migrate_hrtimers(*(int *)cpup, false);
}
#endif /* CONFIG_HOTPLUG_CPU */
@@ -1795,15 +1799,19 @@ schedule_hrtimeout_range_clock(ktime_t *expires, u64 delta,
* You can set the task state as follows -
*
* %TASK_UNINTERRUPTIBLE - at least @timeout time is guaranteed to
- * pass before the routine returns.
+ * pass before the routine returns unless the current task is explicitly
+ * woken up, (e.g. by wake_up_process()).
*
* %TASK_INTERRUPTIBLE - the routine may return early if a signal is
- * delivered to the current task.
+ * delivered to the current task or the current task is explicitly woken
+ * up.
*
* The current task state is guaranteed to be TASK_RUNNING when this
* routine returns.
*
- * Returns 0 when the timer has expired otherwise -EINTR
+ * Returns 0 when the timer has expired. If the task was woken before the
+ * timer expired by a signal (only possible in state TASK_INTERRUPTIBLE) or
+ * by an explicit wakeup, it returns -EINTR.
*/
int __sched schedule_hrtimeout_range(ktime_t *expires, u64 delta,
const enum hrtimer_mode mode)
@@ -1825,15 +1833,19 @@ EXPORT_SYMBOL_GPL(schedule_hrtimeout_range);
* You can set the task state as follows -
*
* %TASK_UNINTERRUPTIBLE - at least @timeout time is guaranteed to
- * pass before the routine returns.
+ * pass before the routine returns unless the current task is explicitly
+ * woken up, (e.g. by wake_up_process()).
*
* %TASK_INTERRUPTIBLE - the routine may return early if a signal is
- * delivered to the current task.
+ * delivered to the current task or the current task is explicitly woken
+ * up.
*
* The current task state is guaranteed to be TASK_RUNNING when this
* routine returns.
*
- * Returns 0 when the timer has expired otherwise -EINTR
+ * Returns 0 when the timer has expired. If the task was woken before the
+ * timer expired by a signal (only possible in state TASK_INTERRUPTIBLE) or
+ * by an explicit wakeup, it returns -EINTR.
*/
int __sched schedule_hrtimeout(ktime_t *expires,
const enum hrtimer_mode mode)
diff --git a/kernel/time/posix-cpu-timers.c b/kernel/time/posix-cpu-timers.c
index 80016b329d94..051544aec37c 100644
--- a/kernel/time/posix-cpu-timers.c
+++ b/kernel/time/posix-cpu-timers.c
@@ -1250,7 +1250,7 @@ void run_posix_cpu_timers(struct task_struct *tsk)
void set_process_cpu_timer(struct task_struct *tsk, unsigned int clock_idx,
cputime_t *newval, cputime_t *oldval)
{
- unsigned long long now;
+ unsigned long long now = 0;
WARN_ON_ONCE(clock_idx == CPUCLOCK_SCHED);
cpu_timer_sample_group(clock_idx, tsk, &now);
diff --git a/kernel/time/sched_clock.c b/kernel/time/sched_clock.c
index 382b159d8592..1986ab6e5943 100644
--- a/kernel/time/sched_clock.c
+++ b/kernel/time/sched_clock.c
@@ -70,6 +70,11 @@ struct clock_data {
static struct hrtimer sched_clock_timer;
static int irqtime = -1;
+static int initialized;
+static u64 suspend_ns;
+static u64 suspend_cycles;
+static u64 resume_cycles;
+
core_param(irqtime, irqtime, int, 0400);
@@ -236,6 +241,11 @@ sched_clock_register(u64 (*read)(void), int bits, unsigned long rate)
pr_debug("Registered %pF as sched_clock source\n", read);
}
+int sched_clock_initialized(void)
+{
+ return initialized;
+}
+
void __init sched_clock_postinit(void)
{
/*
@@ -254,6 +264,8 @@ void __init sched_clock_postinit(void)
hrtimer_init(&sched_clock_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
sched_clock_timer.function = sched_clock_poll;
hrtimer_start(&sched_clock_timer, cd.wrap_kt, HRTIMER_MODE_REL);
+
+ initialized = 1;
}
/*
@@ -279,6 +291,11 @@ static int sched_clock_suspend(void)
struct clock_read_data *rd = &cd.read_data[0];
update_sched_clock();
+
+ suspend_ns = rd->epoch_ns;
+ suspend_cycles = rd->epoch_cyc;
+ pr_info("suspend ns:%17llu suspend cycles:%17llu\n",
+ rd->epoch_ns, rd->epoch_cyc);
hrtimer_cancel(&sched_clock_timer);
rd->read_sched_clock = suspended_sched_clock_read;
@@ -290,6 +307,8 @@ static void sched_clock_resume(void)
struct clock_read_data *rd = &cd.read_data[0];
rd->epoch_cyc = cd.actual_read_sched_clock();
+ resume_cycles = rd->epoch_cyc;
+ pr_info("resume cycles:%17llu\n", rd->epoch_cyc);
hrtimer_start(&sched_clock_timer, cd.wrap_kt, HRTIMER_MODE_REL);
rd->read_sched_clock = cd.actual_read_sched_clock;
}
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index a935cbdc55a4..6579be96e041 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -19,11 +19,13 @@
#include <linux/percpu.h>
#include <linux/profile.h>
#include <linux/sched.h>
+#include <linux/timer.h>
#include <linux/module.h>
#include <linux/irq_work.h>
#include <linux/posix-timers.h>
#include <linux/perf_event.h>
#include <linux/context_tracking.h>
+#include <linux/rq_stats.h>
#include <asm/irq_regs.h>
@@ -31,6 +33,10 @@
#include <trace/events/timer.h>
+struct rq_data rq_info;
+struct workqueue_struct *rq_wq;
+spinlock_t rq_lock;
+
/*
* Per cpu nohz control structure
*/
@@ -41,6 +47,21 @@ static DEFINE_PER_CPU(struct tick_sched, tick_cpu_sched);
*/
static ktime_t last_jiffies_update;
+u64 jiffy_to_ktime_ns(u64 *now, u64 *jiffy_ktime_ns)
+{
+ u64 cur_jiffies;
+ unsigned long seq;
+
+ do {
+ seq = read_seqbegin(&jiffies_lock);
+ *now = ktime_get_ns();
+ *jiffy_ktime_ns = ktime_to_ns(last_jiffies_update);
+ cur_jiffies = get_jiffies_64();
+ } while (read_seqretry(&jiffies_lock, seq));
+
+ return cur_jiffies;
+}
+
struct tick_sched *tick_get_tick_sched(int cpu)
{
return &per_cpu(tick_cpu_sched, cpu);
@@ -143,7 +164,7 @@ static void tick_sched_handle(struct tick_sched *ts, struct pt_regs *regs)
* when we go busy again does not account too much ticks.
*/
if (ts->tick_stopped) {
- touch_softlockup_watchdog();
+ touch_softlockup_watchdog_sched();
if (is_idle_task(current))
ts->idle_jiffies++;
}
@@ -430,7 +451,7 @@ static void tick_nohz_update_jiffies(ktime_t now)
tick_do_update_jiffies64(now);
local_irq_restore(flags);
- touch_softlockup_watchdog();
+ touch_softlockup_watchdog_sched();
}
/*
@@ -716,7 +737,7 @@ static void tick_nohz_restart_sched_tick(struct tick_sched *ts, ktime_t now)
update_cpu_load_nohz();
calc_load_exit_idle();
- touch_softlockup_watchdog();
+ touch_softlockup_watchdog_sched();
/*
* Cancel the scheduled timer and restore the tick
*/
@@ -804,6 +825,11 @@ static void __tick_nohz_idle_enter(struct tick_sched *ts)
now = tick_nohz_start_idle(ts);
+#ifdef CONFIG_SMP
+ if (check_pending_deferrable_timers(cpu))
+ raise_softirq_irqoff(TIMER_SOFTIRQ);
+#endif
+
if (can_stop_idle_tick(cpu, ts)) {
int was_stopped = ts->tick_stopped;
@@ -1076,6 +1102,51 @@ void tick_irq_enter(void)
* High resolution timer specific code
*/
#ifdef CONFIG_HIGH_RES_TIMERS
+static void update_rq_stats(void)
+{
+ unsigned long jiffy_gap = 0;
+ unsigned int rq_avg = 0;
+ unsigned long flags = 0;
+
+ jiffy_gap = jiffies - rq_info.rq_poll_last_jiffy;
+
+ if (jiffy_gap >= rq_info.rq_poll_jiffies) {
+
+ spin_lock_irqsave(&rq_lock, flags);
+
+ if (!rq_info.rq_avg)
+ rq_info.rq_poll_total_jiffies = 0;
+
+ rq_avg = nr_running() * 10;
+
+ if (rq_info.rq_poll_total_jiffies) {
+ rq_avg = (rq_avg * jiffy_gap) +
+ (rq_info.rq_avg *
+ rq_info.rq_poll_total_jiffies);
+ do_div(rq_avg,
+ rq_info.rq_poll_total_jiffies + jiffy_gap);
+ }
+
+ rq_info.rq_avg = rq_avg;
+ rq_info.rq_poll_total_jiffies += jiffy_gap;
+ rq_info.rq_poll_last_jiffy = jiffies;
+
+ spin_unlock_irqrestore(&rq_lock, flags);
+ }
+}
+
+static void wakeup_user(void)
+{
+ unsigned long jiffy_gap;
+
+ jiffy_gap = jiffies - rq_info.def_timer_last_jiffy;
+
+ if (jiffy_gap >= rq_info.def_timer_jiffies) {
+ rq_info.def_timer_last_jiffy = jiffies;
+ queue_work(rq_wq, &rq_info.def_timer_work);
+ }
+}
+
/*
* We rearm the timer until we get disabled by the idle code.
* Called with interrupts disabled.
@@ -1093,9 +1164,23 @@ static enum hrtimer_restart tick_sched_timer(struct hrtimer *timer)
* Do not call, when we are not in irq context and have
* no valid regs pointer
*/
- if (regs)
+ if (regs) {
tick_sched_handle(ts, regs);
+ if (rq_info.init == 1 &&
+ tick_do_timer_cpu == smp_processor_id()) {
+ /*
+ * update run queue statistics
+ */
+ update_rq_stats();
+
+ /*
+ * wakeup user if needed
+ */
+ wakeup_user();
+ }
+ }
+
/* No need to reprogram if we are in idle or full dynticks mode */
if (unlikely(ts->tick_stopped))
return HRTIMER_NORESTART;
@@ -1208,3 +1293,8 @@ int tick_check_oneshot_change(int allow_nohz)
tick_nohz_switch_to_nohz();
return 0;
}
+
+ktime_t * get_next_event_cpu(unsigned int cpu)
+{
+ return &(per_cpu(tick_cpu_device, cpu).evtdev->next_event);
+}
diff --git a/kernel/time/timer.c b/kernel/time/timer.c
index 523fe1669d4c..903705687b52 100644
--- a/kernel/time/timer.c
+++ b/kernel/time/timer.c
@@ -94,12 +94,16 @@ struct tvec_base {
struct tvec tv5;
} ____cacheline_aligned;
+static inline void __run_timers(struct tvec_base *base);
static DEFINE_PER_CPU(struct tvec_base, tvec_bases);
#if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ_COMMON)
unsigned int sysctl_timer_migration = 1;
+struct tvec_base tvec_base_deferrable;
+static atomic_t deferrable_pending;
+
void timers_update_migration(bool update_nohz)
{
bool on = sysctl_timer_migration && tick_nohz_active;
@@ -135,18 +139,66 @@ int timer_migration_handler(struct ctl_table *table, int write,
}
static inline struct tvec_base *get_target_base(struct tvec_base *base,
- int pinned)
+ int pinned, u32 timer_flags)
{
+ if (!pinned && !(timer_flags & TIMER_PINNED_ON_CPU) &&
+ (timer_flags & TIMER_DEFERRABLE))
+ return &tvec_base_deferrable;
if (pinned || !base->migration_enabled)
return this_cpu_ptr(&tvec_bases);
return per_cpu_ptr(&tvec_bases, get_nohz_timer_target());
}
+
+static inline void __run_deferrable_timers(void)
+{
+ if ((atomic_cmpxchg(&deferrable_pending, 1, 0) &&
+ tick_do_timer_cpu == TICK_DO_TIMER_NONE) ||
+ tick_do_timer_cpu == smp_processor_id()) {
+ if (time_after_eq(jiffies,
+ tvec_base_deferrable.timer_jiffies))
+ __run_timers(&tvec_base_deferrable);
+ }
+}
+
+static inline void init_timer_deferrable_global(void)
+{
+ tvec_base_deferrable.cpu = nr_cpu_ids;
+ spin_lock_init(&tvec_base_deferrable.lock);
+ tvec_base_deferrable.timer_jiffies = jiffies;
+ tvec_base_deferrable.next_timer = tvec_base_deferrable.timer_jiffies;
+}
+
+static inline struct tvec_base *get_timer_base(u32 timer_flags)
+{
+ if (!(timer_flags & TIMER_PINNED_ON_CPU) &&
+ timer_flags & TIMER_DEFERRABLE)
+ return &tvec_base_deferrable;
+ else
+ return per_cpu_ptr(&tvec_bases, timer_flags & TIMER_CPUMASK);
+}
#else
static inline struct tvec_base *get_target_base(struct tvec_base *base,
- int pinned)
+ int pinned, u32 timer_flags)
{
return this_cpu_ptr(&tvec_bases);
}
+
+static inline void __run_deferrable_timers(void)
+{
+}
+
+static inline void init_timer_deferrable_global(void)
+{
+ /*
+ * initialize cpu unbound deferrable timer base only when CONFIG_SMP.
+ * UP kernel handles the timers with cpu 0 timer base.
+ */
+}
+
+static inline struct tvec_base *get_timer_base(u32 timer_flags)
+{
+ return per_cpu_ptr(&tvec_bases, timer_flags & TIMER_CPUMASK);
+}
#endif
static unsigned long round_jiffies_common(unsigned long j, int cpu,
@@ -448,38 +500,6 @@ static void internal_add_timer(struct tvec_base *base, struct timer_list *timer)
}
}
-#ifdef CONFIG_TIMER_STATS
-void __timer_stats_timer_set_start_info(struct timer_list *timer, void *addr)
-{
- if (timer->start_site)
- return;
-
- timer->start_site = addr;
- memcpy(timer->start_comm, current->comm, TASK_COMM_LEN);
- timer->start_pid = current->pid;
-}
-
-static void timer_stats_account_timer(struct timer_list *timer)
-{
- void *site;
-
- /*
- * start_site can be concurrently reset by
- * timer_stats_timer_clear_start_info()
- */
- site = READ_ONCE(timer->start_site);
- if (likely(!site))
- return;
-
- timer_stats_update_stats(timer, timer->start_pid, site,
- timer->function, timer->start_comm,
- timer->flags);
-}
-
-#else
-static void timer_stats_account_timer(struct timer_list *timer) {}
-#endif
-
#ifdef CONFIG_DEBUG_OBJECTS_TIMERS
static struct debug_obj_descr timer_debug_descr;
@@ -682,11 +702,6 @@ static void do_init_timer(struct timer_list *timer, unsigned int flags,
timer->entry.pprev = NULL;
timer->flags = flags | raw_smp_processor_id();
timer->slack = -1;
-#ifdef CONFIG_TIMER_STATS
- timer->start_site = NULL;
- timer->start_pid = -1;
- memset(timer->start_comm, 0, TASK_COMM_LEN);
-#endif
lockdep_init_map(&timer->lockdep_map, name, key, 0);
}
@@ -775,7 +790,7 @@ static struct tvec_base *lock_timer_base(struct timer_list *timer,
tf = READ_ONCE(timer->flags);
if (!(tf & TIMER_MIGRATING)) {
- base = per_cpu_ptr(&tvec_bases, tf & TIMER_CPUMASK);
+ base = get_timer_base(tf);
spin_lock_irqsave(&base->lock, *flags);
if (timer->flags == tf)
return base;
@@ -793,7 +808,6 @@ __mod_timer(struct timer_list *timer, unsigned long expires,
unsigned long flags;
int ret = 0;
- timer_stats_timer_set_start_info(timer);
BUG_ON(!timer->function);
base = lock_timer_base(timer, &flags);
@@ -804,7 +818,7 @@ __mod_timer(struct timer_list *timer, unsigned long expires,
debug_activate(timer, expires);
- new_base = get_target_base(base, pinned);
+ new_base = get_target_base(base, pinned, timer->flags);
if (base != new_base) {
/*
@@ -826,6 +840,10 @@ __mod_timer(struct timer_list *timer, unsigned long expires,
}
}
+ if (pinned == TIMER_PINNED)
+ timer->flags |= TIMER_PINNED_ON_CPU;
+ else
+ timer->flags &= ~TIMER_PINNED_ON_CPU;
timer->expires = expires;
internal_add_timer(base, timer);
@@ -988,7 +1006,6 @@ void add_timer_on(struct timer_list *timer, int cpu)
struct tvec_base *base;
unsigned long flags;
- timer_stats_timer_set_start_info(timer);
BUG_ON(timer_pending(timer) || !timer->function);
/*
@@ -1007,6 +1024,7 @@ void add_timer_on(struct timer_list *timer, int cpu)
(timer->flags & ~TIMER_BASEMASK) | cpu);
}
+ timer->flags |= TIMER_PINNED_ON_CPU;
debug_activate(timer, timer->expires);
internal_add_timer(base, timer);
spin_unlock_irqrestore(&base->lock, flags);
@@ -1032,7 +1050,6 @@ int del_timer(struct timer_list *timer)
debug_assert_init(timer);
- timer_stats_timer_clear_start_info(timer);
if (timer_pending(timer)) {
base = lock_timer_base(timer, &flags);
ret = detach_if_pending(timer, base, true);
@@ -1060,10 +1077,9 @@ int try_to_del_timer_sync(struct timer_list *timer)
base = lock_timer_base(timer, &flags);
- if (base->running_timer != timer) {
- timer_stats_timer_clear_start_info(timer);
+ if (base->running_timer != timer)
ret = detach_if_pending(timer, base, true);
- }
+
spin_unlock_irqrestore(&base->lock, flags);
return ret;
@@ -1247,8 +1263,6 @@ static inline void __run_timers(struct tvec_base *base)
data = timer->data;
irqsafe = timer->flags & TIMER_IRQSAFE;
- timer_stats_account_timer(timer);
-
base->running_timer = timer;
detach_expired_timer(timer, base);
@@ -1376,6 +1390,30 @@ static u64 cmp_next_hrtimer_event(u64 basem, u64 expires)
return DIV_ROUND_UP_ULL(nextevt, TICK_NSEC) * TICK_NSEC;
}
+#ifdef CONFIG_SMP
+/*
+ * check_pending_deferrable_timers - Check for unbound deferrable timer expiry.
+ * @cpu - Current CPU
+ *
+ * The function checks whether any global deferrable pending timers
+ * are exipired or not. This function does not check cpu bounded
+ * diferrable pending timers expiry.
+ *
+ * The function returns true when a cpu unbounded deferrable timer is expired.
+ */
+bool check_pending_deferrable_timers(int cpu)
+{
+ if (cpu == tick_do_timer_cpu ||
+ tick_do_timer_cpu == TICK_DO_TIMER_NONE) {
+ if (time_after_eq(jiffies, tvec_base_deferrable.timer_jiffies)
+ && !atomic_cmpxchg(&deferrable_pending, 0, 1)) {
+ return true;
+ }
+ }
+ return false;
+}
+#endif
+
/**
* get_next_timer_interrupt - return the time (clock mono) of the next timer
* @basej: base time jiffies
@@ -1440,6 +1478,8 @@ static void run_timer_softirq(struct softirq_action *h)
{
struct tvec_base *base = this_cpu_ptr(&tvec_bases);
+ __run_deferrable_timers();
+
if (time_after_eq(jiffies, base->timer_jiffies))
__run_timers(base);
}
@@ -1482,11 +1522,12 @@ static void process_timeout(unsigned long __data)
* You can set the task state as follows -
*
* %TASK_UNINTERRUPTIBLE - at least @timeout jiffies are guaranteed to
- * pass before the routine returns. The routine will return 0
+ * pass before the routine returns unless the current task is explicitly
+ * woken up, (e.g. by wake_up_process())".
*
* %TASK_INTERRUPTIBLE - the routine may return early if a signal is
- * delivered to the current task. In this case the remaining time
- * in jiffies will be returned, or 0 if the timer expired in time
+ * delivered to the current task or the current task is explicitly woken
+ * up.
*
* The current task state is guaranteed to be TASK_RUNNING when this
* routine returns.
@@ -1495,7 +1536,9 @@ static void process_timeout(unsigned long __data)
* the CPU away without a bound on the timeout. In this case the return
* value will be %MAX_SCHEDULE_TIMEOUT.
*
- * In all cases the return value is guaranteed to be non-negative.
+ * Returns 0 when the timer has expired otherwise the remaining time in
+ * jiffies will be returned. In all cases the return value is guaranteed
+ * to be non-negative.
*/
signed long __sched schedule_timeout(signed long timeout)
{
@@ -1573,56 +1616,82 @@ signed long __sched schedule_timeout_uninterruptible(signed long timeout)
}
EXPORT_SYMBOL(schedule_timeout_uninterruptible);
-#ifdef CONFIG_HOTPLUG_CPU
-static void migrate_timer_list(struct tvec_base *new_base, struct hlist_head *head)
+#if defined(CONFIG_HOTPLUG_CPU)
+static void migrate_timer_list(struct tvec_base *new_base,
+ struct hlist_head *head, bool remove_pinned)
{
struct timer_list *timer;
int cpu = new_base->cpu;
+ struct hlist_node *n;
+ int is_pinned;
+
+ hlist_for_each_entry_safe(timer, n, head, entry) {
+ is_pinned = timer->flags & TIMER_PINNED_ON_CPU;
+ if (!remove_pinned && is_pinned)
+ continue;
- while (!hlist_empty(head)) {
- timer = hlist_entry(head->first, struct timer_list, entry);
- /* We ignore the accounting on the dying cpu */
- detach_timer(timer, false);
+ detach_if_pending(timer, get_timer_base(timer->flags), false);
timer->flags = (timer->flags & ~TIMER_BASEMASK) | cpu;
internal_add_timer(new_base, timer);
}
}
-static void migrate_timers(int cpu)
+static void __migrate_timers(int cpu, bool remove_pinned)
{
struct tvec_base *old_base;
struct tvec_base *new_base;
+ unsigned long flags;
int i;
- BUG_ON(cpu_online(cpu));
old_base = per_cpu_ptr(&tvec_bases, cpu);
new_base = get_cpu_ptr(&tvec_bases);
/*
* The caller is globally serialized and nobody else
* takes two locks at once, deadlock is not possible.
*/
- spin_lock_irq(&new_base->lock);
+ spin_lock_irqsave(&new_base->lock, flags);
spin_lock_nested(&old_base->lock, SINGLE_DEPTH_NESTING);
- BUG_ON(old_base->running_timer);
+ /*
+ * If we're in the hotplug path, kill the system if there's a running
+ * timer. It's ok to have a running timer in the isolation case - the
+ * currently running or just expired timers are off of the timer wheel
+ * and so everything else can be migrated off.
+ */
+ if (!cpu_online(cpu))
+ BUG_ON(old_base->running_timer);
for (i = 0; i < TVR_SIZE; i++)
- migrate_timer_list(new_base, old_base->tv1.vec + i);
+ migrate_timer_list(new_base, old_base->tv1.vec + i,
+ remove_pinned);
for (i = 0; i < TVN_SIZE; i++) {
- migrate_timer_list(new_base, old_base->tv2.vec + i);
- migrate_timer_list(new_base, old_base->tv3.vec + i);
- migrate_timer_list(new_base, old_base->tv4.vec + i);
- migrate_timer_list(new_base, old_base->tv5.vec + i);
+ migrate_timer_list(new_base, old_base->tv2.vec + i,
+ remove_pinned);
+ migrate_timer_list(new_base, old_base->tv3.vec + i,
+ remove_pinned);
+ migrate_timer_list(new_base, old_base->tv4.vec + i,
+ remove_pinned);
+ migrate_timer_list(new_base, old_base->tv5.vec + i,
+ remove_pinned);
}
- old_base->active_timers = 0;
- old_base->all_timers = 0;
-
spin_unlock(&old_base->lock);
- spin_unlock_irq(&new_base->lock);
+ spin_unlock_irqrestore(&new_base->lock, flags);
put_cpu_ptr(&tvec_bases);
}
+/* Migrate timers from 'cpu' to this_cpu */
+static void migrate_timers(int cpu)
+{
+ BUG_ON(cpu_online(cpu));
+ __migrate_timers(cpu, true);
+}
+
+void timer_quiesce_cpu(void *cpup)
+{
+ __migrate_timers(*(int *)cpup, false);
+}
+
static int timer_cpu_notify(struct notifier_block *self,
unsigned long action, void *hcpu)
{
@@ -1663,12 +1732,13 @@ static void __init init_timer_cpus(void)
for_each_possible_cpu(cpu)
init_timer_cpu(cpu);
+
+ init_timer_deferrable_global();
}
void __init init_timers(void)
{
init_timer_cpus();
- init_timer_stats();
timer_register_cpu_notifier();
open_softirq(TIMER_SOFTIRQ, run_timer_softirq);
}
@@ -1702,16 +1772,6 @@ unsigned long msleep_interruptible(unsigned int msecs)
EXPORT_SYMBOL(msleep_interruptible);
-static void __sched do_usleep_range(unsigned long min, unsigned long max)
-{
- ktime_t kmin;
- u64 delta;
-
- kmin = ktime_set(0, min * NSEC_PER_USEC);
- delta = (u64)(max - min) * NSEC_PER_USEC;
- schedule_hrtimeout_range(&kmin, delta, HRTIMER_MODE_REL);
-}
-
/**
* usleep_range - Drop in replacement for udelay where wakeup is flexible
* @min: Minimum time in usecs to sleep
@@ -1719,7 +1779,14 @@ static void __sched do_usleep_range(unsigned long min, unsigned long max)
*/
void __sched usleep_range(unsigned long min, unsigned long max)
{
- __set_current_state(TASK_UNINTERRUPTIBLE);
- do_usleep_range(min, max);
+ ktime_t exp = ktime_add_us(ktime_get(), min);
+ u64 delta = (u64)(max - min) * NSEC_PER_USEC;
+
+ for (;;) {
+ __set_current_state(TASK_UNINTERRUPTIBLE);
+ /* Do not return before the requested sleep time has elapsed */
+ if (!schedule_hrtimeout_range(&exp, delta, HRTIMER_MODE_ABS))
+ break;
+ }
}
EXPORT_SYMBOL(usleep_range);
diff --git a/kernel/time/timer_list.c b/kernel/time/timer_list.c
index ef4f16e81283..998d730401b3 100644
--- a/kernel/time/timer_list.c
+++ b/kernel/time/timer_list.c
@@ -63,21 +63,11 @@ static void
print_timer(struct seq_file *m, struct hrtimer *taddr, struct hrtimer *timer,
int idx, u64 now)
{
-#ifdef CONFIG_TIMER_STATS
- char tmp[TASK_COMM_LEN + 1];
-#endif
SEQ_printf(m, " #%d: ", idx);
print_name_offset(m, taddr);
SEQ_printf(m, ", ");
print_name_offset(m, timer->function);
SEQ_printf(m, ", S:%02x", timer->state);
-#ifdef CONFIG_TIMER_STATS
- SEQ_printf(m, ", ");
- print_name_offset(m, timer->start_site);
- memcpy(tmp, timer->start_comm, TASK_COMM_LEN);
- tmp[TASK_COMM_LEN] = 0;
- SEQ_printf(m, ", %s/%d", tmp, timer->start_pid);
-#endif
SEQ_printf(m, "\n");
SEQ_printf(m, " # expires at %Lu-%Lu nsecs [in %Ld to %Ld nsecs]\n",
(unsigned long long)ktime_to_ns(hrtimer_get_softexpires(timer)),
diff --git a/kernel/time/timer_stats.c b/kernel/time/timer_stats.c
deleted file mode 100644
index 1adecb4b87c8..000000000000
--- a/kernel/time/timer_stats.c
+++ /dev/null
@@ -1,425 +0,0 @@
-/*
- * kernel/time/timer_stats.c
- *
- * Collect timer usage statistics.
- *
- * Copyright(C) 2006, Red Hat, Inc., Ingo Molnar
- * Copyright(C) 2006 Timesys Corp., Thomas Gleixner <tglx@timesys.com>
- *
- * timer_stats is based on timer_top, a similar functionality which was part of
- * Con Kolivas dyntick patch set. It was developed by Daniel Petrini at the
- * Instituto Nokia de Tecnologia - INdT - Manaus. timer_top's design was based
- * on dynamic allocation of the statistics entries and linear search based
- * lookup combined with a global lock, rather than the static array, hash
- * and per-CPU locking which is used by timer_stats. It was written for the
- * pre hrtimer kernel code and therefore did not take hrtimers into account.
- * Nevertheless it provided the base for the timer_stats implementation and
- * was a helpful source of inspiration. Kudos to Daniel and the Nokia folks
- * for this effort.
- *
- * timer_top.c is
- * Copyright (C) 2005 Instituto Nokia de Tecnologia - INdT - Manaus
- * Written by Daniel Petrini <d.pensator@gmail.com>
- * timer_top.c was released under the GNU General Public License version 2
- *
- * We export the addresses and counting of timer functions being called,
- * the pid and cmdline from the owner process if applicable.
- *
- * Start/stop data collection:
- * # echo [1|0] >/proc/timer_stats
- *
- * Display the information collected so far:
- * # cat /proc/timer_stats
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#include <linux/proc_fs.h>
-#include <linux/module.h>
-#include <linux/spinlock.h>
-#include <linux/sched.h>
-#include <linux/seq_file.h>
-#include <linux/kallsyms.h>
-
-#include <asm/uaccess.h>
-
-/*
- * This is our basic unit of interest: a timer expiry event identified
- * by the timer, its start/expire functions and the PID of the task that
- * started the timer. We count the number of times an event happens:
- */
-struct entry {
- /*
- * Hash list:
- */
- struct entry *next;
-
- /*
- * Hash keys:
- */
- void *timer;
- void *start_func;
- void *expire_func;
- pid_t pid;
-
- /*
- * Number of timeout events:
- */
- unsigned long count;
- u32 flags;
-
- /*
- * We save the command-line string to preserve
- * this information past task exit:
- */
- char comm[TASK_COMM_LEN + 1];
-
-} ____cacheline_aligned_in_smp;
-
-/*
- * Spinlock protecting the tables - not taken during lookup:
- */
-static DEFINE_RAW_SPINLOCK(table_lock);
-
-/*
- * Per-CPU lookup locks for fast hash lookup:
- */
-static DEFINE_PER_CPU(raw_spinlock_t, tstats_lookup_lock);
-
-/*
- * Mutex to serialize state changes with show-stats activities:
- */
-static DEFINE_MUTEX(show_mutex);
-
-/*
- * Collection status, active/inactive:
- */
-int __read_mostly timer_stats_active;
-
-/*
- * Beginning/end timestamps of measurement:
- */
-static ktime_t time_start, time_stop;
-
-/*
- * tstat entry structs only get allocated while collection is
- * active and never freed during that time - this simplifies
- * things quite a bit.
- *
- * They get freed when a new collection period is started.
- */
-#define MAX_ENTRIES_BITS 10
-#define MAX_ENTRIES (1UL << MAX_ENTRIES_BITS)
-
-static unsigned long nr_entries;
-static struct entry entries[MAX_ENTRIES];
-
-static atomic_t overflow_count;
-
-/*
- * The entries are in a hash-table, for fast lookup:
- */
-#define TSTAT_HASH_BITS (MAX_ENTRIES_BITS - 1)
-#define TSTAT_HASH_SIZE (1UL << TSTAT_HASH_BITS)
-#define TSTAT_HASH_MASK (TSTAT_HASH_SIZE - 1)
-
-#define __tstat_hashfn(entry) \
- (((unsigned long)(entry)->timer ^ \
- (unsigned long)(entry)->start_func ^ \
- (unsigned long)(entry)->expire_func ^ \
- (unsigned long)(entry)->pid ) & TSTAT_HASH_MASK)
-
-#define tstat_hashentry(entry) (tstat_hash_table + __tstat_hashfn(entry))
-
-static struct entry *tstat_hash_table[TSTAT_HASH_SIZE] __read_mostly;
-
-static void reset_entries(void)
-{
- nr_entries = 0;
- memset(entries, 0, sizeof(entries));
- memset(tstat_hash_table, 0, sizeof(tstat_hash_table));
- atomic_set(&overflow_count, 0);
-}
-
-static struct entry *alloc_entry(void)
-{
- if (nr_entries >= MAX_ENTRIES)
- return NULL;
-
- return entries + nr_entries++;
-}
-
-static int match_entries(struct entry *entry1, struct entry *entry2)
-{
- return entry1->timer == entry2->timer &&
- entry1->start_func == entry2->start_func &&
- entry1->expire_func == entry2->expire_func &&
- entry1->pid == entry2->pid;
-}
-
-/*
- * Look up whether an entry matching this item is present
- * in the hash already. Must be called with irqs off and the
- * lookup lock held:
- */
-static struct entry *tstat_lookup(struct entry *entry, char *comm)
-{
- struct entry **head, *curr, *prev;
-
- head = tstat_hashentry(entry);
- curr = *head;
-
- /*
- * The fastpath is when the entry is already hashed,
- * we do this with the lookup lock held, but with the
- * table lock not held:
- */
- while (curr) {
- if (match_entries(curr, entry))
- return curr;
-
- curr = curr->next;
- }
- /*
- * Slowpath: allocate, set up and link a new hash entry:
- */
- prev = NULL;
- curr = *head;
-
- raw_spin_lock(&table_lock);
- /*
- * Make sure we have not raced with another CPU:
- */
- while (curr) {
- if (match_entries(curr, entry))
- goto out_unlock;
-
- prev = curr;
- curr = curr->next;
- }
-
- curr = alloc_entry();
- if (curr) {
- *curr = *entry;
- curr->count = 0;
- curr->next = NULL;
- memcpy(curr->comm, comm, TASK_COMM_LEN);
-
- smp_mb(); /* Ensure that curr is initialized before insert */
-
- if (prev)
- prev->next = curr;
- else
- *head = curr;
- }
- out_unlock:
- raw_spin_unlock(&table_lock);
-
- return curr;
-}
-
-/**
- * timer_stats_update_stats - Update the statistics for a timer.
- * @timer: pointer to either a timer_list or a hrtimer
- * @pid: the pid of the task which set up the timer
- * @startf: pointer to the function which did the timer setup
- * @timerf: pointer to the timer callback function of the timer
- * @comm: name of the process which set up the timer
- * @tflags: The flags field of the timer
- *
- * When the timer is already registered, then the event counter is
- * incremented. Otherwise the timer is registered in a free slot.
- */
-void timer_stats_update_stats(void *timer, pid_t pid, void *startf,
- void *timerf, char *comm, u32 tflags)
-{
- /*
- * It doesn't matter which lock we take:
- */
- raw_spinlock_t *lock;
- struct entry *entry, input;
- unsigned long flags;
-
- if (likely(!timer_stats_active))
- return;
-
- lock = &per_cpu(tstats_lookup_lock, raw_smp_processor_id());
-
- input.timer = timer;
- input.start_func = startf;
- input.expire_func = timerf;
- input.pid = pid;
- input.flags = tflags;
-
- raw_spin_lock_irqsave(lock, flags);
- if (!timer_stats_active)
- goto out_unlock;
-
- entry = tstat_lookup(&input, comm);
- if (likely(entry))
- entry->count++;
- else
- atomic_inc(&overflow_count);
-
- out_unlock:
- raw_spin_unlock_irqrestore(lock, flags);
-}
-
-static void print_name_offset(struct seq_file *m, unsigned long addr)
-{
- char symname[KSYM_NAME_LEN];
-
- if (lookup_symbol_name(addr, symname) < 0)
- seq_printf(m, "<%p>", (void *)addr);
- else
- seq_printf(m, "%s", symname);
-}
-
-static int tstats_show(struct seq_file *m, void *v)
-{
- struct timespec period;
- struct entry *entry;
- unsigned long ms;
- long events = 0;
- ktime_t time;
- int i;
-
- mutex_lock(&show_mutex);
- /*
- * If still active then calculate up to now:
- */
- if (timer_stats_active)
- time_stop = ktime_get();
-
- time = ktime_sub(time_stop, time_start);
-
- period = ktime_to_timespec(time);
- ms = period.tv_nsec / 1000000;
-
- seq_puts(m, "Timer Stats Version: v0.3\n");
- seq_printf(m, "Sample period: %ld.%03ld s\n", period.tv_sec, ms);
- if (atomic_read(&overflow_count))
- seq_printf(m, "Overflow: %d entries\n", atomic_read(&overflow_count));
- seq_printf(m, "Collection: %s\n", timer_stats_active ? "active" : "inactive");
-
- for (i = 0; i < nr_entries; i++) {
- entry = entries + i;
- if (entry->flags & TIMER_DEFERRABLE) {
- seq_printf(m, "%4luD, %5d %-16s ",
- entry->count, entry->pid, entry->comm);
- } else {
- seq_printf(m, " %4lu, %5d %-16s ",
- entry->count, entry->pid, entry->comm);
- }
-
- print_name_offset(m, (unsigned long)entry->start_func);
- seq_puts(m, " (");
- print_name_offset(m, (unsigned long)entry->expire_func);
- seq_puts(m, ")\n");
-
- events += entry->count;
- }
-
- ms += period.tv_sec * 1000;
- if (!ms)
- ms = 1;
-
- if (events && period.tv_sec)
- seq_printf(m, "%ld total events, %ld.%03ld events/sec\n",
- events, events * 1000 / ms,
- (events * 1000000 / ms) % 1000);
- else
- seq_printf(m, "%ld total events\n", events);
-
- mutex_unlock(&show_mutex);
-
- return 0;
-}
-
-/*
- * After a state change, make sure all concurrent lookup/update
- * activities have stopped:
- */
-static void sync_access(void)
-{
- unsigned long flags;
- int cpu;
-
- for_each_online_cpu(cpu) {
- raw_spinlock_t *lock = &per_cpu(tstats_lookup_lock, cpu);
-
- raw_spin_lock_irqsave(lock, flags);
- /* nothing */
- raw_spin_unlock_irqrestore(lock, flags);
- }
-}
-
-static ssize_t tstats_write(struct file *file, const char __user *buf,
- size_t count, loff_t *offs)
-{
- char ctl[2];
-
- if (count != 2 || *offs)
- return -EINVAL;
-
- if (copy_from_user(ctl, buf, count))
- return -EFAULT;
-
- mutex_lock(&show_mutex);
- switch (ctl[0]) {
- case '0':
- if (timer_stats_active) {
- timer_stats_active = 0;
- time_stop = ktime_get();
- sync_access();
- }
- break;
- case '1':
- if (!timer_stats_active) {
- reset_entries();
- time_start = ktime_get();
- smp_mb();
- timer_stats_active = 1;
- }
- break;
- default:
- count = -EINVAL;
- }
- mutex_unlock(&show_mutex);
-
- return count;
-}
-
-static int tstats_open(struct inode *inode, struct file *filp)
-{
- return single_open(filp, tstats_show, NULL);
-}
-
-static const struct file_operations tstats_fops = {
- .open = tstats_open,
- .read = seq_read,
- .write = tstats_write,
- .llseek = seq_lseek,
- .release = single_release,
-};
-
-void __init init_timer_stats(void)
-{
- int cpu;
-
- for_each_possible_cpu(cpu)
- raw_spin_lock_init(&per_cpu(tstats_lookup_lock, cpu));
-}
-
-static int __init init_tstats_procfs(void)
-{
- struct proc_dir_entry *pe;
-
- pe = proc_create("timer_stats", 0644, NULL, &tstats_fops);
- if (!pe)
- return -ENOMEM;
- return 0;
-}
-__initcall(init_tstats_procfs);