summaryrefslogtreecommitdiff
path: root/kernel/time
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/time')
-rw-r--r--kernel/time/Makefile2
-rw-r--r--kernel/time/alarmtimer.c270
-rw-r--r--kernel/time/hrtimer.c111
-rw-r--r--kernel/time/sched_clock.c8
-rw-r--r--kernel/time/tick-sched.c92
-rw-r--r--kernel/time/timer.c128
6 files changed, 552 insertions, 59 deletions
diff --git a/kernel/time/Makefile b/kernel/time/Makefile
index 49eca0beed32..5819ca07a22b 100644
--- a/kernel/time/Makefile
+++ b/kernel/time/Makefile
@@ -12,3 +12,5 @@ obj-$(CONFIG_TICK_ONESHOT) += tick-oneshot.o tick-sched.o
obj-$(CONFIG_TIMER_STATS) += timer_stats.o
obj-$(CONFIG_DEBUG_FS) += timekeeping_debug.o
obj-$(CONFIG_TEST_UDELAY) += test_udelay.o
+
+ccflags-y += -Idrivers/cpuidle
diff --git a/kernel/time/alarmtimer.c b/kernel/time/alarmtimer.c
index 7fbba635a549..0cdc34ebd8d1 100644
--- a/kernel/time/alarmtimer.c
+++ b/kernel/time/alarmtimer.c
@@ -26,6 +26,11 @@
#include <linux/workqueue.h>
#include <linux/freezer.h>
+#ifdef CONFIG_MSM_PM
+#include "lpm-levels.h"
+#endif
+#include <linux/workqueue.h>
+
/**
* struct alarm_base - Alarm timer bases
* @lock: Lock for syncrhonized access to the base
@@ -46,14 +51,130 @@ static ktime_t freezer_delta;
static DEFINE_SPINLOCK(freezer_delta_lock);
static struct wakeup_source *ws;
+static struct delayed_work work;
+static struct workqueue_struct *power_off_alarm_workqueue;
#ifdef CONFIG_RTC_CLASS
/* rtc timer and device for setting alarm wakeups at suspend */
static struct rtc_timer rtctimer;
static struct rtc_device *rtcdev;
static DEFINE_SPINLOCK(rtcdev_lock);
+static struct mutex power_on_alarm_lock;
+static struct alarm init_alarm;
/**
+ * power_on_alarm_init - Init power on alarm value
+ *
+ * Read rtc alarm value after device booting up and add this alarm
+ * into alarm queue.
+ */
+void power_on_alarm_init(void)
+{
+ struct rtc_wkalrm rtc_alarm;
+ struct rtc_time rt;
+ unsigned long alarm_time;
+ struct rtc_device *rtc;
+ ktime_t alarm_ktime;
+
+ rtc = alarmtimer_get_rtcdev();
+
+ if (!rtc)
+ return;
+
+ rtc_read_alarm(rtc, &rtc_alarm);
+ rt = rtc_alarm.time;
+
+ rtc_tm_to_time(&rt, &alarm_time);
+
+ if (alarm_time) {
+ alarm_ktime = ktime_set(alarm_time, 0);
+ alarm_init(&init_alarm, ALARM_POWEROFF_REALTIME, NULL);
+ alarm_start(&init_alarm, alarm_ktime);
+ }
+}
+
+/**
+ * set_power_on_alarm - set power on alarm value into rtc register
+ *
+ * Get the soonest power off alarm timer and set the alarm value into rtc
+ * register.
+ */
+void set_power_on_alarm(void)
+{
+ int rc;
+ struct timespec wall_time, alarm_ts;
+ long alarm_secs = 0l;
+ long rtc_secs, alarm_time, alarm_delta;
+ struct rtc_time rtc_time;
+ struct rtc_wkalrm alarm;
+ struct rtc_device *rtc;
+ struct timerqueue_node *next;
+ unsigned long flags;
+ struct alarm_base *base = &alarm_bases[ALARM_POWEROFF_REALTIME];
+
+ rc = mutex_lock_interruptible(&power_on_alarm_lock);
+ if (rc != 0)
+ return;
+
+ spin_lock_irqsave(&base->lock, flags);
+ next = timerqueue_getnext(&base->timerqueue);
+ spin_unlock_irqrestore(&base->lock, flags);
+
+ if (next) {
+ alarm_ts = ktime_to_timespec(next->expires);
+ alarm_secs = alarm_ts.tv_sec;
+ }
+
+ if (!alarm_secs)
+ goto disable_alarm;
+
+ getnstimeofday(&wall_time);
+
+ /*
+ * alarm_secs have to be bigger than "wall_time +1".
+ * It is to make sure that alarm time will be always
+ * bigger than wall time.
+ */
+ if (alarm_secs <= wall_time.tv_sec + 1)
+ goto disable_alarm;
+
+ rtc = alarmtimer_get_rtcdev();
+ if (!rtc)
+ goto exit;
+
+ rtc_read_time(rtc, &rtc_time);
+ rtc_tm_to_time(&rtc_time, &rtc_secs);
+ alarm_delta = wall_time.tv_sec - rtc_secs;
+ alarm_time = alarm_secs - alarm_delta;
+
+ rtc_time_to_tm(alarm_time, &alarm.time);
+ alarm.enabled = 1;
+ rc = rtc_set_alarm(rtcdev, &alarm);
+ if (rc)
+ goto disable_alarm;
+
+ mutex_unlock(&power_on_alarm_lock);
+ return;
+
+disable_alarm:
+ rtc_alarm_irq_enable(rtcdev, 0);
+exit:
+ mutex_unlock(&power_on_alarm_lock);
+}
+
+static void alarmtimer_triggered_func(void *p)
+{
+ struct rtc_device *rtc = rtcdev;
+
+ if (!(rtc->irq_data & RTC_AF))
+ return;
+ __pm_wakeup_event(ws, 2 * MSEC_PER_SEC);
+}
+
+static struct rtc_task alarmtimer_rtc_task = {
+ .func = alarmtimer_triggered_func
+};
+/**
* alarmtimer_get_rtcdev - Return selected rtcdevice
*
* This function returns the rtc device to use for wakealarms.
@@ -63,7 +184,7 @@ static DEFINE_SPINLOCK(rtcdev_lock);
struct rtc_device *alarmtimer_get_rtcdev(void)
{
unsigned long flags;
- struct rtc_device *ret;
+ struct rtc_device *ret = NULL;
spin_lock_irqsave(&rtcdev_lock, flags);
ret = rtcdev;
@@ -77,33 +198,48 @@ static int alarmtimer_rtc_add_device(struct device *dev,
struct class_interface *class_intf)
{
unsigned long flags;
+ int err = 0;
struct rtc_device *rtc = to_rtc_device(dev);
-
if (rtcdev)
return -EBUSY;
-
if (!rtc->ops->set_alarm)
return -1;
- if (!device_may_wakeup(rtc->dev.parent))
- return -1;
spin_lock_irqsave(&rtcdev_lock, flags);
if (!rtcdev) {
+ err = rtc_irq_register(rtc, &alarmtimer_rtc_task);
+ if (err)
+ goto rtc_irq_reg_err;
rtcdev = rtc;
/* hold a reference so it doesn't go away */
get_device(dev);
}
+
+rtc_irq_reg_err:
spin_unlock_irqrestore(&rtcdev_lock, flags);
- return 0;
+ return err;
+
+}
+
+static void alarmtimer_rtc_remove_device(struct device *dev,
+ struct class_interface *class_intf)
+{
+ if (rtcdev && dev == &rtcdev->dev) {
+ rtc_irq_unregister(rtcdev, &alarmtimer_rtc_task);
+ rtcdev = NULL;
+ }
}
static inline void alarmtimer_rtc_timer_init(void)
{
+ mutex_init(&power_on_alarm_lock);
+
rtc_timer_init(&rtctimer, NULL, NULL);
}
static struct class_interface alarmtimer_rtc_interface = {
.add_dev = &alarmtimer_rtc_add_device,
+ .remove_dev = &alarmtimer_rtc_remove_device,
};
static int alarmtimer_rtc_interface_setup(void)
@@ -124,8 +260,14 @@ struct rtc_device *alarmtimer_get_rtcdev(void)
static inline int alarmtimer_rtc_interface_setup(void) { return 0; }
static inline void alarmtimer_rtc_interface_remove(void) { }
static inline void alarmtimer_rtc_timer_init(void) { }
+void set_power_on_alarm(void) { }
#endif
+static void alarm_work_func(struct work_struct *unused)
+{
+ set_power_on_alarm();
+}
+
/**
* alarmtimer_enqueue - Adds an alarm timer to an alarm_base timerqueue
* @base: pointer to the base where the timer is being run
@@ -195,6 +337,10 @@ static enum hrtimer_restart alarmtimer_fired(struct hrtimer *timer)
}
spin_unlock_irqrestore(&base->lock, flags);
+ /* set next power off alarm */
+ if (alarm->type == ALARM_POWEROFF_REALTIME)
+ queue_delayed_work(power_off_alarm_workqueue, &work, 0);
+
return ret;
}
@@ -217,6 +363,70 @@ EXPORT_SYMBOL_GPL(alarm_expires_remaining);
* set an rtc timer to fire that far into the future, which
* will wake us from suspend.
*/
+#if defined(CONFIG_RTC_DRV_QPNP) && defined(CONFIG_MSM_PM)
+static int alarmtimer_suspend(struct device *dev)
+{
+ struct rtc_time tm;
+ ktime_t min, now;
+ unsigned long flags;
+ struct rtc_device *rtc;
+ int i;
+ int ret = 0;
+
+ spin_lock_irqsave(&freezer_delta_lock, flags);
+ min = freezer_delta;
+ freezer_delta = ktime_set(0, 0);
+ spin_unlock_irqrestore(&freezer_delta_lock, flags);
+
+ rtc = alarmtimer_get_rtcdev();
+ /* If we have no rtcdev, just return */
+ if (!rtc)
+ return 0;
+
+ /* Find the soonest timer to expire*/
+ for (i = 0; i < ALARM_NUMTYPE; i++) {
+ struct alarm_base *base = &alarm_bases[i];
+ struct timerqueue_node *next;
+ ktime_t delta;
+
+ spin_lock_irqsave(&base->lock, flags);
+ next = timerqueue_getnext(&base->timerqueue);
+ spin_unlock_irqrestore(&base->lock, flags);
+ if (!next)
+ continue;
+ delta = ktime_sub(next->expires, base->gettime());
+ if (!min.tv64 || (delta.tv64 < min.tv64))
+ min = delta;
+ }
+ if (min.tv64 == 0)
+ return 0;
+
+ if (ktime_to_ns(min) < 2 * NSEC_PER_SEC) {
+ __pm_wakeup_event(ws, 2 * MSEC_PER_SEC);
+ return -EBUSY;
+ }
+
+ /* Setup a timer to fire that far in the future */
+ rtc_timer_cancel(rtc, &rtctimer);
+ rtc_read_time(rtc, &tm);
+ now = rtc_tm_to_ktime(tm);
+ now = ktime_add(now, min);
+ if (poweron_alarm) {
+ struct rtc_time tm_val;
+ unsigned long secs;
+
+ tm_val = rtc_ktime_to_tm(min);
+ rtc_tm_to_time(&tm_val, &secs);
+ lpm_suspend_wake_time(secs);
+ } else {
+ /* Set alarm, if in the past reject suspend briefly to handle */
+ ret = rtc_timer_start(rtc, &rtctimer, now, ktime_set(0, 0));
+ if (ret < 0)
+ __pm_wakeup_event(ws, MSEC_PER_SEC);
+ }
+ return ret;
+}
+#else
static int alarmtimer_suspend(struct device *dev)
{
struct rtc_time tm;
@@ -226,6 +436,8 @@ static int alarmtimer_suspend(struct device *dev)
int i;
int ret;
+ cancel_delayed_work_sync(&work);
+
spin_lock_irqsave(&freezer_delta_lock, flags);
min = freezer_delta;
freezer_delta = ktime_set(0, 0);
@@ -271,11 +483,31 @@ static int alarmtimer_suspend(struct device *dev)
__pm_wakeup_event(ws, MSEC_PER_SEC);
return ret;
}
+#endif
+static int alarmtimer_resume(struct device *dev)
+{
+ struct rtc_device *rtc;
+
+ rtc = alarmtimer_get_rtcdev();
+ /* If we have no rtcdev, just return */
+ if (!rtc)
+ return 0;
+ rtc_timer_cancel(rtc, &rtctimer);
+
+ queue_delayed_work(power_off_alarm_workqueue, &work, 0);
+ return 0;
+}
+
#else
static int alarmtimer_suspend(struct device *dev)
{
return 0;
}
+
+static int alarmtimer_resume(struct device *dev)
+{
+ return 0;
+}
#endif
static void alarmtimer_freezerset(ktime_t absexp, enum alarmtimer_type type)
@@ -443,12 +675,14 @@ EXPORT_SYMBOL_GPL(alarm_forward_now);
* clock2alarm - helper that converts from clockid to alarmtypes
* @clockid: clockid.
*/
-static enum alarmtimer_type clock2alarm(clockid_t clockid)
+enum alarmtimer_type clock2alarm(clockid_t clockid)
{
if (clockid == CLOCK_REALTIME_ALARM)
return ALARM_REALTIME;
if (clockid == CLOCK_BOOTTIME_ALARM)
return ALARM_BOOTTIME;
+ if (clockid == CLOCK_POWEROFF_ALARM)
+ return ALARM_POWEROFF_REALTIME;
return -1;
}
@@ -800,6 +1034,7 @@ out:
/* Suspend hook structures */
static const struct dev_pm_ops alarmtimer_pm_ops = {
.suspend = alarmtimer_suspend,
+ .resume = alarmtimer_resume,
};
static struct platform_driver alarmtimer_driver = {
@@ -834,10 +1069,13 @@ static int __init alarmtimer_init(void)
posix_timers_register_clock(CLOCK_REALTIME_ALARM, &alarm_clock);
posix_timers_register_clock(CLOCK_BOOTTIME_ALARM, &alarm_clock);
+ posix_timers_register_clock(CLOCK_POWEROFF_ALARM, &alarm_clock);
/* Initialize alarm bases */
alarm_bases[ALARM_REALTIME].base_clockid = CLOCK_REALTIME;
alarm_bases[ALARM_REALTIME].gettime = &ktime_get_real;
+ alarm_bases[ALARM_POWEROFF_REALTIME].base_clockid = CLOCK_REALTIME;
+ alarm_bases[ALARM_POWEROFF_REALTIME].gettime = &ktime_get_real;
alarm_bases[ALARM_BOOTTIME].base_clockid = CLOCK_BOOTTIME;
alarm_bases[ALARM_BOOTTIME].gettime = &ktime_get_boottime;
for (i = 0; i < ALARM_NUMTYPE; i++) {
@@ -859,8 +1097,24 @@ static int __init alarmtimer_init(void)
goto out_drv;
}
ws = wakeup_source_register("alarmtimer");
- return 0;
+ if (!ws) {
+ error = -ENOMEM;
+ goto out_ws;
+ }
+
+ INIT_DELAYED_WORK(&work, alarm_work_func);
+ power_off_alarm_workqueue =
+ create_singlethread_workqueue("power_off_alarm");
+ if (!power_off_alarm_workqueue) {
+ error = -ENOMEM;
+ goto out_wq;
+ }
+ return 0;
+out_wq:
+ wakeup_source_unregister(ws);
+out_ws:
+ platform_device_unregister(pdev);
out_drv:
platform_driver_unregister(&alarmtimer_driver);
out_if:
diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c
index 405536b22c0c..271d83e30d19 100644
--- a/kernel/time/hrtimer.c
+++ b/kernel/time/hrtimer.c
@@ -49,6 +49,7 @@
#include <linux/sched/deadline.h>
#include <linux/timer.h>
#include <linux/freezer.h>
+#include <linux/delay.h>
#include <asm/uaccess.h>
@@ -885,7 +886,7 @@ static int enqueue_hrtimer(struct hrtimer *timer,
base->cpu_base->active_bases |= 1 << base->index;
- timer->state = HRTIMER_STATE_ENQUEUED;
+ timer->state |= HRTIMER_STATE_ENQUEUED;
return timerqueue_add(&base->active, &timer->node);
}
@@ -905,11 +906,9 @@ static void __remove_hrtimer(struct hrtimer *timer,
u8 newstate, int reprogram)
{
struct hrtimer_cpu_base *cpu_base = base->cpu_base;
- u8 state = timer->state;
- timer->state = newstate;
- if (!(state & HRTIMER_STATE_ENQUEUED))
- return;
+ if (!(timer->state & HRTIMER_STATE_ENQUEUED))
+ goto out;
if (!timerqueue_del(&base->active, &timer->node))
cpu_base->active_bases &= ~(1 << base->index);
@@ -926,6 +925,13 @@ static void __remove_hrtimer(struct hrtimer *timer,
if (reprogram && timer == cpu_base->next_timer)
hrtimer_force_reprogram(cpu_base, 1);
#endif
+
+out:
+ /*
+ * We need to preserve PINNED state here, otherwise we may end up
+ * migrating pinned hrtimers as well.
+ */
+ timer->state = newstate | (timer->state & HRTIMER_STATE_PINNED);
}
/*
@@ -954,6 +960,7 @@ remove_hrtimer(struct hrtimer *timer, struct hrtimer_clock_base *base, bool rest
state = HRTIMER_STATE_INACTIVE;
__remove_hrtimer(timer, base, state, reprogram);
+ timer->state &= ~HRTIMER_STATE_PINNED;
return 1;
}
return 0;
@@ -1007,6 +1014,10 @@ void hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim,
timer_stats_hrtimer_set_start_info(timer);
+ /* Update pinned state */
+ timer->state &= ~HRTIMER_STATE_PINNED;
+ timer->state |= (!!(mode & HRTIMER_MODE_PINNED)) << HRTIMER_PINNED_SHIFT;
+
leftmost = enqueue_hrtimer(timer, new_base);
if (!leftmost)
goto unlock;
@@ -1181,8 +1192,8 @@ bool hrtimer_active(const struct hrtimer *timer)
cpu_base = READ_ONCE(timer->base->cpu_base);
seq = raw_read_seqcount_begin(&cpu_base->seq);
- if (timer->state != HRTIMER_STATE_INACTIVE ||
- cpu_base->running == timer)
+ if (((timer->state & ~HRTIMER_STATE_PINNED) !=
+ HRTIMER_STATE_INACTIVE) || cpu_base->running == timer)
return true;
} while (read_seqcount_retry(&cpu_base->seq, seq) ||
@@ -1619,17 +1630,43 @@ static void init_hrtimers_cpu(int cpu)
hrtimer_init_hres(cpu_base);
}
-#ifdef CONFIG_HOTPLUG_CPU
-
-static void migrate_hrtimer_list(struct hrtimer_clock_base *old_base,
- struct hrtimer_clock_base *new_base)
+#if defined(CONFIG_HOTPLUG_CPU)
+static void migrate_hrtimer_list(struct hrtimer_cpu_base *old_base,
+ struct hrtimer_cpu_base *new_base,
+ unsigned int i,
+ bool wait,
+ bool remove_pinned)
{
struct hrtimer *timer;
struct timerqueue_node *node;
+ struct timerqueue_head pinned;
+ int is_pinned;
+ struct hrtimer_clock_base *old_c_base = &old_base->clock_base[i];
+ struct hrtimer_clock_base *new_c_base = &new_base->clock_base[i];
+
+ timerqueue_init_head(&pinned);
- while ((node = timerqueue_getnext(&old_base->active))) {
+ while ((node = timerqueue_getnext(&old_c_base->active))) {
timer = container_of(node, struct hrtimer, node);
- BUG_ON(hrtimer_callback_running(timer));
+ if (wait) {
+ /* Ensure timers are done running before continuing */
+ while (hrtimer_callback_running(timer)) {
+ raw_spin_unlock(&old_base->lock);
+ raw_spin_unlock(&new_base->lock);
+ cpu_relax();
+ /*
+ * cpu_relax may just be a barrier. Grant the
+ * run_hrtimer_list code some time to obtain the
+ * spinlock.
+ */
+ udelay(2);
+ raw_spin_lock(&new_base->lock);
+ raw_spin_lock_nested(&old_base->lock,
+ SINGLE_DEPTH_NESTING);
+ }
+ } else {
+ BUG_ON(hrtimer_callback_running(timer));
+ }
debug_deactivate(timer);
/*
@@ -1637,8 +1674,15 @@ static void migrate_hrtimer_list(struct hrtimer_clock_base *old_base,
* timer could be seen as !active and just vanish away
* under us on another CPU
*/
- __remove_hrtimer(timer, old_base, HRTIMER_STATE_ENQUEUED, 0);
- timer->base = new_base;
+ __remove_hrtimer(timer, old_c_base, HRTIMER_STATE_ENQUEUED, 0);
+
+ is_pinned = timer->state & HRTIMER_STATE_PINNED;
+ if (!remove_pinned && is_pinned) {
+ timerqueue_add(&pinned, &timer->node);
+ continue;
+ }
+
+ timer->base = new_c_base;
/*
* Enqueue the timers on the new cpu. This does not
* reprogram the event device in case the timer
@@ -1647,19 +1691,25 @@ static void migrate_hrtimer_list(struct hrtimer_clock_base *old_base,
* sort out already expired timers and reprogram the
* event device.
*/
- enqueue_hrtimer(timer, new_base);
+ enqueue_hrtimer(timer, new_c_base);
+ }
+
+ /* Re-queue pinned timers for non-hotplug usecase */
+ while ((node = timerqueue_getnext(&pinned))) {
+ timer = container_of(node, struct hrtimer, node);
+
+ timerqueue_del(&pinned, &timer->node);
+ enqueue_hrtimer(timer, old_c_base);
}
}
-static void migrate_hrtimers(int scpu)
+static void __migrate_hrtimers(int scpu, bool wait, bool remove_pinned)
{
struct hrtimer_cpu_base *old_base, *new_base;
+ unsigned long flags;
int i;
- BUG_ON(cpu_online(scpu));
- tick_cancel_sched_timer(scpu);
-
- local_irq_disable();
+ local_irq_save(flags);
old_base = &per_cpu(hrtimer_bases, scpu);
new_base = this_cpu_ptr(&hrtimer_bases);
/*
@@ -1670,8 +1720,8 @@ static void migrate_hrtimers(int scpu)
raw_spin_lock_nested(&old_base->lock, SINGLE_DEPTH_NESTING);
for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++) {
- migrate_hrtimer_list(&old_base->clock_base[i],
- &new_base->clock_base[i]);
+ migrate_hrtimer_list(old_base, new_base, i, wait,
+ remove_pinned);
}
raw_spin_unlock(&old_base->lock);
@@ -1679,7 +1729,20 @@ static void migrate_hrtimers(int scpu)
/* Check, if we got expired work to do */
__hrtimer_peek_ahead_timers();
- local_irq_enable();
+ local_irq_restore(flags);
+}
+
+static void migrate_hrtimers(int scpu)
+{
+ BUG_ON(cpu_online(scpu));
+ tick_cancel_sched_timer(scpu);
+
+ __migrate_hrtimers(scpu, false, true);
+}
+
+void hrtimer_quiesce_cpu(void *cpup)
+{
+ __migrate_hrtimers(*(int *)cpup, true, false);
}
#endif /* CONFIG_HOTPLUG_CPU */
diff --git a/kernel/time/sched_clock.c b/kernel/time/sched_clock.c
index a26036d37a38..0637823aa5a6 100644
--- a/kernel/time/sched_clock.c
+++ b/kernel/time/sched_clock.c
@@ -70,6 +70,7 @@ struct clock_data {
static struct hrtimer sched_clock_timer;
static int irqtime = -1;
+static int initialized;
core_param(irqtime, irqtime, int, 0400);
@@ -231,6 +232,11 @@ sched_clock_register(u64 (*read)(void), int bits, unsigned long rate)
pr_debug("Registered %pF as sched_clock source\n", read);
}
+int sched_clock_initialized(void)
+{
+ return initialized;
+}
+
void __init sched_clock_postinit(void)
{
/*
@@ -249,6 +255,8 @@ void __init sched_clock_postinit(void)
hrtimer_init(&sched_clock_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
sched_clock_timer.function = sched_clock_poll;
hrtimer_start(&sched_clock_timer, cd.wrap_kt, HRTIMER_MODE_REL);
+
+ initialized = 1;
}
/*
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index 22c57e191a23..651ff1a3a306 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -24,6 +24,7 @@
#include <linux/posix-timers.h>
#include <linux/perf_event.h>
#include <linux/context_tracking.h>
+#include <linux/rq_stats.h>
#include <asm/irq_regs.h>
@@ -31,6 +32,10 @@
#include <trace/events/timer.h>
+struct rq_data rq_info;
+struct workqueue_struct *rq_wq;
+spinlock_t rq_lock;
+
/*
* Per cpu nohz control structure
*/
@@ -41,6 +46,21 @@ static DEFINE_PER_CPU(struct tick_sched, tick_cpu_sched);
*/
static ktime_t last_jiffies_update;
+u64 jiffy_to_ktime_ns(u64 *now, u64 *jiffy_ktime_ns)
+{
+ u64 cur_jiffies;
+ unsigned long seq;
+
+ do {
+ seq = read_seqbegin(&jiffies_lock);
+ *now = ktime_get_ns();
+ *jiffy_ktime_ns = ktime_to_ns(last_jiffies_update);
+ cur_jiffies = get_jiffies_64();
+ } while (read_seqretry(&jiffies_lock, seq));
+
+ return cur_jiffies;
+}
+
struct tick_sched *tick_get_tick_sched(int cpu)
{
return &per_cpu(tick_cpu_sched, cpu);
@@ -143,7 +163,7 @@ static void tick_sched_handle(struct tick_sched *ts, struct pt_regs *regs)
* when we go busy again does not account too much ticks.
*/
if (ts->tick_stopped) {
- touch_softlockup_watchdog();
+ touch_softlockup_watchdog_sched();
if (is_idle_task(current))
ts->idle_jiffies++;
}
@@ -430,7 +450,7 @@ static void tick_nohz_update_jiffies(ktime_t now)
tick_do_update_jiffies64(now);
local_irq_restore(flags);
- touch_softlockup_watchdog();
+ touch_softlockup_watchdog_sched();
}
/*
@@ -701,7 +721,7 @@ static void tick_nohz_restart_sched_tick(struct tick_sched *ts, ktime_t now)
update_cpu_load_nohz();
calc_load_exit_idle();
- touch_softlockup_watchdog();
+ touch_softlockup_watchdog_sched();
/*
* Cancel the scheduled timer and restore the tick
*/
@@ -1049,6 +1069,51 @@ void tick_irq_enter(void)
* High resolution timer specific code
*/
#ifdef CONFIG_HIGH_RES_TIMERS
+static void update_rq_stats(void)
+{
+ unsigned long jiffy_gap = 0;
+ unsigned int rq_avg = 0;
+ unsigned long flags = 0;
+
+ jiffy_gap = jiffies - rq_info.rq_poll_last_jiffy;
+
+ if (jiffy_gap >= rq_info.rq_poll_jiffies) {
+
+ spin_lock_irqsave(&rq_lock, flags);
+
+ if (!rq_info.rq_avg)
+ rq_info.rq_poll_total_jiffies = 0;
+
+ rq_avg = nr_running() * 10;
+
+ if (rq_info.rq_poll_total_jiffies) {
+ rq_avg = (rq_avg * jiffy_gap) +
+ (rq_info.rq_avg *
+ rq_info.rq_poll_total_jiffies);
+ do_div(rq_avg,
+ rq_info.rq_poll_total_jiffies + jiffy_gap);
+ }
+
+ rq_info.rq_avg = rq_avg;
+ rq_info.rq_poll_total_jiffies += jiffy_gap;
+ rq_info.rq_poll_last_jiffy = jiffies;
+
+ spin_unlock_irqrestore(&rq_lock, flags);
+ }
+}
+
+static void wakeup_user(void)
+{
+ unsigned long jiffy_gap;
+
+ jiffy_gap = jiffies - rq_info.def_timer_last_jiffy;
+
+ if (jiffy_gap >= rq_info.def_timer_jiffies) {
+ rq_info.def_timer_last_jiffy = jiffies;
+ queue_work(rq_wq, &rq_info.def_timer_work);
+ }
+}
+
/*
* We rearm the timer until we get disabled by the idle code.
* Called with interrupts disabled.
@@ -1066,9 +1131,23 @@ static enum hrtimer_restart tick_sched_timer(struct hrtimer *timer)
* Do not call, when we are not in irq context and have
* no valid regs pointer
*/
- if (regs)
+ if (regs) {
tick_sched_handle(ts, regs);
+ if (rq_info.init == 1 &&
+ tick_do_timer_cpu == smp_processor_id()) {
+ /*
+ * update run queue statistics
+ */
+ update_rq_stats();
+
+ /*
+ * wakeup user if needed
+ */
+ wakeup_user();
+ }
+ }
+
/* No need to reprogram if we are in idle or full dynticks mode */
if (unlikely(ts->tick_stopped))
return HRTIMER_NORESTART;
@@ -1181,3 +1260,8 @@ int tick_check_oneshot_change(int allow_nohz)
tick_nohz_switch_to_nohz();
return 0;
}
+
+ktime_t * get_next_event_cpu(unsigned int cpu)
+{
+ return &(per_cpu(tick_cpu_device, cpu).evtdev->next_event);
+}
diff --git a/kernel/time/timer.c b/kernel/time/timer.c
index d1798fa0c743..5ebefc7cfa4f 100644
--- a/kernel/time/timer.c
+++ b/kernel/time/timer.c
@@ -94,12 +94,15 @@ struct tvec_base {
struct tvec tv5;
} ____cacheline_aligned;
+static inline void __run_timers(struct tvec_base *base);
static DEFINE_PER_CPU(struct tvec_base, tvec_bases);
#if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ_COMMON)
unsigned int sysctl_timer_migration = 1;
+struct tvec_base tvec_base_deferrable;
+
void timers_update_migration(bool update_nohz)
{
bool on = sysctl_timer_migration && tick_nohz_active;
@@ -135,18 +138,62 @@ int timer_migration_handler(struct ctl_table *table, int write,
}
static inline struct tvec_base *get_target_base(struct tvec_base *base,
- int pinned)
+ int pinned, u32 timer_flags)
{
+ if (!pinned && !(timer_flags & TIMER_PINNED_ON_CPU) &&
+ (timer_flags & TIMER_DEFERRABLE))
+ return &tvec_base_deferrable;
if (pinned || !base->migration_enabled)
return this_cpu_ptr(&tvec_bases);
return per_cpu_ptr(&tvec_bases, get_nohz_timer_target());
}
+
+static inline void __run_deferrable_timers(void)
+{
+ if (smp_processor_id() == tick_do_timer_cpu &&
+ time_after_eq(jiffies, tvec_base_deferrable.timer_jiffies))
+ __run_timers(&tvec_base_deferrable);
+}
+
+static inline void init_timer_deferrable_global(void)
+{
+ tvec_base_deferrable.cpu = nr_cpu_ids;
+ spin_lock_init(&tvec_base_deferrable.lock);
+ tvec_base_deferrable.timer_jiffies = jiffies;
+ tvec_base_deferrable.next_timer = tvec_base_deferrable.timer_jiffies;
+}
+
+static inline struct tvec_base *get_timer_base(u32 timer_flags)
+{
+ if (!(timer_flags & TIMER_PINNED_ON_CPU) &&
+ timer_flags & TIMER_DEFERRABLE)
+ return &tvec_base_deferrable;
+ else
+ return per_cpu_ptr(&tvec_bases, timer_flags & TIMER_CPUMASK);
+}
#else
static inline struct tvec_base *get_target_base(struct tvec_base *base,
- int pinned)
+ int pinned, u32 timer_flags)
{
return this_cpu_ptr(&tvec_bases);
}
+
+static inline void __run_deferrable_timers(void)
+{
+}
+
+static inline void init_timer_deferrable_global(void)
+{
+ /*
+ * initialize cpu unbound deferrable timer base only when CONFIG_SMP.
+ * UP kernel handles the timers with cpu 0 timer base.
+ */
+}
+
+static inline struct tvec_base *get_timer_base(u32 timer_flags)
+{
+ return per_cpu_ptr(&tvec_bases, timer_flags & TIMER_CPUMASK);
+}
#endif
static unsigned long round_jiffies_common(unsigned long j, int cpu,
@@ -768,7 +815,7 @@ static struct tvec_base *lock_timer_base(struct timer_list *timer,
struct tvec_base *base;
if (!(tf & TIMER_MIGRATING)) {
- base = per_cpu_ptr(&tvec_bases, tf & TIMER_CPUMASK);
+ base = get_timer_base(tf);
spin_lock_irqsave(&base->lock, *flags);
if (timer->flags == tf)
return base;
@@ -797,7 +844,7 @@ __mod_timer(struct timer_list *timer, unsigned long expires,
debug_activate(timer, expires);
- new_base = get_target_base(base, pinned);
+ new_base = get_target_base(base, pinned, timer->flags);
if (base != new_base) {
/*
@@ -819,6 +866,10 @@ __mod_timer(struct timer_list *timer, unsigned long expires,
}
}
+ if (pinned == TIMER_PINNED)
+ timer->flags |= TIMER_PINNED_ON_CPU;
+ else
+ timer->flags &= ~TIMER_PINNED_ON_CPU;
timer->expires = expires;
internal_add_timer(base, timer);
@@ -1000,6 +1051,7 @@ void add_timer_on(struct timer_list *timer, int cpu)
(timer->flags & ~TIMER_BASEMASK) | cpu);
}
+ timer->flags |= TIMER_PINNED_ON_CPU;
debug_activate(timer, timer->expires);
internal_add_timer(base, timer);
spin_unlock_irqrestore(&base->lock, flags);
@@ -1433,6 +1485,8 @@ static void run_timer_softirq(struct softirq_action *h)
{
struct tvec_base *base = this_cpu_ptr(&tvec_bases);
+ __run_deferrable_timers();
+
if (time_after_eq(jiffies, base->timer_jiffies))
__run_timers(base);
}
@@ -1566,56 +1620,82 @@ signed long __sched schedule_timeout_uninterruptible(signed long timeout)
}
EXPORT_SYMBOL(schedule_timeout_uninterruptible);
-#ifdef CONFIG_HOTPLUG_CPU
-static void migrate_timer_list(struct tvec_base *new_base, struct hlist_head *head)
+#if defined(CONFIG_HOTPLUG_CPU)
+static void migrate_timer_list(struct tvec_base *new_base,
+ struct hlist_head *head, bool remove_pinned)
{
struct timer_list *timer;
int cpu = new_base->cpu;
+ struct hlist_node *n;
+ int is_pinned;
- while (!hlist_empty(head)) {
- timer = hlist_entry(head->first, struct timer_list, entry);
- /* We ignore the accounting on the dying cpu */
- detach_timer(timer, false);
+ hlist_for_each_entry_safe(timer, n, head, entry) {
+ is_pinned = timer->flags & TIMER_PINNED_ON_CPU;
+ if (!remove_pinned && is_pinned)
+ continue;
+
+ detach_if_pending(timer, get_timer_base(timer->flags), false);
timer->flags = (timer->flags & ~TIMER_BASEMASK) | cpu;
internal_add_timer(new_base, timer);
}
}
-static void migrate_timers(int cpu)
+static void __migrate_timers(int cpu, bool remove_pinned)
{
struct tvec_base *old_base;
struct tvec_base *new_base;
+ unsigned long flags;
int i;
- BUG_ON(cpu_online(cpu));
old_base = per_cpu_ptr(&tvec_bases, cpu);
new_base = get_cpu_ptr(&tvec_bases);
/*
* The caller is globally serialized and nobody else
* takes two locks at once, deadlock is not possible.
*/
- spin_lock_irq(&new_base->lock);
+ spin_lock_irqsave(&new_base->lock, flags);
spin_lock_nested(&old_base->lock, SINGLE_DEPTH_NESTING);
- BUG_ON(old_base->running_timer);
+ /*
+ * If we're in the hotplug path, kill the system if there's a running
+ * timer. It's ok to have a running timer in the isolation case - the
+ * currently running or just expired timers are off of the timer wheel
+ * and so everything else can be migrated off.
+ */
+ if (!cpu_online(cpu))
+ BUG_ON(old_base->running_timer);
for (i = 0; i < TVR_SIZE; i++)
- migrate_timer_list(new_base, old_base->tv1.vec + i);
+ migrate_timer_list(new_base, old_base->tv1.vec + i,
+ remove_pinned);
for (i = 0; i < TVN_SIZE; i++) {
- migrate_timer_list(new_base, old_base->tv2.vec + i);
- migrate_timer_list(new_base, old_base->tv3.vec + i);
- migrate_timer_list(new_base, old_base->tv4.vec + i);
- migrate_timer_list(new_base, old_base->tv5.vec + i);
+ migrate_timer_list(new_base, old_base->tv2.vec + i,
+ remove_pinned);
+ migrate_timer_list(new_base, old_base->tv3.vec + i,
+ remove_pinned);
+ migrate_timer_list(new_base, old_base->tv4.vec + i,
+ remove_pinned);
+ migrate_timer_list(new_base, old_base->tv5.vec + i,
+ remove_pinned);
}
- old_base->active_timers = 0;
- old_base->all_timers = 0;
-
spin_unlock(&old_base->lock);
- spin_unlock_irq(&new_base->lock);
+ spin_unlock_irqrestore(&new_base->lock, flags);
put_cpu_ptr(&tvec_bases);
}
+/* Migrate timers from 'cpu' to this_cpu */
+static void migrate_timers(int cpu)
+{
+ BUG_ON(cpu_online(cpu));
+ __migrate_timers(cpu, true);
+}
+
+void timer_quiesce_cpu(void *cpup)
+{
+ __migrate_timers(*(int *)cpup, false);
+}
+
static int timer_cpu_notify(struct notifier_block *self,
unsigned long action, void *hcpu)
{
@@ -1656,6 +1736,8 @@ static void __init init_timer_cpus(void)
for_each_possible_cpu(cpu)
init_timer_cpu(cpu);
+
+ init_timer_deferrable_global();
}
void __init init_timers(void)