summaryrefslogtreecommitdiff
path: root/kernel/time
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/time')
-rw-r--r--kernel/time/alarmtimer.c18
-rw-r--r--kernel/time/hrtimer.c21
-rw-r--r--kernel/time/posix-clock.c34
-rw-r--r--kernel/time/posix-cpu-timers.c2
-rw-r--r--kernel/time/posix-timers.c63
-rw-r--r--kernel/time/sched_clock.c5
-rw-r--r--kernel/time/tick-broadcast.c11
-rw-r--r--kernel/time/tick-sched.c31
-rw-r--r--kernel/time/time.c6
-rw-r--r--kernel/time/timekeeping.c161
-rw-r--r--kernel/time/timer.c15
-rw-r--r--kernel/time/timer_list.c8
12 files changed, 276 insertions, 99 deletions
diff --git a/kernel/time/alarmtimer.c b/kernel/time/alarmtimer.c
index 7fbba635a549..e78480b81f8d 100644
--- a/kernel/time/alarmtimer.c
+++ b/kernel/time/alarmtimer.c
@@ -339,7 +339,7 @@ void alarm_start_relative(struct alarm *alarm, ktime_t start)
{
struct alarm_base *base = &alarm_bases[alarm->type];
- start = ktime_add(start, base->gettime());
+ start = ktime_add_safe(start, base->gettime());
alarm_start(alarm, start);
}
EXPORT_SYMBOL_GPL(alarm_start_relative);
@@ -425,7 +425,7 @@ u64 alarm_forward(struct alarm *alarm, ktime_t now, ktime_t interval)
overrun++;
}
- alarm->node.expires = ktime_add(alarm->node.expires, interval);
+ alarm->node.expires = ktime_add_safe(alarm->node.expires, interval);
return overrun;
}
EXPORT_SYMBOL_GPL(alarm_forward);
@@ -611,13 +611,22 @@ static int alarm_timer_set(struct k_itimer *timr, int flags,
/* start the timer */
timr->it.alarm.interval = timespec_to_ktime(new_setting->it_interval);
+
+ /*
+ * Rate limit to the tick as a hot fix to prevent DOS. Will be
+ * mopped up later.
+ */
+ if (timr->it.alarm.interval.tv64 &&
+ ktime_to_ns(timr->it.alarm.interval) < TICK_NSEC)
+ timr->it.alarm.interval = ktime_set(0, TICK_NSEC);
+
exp = timespec_to_ktime(new_setting->it_value);
/* Convert (if necessary) to absolute time */
if (flags != TIMER_ABSTIME) {
ktime_t now;
now = alarm_bases[timr->it.alarm.alarmtimer.type].gettime();
- exp = ktime_add(now, exp);
+ exp = ktime_add_safe(now, exp);
}
alarm_start(&timr->it.alarm.alarmtimer, exp);
@@ -764,7 +773,8 @@ static int alarm_timer_nsleep(const clockid_t which_clock, int flags,
/* Convert (if necessary) to absolute time */
if (flags != TIMER_ABSTIME) {
ktime_t now = alarm_bases[type].gettime();
- exp = ktime_add(now, exp);
+
+ exp = ktime_add_safe(now, exp);
}
if (alarmtimer_do_nsleep(&alarm, exp))
diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c
index 17f7bcff1e02..8d2e7074860b 100644
--- a/kernel/time/hrtimer.c
+++ b/kernel/time/hrtimer.c
@@ -312,7 +312,7 @@ EXPORT_SYMBOL_GPL(__ktime_divns);
*/
ktime_t ktime_add_safe(const ktime_t lhs, const ktime_t rhs)
{
- ktime_t res = ktime_add(lhs, rhs);
+ ktime_t res = ktime_add_unsafe(lhs, rhs);
/*
* We use KTIME_SEC_MAX here, the maximum timeout which we can
@@ -435,6 +435,7 @@ void destroy_hrtimer_on_stack(struct hrtimer *timer)
{
debug_object_free(timer, &hrtimer_debug_descr);
}
+EXPORT_SYMBOL_GPL(destroy_hrtimer_on_stack);
#else
static inline void debug_hrtimer_init(struct hrtimer *timer) { }
@@ -669,7 +670,9 @@ static void hrtimer_reprogram(struct hrtimer *timer,
static inline void hrtimer_init_hres(struct hrtimer_cpu_base *base)
{
base->expires_next.tv64 = KTIME_MAX;
+ base->hang_detected = 0;
base->hres_active = 0;
+ base->next_timer = NULL;
}
/*
@@ -984,7 +987,7 @@ static inline ktime_t hrtimer_update_lowres(struct hrtimer *timer, ktime_t tim,
* relative (HRTIMER_MODE_REL)
*/
void hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim,
- unsigned long delta_ns, const enum hrtimer_mode mode)
+ u64 delta_ns, const enum hrtimer_mode mode)
{
struct hrtimer_clock_base *base, *new_base;
unsigned long flags;
@@ -1137,7 +1140,12 @@ static void __hrtimer_init(struct hrtimer *timer, clockid_t clock_id,
cpu_base = raw_cpu_ptr(&hrtimer_bases);
- if (clock_id == CLOCK_REALTIME && mode != HRTIMER_MODE_ABS)
+ /*
+ * POSIX magic: Relative CLOCK_REALTIME timers are not affected by
+ * clock modifications, so they needs to become CLOCK_MONOTONIC to
+ * ensure POSIX compliance.
+ */
+ if (clock_id == CLOCK_REALTIME && mode & HRTIMER_MODE_REL)
clock_id = CLOCK_MONOTONIC;
base = hrtimer_clockid_to_base(clock_id);
@@ -1553,7 +1561,7 @@ long hrtimer_nanosleep(struct timespec *rqtp, struct timespec __user *rmtp,
struct restart_block *restart;
struct hrtimer_sleeper t;
int ret = 0;
- unsigned long slack;
+ u64 slack;
slack = current->timer_slack_ns;
if (dl_task(current) || rt_task(current))
@@ -1615,6 +1623,7 @@ static void init_hrtimers_cpu(int cpu)
timerqueue_init_head(&cpu_base->clock_base[i].active);
}
+ cpu_base->active_bases = 0;
cpu_base->cpu = cpu;
hrtimer_init_hres(cpu_base);
}
@@ -1729,7 +1738,7 @@ void __init hrtimers_init(void)
* @clock: timer clock, CLOCK_MONOTONIC or CLOCK_REALTIME
*/
int __sched
-schedule_hrtimeout_range_clock(ktime_t *expires, unsigned long delta,
+schedule_hrtimeout_range_clock(ktime_t *expires, u64 delta,
const enum hrtimer_mode mode, int clock)
{
struct hrtimer_sleeper t;
@@ -1797,7 +1806,7 @@ schedule_hrtimeout_range_clock(ktime_t *expires, unsigned long delta,
*
* Returns 0 when the timer has expired otherwise -EINTR
*/
-int __sched schedule_hrtimeout_range(ktime_t *expires, unsigned long delta,
+int __sched schedule_hrtimeout_range(ktime_t *expires, u64 delta,
const enum hrtimer_mode mode)
{
return schedule_hrtimeout_range_clock(expires, delta, mode,
diff --git a/kernel/time/posix-clock.c b/kernel/time/posix-clock.c
index 9cff0ab82b63..e24008c098c6 100644
--- a/kernel/time/posix-clock.c
+++ b/kernel/time/posix-clock.c
@@ -300,14 +300,17 @@ out:
static int pc_clock_gettime(clockid_t id, struct timespec *ts)
{
struct posix_clock_desc cd;
+ struct timespec64 ts64;
int err;
err = get_clock_desc(id, &cd);
if (err)
return err;
- if (cd.clk->ops.clock_gettime)
- err = cd.clk->ops.clock_gettime(cd.clk, ts);
+ if (cd.clk->ops.clock_gettime) {
+ err = cd.clk->ops.clock_gettime(cd.clk, &ts64);
+ *ts = timespec64_to_timespec(ts64);
+ }
else
err = -EOPNOTSUPP;
@@ -319,14 +322,17 @@ static int pc_clock_gettime(clockid_t id, struct timespec *ts)
static int pc_clock_getres(clockid_t id, struct timespec *ts)
{
struct posix_clock_desc cd;
+ struct timespec64 ts64;
int err;
err = get_clock_desc(id, &cd);
if (err)
return err;
- if (cd.clk->ops.clock_getres)
- err = cd.clk->ops.clock_getres(cd.clk, ts);
+ if (cd.clk->ops.clock_getres) {
+ err = cd.clk->ops.clock_getres(cd.clk, &ts64);
+ *ts = timespec64_to_timespec(ts64);
+ }
else
err = -EOPNOTSUPP;
@@ -337,6 +343,7 @@ static int pc_clock_getres(clockid_t id, struct timespec *ts)
static int pc_clock_settime(clockid_t id, const struct timespec *ts)
{
+ struct timespec64 ts64 = timespec_to_timespec64(*ts);
struct posix_clock_desc cd;
int err;
@@ -350,7 +357,7 @@ static int pc_clock_settime(clockid_t id, const struct timespec *ts)
}
if (cd.clk->ops.clock_settime)
- err = cd.clk->ops.clock_settime(cd.clk, ts);
+ err = cd.clk->ops.clock_settime(cd.clk, &ts64);
else
err = -EOPNOTSUPP;
out:
@@ -403,29 +410,36 @@ static void pc_timer_gettime(struct k_itimer *kit, struct itimerspec *ts)
{
clockid_t id = kit->it_clock;
struct posix_clock_desc cd;
+ struct itimerspec64 ts64;
if (get_clock_desc(id, &cd))
return;
- if (cd.clk->ops.timer_gettime)
- cd.clk->ops.timer_gettime(cd.clk, kit, ts);
-
+ if (cd.clk->ops.timer_gettime) {
+ cd.clk->ops.timer_gettime(cd.clk, kit, &ts64);
+ *ts = itimerspec64_to_itimerspec(&ts64);
+ }
put_clock_desc(&cd);
}
static int pc_timer_settime(struct k_itimer *kit, int flags,
struct itimerspec *ts, struct itimerspec *old)
{
+ struct itimerspec64 ts64 = itimerspec_to_itimerspec64(ts);
clockid_t id = kit->it_clock;
struct posix_clock_desc cd;
+ struct itimerspec64 old64;
int err;
err = get_clock_desc(id, &cd);
if (err)
return err;
- if (cd.clk->ops.timer_settime)
- err = cd.clk->ops.timer_settime(cd.clk, kit, flags, ts, old);
+ if (cd.clk->ops.timer_settime) {
+ err = cd.clk->ops.timer_settime(cd.clk, kit, flags, &ts64, &old64);
+ if (old)
+ *old = itimerspec64_to_itimerspec(&old64);
+ }
else
err = -EOPNOTSUPP;
diff --git a/kernel/time/posix-cpu-timers.c b/kernel/time/posix-cpu-timers.c
index 80016b329d94..8fc68e60c795 100644
--- a/kernel/time/posix-cpu-timers.c
+++ b/kernel/time/posix-cpu-timers.c
@@ -103,7 +103,7 @@ static void bump_cpu_timer(struct k_itimer *timer,
continue;
timer->it.cpu.expires += incr;
- timer->it_overrun += 1 << i;
+ timer->it_overrun += 1LL << i;
delta -= incr;
}
}
diff --git a/kernel/time/posix-timers.c b/kernel/time/posix-timers.c
index f2826c35e918..0e6ed2e7d066 100644
--- a/kernel/time/posix-timers.c
+++ b/kernel/time/posix-timers.c
@@ -355,6 +355,17 @@ static __init int init_posix_timers(void)
__initcall(init_posix_timers);
+/*
+ * The siginfo si_overrun field and the return value of timer_getoverrun(2)
+ * are of type int. Clamp the overrun value to INT_MAX
+ */
+static inline int timer_overrun_to_int(struct k_itimer *timr, int baseval)
+{
+ s64 sum = timr->it_overrun_last + (s64)baseval;
+
+ return sum > (s64)INT_MAX ? INT_MAX : (int)sum;
+}
+
static void schedule_next_timer(struct k_itimer *timr)
{
struct hrtimer *timer = &timr->it.real.timer;
@@ -362,12 +373,11 @@ static void schedule_next_timer(struct k_itimer *timr)
if (timr->it.real.interval.tv64 == 0)
return;
- timr->it_overrun += (unsigned int) hrtimer_forward(timer,
- timer->base->get_time(),
- timr->it.real.interval);
+ timr->it_overrun += hrtimer_forward(timer, timer->base->get_time(),
+ timr->it.real.interval);
timr->it_overrun_last = timr->it_overrun;
- timr->it_overrun = -1;
+ timr->it_overrun = -1LL;
++timr->it_requeue_pending;
hrtimer_restart(timer);
}
@@ -396,7 +406,7 @@ void do_schedule_next_timer(struct siginfo *info)
else
schedule_next_timer(timr);
- info->si_overrun += timr->it_overrun_last;
+ info->si_overrun = timer_overrun_to_int(timr, info->si_overrun);
}
if (timr)
@@ -491,8 +501,7 @@ static enum hrtimer_restart posix_timer_fn(struct hrtimer *timer)
now = ktime_add(now, kj);
}
#endif
- timr->it_overrun += (unsigned int)
- hrtimer_forward(timer, now,
+ timr->it_overrun += hrtimer_forward(timer, now,
timr->it.real.interval);
ret = HRTIMER_RESTART;
++timr->it_requeue_pending;
@@ -507,17 +516,22 @@ static struct pid *good_sigevent(sigevent_t * event)
{
struct task_struct *rtn = current->group_leader;
- if ((event->sigev_notify & SIGEV_THREAD_ID ) &&
- (!(rtn = find_task_by_vpid(event->sigev_notify_thread_id)) ||
- !same_thread_group(rtn, current) ||
- (event->sigev_notify & ~SIGEV_THREAD_ID) != SIGEV_SIGNAL))
- return NULL;
-
- if (((event->sigev_notify & ~SIGEV_THREAD_ID) != SIGEV_NONE) &&
- ((event->sigev_signo <= 0) || (event->sigev_signo > SIGRTMAX)))
+ switch (event->sigev_notify) {
+ case SIGEV_SIGNAL | SIGEV_THREAD_ID:
+ rtn = find_task_by_vpid(event->sigev_notify_thread_id);
+ if (!rtn || !same_thread_group(rtn, current))
+ return NULL;
+ /* FALLTHRU */
+ case SIGEV_SIGNAL:
+ case SIGEV_THREAD:
+ if (event->sigev_signo <= 0 || event->sigev_signo > SIGRTMAX)
+ return NULL;
+ /* FALLTHRU */
+ case SIGEV_NONE:
+ return task_pid(rtn);
+ default:
return NULL;
-
- return task_pid(rtn);
+ }
}
void posix_timers_register_clock(const clockid_t clock_id,
@@ -628,7 +642,7 @@ SYSCALL_DEFINE3(timer_create, const clockid_t, which_clock,
it_id_set = IT_ID_SET;
new_timer->it_id = (timer_t) new_timer_id;
new_timer->it_clock = which_clock;
- new_timer->it_overrun = -1;
+ new_timer->it_overrun = -1LL;
if (timer_event_spec) {
if (copy_from_user(&event, timer_event_spec, sizeof (event))) {
@@ -745,8 +759,7 @@ common_timer_get(struct k_itimer *timr, struct itimerspec *cur_setting)
/* interval timer ? */
if (iv.tv64)
cur_setting->it_interval = ktime_to_timespec(iv);
- else if (!hrtimer_active(timer) &&
- (timr->it_sigev_notify & ~SIGEV_THREAD_ID) != SIGEV_NONE)
+ else if (!hrtimer_active(timer) && timr->it_sigev_notify != SIGEV_NONE)
return;
now = timer->base->get_time();
@@ -757,8 +770,8 @@ common_timer_get(struct k_itimer *timr, struct itimerspec *cur_setting)
* expiry is > now.
*/
if (iv.tv64 && (timr->it_requeue_pending & REQUEUE_PENDING ||
- (timr->it_sigev_notify & ~SIGEV_THREAD_ID) == SIGEV_NONE))
- timr->it_overrun += (unsigned int) hrtimer_forward(timer, now, iv);
+ timr->it_sigev_notify == SIGEV_NONE))
+ timr->it_overrun += hrtimer_forward(timer, now, iv);
remaining = __hrtimer_expires_remaining_adjusted(timer, now);
/* Return 0 only, when the timer is expired and not pending */
@@ -767,7 +780,7 @@ common_timer_get(struct k_itimer *timr, struct itimerspec *cur_setting)
* A single shot SIGEV_NONE timer must return 0, when
* it is expired !
*/
- if ((timr->it_sigev_notify & ~SIGEV_THREAD_ID) != SIGEV_NONE)
+ if (timr->it_sigev_notify != SIGEV_NONE)
cur_setting->it_value.tv_nsec = 1;
} else
cur_setting->it_value = ktime_to_timespec(remaining);
@@ -820,7 +833,7 @@ SYSCALL_DEFINE1(timer_getoverrun, timer_t, timer_id)
if (!timr)
return -EINVAL;
- overrun = timr->it_overrun_last;
+ overrun = timer_overrun_to_int(timr, 0);
unlock_timer(timr, flags);
return overrun;
@@ -865,7 +878,7 @@ common_timer_set(struct k_itimer *timr, int flags,
timr->it.real.interval = timespec_to_ktime(new_setting->it_interval);
/* SIGEV_NONE timers are not queued ! See common_timer_get */
- if (((timr->it_sigev_notify & ~SIGEV_THREAD_ID) == SIGEV_NONE)) {
+ if (timr->it_sigev_notify == SIGEV_NONE) {
/* Setup correct expiry time for relative timers */
if (mode == HRTIMER_MODE_REL) {
hrtimer_add_expires(timer, timer->base->get_time());
diff --git a/kernel/time/sched_clock.c b/kernel/time/sched_clock.c
index a26036d37a38..382b159d8592 100644
--- a/kernel/time/sched_clock.c
+++ b/kernel/time/sched_clock.c
@@ -205,6 +205,11 @@ sched_clock_register(u64 (*read)(void), int bits, unsigned long rate)
update_clock_read_data(&rd);
+ if (sched_clock_timer.function != NULL) {
+ /* update timeout for clock wrap */
+ hrtimer_start(&sched_clock_timer, cd.wrap_kt, HRTIMER_MODE_REL);
+ }
+
r = rate;
if (r >= 4000000) {
r /= 1000000;
diff --git a/kernel/time/tick-broadcast.c b/kernel/time/tick-broadcast.c
index f6aae7977824..22d7454b387b 100644
--- a/kernel/time/tick-broadcast.c
+++ b/kernel/time/tick-broadcast.c
@@ -610,6 +610,14 @@ static void tick_handle_oneshot_broadcast(struct clock_event_device *dev)
now = ktime_get();
/* Find all expired events */
for_each_cpu(cpu, tick_broadcast_oneshot_mask) {
+ /*
+ * Required for !SMP because for_each_cpu() reports
+ * unconditionally CPU0 as set on UP kernels.
+ */
+ if (!IS_ENABLED(CONFIG_SMP) &&
+ cpumask_empty(tick_broadcast_oneshot_mask))
+ break;
+
td = &per_cpu(tick_cpu_device, cpu);
if (td->evtdev->next_event.tv64 <= now.tv64) {
cpumask_set_cpu(cpu, tmpmask);
@@ -871,6 +879,9 @@ void tick_broadcast_setup_oneshot(struct clock_event_device *bc)
{
int cpu = smp_processor_id();
+ if (!bc)
+ return;
+
/* Set it up only once ! */
if (bc->event_handler != tick_handle_oneshot_broadcast) {
int was_periodic = clockevent_state_periodic(bc);
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index 22c57e191a23..4cd1145ec260 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -568,6 +568,11 @@ static void tick_nohz_restart(struct tick_sched *ts, ktime_t now)
tick_program_event(hrtimer_get_expires(&ts->sched_timer), 1);
}
+static inline bool local_timer_softirq_pending(void)
+{
+ return local_softirq_pending() & BIT(TIMER_SOFTIRQ);
+}
+
static ktime_t tick_nohz_stop_sched_tick(struct tick_sched *ts,
ktime_t now, int cpu)
{
@@ -584,8 +589,18 @@ static ktime_t tick_nohz_stop_sched_tick(struct tick_sched *ts,
} while (read_seqretry(&jiffies_lock, seq));
ts->last_jiffies = basejiff;
- if (rcu_needs_cpu(basemono, &next_rcu) ||
- arch_needs_cpu() || irq_work_needs_cpu()) {
+ /*
+ * Keep the periodic tick, when RCU, architecture or irq_work
+ * requests it.
+ * Aside of that check whether the local timer softirq is
+ * pending. If so its a bad idea to call get_next_timer_interrupt()
+ * because there is an already expired timer, so it will request
+ * immeditate expiry, which rearms the hardware timer with a
+ * minimal delta which brings us back to this place
+ * immediately. Lather, rinse and repeat...
+ */
+ if (rcu_needs_cpu(basemono, &next_rcu) || arch_needs_cpu() ||
+ irq_work_needs_cpu() || local_timer_softirq_pending()) {
next_tick = basemono + TICK_NSEC;
} else {
/*
@@ -870,6 +885,18 @@ ktime_t tick_nohz_get_sleep_length(void)
return ts->sleep_length;
}
+/**
+ * tick_nohz_get_idle_calls - return the current idle calls counter value
+ *
+ * Called from the schedutil frequency scaling governor in scheduler context.
+ */
+unsigned long tick_nohz_get_idle_calls(void)
+{
+ struct tick_sched *ts = this_cpu_ptr(&tick_cpu_sched);
+
+ return ts->idle_calls;
+}
+
static void tick_nohz_account_idle_ticks(struct tick_sched *ts)
{
#ifndef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
diff --git a/kernel/time/time.c b/kernel/time/time.c
index 86751c68e08d..de70ac1f84d0 100644
--- a/kernel/time/time.c
+++ b/kernel/time/time.c
@@ -28,6 +28,7 @@
*/
#include <linux/export.h>
+#include <linux/kernel.h>
#include <linux/timex.h>
#include <linux/capability.h>
#include <linux/timekeeper_internal.h>
@@ -258,9 +259,10 @@ unsigned int jiffies_to_msecs(const unsigned long j)
return (j + (HZ / MSEC_PER_SEC) - 1)/(HZ / MSEC_PER_SEC);
#else
# if BITS_PER_LONG == 32
- return (HZ_TO_MSEC_MUL32 * j) >> HZ_TO_MSEC_SHR32;
+ return (HZ_TO_MSEC_MUL32 * j + (1ULL << HZ_TO_MSEC_SHR32) - 1) >>
+ HZ_TO_MSEC_SHR32;
# else
- return (j * HZ_TO_MSEC_NUM) / HZ_TO_MSEC_DEN;
+ return DIV_ROUND_UP(j * HZ_TO_MSEC_NUM, HZ_TO_MSEC_DEN);
# endif
#endif
}
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index 4ff237dbc006..7902ecbce8ec 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -70,6 +70,10 @@ static inline void tk_normalize_xtime(struct timekeeper *tk)
tk->tkr_mono.xtime_nsec -= (u64)NSEC_PER_SEC << tk->tkr_mono.shift;
tk->xtime_sec++;
}
+ while (tk->tkr_raw.xtime_nsec >= ((u64)NSEC_PER_SEC << tk->tkr_raw.shift)) {
+ tk->tkr_raw.xtime_nsec -= (u64)NSEC_PER_SEC << tk->tkr_raw.shift;
+ tk->raw_sec++;
+ }
}
static inline struct timespec64 tk_xtime(struct timekeeper *tk)
@@ -116,6 +120,26 @@ static inline void tk_update_sleep_time(struct timekeeper *tk, ktime_t delta)
tk->offs_boot = ktime_add(tk->offs_boot, delta);
}
+/*
+ * tk_clock_read - atomic clocksource read() helper
+ *
+ * This helper is necessary to use in the read paths because, while the
+ * seqlock ensures we don't return a bad value while structures are updated,
+ * it doesn't protect from potential crashes. There is the possibility that
+ * the tkr's clocksource may change between the read reference, and the
+ * clock reference passed to the read function. This can cause crashes if
+ * the wrong clocksource is passed to the wrong read function.
+ * This isn't necessary to use when holding the timekeeper_lock or doing
+ * a read of the fast-timekeeper tkrs (which is protected by its own locking
+ * and update logic).
+ */
+static inline u64 tk_clock_read(struct tk_read_base *tkr)
+{
+ struct clocksource *clock = READ_ONCE(tkr->clock);
+
+ return clock->read(clock);
+}
+
#ifdef CONFIG_DEBUG_TIMEKEEPING
#define WARNING_FREQ (HZ*300) /* 5 minute rate-limiting */
@@ -173,7 +197,7 @@ static inline cycle_t timekeeping_get_delta(struct tk_read_base *tkr)
*/
do {
seq = read_seqcount_begin(&tk_core.seq);
- now = tkr->read(tkr->clock);
+ now = tk_clock_read(tkr);
last = tkr->cycle_last;
mask = tkr->mask;
max = tkr->clock->max_cycles;
@@ -207,7 +231,7 @@ static inline cycle_t timekeeping_get_delta(struct tk_read_base *tkr)
cycle_t cycle_now, delta;
/* read clocksource */
- cycle_now = tkr->read(tkr->clock);
+ cycle_now = tk_clock_read(tkr);
/* calculate the delta since the last update_wall_time */
delta = clocksource_delta(cycle_now, tkr->cycle_last, tkr->mask);
@@ -235,12 +259,10 @@ static void tk_setup_internals(struct timekeeper *tk, struct clocksource *clock)
old_clock = tk->tkr_mono.clock;
tk->tkr_mono.clock = clock;
- tk->tkr_mono.read = clock->read;
tk->tkr_mono.mask = clock->mask;
- tk->tkr_mono.cycle_last = tk->tkr_mono.read(clock);
+ tk->tkr_mono.cycle_last = tk_clock_read(&tk->tkr_mono);
tk->tkr_raw.clock = clock;
- tk->tkr_raw.read = clock->read;
tk->tkr_raw.mask = clock->mask;
tk->tkr_raw.cycle_last = tk->tkr_mono.cycle_last;
@@ -259,18 +281,19 @@ static void tk_setup_internals(struct timekeeper *tk, struct clocksource *clock)
/* Go back from cycles -> shifted ns */
tk->xtime_interval = (u64) interval * clock->mult;
tk->xtime_remainder = ntpinterval - tk->xtime_interval;
- tk->raw_interval =
- ((u64) interval * clock->mult) >> clock->shift;
+ tk->raw_interval = interval * clock->mult;
/* if changing clocks, convert xtime_nsec shift units */
if (old_clock) {
int shift_change = clock->shift - old_clock->shift;
- if (shift_change < 0)
+ if (shift_change < 0) {
tk->tkr_mono.xtime_nsec >>= -shift_change;
- else
+ tk->tkr_raw.xtime_nsec >>= -shift_change;
+ } else {
tk->tkr_mono.xtime_nsec <<= shift_change;
+ tk->tkr_raw.xtime_nsec <<= shift_change;
+ }
}
- tk->tkr_raw.xtime_nsec = 0;
tk->tkr_mono.shift = clock->shift;
tk->tkr_raw.shift = clock->shift;
@@ -298,17 +321,34 @@ u32 (*arch_gettimeoffset)(void) = default_arch_gettimeoffset;
static inline u32 arch_gettimeoffset(void) { return 0; }
#endif
+static inline u64 timekeeping_delta_to_ns(struct tk_read_base *tkr,
+ cycle_t delta)
+{
+ u64 nsec;
+
+ nsec = delta * tkr->mult + tkr->xtime_nsec;
+ nsec >>= tkr->shift;
+
+ /* If arch requires, add in get_arch_timeoffset() */
+ return nsec + arch_gettimeoffset();
+}
+
static inline s64 timekeeping_get_ns(struct tk_read_base *tkr)
{
cycle_t delta;
- s64 nsec;
delta = timekeeping_get_delta(tkr);
+ return timekeeping_delta_to_ns(tkr, delta);
+}
- nsec = (delta * tkr->mult + tkr->xtime_nsec) >> tkr->shift;
+static inline s64 timekeeping_cycles_to_ns(struct tk_read_base *tkr,
+ cycle_t cycles)
+{
+ cycle_t delta;
- /* If arch requires, add in get_arch_timeoffset() */
- return nsec + arch_gettimeoffset();
+ /* calculate the delta since the last update_wall_time */
+ delta = clocksource_delta(cycles, tkr->cycle_last, tkr->mask);
+ return timekeeping_delta_to_ns(tkr, delta);
}
/**
@@ -385,8 +425,11 @@ static __always_inline u64 __ktime_get_fast_ns(struct tk_fast *tkf)
tkr = tkf->base + (seq & 0x01);
now = ktime_to_ns(tkr->base);
- now += clocksource_delta(tkr->read(tkr->clock),
- tkr->cycle_last, tkr->mask);
+ now += timekeeping_delta_to_ns(tkr,
+ clocksource_delta(
+ tk_clock_read(tkr),
+ tkr->cycle_last,
+ tkr->mask));
} while (read_seqcount_retry(&tkf->seq, seq));
return now;
@@ -404,6 +447,35 @@ u64 ktime_get_raw_fast_ns(void)
}
EXPORT_SYMBOL_GPL(ktime_get_raw_fast_ns);
+/**
+ * ktime_get_boot_fast_ns - NMI safe and fast access to boot clock.
+ *
+ * To keep it NMI safe since we're accessing from tracing, we're not using a
+ * separate timekeeper with updates to monotonic clock and boot offset
+ * protected with seqlocks. This has the following minor side effects:
+ *
+ * (1) Its possible that a timestamp be taken after the boot offset is updated
+ * but before the timekeeper is updated. If this happens, the new boot offset
+ * is added to the old timekeeping making the clock appear to update slightly
+ * earlier:
+ * CPU 0 CPU 1
+ * timekeeping_inject_sleeptime64()
+ * __timekeeping_inject_sleeptime(tk, delta);
+ * timestamp();
+ * timekeeping_update(tk, TK_CLEAR_NTP...);
+ *
+ * (2) On 32-bit systems, the 64-bit boot offset (tk->offs_boot) may be
+ * partially updated. Since the tk->offs_boot update is a rare event, this
+ * should be a rare occurrence which postprocessing should be able to handle.
+ */
+u64 notrace ktime_get_boot_fast_ns(void)
+{
+ struct timekeeper *tk = &tk_core.timekeeper;
+
+ return (ktime_get_mono_fast_ns() + ktime_to_ns(tk->offs_boot));
+}
+EXPORT_SYMBOL_GPL(ktime_get_boot_fast_ns);
+
/* Suspend-time cycles value for halted fast timekeeper. */
static cycle_t cycles_at_suspend;
@@ -412,6 +484,10 @@ static cycle_t dummy_clock_read(struct clocksource *cs)
return cycles_at_suspend;
}
+static struct clocksource dummy_clock = {
+ .read = dummy_clock_read,
+};
+
/**
* halt_fast_timekeeper - Prevent fast timekeeper from accessing clocksource.
* @tk: Timekeeper to snapshot.
@@ -428,13 +504,13 @@ static void halt_fast_timekeeper(struct timekeeper *tk)
struct tk_read_base *tkr = &tk->tkr_mono;
memcpy(&tkr_dummy, tkr, sizeof(tkr_dummy));
- cycles_at_suspend = tkr->read(tkr->clock);
- tkr_dummy.read = dummy_clock_read;
+ cycles_at_suspend = tk_clock_read(tkr);
+ tkr_dummy.clock = &dummy_clock;
update_fast_timekeeper(&tkr_dummy, &tk_fast_mono);
tkr = &tk->tkr_raw;
memcpy(&tkr_dummy, tkr, sizeof(tkr_dummy));
- tkr_dummy.read = dummy_clock_read;
+ tkr_dummy.clock = &dummy_clock;
update_fast_timekeeper(&tkr_dummy, &tk_fast_raw);
}
@@ -546,9 +622,6 @@ static inline void tk_update_ktime_data(struct timekeeper *tk)
nsec = (u32) tk->wall_to_monotonic.tv_nsec;
tk->tkr_mono.base = ns_to_ktime(seconds * NSEC_PER_SEC + nsec);
- /* Update the monotonic raw base */
- tk->tkr_raw.base = timespec64_to_ktime(tk->raw_time);
-
/*
* The sum of the nanoseconds portions of xtime and
* wall_to_monotonic can be greater/equal one second. Take
@@ -558,6 +631,9 @@ static inline void tk_update_ktime_data(struct timekeeper *tk)
if (nsec >= NSEC_PER_SEC)
seconds++;
tk->ktime_sec = seconds;
+
+ /* Update the monotonic raw base */
+ tk->tkr_raw.base = ns_to_ktime(tk->raw_sec * NSEC_PER_SEC);
}
/* must hold timekeeper_lock */
@@ -598,11 +674,9 @@ static void timekeeping_update(struct timekeeper *tk, unsigned int action)
*/
static void timekeeping_forward_now(struct timekeeper *tk)
{
- struct clocksource *clock = tk->tkr_mono.clock;
cycle_t cycle_now, delta;
- s64 nsec;
- cycle_now = tk->tkr_mono.read(clock);
+ cycle_now = tk_clock_read(&tk->tkr_mono);
delta = clocksource_delta(cycle_now, tk->tkr_mono.cycle_last, tk->tkr_mono.mask);
tk->tkr_mono.cycle_last = cycle_now;
tk->tkr_raw.cycle_last = cycle_now;
@@ -612,10 +686,13 @@ static void timekeeping_forward_now(struct timekeeper *tk)
/* If arch requires, add in get_arch_timeoffset() */
tk->tkr_mono.xtime_nsec += (u64)arch_gettimeoffset() << tk->tkr_mono.shift;
- tk_normalize_xtime(tk);
- nsec = clocksource_cyc2ns(delta, tk->tkr_raw.mult, tk->tkr_raw.shift);
- timespec64_add_ns(&tk->raw_time, nsec);
+ tk->tkr_raw.xtime_nsec += delta * tk->tkr_raw.mult;
+
+ /* If arch requires, add in get_arch_timeoffset() */
+ tk->tkr_raw.xtime_nsec += (u64)arch_gettimeoffset() << tk->tkr_raw.shift;
+
+ tk_normalize_xtime(tk);
}
/**
@@ -1109,19 +1186,18 @@ int timekeeping_notify(struct clocksource *clock)
void getrawmonotonic64(struct timespec64 *ts)
{
struct timekeeper *tk = &tk_core.timekeeper;
- struct timespec64 ts64;
unsigned long seq;
s64 nsecs;
do {
seq = read_seqcount_begin(&tk_core.seq);
+ ts->tv_sec = tk->raw_sec;
nsecs = timekeeping_get_ns(&tk->tkr_raw);
- ts64 = tk->raw_time;
} while (read_seqcount_retry(&tk_core.seq, seq));
- timespec64_add_ns(&ts64, nsecs);
- *ts = ts64;
+ ts->tv_nsec = 0;
+ timespec64_add_ns(ts, nsecs);
}
EXPORT_SYMBOL(getrawmonotonic64);
@@ -1245,8 +1321,7 @@ void __init timekeeping_init(void)
tk_setup_internals(tk, clock);
tk_set_xtime(tk, &now);
- tk->raw_time.tv_sec = 0;
- tk->raw_time.tv_nsec = 0;
+ tk->raw_sec = 0;
if (boot.tv_sec == 0 && boot.tv_nsec == 0)
boot = tk_xtime(tk);
@@ -1385,7 +1460,7 @@ void timekeeping_resume(void)
* The less preferred source will only be tried if there is no better
* usable source. The rtc part is handled separately in rtc core code.
*/
- cycle_now = tk->tkr_mono.read(clock);
+ cycle_now = tk_clock_read(&tk->tkr_mono);
if ((clock->flags & CLOCK_SOURCE_SUSPEND_NONSTOP) &&
cycle_now > tk->tkr_mono.cycle_last) {
u64 num, max = ULLONG_MAX;
@@ -1726,7 +1801,7 @@ static cycle_t logarithmic_accumulation(struct timekeeper *tk, cycle_t offset,
unsigned int *clock_set)
{
cycle_t interval = tk->cycle_interval << shift;
- u64 raw_nsecs;
+ u64 snsec_per_sec;
/* If the offset is smaller than a shifted interval, do nothing */
if (offset < interval)
@@ -1741,14 +1816,12 @@ static cycle_t logarithmic_accumulation(struct timekeeper *tk, cycle_t offset,
*clock_set |= accumulate_nsecs_to_secs(tk);
/* Accumulate raw time */
- raw_nsecs = (u64)tk->raw_interval << shift;
- raw_nsecs += tk->raw_time.tv_nsec;
- if (raw_nsecs >= NSEC_PER_SEC) {
- u64 raw_secs = raw_nsecs;
- raw_nsecs = do_div(raw_secs, NSEC_PER_SEC);
- tk->raw_time.tv_sec += raw_secs;
+ tk->tkr_raw.xtime_nsec += tk->raw_interval << shift;
+ snsec_per_sec = (u64)NSEC_PER_SEC << tk->tkr_raw.shift;
+ while (tk->tkr_raw.xtime_nsec >= snsec_per_sec) {
+ tk->tkr_raw.xtime_nsec -= snsec_per_sec;
+ tk->raw_sec++;
}
- tk->raw_time.tv_nsec = raw_nsecs;
/* Accumulate error between NTP and clock interval */
tk->ntp_error += tk->ntp_tick << shift;
@@ -1780,7 +1853,7 @@ void update_wall_time(void)
#ifdef CONFIG_ARCH_USES_GETTIMEOFFSET
offset = real_tk->cycle_interval;
#else
- offset = clocksource_delta(tk->tkr_mono.read(tk->tkr_mono.clock),
+ offset = clocksource_delta(tk_clock_read(&tk->tkr_mono),
tk->tkr_mono.cycle_last, tk->tkr_mono.mask);
#endif
diff --git a/kernel/time/timer.c b/kernel/time/timer.c
index bbc5d1114583..523fe1669d4c 100644
--- a/kernel/time/timer.c
+++ b/kernel/time/timer.c
@@ -127,7 +127,7 @@ int timer_migration_handler(struct ctl_table *table, int write,
int ret;
mutex_lock(&mutex);
- ret = proc_dointvec(table, write, buffer, lenp, ppos);
+ ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
if (!ret && write)
timers_update_migration(false);
mutex_unlock(&mutex);
@@ -764,8 +764,15 @@ static struct tvec_base *lock_timer_base(struct timer_list *timer,
__acquires(timer->base->lock)
{
for (;;) {
- u32 tf = timer->flags;
struct tvec_base *base;
+ u32 tf;
+
+ /*
+ * We need to use READ_ONCE() here, otherwise the compiler
+ * might re-read @tf between the check for TIMER_MIGRATING
+ * and spin_lock().
+ */
+ tf = READ_ONCE(timer->flags);
if (!(tf & TIMER_MIGRATING)) {
base = per_cpu_ptr(&tvec_bases, tf & TIMER_CPUMASK);
@@ -1698,10 +1705,10 @@ EXPORT_SYMBOL(msleep_interruptible);
static void __sched do_usleep_range(unsigned long min, unsigned long max)
{
ktime_t kmin;
- unsigned long delta;
+ u64 delta;
kmin = ktime_set(0, min * NSEC_PER_USEC);
- delta = (max - min) * NSEC_PER_USEC;
+ delta = (u64)(max - min) * NSEC_PER_USEC;
schedule_hrtimeout_range(&kmin, delta, HRTIMER_MODE_REL);
}
diff --git a/kernel/time/timer_list.c b/kernel/time/timer_list.c
index ba7d8b288bb3..1407ed20ea93 100644
--- a/kernel/time/timer_list.c
+++ b/kernel/time/timer_list.c
@@ -16,6 +16,7 @@
#include <linux/sched.h>
#include <linux/seq_file.h>
#include <linux/kallsyms.h>
+#include <linux/nmi.h>
#include <asm/uaccess.h>
@@ -96,6 +97,9 @@ print_active_timers(struct seq_file *m, struct hrtimer_clock_base *base,
next_one:
i = 0;
+
+ touch_nmi_watchdog();
+
raw_spin_lock_irqsave(&base->cpu_base->lock, flags);
curr = timerqueue_getnext(&base->active);
@@ -207,6 +211,8 @@ print_tickdevice(struct seq_file *m, struct tick_device *td, int cpu)
{
struct clock_event_device *dev = td->evtdev;
+ touch_nmi_watchdog();
+
SEQ_printf(m, "Tick Device: mode: %d\n", td->mode);
if (cpu < 0)
SEQ_printf(m, "Broadcast device\n");
@@ -393,7 +399,7 @@ static int __init init_timer_list_procfs(void)
{
struct proc_dir_entry *pe;
- pe = proc_create("timer_list", 0444, NULL, &timer_list_fops);
+ pe = proc_create("timer_list", 0400, NULL, &timer_list_fops);
if (!pe)
return -ENOMEM;
return 0;