diff options
| author | Srivatsa Vaddagiri <vatsa@codeaurora.org> | 2014-01-06 16:24:48 -0800 |
|---|---|---|
| committer | David Keitel <dkeitel@codeaurora.org> | 2016-03-23 19:58:39 -0700 |
| commit | 74463329e4f23636cfbd126709b27395dbbfcaa7 (patch) | |
| tree | e3edd3f68ee329e31000eee855e7566a7fe3c220 /kernel/sched | |
| parent | 97ae7bae2cb823be587d85f3421ade6033f6f366 (diff) | |
sched: window-based load stats for tasks
Provide a metric per task that specifies how cpu bound a task is. Task
execution is monitored over several time windows and the fraction of
the window for which task was found to be executing or wanting to run
is recorded as task's demand. Windows over which task was sleeping are
ignored. We track last 5 recent windows for every task and the maximum
demand seen in any of the previous 5 windows (where task had some
activity) drives freq demand for every task.
A per-cpu metric (rq->cumulative_runnable_avg) is also provided which
is an aggregation of cpu demand of all tasks currently enqueued on it.
rq->cumulative_runnable_avg will be useful to know if cpu frequency
will need to be changed to match task demand.
Change-Id: Ib83207b9ba8683cd3304ee8a2290695c34f08fe2
Signed-off-by: Srivatsa Vaddagiri <vatsa@codeaurora.org>
[rameezmustafa@codeaurora.org]: Port to msm-3.18]
Signed-off-by: Syed Rameez Mustafa <rameezmustafa@codeaurora.org>
[joonwoop@codeaurora.org: fixed conflict in ttwu_do_wakeup() to
incorporate with changed trace_sched_wakeup() location.]
Signed-off-by: Joonwoo Park <joonwoop@codeaurora.org>
Diffstat (limited to 'kernel/sched')
| -rw-r--r-- | kernel/sched/core.c | 124 | ||||
| -rw-r--r-- | kernel/sched/sched.h | 5 |
2 files changed, 129 insertions, 0 deletions
diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 9a53673cd810..3059a938045f 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -860,6 +860,7 @@ static inline void enqueue_task(struct rq *rq, struct task_struct *p, int flags) sched_info_queued(rq, p); p->sched_class->enqueue_task(rq, p, flags); trace_sched_enq_deq_task(p, 1); + rq->cumulative_runnable_avg += p->se.ravg.demand; } static inline void dequeue_task(struct rq *rq, struct task_struct *p, int flags) @@ -869,6 +870,8 @@ static inline void dequeue_task(struct rq *rq, struct task_struct *p, int flags) sched_info_dequeued(rq, p); p->sched_class->dequeue_task(rq, p, flags); trace_sched_enq_deq_task(p, 0); + rq->cumulative_runnable_avg -= p->se.ravg.demand; + BUG_ON((s64)rq->cumulative_runnable_avg < 0); } void activate_task(struct rq *rq, struct task_struct *p, int flags) @@ -1745,12 +1748,118 @@ static inline void ttwu_activate(struct rq *rq, struct task_struct *p, int en_fl } /* + * Called when new window is starting for a task, to record cpu usage over + * recently concluded window(s). Normally 'samples' should be 1. It can be > 1 + * when, say, a real-time task runs without preemption for several windows at a + * stretch. + */ +static inline void +update_history(struct rq *rq, struct task_struct *p, u32 runtime, int samples) +{ + u32 *hist = &p->se.ravg.sum_history[0]; + int ridx, widx; + u32 max = 0; + + /* Ignore windows where task had no activity */ + if (!runtime) + return; + + /* Push new 'runtime' value onto stack */ + widx = RAVG_HIST_SIZE - 1; + ridx = widx - samples; + for (; ridx >= 0; --widx, --ridx) { + hist[widx] = hist[ridx]; + if (hist[widx] > max) + max = hist[widx]; + } + + for (widx = 0; widx < samples && widx < RAVG_HIST_SIZE; widx++) { + hist[widx] = runtime; + if (hist[widx] > max) + max = hist[widx]; + } + + p->se.ravg.sum = 0; + if (p->on_rq) { + rq->cumulative_runnable_avg -= p->se.ravg.demand; + BUG_ON((s64)rq->cumulative_runnable_avg < 0); + } + /* + * Maximum demand seen over previous RAVG_HIST_SIZE windows drives + * frequency demand for a task. Record maximum in 'demand' attribute. + */ + p->se.ravg.demand = max; + if (p->on_rq) + rq->cumulative_runnable_avg += p->se.ravg.demand; +} + +/* Window size (in ns) */ +__read_mostly unsigned int sysctl_sched_ravg_window = 50000000; + +void update_task_ravg(struct task_struct *p, struct rq *rq, int update_sum) +{ + u32 window_size = sysctl_sched_ravg_window; + int new_window; + u64 wallclock = sched_clock(); + + do { + s64 delta = 0; + int n; + u64 now = wallclock; + + new_window = 0; + delta = now - p->se.ravg.window_start; + BUG_ON(delta < 0); + if (delta > window_size) { + p->se.ravg.window_start += window_size; + now = p->se.ravg.window_start; + new_window = 1; + } + + if (update_sum) { + delta = now - p->se.ravg.mark_start; + BUG_ON(delta < 0); + + if (likely(rq->cur_freq && + rq->cur_freq <= max_possible_freq)) + delta = div64_u64(delta * rq->cur_freq, + max_possible_freq); + p->se.ravg.sum += delta; + WARN_ON(p->se.ravg.sum > window_size); + } + + if (!new_window) + break; + + update_history(rq, p, p->se.ravg.sum, 1); + + delta = wallclock - p->se.ravg.window_start; + BUG_ON(delta < 0); + n = div64_u64(delta, window_size); + if (n) { + if (!update_sum) + p->se.ravg.window_start = wallclock; + else + p->se.ravg.window_start += n * window_size; + BUG_ON(p->se.ravg.window_start > wallclock); + if (update_sum) + update_history(rq, p, window_size, n); + } + p->se.ravg.mark_start = p->se.ravg.window_start; + } while (new_window); + + p->se.ravg.mark_start = wallclock; +} + +/* * Mark the task runnable and perform wakeup-preemption. */ static void ttwu_do_wakeup(struct rq *rq, struct task_struct *p, int wake_flags) { check_preempt_curr(rq, p, wake_flags); + + update_task_ravg(p, rq, 0); p->state = TASK_RUNNING; trace_sched_wakeup(p); @@ -2142,6 +2251,8 @@ void __dl_clear_params(struct task_struct *p) */ static void __sched_fork(unsigned long clone_flags, struct task_struct *p) { + int i; + p->on_rq = 0; p->se.on_rq = 0; @@ -2150,6 +2261,13 @@ static void __sched_fork(unsigned long clone_flags, struct task_struct *p) p->se.prev_sum_exec_runtime = 0; p->se.nr_migrations = 0; p->se.vruntime = 0; + p->se.ravg.sum = 0; + p->se.ravg.demand = 0; + p->se.ravg.window_start = 0; + p->se.ravg.mark_start = 0; + for (i = 0; i < RAVG_HIST_SIZE; ++i) + p->se.ravg.sum_history[i] = 0; + INIT_LIST_HEAD(&p->se.group_node); #ifdef CONFIG_SCHEDSTATS @@ -2416,6 +2534,7 @@ void wake_up_new_task(struct task_struct *p) { unsigned long flags; struct rq *rq; + u64 wallclock = sched_clock(); raw_spin_lock_irqsave(&p->pi_lock, flags); /* Initialize new task's runnable average */ @@ -2431,6 +2550,8 @@ void wake_up_new_task(struct task_struct *p) rq = __task_rq_lock(p); activate_task(rq, p, 0); + p->se.ravg.window_start = wallclock; + p->se.ravg.mark_start = wallclock; p->on_rq = TASK_ON_RQ_QUEUED; trace_sched_wakeup_new(p); check_preempt_curr(rq, p, WF_FORK); @@ -3088,6 +3209,7 @@ pick_next_task(struct rq *rq, struct task_struct *prev) if (unlikely(!p)) p = idle_sched_class.pick_next_task(rq, prev); + update_task_ravg(p, rq, 1); return p; } @@ -3097,6 +3219,7 @@ again: if (p) { if (unlikely(p == RETRY_TASK)) goto again; + update_task_ravg(p, rq, 1); return p; } } @@ -7593,6 +7716,7 @@ void __init sched_init(void) rq->cur_freq = 0; rq->max_freq = 0; rq->min_freq = 0; + rq->cumulative_runnable_avg = 0; rq->max_idle_balance_cost = sysctl_sched_migration_cost; rq->cstate = 0; rq->wakeup_latency = 0; diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index 4061d3f9d93d..0dbe55192ef2 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -644,6 +644,7 @@ struct rq { #endif int cur_freq, max_freq, min_freq; + u64 cumulative_runnable_avg; #ifdef CONFIG_IRQ_TIME_ACCOUNTING u64 prev_irq_time; @@ -1243,8 +1244,12 @@ struct sched_class { #endif }; +extern void +update_task_ravg(struct task_struct *p, struct rq *rq, int update_sum); + static inline void put_prev_task(struct rq *rq, struct task_struct *prev) { + update_task_ravg(prev, rq, 1); prev->sched_class->put_prev_task(rq, prev); } |
