diff options
-rw-r--r-- | include/linux/sched.h | 1 | ||||
-rw-r--r-- | kernel/sched/core.c | 8 | ||||
-rw-r--r-- | kernel/sched/sched.h | 12 | ||||
-rw-r--r-- | kernel/sched/walt.c | 56 |
4 files changed, 74 insertions, 3 deletions
diff --git a/include/linux/sched.h b/include/linux/sched.h index 1872d19d1702..62e179f84aef 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1568,6 +1568,7 @@ struct task_struct { * of this task */ u32 init_load_pct; + u64 last_sleep_ts; #endif #ifdef CONFIG_CGROUP_SCHED diff --git a/kernel/sched/core.c b/kernel/sched/core.c index ca9d72fa8e66..563f316f5330 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -2170,6 +2170,10 @@ static void __sched_fork(unsigned long clone_flags, struct task_struct *p) p->se.prev_sum_exec_runtime = 0; p->se.nr_migrations = 0; p->se.vruntime = 0; +#ifdef CONFIG_SCHED_WALT + p->last_sleep_ts = 0; +#endif + INIT_LIST_HEAD(&p->se.group_node); walt_init_new_task_load(p); @@ -3379,6 +3383,10 @@ static void __sched notrace __schedule(bool preempt) rq->clock_skip_update = 0; if (likely(prev != next)) { +#ifdef CONFIG_SCHED_WALT + if (!prev->on_rq) + prev->last_sleep_ts = wallclock; +#endif rq->nr_switches++; rq->curr = next; ++*switch_count; diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index 8d3712107e61..deba080af845 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -690,6 +690,7 @@ struct rq { u64 cur_irqload; u64 avg_irqload; u64 irqload_ts; + u64 cum_window_demand; #endif /* CONFIG_SCHED_WALT */ @@ -2101,6 +2102,17 @@ static inline void cpufreq_update_util(struct rq *rq, unsigned int flags) {} static inline void cpufreq_update_this_cpu(struct rq *rq, unsigned int flags) {} #endif /* CONFIG_CPU_FREQ */ +#ifdef CONFIG_SCHED_WALT + +static inline bool +walt_task_in_cum_window_demand(struct rq *rq, struct task_struct *p) +{ + return cpu_of(rq) == task_cpu(p) && + (p->on_rq || p->last_sleep_ts >= rq->window_start); +} + +#endif /* CONFIG_SCHED_WALT */ + #ifdef arch_scale_freq_capacity #ifndef arch_scale_freq_invariant #define arch_scale_freq_invariant() (true) diff --git a/kernel/sched/walt.c b/kernel/sched/walt.c index 441cba01bc04..93f61486f989 100644 --- a/kernel/sched/walt.c +++ b/kernel/sched/walt.c @@ -70,11 +70,28 @@ static unsigned int task_load(struct task_struct *p) return p->ravg.demand; } +static inline void fixup_cum_window_demand(struct rq *rq, s64 delta) +{ + rq->cum_window_demand += delta; + if (unlikely((s64)rq->cum_window_demand < 0)) + rq->cum_window_demand = 0; +} + void walt_inc_cumulative_runnable_avg(struct rq *rq, struct task_struct *p) { rq->cumulative_runnable_avg += p->ravg.demand; + + /* + * Add a task's contribution to the cumulative window demand when + * + * (1) task is enqueued with on_rq = 1 i.e migration, + * prio/cgroup/class change. + * (2) task is waking for the first time in this window. + */ + if (p->on_rq || (p->last_sleep_ts < rq->window_start)) + fixup_cum_window_demand(rq, p->ravg.demand); } void @@ -83,6 +100,14 @@ walt_dec_cumulative_runnable_avg(struct rq *rq, { rq->cumulative_runnable_avg -= p->ravg.demand; BUG_ON((s64)rq->cumulative_runnable_avg < 0); + + /* + * on_rq will be 1 for sleeping tasks. So check if the task + * is migrating or dequeuing in RUNNING state to change the + * prio/cgroup/class. + */ + if (task_on_rq_migrating(p) || p->state == TASK_RUNNING) + fixup_cum_window_demand(rq, -(s64)p->ravg.demand); } static void @@ -95,6 +120,8 @@ fixup_cumulative_runnable_avg(struct rq *rq, if ((s64)rq->cumulative_runnable_avg < 0) panic("cra less than zero: tld: %lld, task_load(p) = %u\n", task_load_delta, task_load(p)); + + fixup_cum_window_demand(rq, task_load_delta); } u64 walt_ktime_clock(void) @@ -180,6 +207,8 @@ update_window_start(struct rq *rq, u64 wallclock) nr_windows = div64_u64(delta, walt_ravg_window); rq->window_start += (u64)nr_windows * (u64)walt_ravg_window; + + rq->cum_window_demand = rq->cumulative_runnable_avg; } /* @@ -568,10 +597,20 @@ static void update_history(struct rq *rq, struct task_struct *p, * A throttled deadline sched class task gets dequeued without * changing p->on_rq. Since the dequeue decrements hmp stats * avoid decrementing it here again. + * + * When window is rolled over, the cumulative window demand + * is reset to the cumulative runnable average (contribution from + * the tasks on the runqueue). If the current task is dequeued + * already, it's demand is not included in the cumulative runnable + * average. So add the task demand separately to cumulative window + * demand. */ - if (task_on_rq_queued(p) && (!task_has_dl_policy(p) || - !p->dl.dl_throttled)) - fixup_cumulative_runnable_avg(rq, p, demand); + if (!task_has_dl_policy(p) || !p->dl.dl_throttled) { + if (task_on_rq_queued(p)) + fixup_cumulative_runnable_avg(rq, p, demand); + else if (rq->curr == p) + fixup_cum_window_demand(rq, demand); + } p->ravg.demand = demand; @@ -792,6 +831,17 @@ void walt_fixup_busy_time(struct task_struct *p, int new_cpu) walt_update_task_ravg(p, task_rq(p), TASK_MIGRATE, wallclock, 0); + /* + * When a task is migrating during the wakeup, adjust + * the task's contribution towards cumulative window + * demand. + */ + if (p->state == TASK_WAKING && + p->last_sleep_ts >= src_rq->window_start) { + fixup_cum_window_demand(src_rq, -(s64)p->ravg.demand); + fixup_cum_window_demand(dest_rq, p->ravg.demand); + } + if (p->ravg.curr_window) { src_rq->curr_runnable_sum -= p->ravg.curr_window; dest_rq->curr_runnable_sum += p->ravg.curr_window; |