From 1cb87c38cb81847938bbb4776d9c6f3afe8fd938 Mon Sep 17 00:00:00 2001 From: Ke Wang Date: Wed, 1 Nov 2017 14:11:06 +0800 Subject: sched: EAS: Fix the condition to distinguish energy before/after Before commit 5f8b3a757d65 ("sched/fair: consider task utilization in group_norm_util()"), eenv->util_delta is used to distinguish energy before and energy after in sched_group_energy(). After that commit, eenv->util_delta can not do that any more. In this commit, use trg_cpu to distinguish energy before/after in sched_group_energy(). Before apply this commit, cap_before/cap_delta is not correct: -0 [001] 147504.608920: sched_energy_diff: pid=7 comm=rcu_preempt src_cpu=1 dst_cpu=3 usage_delta=7 nrg_before=250 nrg_after=250 nrg_diff=0 cap_before=0 cap_after=528 cap_delta=1056 nrg_delta=0 nrg_payoff=0 After apply this commit, cap_before/cap_delta retrun to normal: -0 [001] 220.494011: sched_energy_diff: pid=7 comm=rcu_preempt src_cpu=1 dst_cpu=2 usage_delta=3 nrg_before=248 nrg_after=248 nrg_diff=0 cap_before=528 cap_after=528 cap_delta=0 nrg_delta=0 nrg_payoff=0 Signed-off-by: Ke Wang --- kernel/sched/fair.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'kernel') diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index ac22d32a6255..06b814b58d20 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -5534,13 +5534,13 @@ static int sched_group_energy(struct energy_env *eenv) if (sg->group_weight == 1) { /* Remove capacity of src CPU (before task move) */ - if (eenv->util_delta == 0 && + if (eenv->trg_cpu == eenv->src_cpu && cpumask_test_cpu(eenv->src_cpu, sched_group_cpus(sg))) { eenv->cap.before = sg->sge->cap_states[cap_idx].cap; eenv->cap.delta -= eenv->cap.before; } /* Add capacity of dst CPU (after task move) */ - if (eenv->util_delta != 0 && + if (eenv->trg_cpu == eenv->dst_cpu && cpumask_test_cpu(eenv->dst_cpu, sched_group_cpus(sg))) { eenv->cap.after = sg->sge->cap_states[cap_idx].cap; eenv->cap.delta += eenv->cap.after; -- cgit v1.2.3 From 7d5a251c66be3516c14cffa80e6b076b37736971 Mon Sep 17 00:00:00 2001 From: Ke Wang Date: Mon, 30 Oct 2017 17:38:16 +0800 Subject: sched: EAS: update trg_cpu to backup_cpu if no energy saving for target_cpu If no energy saving for target_cpu in the calculation of energy_diff(), backup_cpu will be set as the new dst_cpu for the next calculation. At this point, we also need update the new trg_cpu as backup_cpu to make sure the subsequent calculation of energy_diff() is correct. Signed-off-by: Ke Wang --- kernel/sched/fair.c | 1 + 1 file changed, 1 insertion(+) (limited to 'kernel') diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 06b814b58d20..3b429c5ce721 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -6723,6 +6723,7 @@ static int select_energy_cpu_brute(struct task_struct *p, int prev_cpu, int sync /* No energy saving for target_cpu, try backup */ target_cpu = tmp_backup; eenv.dst_cpu = target_cpu; + eenv.trg_cpu = target_cpu; if (tmp_backup < 0 || tmp_backup == prev_cpu || energy_diff(&eenv) >= 0) { -- cgit v1.2.3 From 47c87b2654376e7dda646ca5a2af067c5d368ca7 Mon Sep 17 00:00:00 2001 From: Ke Wang Date: Wed, 1 Nov 2017 16:07:38 +0800 Subject: sched: EAS: Fix the calculation of group util in group_idle_state() util_delta becomes not zero in eenv_before, which will affect the calculation of grp_util in group_idle_state(). Fix it under the new condition. Change-Id: Ic3853bb45876a8e388afcbe4e72d25fc42b1d7b0 Signed-off-by: Ke Wang --- kernel/sched/fair.c | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) (limited to 'kernel') diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 3b429c5ce721..5c65f3ad6da1 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -5429,13 +5429,6 @@ static int group_idle_state(struct energy_env *eenv, struct sched_group *sg) /* Take non-cpuidle idling into account (active idle/arch_cpu_idle()) */ state++; - /* - * Try to estimate if a deeper idle state is - * achievable when we move the task. - */ - for_each_cpu(i, sched_group_cpus(sg)) - grp_util += cpu_util(i); - src_in_grp = cpumask_test_cpu(eenv->src_cpu, sched_group_cpus(sg)); dst_in_grp = cpumask_test_cpu(eenv->dst_cpu, sched_group_cpus(sg)); if (src_in_grp == dst_in_grp) { @@ -5444,10 +5437,16 @@ static int group_idle_state(struct energy_env *eenv, struct sched_group *sg) */ goto end; } - /* add or remove util as appropriate to indicate what group util - * will be (worst case - no concurrent execution) after moving the task + + /* + * Try to estimate if a deeper idle state is + * achievable when we move the task. */ - grp_util += src_in_grp ? -eenv->util_delta : eenv->util_delta; + for_each_cpu(i, sched_group_cpus(sg)) { + grp_util += cpu_util_wake(i, eenv->task); + if (unlikely(i == eenv->trg_cpu)) + grp_util += eenv->util_delta; + } if (grp_util <= ((long)sg->sgc->max_capacity * (int)sg->group_weight)) { -- cgit v1.2.3 From cd04e987d1da0eadc25c2186bd6bd93f22c3e851 Mon Sep 17 00:00:00 2001 From: Joel Fernandes Date: Mon, 11 Sep 2017 17:05:49 -0700 Subject: ANDROID: sched/rt: add schedtune accounting This patch adds schedtune enqueue/dequeue to RT scheduling class. Change-Id: If416e64319d62191f3aedd675d3e9a21fe2102fb Signed-off-by: Joel Fernandes --- kernel/sched/rt.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'kernel') diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c index 069f8982867f..88f28e996249 100644 --- a/kernel/sched/rt.c +++ b/kernel/sched/rt.c @@ -9,6 +9,7 @@ #include #include "walt.h" +#include "tune.h" int sched_rr_timeslice = RR_TIMESLICE; @@ -1321,6 +1322,8 @@ enqueue_task_rt(struct rq *rq, struct task_struct *p, int flags) if (!task_current(rq, p) && p->nr_cpus_allowed > 1) enqueue_pushable_task(rq, p); + + schedtune_enqueue_task(p, cpu_of(rq)); } static void dequeue_task_rt(struct rq *rq, struct task_struct *p, int flags) @@ -1332,6 +1335,7 @@ static void dequeue_task_rt(struct rq *rq, struct task_struct *p, int flags) walt_dec_cumulative_runnable_avg(rq, p); dequeue_pushable_task(rq, p); + schedtune_dequeue_task(p, cpu_of(rq)); } /* -- cgit v1.2.3 From d194ba5d712f051ff6c025f3484bb72f219764e3 Mon Sep 17 00:00:00 2001 From: Joel Fernandes Date: Mon, 11 Sep 2017 17:10:37 -0700 Subject: ANDROID: sched/rt: schedtune: Add boost retention to RT Boosted RT tasks can be deboosted quickly, this makes boost usless for RT tasks and causes lots of glitching. Use timers to prevent de-boost too soon and wait for long enough such that next enqueue happens after a threshold. While this can be solved in the governor, there are following advantages: - The approach used is governor-independent - Reduces boost group lock contention for frequently sleepers/wakers - Works with schedfreq without any other schedfreq hacks. Bug: 30210506 Change-Id: I41788b235586988be446505deb7c0529758a9898 Signed-off-by: Joel Fernandes --- kernel/sched/core.c | 1 + kernel/sched/rt.c | 154 +++++++++++++++++++++++++++++++++++++++++++++++++++ kernel/sched/sched.h | 1 + 3 files changed, 156 insertions(+) (limited to 'kernel') diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 0c9e332ceb3b..3030633d8900 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -2200,6 +2200,7 @@ static void __sched_fork(unsigned long clone_flags, struct task_struct *p) init_dl_task_timer(&p->dl); __dl_clear_params(p); + init_rt_schedtune_timer(&p->rt); INIT_LIST_HEAD(&p->rt.run_list); #ifdef CONFIG_PREEMPT_NOTIFIERS diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c index 88f28e996249..f41435e7f75d 100644 --- a/kernel/sched/rt.c +++ b/kernel/sched/rt.c @@ -7,6 +7,7 @@ #include #include +#include #include "walt.h" #include "tune.h" @@ -986,6 +987,73 @@ static int sched_rt_runtime_exceeded(struct rt_rq *rt_rq) return 0; } +#define RT_SCHEDTUNE_INTERVAL 50000000ULL + +static void sched_rt_update_capacity_req(struct rq *rq); + +static enum hrtimer_restart rt_schedtune_timer(struct hrtimer *timer) +{ + struct sched_rt_entity *rt_se = container_of(timer, + struct sched_rt_entity, + schedtune_timer); + struct task_struct *p = rt_task_of(rt_se); + struct rq *rq = task_rq(p); + + raw_spin_lock(&rq->lock); + + /* + * Nothing to do if: + * - task has switched runqueues + * - task isn't RT anymore + */ + if (rq != task_rq(p) || (p->sched_class != &rt_sched_class)) + goto out; + + /* + * If task got enqueued back during callback time, it means we raced + * with the enqueue on another cpu, that's Ok, just do nothing as + * enqueue path would have tried to cancel us and we shouldn't run + * Also check the schedtune_enqueued flag as class-switch on a + * sleeping task may have already canceled the timer and done dq + */ + if (p->on_rq || !rt_se->schedtune_enqueued) + goto out; + + /* + * RT task is no longer active, cancel boost + */ + rt_se->schedtune_enqueued = false; + schedtune_dequeue_task(p, cpu_of(rq)); + sched_rt_update_capacity_req(rq); + cpufreq_update_this_cpu(rq, SCHED_CPUFREQ_RT); +out: + raw_spin_unlock(&rq->lock); + + /* + * This can free the task_struct if no more references. + */ + put_task_struct(p); + + return HRTIMER_NORESTART; +} + +void init_rt_schedtune_timer(struct sched_rt_entity *rt_se) +{ + struct hrtimer *timer = &rt_se->schedtune_timer; + + hrtimer_init(timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); + timer->function = rt_schedtune_timer; + rt_se->schedtune_enqueued = false; +} + +static void start_schedtune_timer(struct sched_rt_entity *rt_se) +{ + struct hrtimer *timer = &rt_se->schedtune_timer; + + hrtimer_start(timer, ns_to_ktime(RT_SCHEDTUNE_INTERVAL), + HRTIMER_MODE_REL_PINNED); +} + /* * Update the current task's runtime statistics. Skip current tasks that * are not in our scheduling class. @@ -1323,7 +1391,33 @@ enqueue_task_rt(struct rq *rq, struct task_struct *p, int flags) if (!task_current(rq, p) && p->nr_cpus_allowed > 1) enqueue_pushable_task(rq, p); + if (!schedtune_task_boost(p)) + return; + + /* + * If schedtune timer is active, that means a boost was already + * done, just cancel the timer so that deboost doesn't happen. + * Otherwise, increase the boost. If an enqueued timer was + * cancelled, put the task reference. + */ + if (hrtimer_try_to_cancel(&rt_se->schedtune_timer) == 1) + put_task_struct(p); + + /* + * schedtune_enqueued can be true in the following situation: + * enqueue_task_rt grabs rq lock before timer fires + * or before its callback acquires rq lock + * schedtune_enqueued can be false if timer callback is running + * and timer just released rq lock, or if the timer finished + * running and canceling the boost + */ + if (rt_se->schedtune_enqueued) + return; + + rt_se->schedtune_enqueued = true; schedtune_enqueue_task(p, cpu_of(rq)); + sched_rt_update_capacity_req(rq); + cpufreq_update_this_cpu(rq, SCHED_CPUFREQ_RT); } static void dequeue_task_rt(struct rq *rq, struct task_struct *p, int flags) @@ -1335,7 +1429,20 @@ static void dequeue_task_rt(struct rq *rq, struct task_struct *p, int flags) walt_dec_cumulative_runnable_avg(rq, p); dequeue_pushable_task(rq, p); + + if (!rt_se->schedtune_enqueued) + return; + + if (flags == DEQUEUE_SLEEP) { + get_task_struct(p); + start_schedtune_timer(rt_se); + return; + } + + rt_se->schedtune_enqueued = false; schedtune_dequeue_task(p, cpu_of(rq)); + sched_rt_update_capacity_req(rq); + cpufreq_update_this_cpu(rq, SCHED_CPUFREQ_RT); } /* @@ -1375,6 +1482,33 @@ static void yield_task_rt(struct rq *rq) #ifdef CONFIG_SMP static int find_lowest_rq(struct task_struct *task); +/* + * Perform a schedtune dequeue and cancelation of boost timers if needed. + * Should be called only with the rq->lock held. + */ +static void schedtune_dequeue_rt(struct rq *rq, struct task_struct *p) +{ + struct sched_rt_entity *rt_se = &p->rt; + + BUG_ON(!raw_spin_is_locked(&rq->lock)); + + if (!rt_se->schedtune_enqueued) + return; + + /* + * Incase of class change cancel any active timers. If an enqueued + * timer was cancelled, put the task ref. + */ + if (hrtimer_try_to_cancel(&rt_se->schedtune_timer) == 1) + put_task_struct(p); + + /* schedtune_enqueued is true, deboost it */ + rt_se->schedtune_enqueued = false; + schedtune_dequeue_task(p, task_cpu(p)); + sched_rt_update_capacity_req(rq); + cpufreq_update_this_cpu(rq, SCHED_CPUFREQ_RT); +} + static int select_task_rq_rt(struct task_struct *p, int cpu, int sd_flag, int flags, int sibling_count_hint) @@ -1429,6 +1563,19 @@ select_task_rq_rt(struct task_struct *p, int cpu, int sd_flag, int flags, rcu_read_unlock(); out: + /* + * If previous CPU was different, make sure to cancel any active + * schedtune timers and deboost. + */ + if (task_cpu(p) != cpu) { + unsigned long fl; + struct rq *prq = task_rq(p); + + raw_spin_lock_irqsave(&prq->lock, fl); + schedtune_dequeue_rt(prq, p); + raw_spin_unlock_irqrestore(&prq->lock, fl); + } + return cpu; } @@ -2205,6 +2352,13 @@ static void rq_offline_rt(struct rq *rq) */ static void switched_from_rt(struct rq *rq, struct task_struct *p) { + /* + * On class switch from rt, always cancel active schedtune timers, + * this handles the cases where we switch class for a task that is + * already rt-dequeued but has a running timer. + */ + schedtune_dequeue_rt(rq, p); + /* * If there are other RT tasks then we will reschedule * and the scheduling of the other RT tasks will handle diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index 0238e94b0a1e..028e232103c2 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -1398,6 +1398,7 @@ extern void resched_cpu(int cpu); extern struct rt_bandwidth def_rt_bandwidth; extern void init_rt_bandwidth(struct rt_bandwidth *rt_b, u64 period, u64 runtime); +extern void init_rt_schedtune_timer(struct sched_rt_entity *rt_se); extern struct dl_bandwidth def_dl_bandwidth; extern void init_dl_bandwidth(struct dl_bandwidth *dl_b, u64 period, u64 runtime); -- cgit v1.2.3 From df147c9e336cfcb4183db1eb9552b0429060cd0d Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Thu, 2 Nov 2017 15:13:26 +0530 Subject: cpufreq: Drop schedfreq governor We all should be using (and improving) the schedutil governor now. Get rid of the non-upstream governor. Tested on Hikey. Change-Id: Ic660756536e5da51952738c3c18b94e31f58cd57 Signed-off-by: Viresh Kumar --- kernel/sched/Makefile | 1 - kernel/sched/core.c | 86 ------- kernel/sched/cpufreq_sched.c | 525 ------------------------------------------- kernel/sched/fair.c | 89 +------- kernel/sched/rt.c | 49 +--- kernel/sched/sched.h | 75 ------- kernel/sysctl.c | 7 - 7 files changed, 4 insertions(+), 828 deletions(-) delete mode 100644 kernel/sched/cpufreq_sched.c (limited to 'kernel') diff --git a/kernel/sched/Makefile b/kernel/sched/Makefile index ca0d94096170..d7ec4f7dd0d9 100644 --- a/kernel/sched/Makefile +++ b/kernel/sched/Makefile @@ -22,5 +22,4 @@ obj-$(CONFIG_SCHED_DEBUG) += debug.o obj-$(CONFIG_SCHED_TUNE) += tune.o obj-$(CONFIG_CGROUP_CPUACCT) += cpuacct.o obj-$(CONFIG_CPU_FREQ) += cpufreq.o -obj-$(CONFIG_CPU_FREQ_GOV_SCHED) += cpufreq_sched.o obj-$(CONFIG_CPU_FREQ_GOV_SCHEDUTIL) += cpufreq_schedutil.o diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 3030633d8900..889fb1aff1e0 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -2983,91 +2983,6 @@ unsigned long long task_sched_runtime(struct task_struct *p) return ns; } -#ifdef CONFIG_CPU_FREQ_GOV_SCHED - -static inline -unsigned long add_capacity_margin(unsigned long cpu_capacity) -{ - cpu_capacity = cpu_capacity * capacity_margin; - cpu_capacity /= SCHED_CAPACITY_SCALE; - return cpu_capacity; -} - -static inline -unsigned long sum_capacity_reqs(unsigned long cfs_cap, - struct sched_capacity_reqs *scr) -{ - unsigned long total = add_capacity_margin(cfs_cap + scr->rt); - return total += scr->dl; -} - -unsigned long boosted_cpu_util(int cpu); -static void sched_freq_tick_pelt(int cpu) -{ - unsigned long cpu_utilization = boosted_cpu_util(cpu); - unsigned long capacity_curr = capacity_curr_of(cpu); - struct sched_capacity_reqs *scr; - - scr = &per_cpu(cpu_sched_capacity_reqs, cpu); - if (sum_capacity_reqs(cpu_utilization, scr) < capacity_curr) - return; - - /* - * To make free room for a task that is building up its "real" - * utilization and to harm its performance the least, request - * a jump to a higher OPP as soon as the margin of free capacity - * is impacted (specified by capacity_margin). - * Remember CPU utilization in sched_capacity_reqs should be normalised. - */ - cpu_utilization = cpu_utilization * SCHED_CAPACITY_SCALE / capacity_orig_of(cpu); - set_cfs_cpu_capacity(cpu, true, cpu_utilization); -} - -#ifdef CONFIG_SCHED_WALT -static void sched_freq_tick_walt(int cpu) -{ - unsigned long cpu_utilization = cpu_util_freq(cpu); - unsigned long capacity_curr = capacity_curr_of(cpu); - - if (walt_disabled || !sysctl_sched_use_walt_cpu_util) - return sched_freq_tick_pelt(cpu); - - /* - * Add a margin to the WALT utilization to check if we will need to - * increase frequency. - * NOTE: WALT tracks a single CPU signal for all the scheduling - * classes, thus this margin is going to be added to the DL class as - * well, which is something we do not do in sched_freq_tick_pelt case. - */ - if (add_capacity_margin(cpu_utilization) <= capacity_curr) - return; - - /* - * It is likely that the load is growing so we - * keep the added margin in our request as an - * extra boost. - * Remember CPU utilization in sched_capacity_reqs should be normalised. - */ - cpu_utilization = cpu_utilization * SCHED_CAPACITY_SCALE / capacity_orig_of(cpu); - set_cfs_cpu_capacity(cpu, true, cpu_utilization); - -} -#define _sched_freq_tick(cpu) sched_freq_tick_walt(cpu) -#else -#define _sched_freq_tick(cpu) sched_freq_tick_pelt(cpu) -#endif /* CONFIG_SCHED_WALT */ - -static void sched_freq_tick(int cpu) -{ - if (!sched_freq()) - return; - - _sched_freq_tick(cpu); -} -#else -static inline void sched_freq_tick(int cpu) { } -#endif /* CONFIG_CPU_FREQ_GOV_SCHED */ - /* * This function gets called by the timer code, with HZ frequency. * We call it with interrupts disabled. @@ -3088,7 +3003,6 @@ void scheduler_tick(void) curr->sched_class->task_tick(rq, curr, 0); update_cpu_load_active(rq); calc_global_load_tick(rq); - sched_freq_tick(cpu); raw_spin_unlock(&rq->lock); perf_event_task_tick(); diff --git a/kernel/sched/cpufreq_sched.c b/kernel/sched/cpufreq_sched.c deleted file mode 100644 index ec0aed7a8f96..000000000000 --- a/kernel/sched/cpufreq_sched.c +++ /dev/null @@ -1,525 +0,0 @@ -/* - * Copyright (C) 2015 Michael Turquette - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ - -#include -#include -#include -#include -#include -#include -#include - -#define CREATE_TRACE_POINTS -#include - -#include "sched.h" - -#define THROTTLE_DOWN_NSEC 50000000 /* 50ms default */ -#define THROTTLE_UP_NSEC 500000 /* 500us default */ - -struct static_key __read_mostly __sched_freq = STATIC_KEY_INIT_FALSE; -static bool __read_mostly cpufreq_driver_slow; - -#ifndef CONFIG_CPU_FREQ_DEFAULT_GOV_SCHED -static struct cpufreq_governor cpufreq_gov_sched; -#endif - -static DEFINE_PER_CPU(unsigned long, enabled); -DEFINE_PER_CPU(struct sched_capacity_reqs, cpu_sched_capacity_reqs); - -struct gov_tunables { - struct gov_attr_set attr_set; - unsigned int up_throttle_nsec; - unsigned int down_throttle_nsec; -}; - -/** - * gov_data - per-policy data internal to the governor - * @up_throttle: next throttling period expiry if increasing OPP - * @down_throttle: next throttling period expiry if decreasing OPP - * @up_throttle_nsec: throttle period length in nanoseconds if increasing OPP - * @down_throttle_nsec: throttle period length in nanoseconds if decreasing OPP - * @task: worker thread for dvfs transition that may block/sleep - * @irq_work: callback used to wake up worker thread - * @requested_freq: last frequency requested by the sched governor - * - * struct gov_data is the per-policy cpufreq_sched-specific data structure. A - * per-policy instance of it is created when the cpufreq_sched governor receives - * the CPUFREQ_GOV_START condition and a pointer to it exists in the gov_data - * member of struct cpufreq_policy. - * - * Readers of this data must call down_read(policy->rwsem). Writers must - * call down_write(policy->rwsem). - */ -struct gov_data { - ktime_t up_throttle; - ktime_t down_throttle; - struct gov_tunables *tunables; - struct list_head tunables_hook; - struct task_struct *task; - struct irq_work irq_work; - unsigned int requested_freq; -}; - -static void cpufreq_sched_try_driver_target(struct cpufreq_policy *policy, - unsigned int freq) -{ - struct gov_data *gd = policy->governor_data; - - /* avoid race with cpufreq_sched_stop */ - if (!down_write_trylock(&policy->rwsem)) - return; - - __cpufreq_driver_target(policy, freq, CPUFREQ_RELATION_L); - - gd->up_throttle = ktime_add_ns(ktime_get(), - gd->tunables->up_throttle_nsec); - gd->down_throttle = ktime_add_ns(ktime_get(), - gd->tunables->down_throttle_nsec); - up_write(&policy->rwsem); -} - -static bool finish_last_request(struct gov_data *gd, unsigned int cur_freq) -{ - ktime_t now = ktime_get(); - - ktime_t throttle = gd->requested_freq < cur_freq ? - gd->down_throttle : gd->up_throttle; - - if (ktime_after(now, throttle)) - return false; - - while (1) { - int usec_left = ktime_to_ns(ktime_sub(throttle, now)); - - usec_left /= NSEC_PER_USEC; - trace_cpufreq_sched_throttled(usec_left); - usleep_range(usec_left, usec_left + 100); - now = ktime_get(); - if (ktime_after(now, throttle)) - return true; - } -} - -/* - * we pass in struct cpufreq_policy. This is safe because changing out the - * policy requires a call to __cpufreq_governor(policy, CPUFREQ_GOV_STOP), - * which tears down all of the data structures and __cpufreq_governor(policy, - * CPUFREQ_GOV_START) will do a full rebuild, including this kthread with the - * new policy pointer - */ -static int cpufreq_sched_thread(void *data) -{ - struct sched_param param; - struct cpufreq_policy *policy; - struct gov_data *gd; - unsigned int new_request = 0; - unsigned int last_request = 0; - int ret; - - policy = (struct cpufreq_policy *) data; - gd = policy->governor_data; - - param.sched_priority = 50; - ret = sched_setscheduler_nocheck(gd->task, SCHED_FIFO, ¶m); - if (ret) { - pr_warn("%s: failed to set SCHED_FIFO\n", __func__); - do_exit(-EINVAL); - } else { - pr_debug("%s: kthread (%d) set to SCHED_FIFO\n", - __func__, gd->task->pid); - } - - do { - new_request = gd->requested_freq; - if (new_request == last_request) { - set_current_state(TASK_INTERRUPTIBLE); - if (kthread_should_stop()) - break; - schedule(); - } else { - /* - * if the frequency thread sleeps while waiting to be - * unthrottled, start over to check for a newer request - */ - if (finish_last_request(gd, policy->cur)) - continue; - last_request = new_request; - cpufreq_sched_try_driver_target(policy, new_request); - } - } while (!kthread_should_stop()); - - return 0; -} - -static void cpufreq_sched_irq_work(struct irq_work *irq_work) -{ - struct gov_data *gd; - - gd = container_of(irq_work, struct gov_data, irq_work); - if (!gd) - return; - - wake_up_process(gd->task); -} - -static void update_fdomain_capacity_request(int cpu) -{ - unsigned int freq_new, index_new, cpu_tmp; - struct cpufreq_policy *policy; - struct gov_data *gd; - unsigned long capacity = 0; - - /* - * Avoid grabbing the policy if possible. A test is still - * required after locking the CPU's policy to avoid racing - * with the governor changing. - */ - if (!per_cpu(enabled, cpu)) - return; - - policy = cpufreq_cpu_get(cpu); - if (IS_ERR_OR_NULL(policy)) - return; - - if (policy->governor != &cpufreq_gov_sched || - !policy->governor_data) - goto out; - - gd = policy->governor_data; - - /* find max capacity requested by cpus in this policy */ - for_each_cpu(cpu_tmp, policy->cpus) { - struct sched_capacity_reqs *scr; - - scr = &per_cpu(cpu_sched_capacity_reqs, cpu_tmp); - capacity = max(capacity, scr->total); - } - - /* Convert the new maximum capacity request into a cpu frequency */ - freq_new = capacity * policy->cpuinfo.max_freq >> SCHED_CAPACITY_SHIFT; - if (cpufreq_frequency_table_target(policy, policy->freq_table, - freq_new, CPUFREQ_RELATION_L, - &index_new)) - goto out; - freq_new = policy->freq_table[index_new].frequency; - - if (freq_new > policy->max) - freq_new = policy->max; - - if (freq_new < policy->min) - freq_new = policy->min; - - trace_cpufreq_sched_request_opp(cpu, capacity, freq_new, - gd->requested_freq); - if (freq_new == gd->requested_freq) - goto out; - - gd->requested_freq = freq_new; - - /* - * Throttling is not yet supported on platforms with fast cpufreq - * drivers. - */ - if (cpufreq_driver_slow) - irq_work_queue_on(&gd->irq_work, cpu); - else - cpufreq_sched_try_driver_target(policy, freq_new); - -out: - cpufreq_cpu_put(policy); -} - -#ifdef CONFIG_SCHED_WALT -static inline unsigned long -requested_capacity(struct sched_capacity_reqs *scr) -{ - if (!walt_disabled && sysctl_sched_use_walt_cpu_util) - return scr->cfs; - return scr->cfs + scr->rt; -} -#else -#define requested_capacity(scr) (scr->cfs + scr->rt) -#endif - -void update_cpu_capacity_request(int cpu, bool request) -{ - unsigned long new_capacity; - struct sched_capacity_reqs *scr; - - /* The rq lock serializes access to the CPU's sched_capacity_reqs. */ - lockdep_assert_held(&cpu_rq(cpu)->lock); - - scr = &per_cpu(cpu_sched_capacity_reqs, cpu); - - new_capacity = requested_capacity(scr); - new_capacity = new_capacity * capacity_margin - / SCHED_CAPACITY_SCALE; - new_capacity += scr->dl; - - if (new_capacity == scr->total) - return; - - trace_cpufreq_sched_update_capacity(cpu, request, scr, new_capacity); - - scr->total = new_capacity; - if (request) - update_fdomain_capacity_request(cpu); -} - -static inline void set_sched_freq(void) -{ - static_key_slow_inc(&__sched_freq); -} - -static inline void clear_sched_freq(void) -{ - static_key_slow_dec(&__sched_freq); -} - -/* Tunables */ -static struct gov_tunables *global_tunables; - -static inline struct gov_tunables *to_tunables(struct gov_attr_set *attr_set) -{ - return container_of(attr_set, struct gov_tunables, attr_set); -} - -static ssize_t up_throttle_nsec_show(struct gov_attr_set *attr_set, char *buf) -{ - struct gov_tunables *tunables = to_tunables(attr_set); - - return sprintf(buf, "%u\n", tunables->up_throttle_nsec); -} - -static ssize_t up_throttle_nsec_store(struct gov_attr_set *attr_set, - const char *buf, size_t count) -{ - struct gov_tunables *tunables = to_tunables(attr_set); - int ret; - long unsigned int val; - - ret = kstrtoul(buf, 0, &val); - if (ret < 0) - return ret; - tunables->up_throttle_nsec = val; - return count; -} - -static ssize_t down_throttle_nsec_show(struct gov_attr_set *attr_set, char *buf) -{ - struct gov_tunables *tunables = to_tunables(attr_set); - - return sprintf(buf, "%u\n", tunables->down_throttle_nsec); -} - -static ssize_t down_throttle_nsec_store(struct gov_attr_set *attr_set, - const char *buf, size_t count) -{ - struct gov_tunables *tunables = to_tunables(attr_set); - int ret; - long unsigned int val; - - ret = kstrtoul(buf, 0, &val); - if (ret < 0) - return ret; - tunables->down_throttle_nsec = val; - return count; -} - -static struct governor_attr up_throttle_nsec = __ATTR_RW(up_throttle_nsec); -static struct governor_attr down_throttle_nsec = __ATTR_RW(down_throttle_nsec); - -static struct attribute *schedfreq_attributes[] = { - &up_throttle_nsec.attr, - &down_throttle_nsec.attr, - NULL -}; - -static struct kobj_type tunables_ktype = { - .default_attrs = schedfreq_attributes, - .sysfs_ops = &governor_sysfs_ops, -}; - -static int cpufreq_sched_policy_init(struct cpufreq_policy *policy) -{ - struct gov_data *gd; - int cpu; - int rc; - - for_each_cpu(cpu, policy->cpus) - memset(&per_cpu(cpu_sched_capacity_reqs, cpu), 0, - sizeof(struct sched_capacity_reqs)); - - gd = kzalloc(sizeof(*gd), GFP_KERNEL); - if (!gd) - return -ENOMEM; - - policy->governor_data = gd; - - if (!global_tunables) { - gd->tunables = kzalloc(sizeof(*gd->tunables), GFP_KERNEL); - if (!gd->tunables) - goto free_gd; - - gd->tunables->up_throttle_nsec = - policy->cpuinfo.transition_latency ? - policy->cpuinfo.transition_latency : - THROTTLE_UP_NSEC; - gd->tunables->down_throttle_nsec = - THROTTLE_DOWN_NSEC; - - rc = kobject_init_and_add(&gd->tunables->attr_set.kobj, - &tunables_ktype, - get_governor_parent_kobj(policy), - "%s", cpufreq_gov_sched.name); - if (rc) - goto free_tunables; - - gov_attr_set_init(&gd->tunables->attr_set, - &gd->tunables_hook); - - pr_debug("%s: throttle_threshold = %u [ns]\n", - __func__, gd->tunables->up_throttle_nsec); - - if (!have_governor_per_policy()) - global_tunables = gd->tunables; - } else { - gd->tunables = global_tunables; - gov_attr_set_get(&global_tunables->attr_set, - &gd->tunables_hook); - } - - policy->governor_data = gd; - if (cpufreq_driver_is_slow()) { - cpufreq_driver_slow = true; - gd->task = kthread_create(cpufreq_sched_thread, policy, - "kschedfreq:%d", - cpumask_first(policy->related_cpus)); - if (IS_ERR_OR_NULL(gd->task)) { - pr_err("%s: failed to create kschedfreq thread\n", - __func__); - goto free_tunables; - } - get_task_struct(gd->task); - kthread_bind_mask(gd->task, policy->related_cpus); - wake_up_process(gd->task); - init_irq_work(&gd->irq_work, cpufreq_sched_irq_work); - } - - set_sched_freq(); - - return 0; - -free_tunables: - kfree(gd->tunables); -free_gd: - policy->governor_data = NULL; - kfree(gd); - return -ENOMEM; -} - -static int cpufreq_sched_policy_exit(struct cpufreq_policy *policy) -{ - unsigned int count; - struct gov_data *gd = policy->governor_data; - - clear_sched_freq(); - if (cpufreq_driver_slow) { - kthread_stop(gd->task); - put_task_struct(gd->task); - } - - count = gov_attr_set_put(&gd->tunables->attr_set, &gd->tunables_hook); - if (!count) { - if (!have_governor_per_policy()) - global_tunables = NULL; - kfree(gd->tunables); - } - - policy->governor_data = NULL; - - kfree(gd); - return 0; -} - -static int cpufreq_sched_start(struct cpufreq_policy *policy) -{ - int cpu; - - for_each_cpu(cpu, policy->cpus) - per_cpu(enabled, cpu) = 1; - - return 0; -} - -static void cpufreq_sched_limits(struct cpufreq_policy *policy) -{ - unsigned int clamp_freq; - struct gov_data *gd = policy->governor_data;; - - pr_debug("limit event for cpu %u: %u - %u kHz, currently %u kHz\n", - policy->cpu, policy->min, policy->max, - policy->cur); - - clamp_freq = clamp(gd->requested_freq, policy->min, policy->max); - - if (policy->cur != clamp_freq) - __cpufreq_driver_target(policy, clamp_freq, CPUFREQ_RELATION_L); -} - -static int cpufreq_sched_stop(struct cpufreq_policy *policy) -{ - int cpu; - - for_each_cpu(cpu, policy->cpus) - per_cpu(enabled, cpu) = 0; - - return 0; -} - -static int cpufreq_sched_setup(struct cpufreq_policy *policy, - unsigned int event) -{ - switch (event) { - case CPUFREQ_GOV_POLICY_INIT: - return cpufreq_sched_policy_init(policy); - case CPUFREQ_GOV_POLICY_EXIT: - return cpufreq_sched_policy_exit(policy); - case CPUFREQ_GOV_START: - return cpufreq_sched_start(policy); - case CPUFREQ_GOV_STOP: - return cpufreq_sched_stop(policy); - case CPUFREQ_GOV_LIMITS: - cpufreq_sched_limits(policy); - break; - } - return 0; -} - - -#ifndef CONFIG_CPU_FREQ_DEFAULT_GOV_SCHED -static -#endif -struct cpufreq_governor cpufreq_gov_sched = { - .name = "sched", - .governor = cpufreq_sched_setup, - .owner = THIS_MODULE, -}; - -static int __init cpufreq_sched_init(void) -{ - int cpu; - - for_each_cpu(cpu, cpu_possible_mask) - per_cpu(enabled, cpu) = 0; - return cpufreq_register_governor(&cpufreq_gov_sched); -} - -/* Try to make this the default governor */ -fs_initcall(cpufreq_sched_init); diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 5c65f3ad6da1..b5ea66e5551c 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -54,7 +54,6 @@ unsigned int sysctl_sched_latency = 6000000ULL; unsigned int normalized_sysctl_sched_latency = 6000000ULL; unsigned int sysctl_sched_sync_hint_enable = 1; -unsigned int sysctl_sched_initial_task_util = 0; unsigned int sysctl_sched_cstate_aware = 1; #ifdef CONFIG_SCHED_WALT @@ -750,9 +749,7 @@ void init_entity_runnable_average(struct sched_entity *se) sa->load_sum = sa->load_avg * LOAD_AVG_MAX; /* * In previous Android versions, we used to have: - * sa->util_avg = sched_freq() ? - * sysctl_sched_initial_task_util : - * scale_load_down(SCHED_LOAD_SCALE); + * sa->util_avg = scale_load_down(SCHED_LOAD_SCALE); * sa->util_sum = sa->util_avg * LOAD_AVG_MAX; * However, that functionality has been moved to enqueue. * It is unclear if we should restore this in enqueue. @@ -4668,21 +4665,6 @@ unsigned long boosted_cpu_util(int cpu); #define boosted_cpu_util(cpu) cpu_util_freq(cpu) #endif -#ifdef CONFIG_SMP -static void update_capacity_of(int cpu) -{ - unsigned long req_cap; - - if (!sched_freq()) - return; - - /* Normalize scale-invariant capacity to cpu. */ - req_cap = boosted_cpu_util(cpu); - req_cap = req_cap * SCHED_CAPACITY_SCALE / capacity_orig_of(cpu); - set_cfs_cpu_capacity(cpu, true, req_cap); -} -#endif - /* * The enqueue_task method is called before nr_running is * increased. Here we update the fair scheduling stats and @@ -4695,7 +4677,6 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags) struct sched_entity *se = &p->se; #ifdef CONFIG_SMP int task_new = flags & ENQUEUE_WAKEUP_NEW; - int task_wakeup = flags & ENQUEUE_WAKEUP; #endif /* @@ -4769,16 +4750,6 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags) rq->rd->overutilized = true; trace_sched_overutilized(true); } - - /* - * We want to potentially trigger a freq switch - * request only for tasks that are waking up; this is - * because we get here also during load balancing, but - * in these cases it seems wise to trigger as single - * request after load balancing is done. - */ - if (task_new || task_wakeup) - update_capacity_of(cpu_of(rq)); } #endif /* CONFIG_SMP */ @@ -4854,25 +4825,8 @@ static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags) */ schedtune_dequeue_task(p, cpu_of(rq)); - if (!se) { + if (!se) walt_dec_cumulative_runnable_avg(rq, p); - - /* - * We want to potentially trigger a freq switch - * request only for tasks that are going to sleep; - * this is because we get here also during load - * balancing, but in these cases it seems wise to - * trigger as single request after load balancing is - * done. - */ - if (task_sleep) { - if (rq->cfs.nr_running) - update_capacity_of(cpu_of(rq)); - else if (sched_freq()) - set_cfs_cpu_capacity(cpu_of(rq), false, 0); /* no normalization required for 0 */ - } - } - #endif /* CONFIG_SMP */ hrtick_update(rq); @@ -7709,10 +7663,6 @@ static void attach_one_task(struct rq *rq, struct task_struct *p) { raw_spin_lock(&rq->lock); attach_task(rq, p); - /* - * We want to potentially raise target_cpu's OPP. - */ - update_capacity_of(cpu_of(rq)); raw_spin_unlock(&rq->lock); } @@ -7734,11 +7684,6 @@ static void attach_tasks(struct lb_env *env) attach_task(env->dst_rq, p); } - /* - * We want to potentially raise env.dst_cpu's OPP. - */ - update_capacity_of(env->dst_cpu); - raw_spin_unlock(&env->dst_rq->lock); } @@ -9081,11 +9026,6 @@ more_balance: * ld_moved - cumulative load moved across iterations */ cur_ld_moved = detach_tasks(&env); - /* - * We want to potentially lower env.src_cpu's OPP. - */ - if (cur_ld_moved) - update_capacity_of(env.src_cpu); /* * We've detached some tasks from busiest_rq. Every @@ -9310,7 +9250,6 @@ static int idle_balance(struct rq *this_rq) struct sched_domain *sd; int pulled_task = 0; u64 curr_cost = 0; - long removed_util=0; idle_enter_fair(this_rq); @@ -9334,17 +9273,6 @@ static int idle_balance(struct rq *this_rq) raw_spin_unlock(&this_rq->lock); - /* - * If removed_util_avg is !0 we most probably migrated some task away - * from this_cpu. In this case we might be willing to trigger an OPP - * update, but we want to do so if we don't find anybody else to pull - * here (we will trigger an OPP update with the pulled task's enqueue - * anyway). - * - * Record removed_util before calling update_blocked_averages, and use - * it below (before returning) to see if an OPP update is required. - */ - removed_util = atomic_long_read(&(this_rq->cfs).removed_util_avg); update_blocked_averages(this_cpu); rcu_read_lock(); for_each_domain(this_cpu, sd) { @@ -9409,12 +9337,6 @@ out: if (pulled_task) { idle_exit_fair(this_rq); this_rq->idle_stamp = 0; - } else if (removed_util) { - /* - * No task pulled and someone has been migrated away. - * Good case to trigger an OPP update. - */ - update_capacity_of(this_cpu); } return pulled_task; @@ -9488,13 +9410,8 @@ static int active_load_balance_cpu_stop(void *data) update_rq_clock(busiest_rq); p = detach_one_task(&env); - if (p) { + if (p) schedstat_inc(sd, alb_pushed); - /* - * We want to potentially lower env.src_cpu's OPP. - */ - update_capacity_of(env.src_cpu); - } else schedstat_inc(sd, alb_failed); } diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c index f41435e7f75d..ebf0d9329c86 100644 --- a/kernel/sched/rt.c +++ b/kernel/sched/rt.c @@ -1636,41 +1636,6 @@ static void check_preempt_curr_rt(struct rq *rq, struct task_struct *p, int flag #endif } -#ifdef CONFIG_SMP -static void sched_rt_update_capacity_req(struct rq *rq) -{ - u64 total, used, age_stamp, avg; - s64 delta; - - if (!sched_freq()) - return; - - sched_avg_update(rq); - /* - * Since we're reading these variables without serialization make sure - * we read them once before doing sanity checks on them. - */ - age_stamp = READ_ONCE(rq->age_stamp); - avg = READ_ONCE(rq->rt_avg); - delta = rq_clock(rq) - age_stamp; - - if (unlikely(delta < 0)) - delta = 0; - - total = sched_avg_period() + delta; - - used = div_u64(avg, total); - if (unlikely(used > SCHED_CAPACITY_SCALE)) - used = SCHED_CAPACITY_SCALE; - - set_rt_cpu_capacity(rq->cpu, 1, (unsigned long)(used)); -} -#else -static inline void sched_rt_update_capacity_req(struct rq *rq) -{ } - -#endif - static struct sched_rt_entity *pick_next_rt_entity(struct rq *rq, struct rt_rq *rt_rq) { @@ -1739,17 +1704,8 @@ pick_next_task_rt(struct rq *rq, struct task_struct *prev) if (prev->sched_class == &rt_sched_class) update_curr_rt(rq); - if (!rt_rq->rt_queued) { - /* - * The next task to be picked on this rq will have a lower - * priority than rt tasks so we can spend some time to update - * the capacity used by rt tasks based on the last activity. - * This value will be the used as an estimation of the next - * activity. - */ - sched_rt_update_capacity_req(rq); + if (!rt_rq->rt_queued) return NULL; - } put_prev_task(rq, prev); @@ -2476,9 +2432,6 @@ static void task_tick_rt(struct rq *rq, struct task_struct *p, int queued) update_curr_rt(rq); - if (rq->rt.rt_nr_running) - sched_rt_update_capacity_req(rq); - watchdog(rq, p); /* diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index 028e232103c2..782746140711 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -1629,81 +1629,6 @@ static inline unsigned long cpu_util_freq(int cpu) #endif -#ifdef CONFIG_CPU_FREQ_GOV_SCHED -#define capacity_max SCHED_CAPACITY_SCALE -extern unsigned int capacity_margin; -extern struct static_key __sched_freq; - -static inline bool sched_freq(void) -{ - return static_key_false(&__sched_freq); -} - -/* - * sched_capacity_reqs expects capacity requests to be normalised. - * All capacities should sum to the range of 0-1024. - */ -DECLARE_PER_CPU(struct sched_capacity_reqs, cpu_sched_capacity_reqs); -void update_cpu_capacity_request(int cpu, bool request); - -static inline void set_cfs_cpu_capacity(int cpu, bool request, - unsigned long capacity) -{ - struct sched_capacity_reqs *scr = &per_cpu(cpu_sched_capacity_reqs, cpu); - -#ifdef CONFIG_SCHED_WALT - if (!walt_disabled && sysctl_sched_use_walt_cpu_util) { - int rtdl = scr->rt + scr->dl; - /* - * WALT tracks the utilization of a CPU considering the load - * generated by all the scheduling classes. - * Since the following call to: - * update_cpu_capacity - * is already adding the RT and DL utilizations let's remove - * these contributions from the WALT signal. - */ - if (capacity > rtdl) - capacity -= rtdl; - else - capacity = 0; - } -#endif - if (scr->cfs != capacity) { - scr->cfs = capacity; - update_cpu_capacity_request(cpu, request); - } -} - -static inline void set_rt_cpu_capacity(int cpu, bool request, - unsigned long capacity) -{ - if (per_cpu(cpu_sched_capacity_reqs, cpu).rt != capacity) { - per_cpu(cpu_sched_capacity_reqs, cpu).rt = capacity; - update_cpu_capacity_request(cpu, request); - } -} - -static inline void set_dl_cpu_capacity(int cpu, bool request, - unsigned long capacity) -{ - if (per_cpu(cpu_sched_capacity_reqs, cpu).dl != capacity) { - per_cpu(cpu_sched_capacity_reqs, cpu).dl = capacity; - update_cpu_capacity_request(cpu, request); - } -} -#else -static inline bool sched_freq(void) { return false; } -static inline void set_cfs_cpu_capacity(int cpu, bool request, - unsigned long capacity) -{ } -static inline void set_rt_cpu_capacity(int cpu, bool request, - unsigned long capacity) -{ } -static inline void set_dl_cpu_capacity(int cpu, bool request, - unsigned long capacity) -{ } -#endif - static inline void sched_rt_avg_update(struct rq *rq, u64 rt_delta) { rq->rt_avg += rt_delta * arch_scale_freq_capacity(NULL, cpu_of(rq)); diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 55caf81a833f..4e2f98dd2052 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -342,13 +342,6 @@ static struct ctl_table kern_table[] = { .proc_handler = proc_dointvec, }, #endif - { - .procname = "sched_initial_task_util", - .data = &sysctl_sched_initial_task_util, - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = proc_dointvec, - }, { .procname = "sched_cstate_aware", .data = &sysctl_sched_cstate_aware, -- cgit v1.2.3 From 3822fe484cef0ef3e37e7106bfd684639f64e77b Mon Sep 17 00:00:00 2001 From: Todd Kjos Date: Wed, 8 Nov 2017 00:43:37 +0000 Subject: Revert "ANDROID: sched/rt: schedtune: Add boost retention to RT" This reverts commit d194ba5d712f051ff6c025f3484bb72f219764e3. Reason for revert: Broke some builds. Will fix and resubmit. Change-Id: I4e6fa1562346eda1bbf058f1d5ace5ba6256ce07 --- kernel/sched/core.c | 1 - kernel/sched/rt.c | 154 --------------------------------------------------- kernel/sched/sched.h | 1 - 3 files changed, 156 deletions(-) (limited to 'kernel') diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 889fb1aff1e0..1eb91a696069 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -2200,7 +2200,6 @@ static void __sched_fork(unsigned long clone_flags, struct task_struct *p) init_dl_task_timer(&p->dl); __dl_clear_params(p); - init_rt_schedtune_timer(&p->rt); INIT_LIST_HEAD(&p->rt.run_list); #ifdef CONFIG_PREEMPT_NOTIFIERS diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c index ebf0d9329c86..c8322ab130eb 100644 --- a/kernel/sched/rt.c +++ b/kernel/sched/rt.c @@ -7,7 +7,6 @@ #include #include -#include #include "walt.h" #include "tune.h" @@ -987,73 +986,6 @@ static int sched_rt_runtime_exceeded(struct rt_rq *rt_rq) return 0; } -#define RT_SCHEDTUNE_INTERVAL 50000000ULL - -static void sched_rt_update_capacity_req(struct rq *rq); - -static enum hrtimer_restart rt_schedtune_timer(struct hrtimer *timer) -{ - struct sched_rt_entity *rt_se = container_of(timer, - struct sched_rt_entity, - schedtune_timer); - struct task_struct *p = rt_task_of(rt_se); - struct rq *rq = task_rq(p); - - raw_spin_lock(&rq->lock); - - /* - * Nothing to do if: - * - task has switched runqueues - * - task isn't RT anymore - */ - if (rq != task_rq(p) || (p->sched_class != &rt_sched_class)) - goto out; - - /* - * If task got enqueued back during callback time, it means we raced - * with the enqueue on another cpu, that's Ok, just do nothing as - * enqueue path would have tried to cancel us and we shouldn't run - * Also check the schedtune_enqueued flag as class-switch on a - * sleeping task may have already canceled the timer and done dq - */ - if (p->on_rq || !rt_se->schedtune_enqueued) - goto out; - - /* - * RT task is no longer active, cancel boost - */ - rt_se->schedtune_enqueued = false; - schedtune_dequeue_task(p, cpu_of(rq)); - sched_rt_update_capacity_req(rq); - cpufreq_update_this_cpu(rq, SCHED_CPUFREQ_RT); -out: - raw_spin_unlock(&rq->lock); - - /* - * This can free the task_struct if no more references. - */ - put_task_struct(p); - - return HRTIMER_NORESTART; -} - -void init_rt_schedtune_timer(struct sched_rt_entity *rt_se) -{ - struct hrtimer *timer = &rt_se->schedtune_timer; - - hrtimer_init(timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); - timer->function = rt_schedtune_timer; - rt_se->schedtune_enqueued = false; -} - -static void start_schedtune_timer(struct sched_rt_entity *rt_se) -{ - struct hrtimer *timer = &rt_se->schedtune_timer; - - hrtimer_start(timer, ns_to_ktime(RT_SCHEDTUNE_INTERVAL), - HRTIMER_MODE_REL_PINNED); -} - /* * Update the current task's runtime statistics. Skip current tasks that * are not in our scheduling class. @@ -1391,33 +1323,7 @@ enqueue_task_rt(struct rq *rq, struct task_struct *p, int flags) if (!task_current(rq, p) && p->nr_cpus_allowed > 1) enqueue_pushable_task(rq, p); - if (!schedtune_task_boost(p)) - return; - - /* - * If schedtune timer is active, that means a boost was already - * done, just cancel the timer so that deboost doesn't happen. - * Otherwise, increase the boost. If an enqueued timer was - * cancelled, put the task reference. - */ - if (hrtimer_try_to_cancel(&rt_se->schedtune_timer) == 1) - put_task_struct(p); - - /* - * schedtune_enqueued can be true in the following situation: - * enqueue_task_rt grabs rq lock before timer fires - * or before its callback acquires rq lock - * schedtune_enqueued can be false if timer callback is running - * and timer just released rq lock, or if the timer finished - * running and canceling the boost - */ - if (rt_se->schedtune_enqueued) - return; - - rt_se->schedtune_enqueued = true; schedtune_enqueue_task(p, cpu_of(rq)); - sched_rt_update_capacity_req(rq); - cpufreq_update_this_cpu(rq, SCHED_CPUFREQ_RT); } static void dequeue_task_rt(struct rq *rq, struct task_struct *p, int flags) @@ -1429,20 +1335,7 @@ static void dequeue_task_rt(struct rq *rq, struct task_struct *p, int flags) walt_dec_cumulative_runnable_avg(rq, p); dequeue_pushable_task(rq, p); - - if (!rt_se->schedtune_enqueued) - return; - - if (flags == DEQUEUE_SLEEP) { - get_task_struct(p); - start_schedtune_timer(rt_se); - return; - } - - rt_se->schedtune_enqueued = false; schedtune_dequeue_task(p, cpu_of(rq)); - sched_rt_update_capacity_req(rq); - cpufreq_update_this_cpu(rq, SCHED_CPUFREQ_RT); } /* @@ -1482,33 +1375,6 @@ static void yield_task_rt(struct rq *rq) #ifdef CONFIG_SMP static int find_lowest_rq(struct task_struct *task); -/* - * Perform a schedtune dequeue and cancelation of boost timers if needed. - * Should be called only with the rq->lock held. - */ -static void schedtune_dequeue_rt(struct rq *rq, struct task_struct *p) -{ - struct sched_rt_entity *rt_se = &p->rt; - - BUG_ON(!raw_spin_is_locked(&rq->lock)); - - if (!rt_se->schedtune_enqueued) - return; - - /* - * Incase of class change cancel any active timers. If an enqueued - * timer was cancelled, put the task ref. - */ - if (hrtimer_try_to_cancel(&rt_se->schedtune_timer) == 1) - put_task_struct(p); - - /* schedtune_enqueued is true, deboost it */ - rt_se->schedtune_enqueued = false; - schedtune_dequeue_task(p, task_cpu(p)); - sched_rt_update_capacity_req(rq); - cpufreq_update_this_cpu(rq, SCHED_CPUFREQ_RT); -} - static int select_task_rq_rt(struct task_struct *p, int cpu, int sd_flag, int flags, int sibling_count_hint) @@ -1563,19 +1429,6 @@ select_task_rq_rt(struct task_struct *p, int cpu, int sd_flag, int flags, rcu_read_unlock(); out: - /* - * If previous CPU was different, make sure to cancel any active - * schedtune timers and deboost. - */ - if (task_cpu(p) != cpu) { - unsigned long fl; - struct rq *prq = task_rq(p); - - raw_spin_lock_irqsave(&prq->lock, fl); - schedtune_dequeue_rt(prq, p); - raw_spin_unlock_irqrestore(&prq->lock, fl); - } - return cpu; } @@ -2308,13 +2161,6 @@ static void rq_offline_rt(struct rq *rq) */ static void switched_from_rt(struct rq *rq, struct task_struct *p) { - /* - * On class switch from rt, always cancel active schedtune timers, - * this handles the cases where we switch class for a task that is - * already rt-dequeued but has a running timer. - */ - schedtune_dequeue_rt(rq, p); - /* * If there are other RT tasks then we will reschedule * and the scheduling of the other RT tasks will handle diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index 782746140711..203d64a0c947 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -1398,7 +1398,6 @@ extern void resched_cpu(int cpu); extern struct rt_bandwidth def_rt_bandwidth; extern void init_rt_bandwidth(struct rt_bandwidth *rt_b, u64 period, u64 runtime); -extern void init_rt_schedtune_timer(struct sched_rt_entity *rt_se); extern struct dl_bandwidth def_dl_bandwidth; extern void init_dl_bandwidth(struct dl_bandwidth *dl_b, u64 period, u64 runtime); -- cgit v1.2.3