diff options
author | Davide Garberi <dade.garberi@gmail.com> | 2023-08-06 15:31:30 +0200 |
---|---|---|
committer | Davide Garberi <dade.garberi@gmail.com> | 2023-08-06 15:31:30 +0200 |
commit | cc57cb4ee3b7918b74d30604735d353b9a5fa23b (patch) | |
tree | 0be483b86472eaf1c74f747ecbaf6300f3998a1a /kernel/sched/fair.c | |
parent | 44be99a74546fb018cbf2049602a5fd2889a0089 (diff) | |
parent | 7d11b1a7a11c598a07687f853ded9eca97d89043 (diff) |
Merge lineage-20 of git@github.com:LineageOS/android_kernel_qcom_msm8998.git into lineage-20
7d11b1a7a11c Revert "sched: cpufreq: Use sched_clock instead of rq_clock when updating schedutil"
daaa5da96a74 sched: Take irq_sparse lock during the isolation
217ab2d0ef91 rcu: Speed up calling of RCU tasks callbacks
997b726bc092 kernel: power: Workaround for sensor ipc message causing high power consume
b933e4d37bc0 sched/fair: Fix low cpu usage with high throttling by removing expiration of cpu-local slices
82d3f23d6dc5 sched/fair: Fix bandwidth timer clock drift condition
629bfed360f9 kernel: power: qos: remove check for core isolation while cluster LPMs
891a63210e1d sched/fair: Fix issue where frequency update not skipped
b775cb29f663 ANDROID: Move schedtune en/dequeue before schedutil update triggers
ebdb82f7b34a sched/fair: Skip frequency updates if CPU about to idle
ff383d94478a FROMLIST: sched: Make iowait_boost optional in schedutil
9539942cb065 FROMLIST: cpufreq: Make iowait boost a policy option
b65c91c9aa14 ARM: dts: msm: add HW CPU's busy-cost-data for additional freqs
72f13941085b ARM: dts: msm: fix CPU's idle-cost-data
ab88411382f7 ARM: dts: msm: fix EM to be monotonically increasing
83dcbae14782 ARM: dts: msm: Fix EAS idle-cost-data property length
33d3b17bfdfb ARM: dts: msm: Add msm8998 energy model
c0fa7577022c sched/walt: Re-add code to allow WALT to function
d5cd35f38616 FROMGIT: binder: use EINTR for interrupted wait for work
db74739c86de sched: Don't fail isolation request for an already isolated CPU
aee7a16e347b sched: WALT: increase WALT minimum window size to 20ms
4dbe44554792 sched: cpufreq: Use per_cpu_ptr instead of this_cpu_ptr when reporting load
ef3fb04c7df4 sched: cpufreq: Use sched_clock instead of rq_clock when updating schedutil
c7128748614a sched/cpupri: Exclude isolated CPUs from the lowest_mask
6adb092856e8 sched: cpufreq: Limit governor updates to WALT changes alone
0fa652ee00f5 sched: walt: Correct WALT window size initialization
41cbb7bc59fb sched: walt: fix window misalignment when HZ=300
43cbf9d6153d sched/tune: Increase the cgroup limit to 6
c71b8fffe6b3 drivers: cpuidle: lpm-levels: Fix KW issues with idle state idx < 0
938e42ca699f drivers: cpuidle: lpm-levels: Correctly check for list empty
8d8a48aecde5 sched/fair: Fix load_balance() affinity redo path
eccc8acbe705 sched/fair: Avoid unnecessary active load balance
0ffdb886996b BACKPORT: sched/core: Fix rules for running on online && !active CPUs
c9999f04236e sched/core: Allow kthreads to fall back to online && !active cpus
b9b6bc6ea3c0 sched: Allow migrating kthreads into online but inactive CPUs
a9314f9d8ad4 sched/fair: Allow load bigger task load balance when nr_running is 2
c0b317c27d44 pinctrl: qcom: Clear status bit on irq_unmask
45df1516d04a UPSTREAM: mm: fix misplaced unlock_page in do_wp_page()
899def5edcd4 UPSTREAM: mm/ksm: Remove reuse_ksm_page()
46c6fbdd185a BACKPORT: mm: do_wp_page() simplification
90dccbae4c04 UPSTREAM: mm: reuse only-pte-mapped KSM page in do_wp_page()
ebf270d24640 sched/fair: vruntime should normalize when switching from fair
cbe0b37059c9 mm: introduce arg_lock to protect arg_start|end and env_start|end in mm_struct
12d40f1995b4 msm: mdss: Fix indentation
620df03a7229 msm: mdss: Treat polling_en as the bool that it is
12af218146a6 msm: mdss: add idle state node
13e661759656 cpuset: Restore tasks affinity while moving across cpusets
602bf4096dab genirq: Honour IRQ's affinity hint during migration
9209b5556f6a power: qos: Use effective affinity mask
f31078b5825f genirq: Introduce effective affinity mask
58c453484f7e sched/cputime: Mitigate performance regression in times()/clock_gettime()
400383059868 kernel: time: Add delay after cpu_relax() in tight loops
1daa7ea39076 pinctrl: qcom: Update irq handle for GPIO pins
07f7c9961c7c power: smb-lib: Fix mutex acquisition deadlock on PD hard reset
094b738f46c8 power: qpnp-smb2: Implement battery charging_enabled node
d6038d6da57f ASoC: msm-pcm-q6-v2: Add dsp buf check
0d7a6c301af8 qcacld-3.0: Fix OOB in wma_scan_roam.c
Change-Id: Ia2e189e37daad6e99bdb359d1204d9133a7916f4
Diffstat (limited to 'kernel/sched/fair.c')
-rw-r--r-- | kernel/sched/fair.c | 214 |
1 files changed, 114 insertions, 100 deletions
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 51443a801af5..266fc95f6c0f 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -55,6 +55,12 @@ unsigned int normalized_sysctl_sched_latency = 6000000ULL; unsigned int sysctl_sched_sync_hint_enable = 1; unsigned int sysctl_sched_cstate_aware = 1; +#ifdef CONFIG_SCHED_WALT +unsigned int sysctl_sched_use_walt_cpu_util = 1; +unsigned int sysctl_sched_use_walt_task_util = 1; +__read_mostly unsigned int sysctl_sched_walt_cpu_high_irqload = + (10 * NSEC_PER_MSEC); +#endif /* * The initial- and re-scaling of tunables is configurable * (default SCHED_TUNABLESCALING_LOG = *(1+ilog(ncpus)) @@ -4326,6 +4332,7 @@ update_cfs_rq_load_avg(u64 now, struct cfs_rq *cfs_rq, bool update_freq) */ #define UPDATE_TG 0x1 #define SKIP_AGE_LOAD 0x2 +#define SKIP_CPUFREQ 0x4 /* Update task and its cfs_rq load average */ static inline void update_load_avg(struct sched_entity *se, int flags) @@ -4346,7 +4353,7 @@ static inline void update_load_avg(struct sched_entity *se, int flags) cfs_rq->curr == se, NULL); } - decayed = update_cfs_rq_load_avg(now, cfs_rq, true); + decayed = update_cfs_rq_load_avg(now, cfs_rq, !(flags & SKIP_CPUFREQ)); decayed |= propagate_entity_load_avg(se); if (decayed && (flags & UPDATE_TG)) @@ -4522,6 +4529,7 @@ update_cfs_rq_load_avg(u64 now, struct cfs_rq *cfs_rq, bool update_freq) #define UPDATE_TG 0x0 #define SKIP_AGE_LOAD 0x0 +#define SKIP_CPUFREQ 0x0 static inline void update_load_avg(struct sched_entity *se, int not_used1){} static inline void @@ -4744,6 +4752,8 @@ static __always_inline void return_cfs_rq_runtime(struct cfs_rq *cfs_rq); static void dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags) { + int update_flags; + /* * Update run-time statistics of the 'current'. */ @@ -4757,7 +4767,12 @@ dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags) * - For group entity, update its weight to reflect the new share * of its group cfs_rq. */ - update_load_avg(se, UPDATE_TG); + update_flags = UPDATE_TG; + + if (flags & DEQUEUE_IDLE) + update_flags |= SKIP_CPUFREQ; + + update_load_avg(se, update_flags); dequeue_entity_load_avg(cfs_rq, se); update_stats_dequeue(cfs_rq, se); @@ -5052,7 +5067,6 @@ void __refill_cfs_bandwidth_runtime(struct cfs_bandwidth *cfs_b) now = sched_clock_cpu(smp_processor_id()); cfs_b->runtime = cfs_b->quota; - cfs_b->runtime_expires = now + ktime_to_ns(cfs_b->period); } static inline struct cfs_bandwidth *tg_cfs_bandwidth(struct task_group *tg) @@ -5074,7 +5088,7 @@ static int assign_cfs_rq_runtime(struct cfs_rq *cfs_rq) { struct task_group *tg = cfs_rq->tg; struct cfs_bandwidth *cfs_b = tg_cfs_bandwidth(tg); - u64 amount = 0, min_amount, expires; + u64 amount = 0, min_amount; /* note: this is a positive sum as runtime_remaining <= 0 */ min_amount = sched_cfs_bandwidth_slice() - cfs_rq->runtime_remaining; @@ -5091,61 +5105,17 @@ static int assign_cfs_rq_runtime(struct cfs_rq *cfs_rq) cfs_b->idle = 0; } } - expires = cfs_b->runtime_expires; raw_spin_unlock(&cfs_b->lock); cfs_rq->runtime_remaining += amount; - /* - * we may have advanced our local expiration to account for allowed - * spread between our sched_clock and the one on which runtime was - * issued. - */ - if ((s64)(expires - cfs_rq->runtime_expires) > 0) - cfs_rq->runtime_expires = expires; return cfs_rq->runtime_remaining > 0; } -/* - * Note: This depends on the synchronization provided by sched_clock and the - * fact that rq->clock snapshots this value. - */ -static void expire_cfs_rq_runtime(struct cfs_rq *cfs_rq) -{ - struct cfs_bandwidth *cfs_b = tg_cfs_bandwidth(cfs_rq->tg); - - /* if the deadline is ahead of our clock, nothing to do */ - if (likely((s64)(rq_clock(rq_of(cfs_rq)) - cfs_rq->runtime_expires) < 0)) - return; - - if (cfs_rq->runtime_remaining < 0) - return; - - /* - * If the local deadline has passed we have to consider the - * possibility that our sched_clock is 'fast' and the global deadline - * has not truly expired. - * - * Fortunately we can check determine whether this the case by checking - * whether the global deadline has advanced. It is valid to compare - * cfs_b->runtime_expires without any locks since we only care about - * exact equality, so a partial write will still work. - */ - - if (cfs_rq->runtime_expires != cfs_b->runtime_expires) { - /* extend local deadline, drift is bounded above by 2 ticks */ - cfs_rq->runtime_expires += TICK_NSEC; - } else { - /* global deadline is ahead, expiration has passed */ - cfs_rq->runtime_remaining = 0; - } -} - static void __account_cfs_rq_runtime(struct cfs_rq *cfs_rq, u64 delta_exec) { /* dock delta_exec before expiring quota (as it could span periods) */ cfs_rq->runtime_remaining -= delta_exec; - expire_cfs_rq_runtime(cfs_rq); if (likely(cfs_rq->runtime_remaining > 0)) return; @@ -5379,8 +5349,7 @@ void unthrottle_cfs_rq(struct cfs_rq *cfs_rq) cpu_temp(cpu_of(rq))); } -static u64 distribute_cfs_runtime(struct cfs_bandwidth *cfs_b, - u64 remaining, u64 expires) +static u64 distribute_cfs_runtime(struct cfs_bandwidth *cfs_b, u64 remaining) { struct cfs_rq *cfs_rq; u64 runtime; @@ -5401,7 +5370,6 @@ static u64 distribute_cfs_runtime(struct cfs_bandwidth *cfs_b, remaining -= runtime; cfs_rq->runtime_remaining += runtime; - cfs_rq->runtime_expires = expires; /* we check whether we're throttled above */ if (cfs_rq->runtime_remaining > 0) @@ -5426,7 +5394,7 @@ next: */ static int do_sched_cfs_period_timer(struct cfs_bandwidth *cfs_b, int overrun) { - u64 runtime, runtime_expires; + u64 runtime; int throttled; /* no need to continue the timer with no bandwidth constraint */ @@ -5454,8 +5422,6 @@ static int do_sched_cfs_period_timer(struct cfs_bandwidth *cfs_b, int overrun) /* account preceding periods in which throttling occurred */ cfs_b->nr_throttled += overrun; - runtime_expires = cfs_b->runtime_expires; - /* * This check is repeated as we are holding onto the new bandwidth while * we unthrottle. This can potentially race with an unthrottled group @@ -5468,8 +5434,7 @@ static int do_sched_cfs_period_timer(struct cfs_bandwidth *cfs_b, int overrun) cfs_b->distribute_running = 1; raw_spin_unlock(&cfs_b->lock); /* we can't nest cfs_b->lock while distributing bandwidth */ - runtime = distribute_cfs_runtime(cfs_b, runtime, - runtime_expires); + runtime = distribute_cfs_runtime(cfs_b, runtime); raw_spin_lock(&cfs_b->lock); cfs_b->distribute_running = 0; @@ -5546,8 +5511,7 @@ static void __return_cfs_rq_runtime(struct cfs_rq *cfs_rq) return; raw_spin_lock(&cfs_b->lock); - if (cfs_b->quota != RUNTIME_INF && - cfs_rq->runtime_expires == cfs_b->runtime_expires) { + if (cfs_b->quota != RUNTIME_INF) { cfs_b->runtime += slack_runtime; /* we are under rq->lock, defer unthrottling using a timer */ @@ -5579,7 +5543,6 @@ static __always_inline void return_cfs_rq_runtime(struct cfs_rq *cfs_rq) static void do_sched_cfs_slack_timer(struct cfs_bandwidth *cfs_b) { u64 runtime = 0, slice = sched_cfs_bandwidth_slice(); - u64 expires; /* confirm we're still not at a refresh boundary */ raw_spin_lock(&cfs_b->lock); @@ -5596,7 +5559,6 @@ static void do_sched_cfs_slack_timer(struct cfs_bandwidth *cfs_b) if (cfs_b->quota != RUNTIME_INF && cfs_b->runtime > slice) runtime = cfs_b->runtime; - expires = cfs_b->runtime_expires; if (runtime) cfs_b->distribute_running = 1; @@ -5605,11 +5567,10 @@ static void do_sched_cfs_slack_timer(struct cfs_bandwidth *cfs_b) if (!runtime) return; - runtime = distribute_cfs_runtime(cfs_b, runtime, expires); + runtime = distribute_cfs_runtime(cfs_b, runtime); raw_spin_lock(&cfs_b->lock); - if (expires == cfs_b->runtime_expires) - cfs_b->runtime -= min(runtime, cfs_b->runtime); + cfs_b->runtime -= min(runtime, cfs_b->runtime); cfs_b->distribute_running = 0; raw_spin_unlock(&cfs_b->lock); } @@ -5936,6 +5897,25 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags) struct sched_entity *se = &p->se; #ifdef CONFIG_SMP int task_new = flags & ENQUEUE_WAKEUP_NEW; + + /* + * Update SchedTune accounting. + * + * We do it before updating the CPU capacity to ensure the + * boost value of the current task is accounted for in the + * selection of the OPP. + * + * We do it also in the case where we enqueue a throttled task; + * we could argue that a throttled task should not boost a CPU, + * however: + * a) properly implementing CPU boosting considering throttled + * tasks will increase a lot the complexity of the solution + * b) it's not easy to quantify the benefits introduced by + * such a more complex solution. + * Thus, for the time being we go for the simple solution and boost + * also for throttled RQs. + */ + schedtune_enqueue_task(p, cpu_of(rq)); #endif /* @@ -5961,6 +5941,7 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags) if (cfs_rq_throttled(cfs_rq)) break; cfs_rq->h_nr_running++; + walt_inc_cfs_cumulative_runnable_avg(cfs_rq, p); inc_cfs_rq_hmp_stats(cfs_rq, p, 1); flags = ENQUEUE_WAKEUP; @@ -5969,6 +5950,7 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags) for_each_sched_entity(se) { cfs_rq = cfs_rq_of(se); cfs_rq->h_nr_running++; + walt_inc_cfs_cumulative_runnable_avg(cfs_rq, p); inc_cfs_rq_hmp_stats(cfs_rq, p, 1); if (cfs_rq_throttled(cfs_rq)) @@ -5984,27 +5966,8 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags) } #ifdef CONFIG_SMP - - /* - * Update SchedTune accounting. - * - * We do it before updating the CPU capacity to ensure the - * boost value of the current task is accounted for in the - * selection of the OPP. - * - * We do it also in the case where we enqueue a throttled task; - * we could argue that a throttled task should not boost a CPU, - * however: - * a) properly implementing CPU boosting considering throttled - * tasks will increase a lot the complexity of the solution - * b) it's not easy to quantify the benefits introduced by - * such a more complex solution. - * Thus, for the time being we go for the simple solution and boost - * also for throttled RQs. - */ - schedtune_enqueue_task(p, cpu_of(rq)); - if (energy_aware() && !se) { + walt_inc_cumulative_runnable_avg(rq, p); if (!task_new && !rq->rd->overutilized && cpu_overutilized(rq->cpu)) { rq->rd->overutilized = true; @@ -6029,6 +5992,20 @@ static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags) struct sched_entity *se = &p->se; int task_sleep = flags & DEQUEUE_SLEEP; + if (task_sleep && rq->nr_running == 1) + flags |= DEQUEUE_IDLE; + +#ifdef CONFIG_SMP + /* + * Update SchedTune accounting + * + * We do it before updating the CPU capacity to ensure the + * boost value of the current task is accounted for in the + * selection of the OPP. + */ + schedtune_dequeue_task(p, cpu_of(rq)); +#endif + for_each_sched_entity(se) { cfs_rq = cfs_rq_of(se); dequeue_entity(cfs_rq, se, flags); @@ -6042,6 +6019,7 @@ static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags) if (cfs_rq_throttled(cfs_rq)) break; cfs_rq->h_nr_running--; + walt_dec_cfs_cumulative_runnable_avg(cfs_rq, p); dec_cfs_rq_hmp_stats(cfs_rq, p, 1); /* Don't dequeue parent if it has other entities besides us */ @@ -6060,14 +6038,22 @@ static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags) } for_each_sched_entity(se) { + int update_flags; + cfs_rq = cfs_rq_of(se); cfs_rq->h_nr_running--; + walt_dec_cfs_cumulative_runnable_avg(cfs_rq, p); dec_cfs_rq_hmp_stats(cfs_rq, p, 1); if (cfs_rq_throttled(cfs_rq)) break; - update_load_avg(se, UPDATE_TG); + update_flags = UPDATE_TG; + + if (flags & DEQUEUE_IDLE) + update_flags |= SKIP_CPUFREQ; + + update_load_avg(se, update_flags); update_cfs_shares(se); } @@ -6076,19 +6062,6 @@ static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags) dec_rq_hmp_stats(rq, p, 1); } -#ifdef CONFIG_SMP - - /* - * Update SchedTune accounting - * - * We do it before updating the CPU capacity to ensure the - * boost value of the current task is accounted for in the - * selection of the OPP. - */ - schedtune_dequeue_task(p, cpu_of(rq)); - -#endif /* CONFIG_SMP */ - hrtick_update(rq); } @@ -7098,6 +7071,12 @@ static int wake_affine(struct sched_domain *sd, struct task_struct *p, static inline unsigned long task_util(struct task_struct *p) { +#ifdef CONFIG_SCHED_WALT + if (!walt_disabled && sysctl_sched_use_walt_cpu_util) { + unsigned long demand = p->ravg.demand; + return (demand << 10) / walt_ravg_window; + } +#endif return p->se.avg.util_avg; } @@ -7656,6 +7635,11 @@ static inline int find_best_target(struct task_struct *p, int *backup_cpu, if (new_util > capacity_orig) continue; +#ifdef CONFIG_SCHED_WALT + if (walt_cpu_high_irqload(i)) + continue; +#endif + /* * Case A) Latency sensitive tasks * @@ -8953,7 +8937,17 @@ redo: if (sched_feat(LB_MIN) && load < 16 && !env->sd->nr_balance_failed) goto next; - if ((load / 2) > env->imbalance) + /* + * p is not running task when we goes until here, so if p is one + * of the 2 task in src cpu rq and not the running one, + * that means it is the only task that can be balanced. + * So only when there is other tasks can be balanced or + * there is situation to ignore big task, it is needed + * to skip the task load bigger than 2*imbalance. + */ + if (((cpu_rq(env->src_cpu)->nr_running > 2) || + (env->flags & LBF_IGNORE_BIG_TASKS)) && + ((load / 2) > env->imbalance)) goto next; detach_task(p, env); @@ -10446,8 +10440,10 @@ static int need_active_balance(struct lb_env *env) * It's worth migrating the task if the src_cpu's capacity is reduced * because of other sched_class or IRQs if more capacity stays * available on dst_cpu. + * Avoid pulling the CFS task if it is the only task running. */ if ((env->idle != CPU_NOT_IDLE) && + (env->src_rq->nr_running > 1) && (env->src_rq->cfs.h_nr_running == 1)) { if ((check_cpu_capacity(env->src_rq, sd)) && (capacity_of(env->src_cpu)*sd->imbalance_pct < capacity_of(env->dst_cpu)*100)) @@ -10685,7 +10681,24 @@ more_balance: /* All tasks on this runqueue were pinned by CPU affinity */ if (unlikely(env.flags & LBF_ALL_PINNED)) { cpumask_clear_cpu(cpu_of(busiest), cpus); - if (!cpumask_empty(cpus)) { + /* + * dst_cpu is not a valid busiest cpu in the following + * check since load cannot be pulled from dst_cpu to be + * put on dst_cpu. + */ + cpumask_clear_cpu(env.dst_cpu, cpus); + /* + * Go back to "redo" iff the load-balance cpumask + * contains other potential busiest cpus for the + * current sched domain. + */ + if (cpumask_intersects(cpus, sched_domain_span(env.sd))) { + /* + * Now that the check has passed, reenable + * dst_cpu so that load can be calculated on + * it in the redo path. + */ + cpumask_set_cpu(env.dst_cpu, cpus); env.loop = 0; env.loop_break = sched_nr_migrate_break; goto redo; @@ -11772,7 +11785,8 @@ static inline bool vruntime_normalized(struct task_struct *p) * - A task which has been woken up by try_to_wake_up() and * waiting for actually being woken up by sched_ttwu_pending(). */ - if (!se->sum_exec_runtime || p->state == TASK_WAKING) + if (!se->sum_exec_runtime || + (p->state == TASK_WAKING && p->sched_class == &fair_sched_class)) return true; return false; |