diff options
Diffstat (limited to 'kernel/sched')
| -rw-r--r-- | kernel/sched/core.c | 24 | ||||
| -rw-r--r-- | kernel/sched/cpufreq_sched.c | 14 | ||||
| -rw-r--r-- | kernel/sched/cpufreq_schedutil.c | 69 | ||||
| -rw-r--r-- | kernel/sched/deadline.c | 6 | ||||
| -rw-r--r-- | kernel/sched/fair.c | 48 | ||||
| -rw-r--r-- | kernel/sched/rt.c | 4 | ||||
| -rw-r--r-- | kernel/sched/sched.h | 19 | ||||
| -rw-r--r-- | kernel/sched/walt.c | 317 |
8 files changed, 140 insertions, 361 deletions
diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 2dbe599d34d5..c408280ddd12 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -1372,7 +1372,9 @@ static void __migrate_swap_task(struct task_struct *p, int cpu) p->on_rq = TASK_ON_RQ_MIGRATING; deactivate_task(src_rq, p, 0); + p->on_rq = TASK_ON_RQ_MIGRATING; set_task_cpu(p, cpu); + p->on_rq = TASK_ON_RQ_QUEUED; activate_task(dst_rq, p, 0); p->on_rq = TASK_ON_RQ_QUEUED; check_preempt_curr(dst_rq, p, 0); @@ -3176,20 +3178,20 @@ static void sched_freq_tick_pelt(int cpu) #ifdef CONFIG_SCHED_WALT static void sched_freq_tick_walt(int cpu) { - unsigned long cpu_utilization = cpu_util(cpu); + unsigned long cpu_utilization = cpu_util_freq(cpu); unsigned long capacity_curr = capacity_curr_of(cpu); if (walt_disabled || !sysctl_sched_use_walt_cpu_util) return sched_freq_tick_pelt(cpu); /* - * Add a margin to the WALT utilization. + * Add a margin to the WALT utilization to check if we will need to + * increase frequency. * NOTE: WALT tracks a single CPU signal for all the scheduling * classes, thus this margin is going to be added to the DL class as * well, which is something we do not do in sched_freq_tick_pelt case. */ - cpu_utilization = add_capacity_margin(cpu_utilization); - if (cpu_utilization <= capacity_curr) + if (add_capacity_margin(cpu_utilization) <= capacity_curr) return; /* @@ -3207,16 +3209,9 @@ static void sched_freq_tick_walt(int cpu) static void sched_freq_tick(int cpu) { - unsigned long capacity_orig, capacity_curr; - if (!sched_freq()) return; - capacity_orig = capacity_orig_of(cpu); - capacity_curr = capacity_curr_of(cpu); - if (capacity_curr == capacity_orig) - return; - _sched_freq_tick(cpu); } #else @@ -8188,17 +8183,16 @@ static int cpuset_cpu_active(struct notifier_block *nfb, unsigned long action, * operation in the resume sequence, just build a single sched * domain, ignoring cpusets. */ - num_cpus_frozen--; - if (likely(num_cpus_frozen)) { - partition_sched_domains(1, NULL, NULL); + partition_sched_domains(1, NULL, NULL); + if (--num_cpus_frozen) break; - } /* * This is the last CPU online operation. So fall through and * restore the original sched domains by considering the * cpuset configurations. */ + cpuset_force_rebuild(); case CPU_ONLINE: cpuset_update_active_cpus(true); diff --git a/kernel/sched/cpufreq_sched.c b/kernel/sched/cpufreq_sched.c index f10d9f7d6d07..6ffb23adbcef 100644 --- a/kernel/sched/cpufreq_sched.c +++ b/kernel/sched/cpufreq_sched.c @@ -235,6 +235,18 @@ out: cpufreq_cpu_put(policy); } +#ifdef CONFIG_SCHED_WALT +static inline unsigned long +requested_capacity(struct sched_capacity_reqs *scr) +{ + if (!walt_disabled && sysctl_sched_use_walt_cpu_util) + return scr->cfs; + return scr->cfs + scr->rt; +} +#else +#define requested_capacity(scr) (scr->cfs + scr->rt) +#endif + void update_cpu_capacity_request(int cpu, bool request) { unsigned long new_capacity; @@ -245,7 +257,7 @@ void update_cpu_capacity_request(int cpu, bool request) scr = &per_cpu(cpu_sched_capacity_reqs, cpu); - new_capacity = scr->cfs + scr->rt; + new_capacity = requested_capacity(scr); new_capacity = new_capacity * capacity_margin / SCHED_CAPACITY_SCALE; new_capacity += scr->dl; diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c index e12309c1b07b..28977799017b 100644 --- a/kernel/sched/cpufreq_schedutil.c +++ b/kernel/sched/cpufreq_schedutil.c @@ -64,8 +64,9 @@ struct sugov_cpu { struct update_util_data update_util; struct sugov_policy *sg_policy; - unsigned long iowait_boost; - unsigned long iowait_boost_max; + bool iowait_boost_pending; + unsigned int iowait_boost; + unsigned int iowait_boost_max; u64 last_update; /* The fields below are only needed when sharing a policy. */ @@ -224,30 +225,54 @@ static void sugov_set_iowait_boost(struct sugov_cpu *sg_cpu, u64 time, unsigned int flags) { if (flags & SCHED_CPUFREQ_IOWAIT) { - sg_cpu->iowait_boost = sg_cpu->iowait_boost_max; + if (sg_cpu->iowait_boost_pending) + return; + + sg_cpu->iowait_boost_pending = true; + + if (sg_cpu->iowait_boost) { + sg_cpu->iowait_boost <<= 1; + if (sg_cpu->iowait_boost > sg_cpu->iowait_boost_max) + sg_cpu->iowait_boost = sg_cpu->iowait_boost_max; + } else { + sg_cpu->iowait_boost = sg_cpu->sg_policy->policy->min; + } } else if (sg_cpu->iowait_boost) { s64 delta_ns = time - sg_cpu->last_update; /* Clear iowait_boost if the CPU apprears to have been idle. */ - if (delta_ns > TICK_NSEC) + if (delta_ns > TICK_NSEC) { sg_cpu->iowait_boost = 0; + sg_cpu->iowait_boost_pending = false; + } } } static void sugov_iowait_boost(struct sugov_cpu *sg_cpu, unsigned long *util, unsigned long *max) { - unsigned long boost_util = sg_cpu->iowait_boost; - unsigned long boost_max = sg_cpu->iowait_boost_max; + unsigned int boost_util, boost_max; - if (!boost_util) + if (!sg_cpu->iowait_boost) return; + if (sg_cpu->iowait_boost_pending) { + sg_cpu->iowait_boost_pending = false; + } else { + sg_cpu->iowait_boost >>= 1; + if (sg_cpu->iowait_boost < sg_cpu->sg_policy->policy->min) { + sg_cpu->iowait_boost = 0; + return; + } + } + + boost_util = sg_cpu->iowait_boost; + boost_max = sg_cpu->iowait_boost_max; + if (*util * boost_max < *max * boost_util) { *util = boost_util; *max = boost_max; } - sg_cpu->iowait_boost >>= 1; } #ifdef CONFIG_NO_HZ_COMMON @@ -297,11 +322,10 @@ static void sugov_update_single(struct update_util_data *hook, u64 time, sugov_update_commit(sg_policy, time, next_f); } -static unsigned int sugov_next_freq_shared(struct sugov_cpu *sg_cpu) +static unsigned int sugov_next_freq_shared(struct sugov_cpu *sg_cpu, u64 time) { struct sugov_policy *sg_policy = sg_cpu->sg_policy; struct cpufreq_policy *policy = sg_policy->policy; - u64 last_freq_update_time = sg_policy->last_freq_update_time; unsigned long util = 0, max = 1; unsigned int j; @@ -317,9 +341,10 @@ static unsigned int sugov_next_freq_shared(struct sugov_cpu *sg_cpu) * enough, don't take the CPU into account as it probably is * idle now (and clear iowait_boost for it). */ - delta_ns = last_freq_update_time - j_sg_cpu->last_update; + delta_ns = time - j_sg_cpu->last_update; if (delta_ns > TICK_NSEC) { j_sg_cpu->iowait_boost = 0; + j_sg_cpu->iowait_boost_pending = false; continue; } if (j_sg_cpu->flags & SCHED_CPUFREQ_DL) @@ -361,7 +386,7 @@ static void sugov_update_shared(struct update_util_data *hook, u64 time, if (flags & SCHED_CPUFREQ_DL) next_f = sg_policy->policy->cpuinfo.max_freq; else - next_f = sugov_next_freq_shared(sg_cpu); + next_f = sugov_next_freq_shared(sg_cpu, time); sugov_update_commit(sg_policy, time, next_f); } @@ -589,7 +614,6 @@ static int sugov_init(struct cpufreq_policy *policy) { struct sugov_policy *sg_policy; struct sugov_tunables *tunables; - unsigned int lat; int ret = 0; /* State should be equivalent to EXIT */ @@ -628,12 +652,19 @@ static int sugov_init(struct cpufreq_policy *policy) goto stop_kthread; } - tunables->up_rate_limit_us = LATENCY_MULTIPLIER; - tunables->down_rate_limit_us = LATENCY_MULTIPLIER; - lat = policy->cpuinfo.transition_latency / NSEC_PER_USEC; - if (lat) { - tunables->up_rate_limit_us *= lat; - tunables->down_rate_limit_us *= lat; + if (policy->up_transition_delay_us && policy->down_transition_delay_us) { + tunables->up_rate_limit_us = policy->up_transition_delay_us; + tunables->down_rate_limit_us = policy->down_transition_delay_us; + } else { + unsigned int lat; + + tunables->up_rate_limit_us = LATENCY_MULTIPLIER; + tunables->down_rate_limit_us = LATENCY_MULTIPLIER; + lat = policy->cpuinfo.transition_latency / NSEC_PER_USEC; + if (lat) { + tunables->up_rate_limit_us *= lat; + tunables->down_rate_limit_us *= lat; + } } policy->governor_data = sg_policy; diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c index a105e97ab6bf..167a1038cff0 100644 --- a/kernel/sched/deadline.c +++ b/kernel/sched/deadline.c @@ -18,6 +18,8 @@ #include <linux/slab.h> +#include "walt.h" + struct dl_bandwidth def_dl_bandwidth; static inline struct task_struct *dl_task_of(struct sched_dl_entity *dl_se) @@ -1623,7 +1625,9 @@ retry: next_task->on_rq = TASK_ON_RQ_MIGRATING; deactivate_task(rq, next_task, 0); clear_average_bw(&next_task->dl, &rq->dl); + next_task->on_rq = TASK_ON_RQ_MIGRATING; set_task_cpu(next_task, later_rq->cpu); + next_task->on_rq = TASK_ON_RQ_QUEUED; add_average_bw(&next_task->dl, &later_rq->dl); activate_task(later_rq, next_task, 0); next_task->on_rq = TASK_ON_RQ_QUEUED; @@ -1715,7 +1719,9 @@ static void pull_dl_task(struct rq *this_rq) p->on_rq = TASK_ON_RQ_MIGRATING; deactivate_task(src_rq, p, 0); clear_average_bw(&p->dl, &src_rq->dl); + p->on_rq = TASK_ON_RQ_MIGRATING; set_task_cpu(p, this_cpu); + p->on_rq = TASK_ON_RQ_QUEUED; add_average_bw(&p->dl, &this_rq->dl); activate_task(this_rq, p, 0); p->on_rq = TASK_ON_RQ_QUEUED; diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 853064319b0d..6e3ab49c262a 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -5809,10 +5809,11 @@ static inline void hrtick_update(struct rq *rq) #endif #ifdef CONFIG_SMP +static bool __cpu_overutilized(int cpu, int delta); static bool cpu_overutilized(int cpu); unsigned long boosted_cpu_util(int cpu); #else -#define boosted_cpu_util(cpu) cpu_util(cpu) +#define boosted_cpu_util(cpu) cpu_util_freq(cpu) #endif #if defined(CONFIG_SMP) && defined(CONFIG_CPU_FREQ_GOV_SCHED) @@ -6626,10 +6627,8 @@ end: */ static int sched_group_energy(struct energy_env *eenv) { - struct sched_domain *sd; - int cpu, total_energy = 0; struct cpumask visit_cpus; - struct sched_group *sg; + u64 total_energy = 0; WARN_ON(!eenv->sg_top->sge); @@ -6637,8 +6636,8 @@ static int sched_group_energy(struct energy_env *eenv) while (!cpumask_empty(&visit_cpus)) { struct sched_group *sg_shared_cap = NULL; - - cpu = cpumask_first(&visit_cpus); + int cpu = cpumask_first(&visit_cpus); + struct sched_domain *sd; /* * Is the group utilization affected by cpus outside this @@ -6650,7 +6649,7 @@ static int sched_group_energy(struct energy_env *eenv) sg_shared_cap = sd->parent->groups; for_each_domain(cpu, sd) { - sg = sd->groups; + struct sched_group *sg = sd->groups; /* Has this sched_domain already been visited? */ if (sd->child && group_first_cpu(sg) != cpu) @@ -6686,11 +6685,9 @@ static int sched_group_energy(struct energy_env *eenv) idle_idx = group_idle_state(eenv, sg); group_util = group_norm_util(eenv, sg); - sg_busy_energy = (group_util * sg->sge->cap_states[cap_idx].power) - >> SCHED_CAPACITY_SHIFT; + sg_busy_energy = (group_util * sg->sge->cap_states[cap_idx].power); sg_idle_energy = ((SCHED_LOAD_SCALE-group_util) - * sg->sge->idle_states[idle_idx].power) - >> SCHED_CAPACITY_SHIFT; + * sg->sge->idle_states[idle_idx].power); total_energy += sg_busy_energy + sg_idle_energy; @@ -6715,7 +6712,7 @@ next_cpu: continue; } - eenv->energy = total_energy; + eenv->energy = total_energy >> SCHED_CAPACITY_SHIFT; return 0; } @@ -7004,9 +7001,14 @@ static inline bool task_fits_max(struct task_struct *p, int cpu) return __task_fits(p, cpu, 0); } +static bool __cpu_overutilized(int cpu, int delta) +{ + return (capacity_of(cpu) * 1024) < ((cpu_util(cpu) + delta) * capacity_margin); +} + static bool cpu_overutilized(int cpu) { - return (capacity_of(cpu) * 1024) < (cpu_util(cpu) * capacity_margin); + return __cpu_overutilized(cpu, 0); } #ifdef CONFIG_SCHED_TUNE @@ -7085,7 +7087,7 @@ schedtune_task_margin(struct task_struct *task) unsigned long boosted_cpu_util(int cpu) { - unsigned long util = cpu_util(cpu); + unsigned long util = cpu_util_freq(cpu); long margin = schedtune_cpu_margin(util, cpu); trace_sched_boost_cpu(cpu, util, margin); @@ -7384,9 +7386,6 @@ static int start_cpu(bool boosted) { struct root_domain *rd = cpu_rq(smp_processor_id())->rd; - RCU_LOCKDEP_WARN(rcu_read_lock_sched_held(), - "sched RCU must be held"); - return boosted ? rd->max_cap_orig_cpu : rd->min_cap_orig_cpu; } @@ -7729,6 +7728,7 @@ static int select_energy_cpu_brute(struct task_struct *p, int prev_cpu, int sync } if (target_cpu != prev_cpu) { + int delta = 0; struct energy_env eenv = { .util_delta = task_util(p), .src_cpu = prev_cpu, @@ -7736,8 +7736,13 @@ static int select_energy_cpu_brute(struct task_struct *p, int prev_cpu, int sync .task = p, }; + +#ifdef CONFIG_SCHED_WALT + if (!walt_disabled && sysctl_sched_use_walt_cpu_util) + delta = task_util(p); +#endif /* Not enough spare capacity on previous cpu */ - if (cpu_overutilized(prev_cpu)) { + if (__cpu_overutilized(prev_cpu, delta)) { schedstat_inc(p, se.statistics.nr_wakeups_secb_insuff_cap); schedstat_inc(this_rq(), eas_stats.secb_insuff_cap); goto unlock; @@ -10268,6 +10273,7 @@ static int need_active_balance(struct lb_env *env) if (energy_aware() && (capacity_of(env->src_cpu) < capacity_of(env->dst_cpu)) && + ((capacity_orig_of(env->src_cpu) < capacity_orig_of(env->dst_cpu))) && env->src_rq->cfs.h_nr_running == 1 && cpu_overutilized(env->src_cpu) && !cpu_overutilized(env->dst_cpu)) { @@ -11348,8 +11354,8 @@ static inline int _nohz_kick_needed(struct rq *rq, int cpu, int *type) return true; /* Do idle load balance if there have misfit task */ - if (energy_aware() && rq->misfit_task) - return 1; + if (energy_aware()) + return rq->misfit_task; return (rq->nr_running >= 2); } @@ -11391,7 +11397,7 @@ static inline bool nohz_kick_needed(struct rq *rq, int *type) #ifndef CONFIG_SCHED_HMP rcu_read_lock(); sd = rcu_dereference(per_cpu(sd_busy, cpu)); - if (sd && !energy_aware()) { + if (sd) { sgc = sd->groups->sgc; nr_busy = atomic_read(&sgc->nr_busy_cpus); diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c index ee095f4e7230..23b68b051cee 100644 --- a/kernel/sched/rt.c +++ b/kernel/sched/rt.c @@ -2117,7 +2117,9 @@ retry: next_task->on_rq = TASK_ON_RQ_MIGRATING; deactivate_task(rq, next_task, 0); + next_task->on_rq = TASK_ON_RQ_MIGRATING; set_task_cpu(next_task, lowest_rq->cpu); + next_task->on_rq = TASK_ON_RQ_QUEUED; activate_task(lowest_rq, next_task, 0); next_task->on_rq = TASK_ON_RQ_QUEUED; ret = 1; @@ -2373,7 +2375,9 @@ static void pull_rt_task(struct rq *this_rq) p->on_rq = TASK_ON_RQ_MIGRATING; deactivate_task(src_rq, p, 0); + p->on_rq = TASK_ON_RQ_MIGRATING; set_task_cpu(p, this_cpu); + p->on_rq = TASK_ON_RQ_QUEUED; activate_task(this_rq, p, 0); p->on_rq = TASK_ON_RQ_QUEUED; /* diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index 33bf0c07e757..c53970b5a8f0 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -2363,6 +2363,12 @@ static inline unsigned long __cpu_util(int cpu, int delta) unsigned long util = cpu_rq(cpu)->cfs.avg.util_avg; unsigned long capacity = capacity_orig_of(cpu); +#ifdef CONFIG_SCHED_WALT + if (!walt_disabled && sysctl_sched_use_walt_cpu_util) + util = div64_u64(cpu_rq(cpu)->cumulative_runnable_avg, + walt_ravg_window >> SCHED_LOAD_SHIFT); +#endif + delta += util; if (delta < 0) return 0; @@ -2375,6 +2381,19 @@ static inline unsigned long cpu_util(int cpu) return __cpu_util(cpu, 0); } +static inline unsigned long cpu_util_freq(int cpu) +{ + unsigned long util = cpu_rq(cpu)->cfs.avg.util_avg; + unsigned long capacity = capacity_orig_of(cpu); + +#ifdef CONFIG_SCHED_WALT + if (!walt_disabled && sysctl_sched_use_walt_cpu_util) + util = div64_u64(cpu_rq(cpu)->prev_runnable_sum, + walt_ravg_window >> SCHED_LOAD_SHIFT); +#endif + return (util >= capacity) ? capacity : util; +} + #endif #ifdef CONFIG_CPU_FREQ_GOV_SCHED diff --git a/kernel/sched/walt.c b/kernel/sched/walt.c index 92c3aae8e056..441cba01bc04 100644 --- a/kernel/sched/walt.c +++ b/kernel/sched/walt.c @@ -20,7 +20,6 @@ */ #include <linux/syscore_ops.h> -#include <linux/cpufreq.h> #include <trace/events/sched.h> #include "sched.h" #include "walt.h" @@ -45,29 +44,6 @@ unsigned int sysctl_sched_walt_init_task_load_pct = 15; /* 1 -> use PELT based load stats, 0 -> use window-based load stats */ unsigned int __read_mostly walt_disabled = 0; -static unsigned int max_possible_efficiency = 1024; -static unsigned int min_possible_efficiency = 1024; - -/* - * Maximum possible frequency across all cpus. Task demand and cpu - * capacity (cpu_power) metrics are scaled in reference to it. - */ -static unsigned int max_possible_freq = 1; - -/* - * Minimum possible max_freq across all cpus. This will be same as - * max_possible_freq on homogeneous systems and could be different from - * max_possible_freq on heterogenous systems. min_max_freq is used to derive - * capacity (cpu_power) of cpus. - */ -static unsigned int min_max_freq = 1; - -static unsigned int max_load_scale_factor = 1024; -static unsigned int max_possible_capacity = 1024; - -/* Mask of all CPUs that have max_possible_capacity */ -static cpumask_t mpc_mask = CPU_MASK_ALL; - /* Window size (in ns) */ __read_mostly unsigned int walt_ravg_window = 20000000; @@ -111,8 +87,10 @@ walt_dec_cumulative_runnable_avg(struct rq *rq, static void fixup_cumulative_runnable_avg(struct rq *rq, - struct task_struct *p, s64 task_load_delta) + struct task_struct *p, u64 new_task_load) { + s64 task_load_delta = (s64)new_task_load - task_load(p); + rq->cumulative_runnable_avg += task_load_delta; if ((s64)rq->cumulative_runnable_avg < 0) panic("cra less than zero: tld: %lld, task_load(p) = %u\n", @@ -204,24 +182,16 @@ update_window_start(struct rq *rq, u64 wallclock) rq->window_start += (u64)nr_windows * (u64)walt_ravg_window; } +/* + * Translate absolute delta time accounted on a CPU + * to a scale where 1024 is the capacity of the most + * capable CPU running at FMAX + */ static u64 scale_exec_time(u64 delta, struct rq *rq) { - unsigned int cur_freq = rq->cur_freq; - int sf; - - if (unlikely(cur_freq > max_possible_freq)) - cur_freq = rq->max_possible_freq; - - /* round up div64 */ - delta = div64_u64(delta * cur_freq + max_possible_freq - 1, - max_possible_freq); - - sf = DIV_ROUND_UP(rq->efficiency * 1024, max_possible_efficiency); + unsigned long capcurr = capacity_curr_of(cpu_of(rq)); - delta *= sf; - delta >>= 10; - - return delta; + return (delta * capcurr) >> SCHED_CAPACITY_SHIFT; } static int cpu_is_waiting_on_io(struct rq *rq) @@ -744,33 +714,6 @@ done: p->ravg.mark_start = wallclock; } -unsigned long __weak arch_get_cpu_efficiency(int cpu) -{ - return SCHED_LOAD_SCALE; -} - -void walt_init_cpu_efficiency(void) -{ - int i, efficiency; - unsigned int max = 0, min = UINT_MAX; - - for_each_possible_cpu(i) { - efficiency = arch_get_cpu_efficiency(i); - cpu_rq(i)->efficiency = efficiency; - - if (efficiency > max) - max = efficiency; - if (efficiency < min) - min = efficiency; - } - - if (max) - max_possible_efficiency = max; - - if (min) - min_possible_efficiency = min; -} - static void reset_task_stats(struct task_struct *p) { u32 sum = 0; @@ -802,11 +745,11 @@ void walt_set_window_start(struct rq *rq) int cpu = cpu_of(rq); struct rq *sync_rq = cpu_rq(sync_cpu); - if (rq->window_start) + if (likely(rq->window_start)) return; if (cpu == sync_cpu) { - rq->window_start = walt_ktime_clock(); + rq->window_start = 1; } else { raw_spin_unlock(&rq->lock); double_rq_lock(rq, sync_rq); @@ -875,242 +818,6 @@ void walt_fixup_busy_time(struct task_struct *p, int new_cpu) double_rq_unlock(src_rq, dest_rq); } -/* - * Return 'capacity' of a cpu in reference to "least" efficient cpu, such that - * least efficient cpu gets capacity of 1024 - */ -static unsigned long capacity_scale_cpu_efficiency(int cpu) -{ - return (1024 * cpu_rq(cpu)->efficiency) / min_possible_efficiency; -} - -/* - * Return 'capacity' of a cpu in reference to cpu with lowest max_freq - * (min_max_freq), such that one with lowest max_freq gets capacity of 1024. - */ -static unsigned long capacity_scale_cpu_freq(int cpu) -{ - return (1024 * cpu_rq(cpu)->max_freq) / min_max_freq; -} - -/* - * Return load_scale_factor of a cpu in reference to "most" efficient cpu, so - * that "most" efficient cpu gets a load_scale_factor of 1 - */ -static unsigned long load_scale_cpu_efficiency(int cpu) -{ - return DIV_ROUND_UP(1024 * max_possible_efficiency, - cpu_rq(cpu)->efficiency); -} - -/* - * Return load_scale_factor of a cpu in reference to cpu with best max_freq - * (max_possible_freq), so that one with best max_freq gets a load_scale_factor - * of 1. - */ -static unsigned long load_scale_cpu_freq(int cpu) -{ - return DIV_ROUND_UP(1024 * max_possible_freq, cpu_rq(cpu)->max_freq); -} - -static int compute_capacity(int cpu) -{ - int capacity = 1024; - - capacity *= capacity_scale_cpu_efficiency(cpu); - capacity >>= 10; - - capacity *= capacity_scale_cpu_freq(cpu); - capacity >>= 10; - - return capacity; -} - -static int compute_load_scale_factor(int cpu) -{ - int load_scale = 1024; - - /* - * load_scale_factor accounts for the fact that task load - * is in reference to "best" performing cpu. Task's load will need to be - * scaled (up) by a factor to determine suitability to be placed on a - * (little) cpu. - */ - load_scale *= load_scale_cpu_efficiency(cpu); - load_scale >>= 10; - - load_scale *= load_scale_cpu_freq(cpu); - load_scale >>= 10; - - return load_scale; -} - -static int cpufreq_notifier_policy(struct notifier_block *nb, - unsigned long val, void *data) -{ - struct cpufreq_policy *policy = (struct cpufreq_policy *)data; - int i, update_max = 0; - u64 highest_mpc = 0, highest_mplsf = 0; - const struct cpumask *cpus = policy->related_cpus; - unsigned int orig_min_max_freq = min_max_freq; - unsigned int orig_max_possible_freq = max_possible_freq; - /* Initialized to policy->max in case policy->related_cpus is empty! */ - unsigned int orig_max_freq = policy->max; - - if (val != CPUFREQ_NOTIFY) - return 0; - - for_each_cpu(i, policy->related_cpus) { - cpumask_copy(&cpu_rq(i)->freq_domain_cpumask, - policy->related_cpus); - orig_max_freq = cpu_rq(i)->max_freq; - cpu_rq(i)->min_freq = policy->min; - cpu_rq(i)->max_freq = policy->max; - cpu_rq(i)->cur_freq = policy->cur; - cpu_rq(i)->max_possible_freq = policy->cpuinfo.max_freq; - } - - max_possible_freq = max(max_possible_freq, policy->cpuinfo.max_freq); - if (min_max_freq == 1) - min_max_freq = UINT_MAX; - min_max_freq = min(min_max_freq, policy->cpuinfo.max_freq); - BUG_ON(!min_max_freq); - BUG_ON(!policy->max); - - /* Changes to policy other than max_freq don't require any updates */ - if (orig_max_freq == policy->max) - return 0; - - /* - * A changed min_max_freq or max_possible_freq (possible during bootup) - * needs to trigger re-computation of load_scale_factor and capacity for - * all possible cpus (even those offline). It also needs to trigger - * re-computation of nr_big_task count on all online cpus. - * - * A changed rq->max_freq otoh needs to trigger re-computation of - * load_scale_factor and capacity for just the cluster of cpus involved. - * Since small task definition depends on max_load_scale_factor, a - * changed load_scale_factor of one cluster could influence - * classification of tasks in another cluster. Hence a changed - * rq->max_freq will need to trigger re-computation of nr_big_task - * count on all online cpus. - * - * While it should be sufficient for nr_big_tasks to be - * re-computed for only online cpus, we have inadequate context - * information here (in policy notifier) with regard to hotplug-safety - * context in which notification is issued. As a result, we can't use - * get_online_cpus() here, as it can lead to deadlock. Until cpufreq is - * fixed up to issue notification always in hotplug-safe context, - * re-compute nr_big_task for all possible cpus. - */ - - if (orig_min_max_freq != min_max_freq || - orig_max_possible_freq != max_possible_freq) { - cpus = cpu_possible_mask; - update_max = 1; - } - - /* - * Changed load_scale_factor can trigger reclassification of tasks as - * big or small. Make this change "atomic" so that tasks are accounted - * properly due to changed load_scale_factor - */ - for_each_cpu(i, cpus) { - struct rq *rq = cpu_rq(i); - - rq->capacity = compute_capacity(i); - rq->load_scale_factor = compute_load_scale_factor(i); - - if (update_max) { - u64 mpc, mplsf; - - mpc = div_u64(((u64) rq->capacity) * - rq->max_possible_freq, rq->max_freq); - rq->max_possible_capacity = (int) mpc; - - mplsf = div_u64(((u64) rq->load_scale_factor) * - rq->max_possible_freq, rq->max_freq); - - if (mpc > highest_mpc) { - highest_mpc = mpc; - cpumask_clear(&mpc_mask); - cpumask_set_cpu(i, &mpc_mask); - } else if (mpc == highest_mpc) { - cpumask_set_cpu(i, &mpc_mask); - } - - if (mplsf > highest_mplsf) - highest_mplsf = mplsf; - } - } - - if (update_max) { - max_possible_capacity = highest_mpc; - max_load_scale_factor = highest_mplsf; - } - - return 0; -} - -static int cpufreq_notifier_trans(struct notifier_block *nb, - unsigned long val, void *data) -{ - struct cpufreq_freqs *freq = (struct cpufreq_freqs *)data; - unsigned int cpu = freq->cpu, new_freq = freq->new; - unsigned long flags; - int i; - - if (val != CPUFREQ_POSTCHANGE) - return 0; - - BUG_ON(!new_freq); - - if (cpu_rq(cpu)->cur_freq == new_freq) - return 0; - - for_each_cpu(i, &cpu_rq(cpu)->freq_domain_cpumask) { - struct rq *rq = cpu_rq(i); - - raw_spin_lock_irqsave(&rq->lock, flags); - walt_update_task_ravg(rq->curr, rq, TASK_UPDATE, - walt_ktime_clock(), 0); - rq->cur_freq = new_freq; - raw_spin_unlock_irqrestore(&rq->lock, flags); - } - - return 0; -} - -static struct notifier_block notifier_policy_block = { - .notifier_call = cpufreq_notifier_policy -}; - -static struct notifier_block notifier_trans_block = { - .notifier_call = cpufreq_notifier_trans -}; - -static int register_sched_callback(void) -{ - int ret; - - ret = cpufreq_register_notifier(¬ifier_policy_block, - CPUFREQ_POLICY_NOTIFIER); - - if (!ret) - ret = cpufreq_register_notifier(¬ifier_trans_block, - CPUFREQ_TRANSITION_NOTIFIER); - - return 0; -} - -/* - * cpufreq callbacks can be registered at core_initcall or later time. - * Any registration done prior to that is "forgotten" by cpufreq. See - * initialization of variable init_cpufreq_transition_notifier_list_called - * for further information. - */ -core_initcall(register_sched_callback); - void walt_init_new_task_load(struct task_struct *p) { int i; |
