diff options
| -rw-r--r-- | include/linux/sched.h | 6 | ||||
| -rw-r--r-- | kernel/sched/core.c | 95 | ||||
| -rw-r--r-- | kernel/sched/fair.c | 11 | ||||
| -rw-r--r-- | kernel/sched/sched.h | 8 |
4 files changed, 117 insertions, 3 deletions
diff --git a/include/linux/sched.h b/include/linux/sched.h index fa5ca5e90917..941930f2935f 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -2131,6 +2131,7 @@ extern unsigned long sched_get_busy(int cpu); extern void sched_get_cpus_busy(unsigned long *busy, const struct cpumask *query_cpus); extern void sched_set_io_is_busy(int val); +int sched_update_freq_max_load(const cpumask_t *cpumask); #else static inline int sched_set_window(u64 window_start, unsigned int window_size) { @@ -2141,6 +2142,11 @@ static inline unsigned long sched_get_busy(int cpu) return 0; } static inline void sched_set_io_is_busy(int val) {}; + +static inline int sched_update_freq_max_load(const cpumask_t *cpumask) +{ + return 0; +} #endif /* diff --git a/kernel/sched/core.c b/kernel/sched/core.c index ebaeda755c91..cc3ba6ee00d8 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -1203,7 +1203,6 @@ __read_mostly int sysctl_sched_freq_inc_notify = 10 * 1024 * 1024; /* + 10GHz */ __read_mostly int sysctl_sched_freq_dec_notify = 10 * 1024 * 1024; /* - 10GHz */ static __read_mostly unsigned int sched_io_is_busy; - #endif /* CONFIG_SCHED_FREQ_INPUT */ /* 1 -> use PELT based load stats, 0 -> use window-based load stats */ @@ -1628,6 +1627,78 @@ static void update_cpu_busy_time(struct task_struct *p, struct rq *rq, BUG(); } + +u32 __weak get_freq_max_load(int cpu, u32 freq) +{ + /* 100% by default */ + return 100; +} + +DEFINE_PER_CPU(struct freq_max_load *, freq_max_load); + +int sched_update_freq_max_load(const cpumask_t *cpumask) +{ + int i, cpu, ret; + unsigned int freq, max; + struct cpu_pstate_pwr *costs; + struct cpu_pwr_stats *per_cpu_info = get_cpu_pwr_stats(); + struct freq_max_load *max_load, *old_max_load; + + if (!per_cpu_info || !sysctl_sched_enable_power_aware) + return 0; + + mutex_lock(&policy_mutex); + for_each_cpu(cpu, cpumask) { + if (!per_cpu_info[cpu].ptable) { + ret = -EINVAL; + goto fail; + } + + old_max_load = rcu_dereference(per_cpu(freq_max_load, cpu)); + + /* + * allocate len + 1 and leave the last power cost as 0 for + * power_cost_at_freq() can stop iterating index when + * per_cpu_info[cpu].len > len of max_load due to race between + * cpu power stats update and get_cpu_pwr_stats(). + */ + max_load = kzalloc(sizeof(struct freq_max_load) + + sizeof(u32) * (per_cpu_info[cpu].len + 1), + GFP_ATOMIC); + if (unlikely(!max_load)) { + ret = -ENOMEM; + goto fail; + } + + i = 0; + costs = per_cpu_info[cpu].ptable; + while (costs[i].freq) { + freq = costs[i].freq; + max = get_freq_max_load(cpu, freq); + max_load->freqs[i] = div64_u64((u64)freq * max, 100); + i++; + } + + rcu_assign_pointer(per_cpu(freq_max_load, cpu), max_load); + if (old_max_load) + kfree_rcu(old_max_load, rcu); + } + + mutex_unlock(&policy_mutex); + return 0; + +fail: + for_each_cpu(cpu, cpumask) { + max_load = rcu_dereference(per_cpu(freq_max_load, cpu)); + if (max_load) { + rcu_assign_pointer(per_cpu(freq_max_load, cpu), NULL); + kfree_rcu(max_load, rcu); + } + } + + mutex_unlock(&policy_mutex); + return ret; +} #else /* CONFIG_SCHED_FREQ_INPUT */ static inline void update_cpu_busy_time(struct task_struct *p, struct rq *rq, @@ -2598,6 +2669,17 @@ static int cpufreq_notifier_trans(struct notifier_block *nb, return 0; } +static int pwr_stats_ready_notifier(struct notifier_block *nb, + unsigned long cpu, void *data) +{ + cpumask_t mask = CPU_MASK_NONE; + + cpumask_set_cpu(cpu, &mask); + sched_update_freq_max_load(&mask); + + return 0; +} + static struct notifier_block notifier_policy_block = { .notifier_call = cpufreq_notifier_policy }; @@ -2606,6 +2688,15 @@ static struct notifier_block notifier_trans_block = { .notifier_call = cpufreq_notifier_trans }; +static struct notifier_block notifier_pwr_stats_ready = { + .notifier_call = pwr_stats_ready_notifier +}; + +int __weak register_cpu_pwr_stats_ready_notifier(struct notifier_block *nb) +{ + return -EINVAL; +} + static int register_sched_callback(void) { int ret; @@ -2620,6 +2711,8 @@ static int register_sched_callback(void) ret = cpufreq_register_notifier(¬ifier_trans_block, CPUFREQ_TRANSITION_NOTIFIER); + register_cpu_pwr_stats_ready_notifier(¬ifier_pwr_stats_ready); + return 0; } diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index a9f3199bdcf6..27e1a3d7bb05 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -3063,6 +3063,7 @@ static unsigned int power_cost_at_freq(int cpu, unsigned int freq) int i = 0; struct cpu_pwr_stats *per_cpu_info = get_cpu_pwr_stats(); struct cpu_pstate_pwr *costs; + struct freq_max_load *max_load; if (!per_cpu_info || !per_cpu_info[cpu].ptable || !sysctl_sched_enable_power_aware) @@ -3075,12 +3076,18 @@ static unsigned int power_cost_at_freq(int cpu, unsigned int freq) costs = per_cpu_info[cpu].ptable; + rcu_read_lock(); + max_load = rcu_dereference(per_cpu(freq_max_load, cpu)); while (costs[i].freq != 0) { - if (costs[i].freq >= freq || - costs[i+1].freq == 0) + if (costs[i+1].freq == 0 || + (costs[i].freq >= freq && + (!max_load || max_load->freqs[i] >= freq))) { + rcu_read_unlock(); return costs[i].power; + } i++; } + rcu_read_unlock(); BUG(); } diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index 23376b43777d..2545fe83e8cd 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -27,6 +27,14 @@ extern unsigned long calc_load_update; extern atomic_long_t calc_load_tasks; extern void calc_global_load_tick(struct rq *this_rq); + +struct freq_max_load { + struct rcu_head rcu; + u32 freqs[0]; +}; + +extern DEFINE_PER_CPU(struct freq_max_load *, freq_max_load); + extern long calc_load_fold_active(struct rq *this_rq); #ifdef CONFIG_SMP |
