summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--include/linux/sched.h6
-rw-r--r--kernel/sched/core.c95
-rw-r--r--kernel/sched/fair.c11
-rw-r--r--kernel/sched/sched.h8
4 files changed, 117 insertions, 3 deletions
diff --git a/include/linux/sched.h b/include/linux/sched.h
index fa5ca5e90917..941930f2935f 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -2131,6 +2131,7 @@ extern unsigned long sched_get_busy(int cpu);
extern void sched_get_cpus_busy(unsigned long *busy,
const struct cpumask *query_cpus);
extern void sched_set_io_is_busy(int val);
+int sched_update_freq_max_load(const cpumask_t *cpumask);
#else
static inline int sched_set_window(u64 window_start, unsigned int window_size)
{
@@ -2141,6 +2142,11 @@ static inline unsigned long sched_get_busy(int cpu)
return 0;
}
static inline void sched_set_io_is_busy(int val) {};
+
+static inline int sched_update_freq_max_load(const cpumask_t *cpumask)
+{
+ return 0;
+}
#endif
/*
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index ebaeda755c91..cc3ba6ee00d8 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -1203,7 +1203,6 @@ __read_mostly int sysctl_sched_freq_inc_notify = 10 * 1024 * 1024; /* + 10GHz */
__read_mostly int sysctl_sched_freq_dec_notify = 10 * 1024 * 1024; /* - 10GHz */
static __read_mostly unsigned int sched_io_is_busy;
-
#endif /* CONFIG_SCHED_FREQ_INPUT */
/* 1 -> use PELT based load stats, 0 -> use window-based load stats */
@@ -1628,6 +1627,78 @@ static void update_cpu_busy_time(struct task_struct *p, struct rq *rq,
BUG();
}
+
+u32 __weak get_freq_max_load(int cpu, u32 freq)
+{
+ /* 100% by default */
+ return 100;
+}
+
+DEFINE_PER_CPU(struct freq_max_load *, freq_max_load);
+
+int sched_update_freq_max_load(const cpumask_t *cpumask)
+{
+ int i, cpu, ret;
+ unsigned int freq, max;
+ struct cpu_pstate_pwr *costs;
+ struct cpu_pwr_stats *per_cpu_info = get_cpu_pwr_stats();
+ struct freq_max_load *max_load, *old_max_load;
+
+ if (!per_cpu_info || !sysctl_sched_enable_power_aware)
+ return 0;
+
+ mutex_lock(&policy_mutex);
+ for_each_cpu(cpu, cpumask) {
+ if (!per_cpu_info[cpu].ptable) {
+ ret = -EINVAL;
+ goto fail;
+ }
+
+ old_max_load = rcu_dereference(per_cpu(freq_max_load, cpu));
+
+ /*
+ * allocate len + 1 and leave the last power cost as 0 for
+ * power_cost_at_freq() can stop iterating index when
+ * per_cpu_info[cpu].len > len of max_load due to race between
+ * cpu power stats update and get_cpu_pwr_stats().
+ */
+ max_load = kzalloc(sizeof(struct freq_max_load) +
+ sizeof(u32) * (per_cpu_info[cpu].len + 1),
+ GFP_ATOMIC);
+ if (unlikely(!max_load)) {
+ ret = -ENOMEM;
+ goto fail;
+ }
+
+ i = 0;
+ costs = per_cpu_info[cpu].ptable;
+ while (costs[i].freq) {
+ freq = costs[i].freq;
+ max = get_freq_max_load(cpu, freq);
+ max_load->freqs[i] = div64_u64((u64)freq * max, 100);
+ i++;
+ }
+
+ rcu_assign_pointer(per_cpu(freq_max_load, cpu), max_load);
+ if (old_max_load)
+ kfree_rcu(old_max_load, rcu);
+ }
+
+ mutex_unlock(&policy_mutex);
+ return 0;
+
+fail:
+ for_each_cpu(cpu, cpumask) {
+ max_load = rcu_dereference(per_cpu(freq_max_load, cpu));
+ if (max_load) {
+ rcu_assign_pointer(per_cpu(freq_max_load, cpu), NULL);
+ kfree_rcu(max_load, rcu);
+ }
+ }
+
+ mutex_unlock(&policy_mutex);
+ return ret;
+}
#else /* CONFIG_SCHED_FREQ_INPUT */
static inline void update_cpu_busy_time(struct task_struct *p, struct rq *rq,
@@ -2598,6 +2669,17 @@ static int cpufreq_notifier_trans(struct notifier_block *nb,
return 0;
}
+static int pwr_stats_ready_notifier(struct notifier_block *nb,
+ unsigned long cpu, void *data)
+{
+ cpumask_t mask = CPU_MASK_NONE;
+
+ cpumask_set_cpu(cpu, &mask);
+ sched_update_freq_max_load(&mask);
+
+ return 0;
+}
+
static struct notifier_block notifier_policy_block = {
.notifier_call = cpufreq_notifier_policy
};
@@ -2606,6 +2688,15 @@ static struct notifier_block notifier_trans_block = {
.notifier_call = cpufreq_notifier_trans
};
+static struct notifier_block notifier_pwr_stats_ready = {
+ .notifier_call = pwr_stats_ready_notifier
+};
+
+int __weak register_cpu_pwr_stats_ready_notifier(struct notifier_block *nb)
+{
+ return -EINVAL;
+}
+
static int register_sched_callback(void)
{
int ret;
@@ -2620,6 +2711,8 @@ static int register_sched_callback(void)
ret = cpufreq_register_notifier(&notifier_trans_block,
CPUFREQ_TRANSITION_NOTIFIER);
+ register_cpu_pwr_stats_ready_notifier(&notifier_pwr_stats_ready);
+
return 0;
}
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index a9f3199bdcf6..27e1a3d7bb05 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -3063,6 +3063,7 @@ static unsigned int power_cost_at_freq(int cpu, unsigned int freq)
int i = 0;
struct cpu_pwr_stats *per_cpu_info = get_cpu_pwr_stats();
struct cpu_pstate_pwr *costs;
+ struct freq_max_load *max_load;
if (!per_cpu_info || !per_cpu_info[cpu].ptable ||
!sysctl_sched_enable_power_aware)
@@ -3075,12 +3076,18 @@ static unsigned int power_cost_at_freq(int cpu, unsigned int freq)
costs = per_cpu_info[cpu].ptable;
+ rcu_read_lock();
+ max_load = rcu_dereference(per_cpu(freq_max_load, cpu));
while (costs[i].freq != 0) {
- if (costs[i].freq >= freq ||
- costs[i+1].freq == 0)
+ if (costs[i+1].freq == 0 ||
+ (costs[i].freq >= freq &&
+ (!max_load || max_load->freqs[i] >= freq))) {
+ rcu_read_unlock();
return costs[i].power;
+ }
i++;
}
+ rcu_read_unlock();
BUG();
}
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 23376b43777d..2545fe83e8cd 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -27,6 +27,14 @@ extern unsigned long calc_load_update;
extern atomic_long_t calc_load_tasks;
extern void calc_global_load_tick(struct rq *this_rq);
+
+struct freq_max_load {
+ struct rcu_head rcu;
+ u32 freqs[0];
+};
+
+extern DEFINE_PER_CPU(struct freq_max_load *, freq_max_load);
+
extern long calc_load_fold_active(struct rq *this_rq);
#ifdef CONFIG_SMP