summaryrefslogtreecommitdiff
path: root/kernel/sched/core.c
diff options
context:
space:
mode:
authorJoonwoo Park <joonwoop@codeaurora.org>2015-07-27 16:52:12 -0700
committerDavid Keitel <dkeitel@codeaurora.org>2016-03-23 20:02:25 -0700
commitb4627e0104c72dd25048fdcd8dd38fad78ad9782 (patch)
treefbea7cad871a4c227f6359dc119eab7926bb85fe /kernel/sched/core.c
parent28f67e5a50d7c1bfc41cd7eb0f940f5daaa347c2 (diff)
sched: take into account of governor's frequency max load
At present HMP scheduler packs tasks to busy CPU till the CPU's load is 100% to avoid waking up of idle CPU as much as possible. Such aggressive packing leads unintended CPU frequency raise as governor raises the busy CPU's frequency when its load is more than configured frequency max load which can be less than 100%. Fix to take into account of governor's frequency max load and pack tasks only when the CPU's projected load is less than max load to avoid unnecessary frequency raise. Change-Id: I4447e5e0c2fa5214ae7a9128f04fd7585ed0dcac [joonwoop@codeaurora.org: fixed minor conflict in kernel/sched/sched.h] Signed-off-by: Joonwoo Park <joonwoop@codeaurora.org>
Diffstat (limited to 'kernel/sched/core.c')
-rw-r--r--kernel/sched/core.c95
1 files changed, 94 insertions, 1 deletions
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index ebaeda755c91..cc3ba6ee00d8 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -1203,7 +1203,6 @@ __read_mostly int sysctl_sched_freq_inc_notify = 10 * 1024 * 1024; /* + 10GHz */
__read_mostly int sysctl_sched_freq_dec_notify = 10 * 1024 * 1024; /* - 10GHz */
static __read_mostly unsigned int sched_io_is_busy;
-
#endif /* CONFIG_SCHED_FREQ_INPUT */
/* 1 -> use PELT based load stats, 0 -> use window-based load stats */
@@ -1628,6 +1627,78 @@ static void update_cpu_busy_time(struct task_struct *p, struct rq *rq,
BUG();
}
+
+u32 __weak get_freq_max_load(int cpu, u32 freq)
+{
+ /* 100% by default */
+ return 100;
+}
+
+DEFINE_PER_CPU(struct freq_max_load *, freq_max_load);
+
+int sched_update_freq_max_load(const cpumask_t *cpumask)
+{
+ int i, cpu, ret;
+ unsigned int freq, max;
+ struct cpu_pstate_pwr *costs;
+ struct cpu_pwr_stats *per_cpu_info = get_cpu_pwr_stats();
+ struct freq_max_load *max_load, *old_max_load;
+
+ if (!per_cpu_info || !sysctl_sched_enable_power_aware)
+ return 0;
+
+ mutex_lock(&policy_mutex);
+ for_each_cpu(cpu, cpumask) {
+ if (!per_cpu_info[cpu].ptable) {
+ ret = -EINVAL;
+ goto fail;
+ }
+
+ old_max_load = rcu_dereference(per_cpu(freq_max_load, cpu));
+
+ /*
+ * allocate len + 1 and leave the last power cost as 0 for
+ * power_cost_at_freq() can stop iterating index when
+ * per_cpu_info[cpu].len > len of max_load due to race between
+ * cpu power stats update and get_cpu_pwr_stats().
+ */
+ max_load = kzalloc(sizeof(struct freq_max_load) +
+ sizeof(u32) * (per_cpu_info[cpu].len + 1),
+ GFP_ATOMIC);
+ if (unlikely(!max_load)) {
+ ret = -ENOMEM;
+ goto fail;
+ }
+
+ i = 0;
+ costs = per_cpu_info[cpu].ptable;
+ while (costs[i].freq) {
+ freq = costs[i].freq;
+ max = get_freq_max_load(cpu, freq);
+ max_load->freqs[i] = div64_u64((u64)freq * max, 100);
+ i++;
+ }
+
+ rcu_assign_pointer(per_cpu(freq_max_load, cpu), max_load);
+ if (old_max_load)
+ kfree_rcu(old_max_load, rcu);
+ }
+
+ mutex_unlock(&policy_mutex);
+ return 0;
+
+fail:
+ for_each_cpu(cpu, cpumask) {
+ max_load = rcu_dereference(per_cpu(freq_max_load, cpu));
+ if (max_load) {
+ rcu_assign_pointer(per_cpu(freq_max_load, cpu), NULL);
+ kfree_rcu(max_load, rcu);
+ }
+ }
+
+ mutex_unlock(&policy_mutex);
+ return ret;
+}
#else /* CONFIG_SCHED_FREQ_INPUT */
static inline void update_cpu_busy_time(struct task_struct *p, struct rq *rq,
@@ -2598,6 +2669,17 @@ static int cpufreq_notifier_trans(struct notifier_block *nb,
return 0;
}
+static int pwr_stats_ready_notifier(struct notifier_block *nb,
+ unsigned long cpu, void *data)
+{
+ cpumask_t mask = CPU_MASK_NONE;
+
+ cpumask_set_cpu(cpu, &mask);
+ sched_update_freq_max_load(&mask);
+
+ return 0;
+}
+
static struct notifier_block notifier_policy_block = {
.notifier_call = cpufreq_notifier_policy
};
@@ -2606,6 +2688,15 @@ static struct notifier_block notifier_trans_block = {
.notifier_call = cpufreq_notifier_trans
};
+static struct notifier_block notifier_pwr_stats_ready = {
+ .notifier_call = pwr_stats_ready_notifier
+};
+
+int __weak register_cpu_pwr_stats_ready_notifier(struct notifier_block *nb)
+{
+ return -EINVAL;
+}
+
static int register_sched_callback(void)
{
int ret;
@@ -2620,6 +2711,8 @@ static int register_sched_callback(void)
ret = cpufreq_register_notifier(&notifier_trans_block,
CPUFREQ_TRANSITION_NOTIFIER);
+ register_cpu_pwr_stats_ready_notifier(&notifier_pwr_stats_ready);
+
return 0;
}