summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSyed Rameez Mustafa <rameezmustafa@codeaurora.org>2015-02-20 17:09:41 -0800
committerDavid Keitel <dkeitel@codeaurora.org>2016-03-23 20:01:52 -0700
commitf0ddb64b10d12a964eb8d70a9547e39f3106d250 (patch)
tree2b49c6e2e2020d792ce2ce017d712060627833a3
parentb55f87849bb1bd573410ef1a1f491f2a418ed664 (diff)
sched: Update max_capacity when an entire cluster is hotplugged
When an entire cluster is hotplugged, the scheduler's notion of max_capacity can get outdated. This introduces the following inefficiencies in behavior: * task_will_fit() does not return true on all tasks. Consequently all big tasks go through fallback CPU selection logic skipping C-state and power checks in select_best_cpu(). * During boost, migration_needed() return true unnecessarily causing an avoidable rerun of select_best_cpu(). * An unnecessary kick is sent to all little CPUs when boost is set. * An opportunity for early bailout from nohz_kick_needed() is lost. Start handling CPUFREQ_REMOVE_POLICY in the policy notifier callback which indicates the last CPU in a cluster being hotplugged out. Also modify update_min_max_capacity() to only iterate through online CPUs instead of possible CPUs. While we can't guarantee the integrity of the cpu_online_mask in the notifier callback, the scheduler will fix up all state soon after any changes to the online mask. The change does have one side effect; early termination from the notifier callback when min_max_freq or max_possible_freq remain unchanged is no longer possible. This is because when the last CPU in a cluster is hot removed, only max_capacity is updated without affecting min_max_freq or max_possible_freq. Therefore, when the first CPU in the same cluster gets hot added at a later point max_capacity must once again be recomputed despite there being no change in min_max_freq or max_possible_freq. Change-Id: I9a1256b5c2cd6fcddd85b069faf5e2ace177e122 Signed-off-by: Syed Rameez Mustafa <rameezmustafa@codeaurora.org>
-rw-r--r--kernel/sched/core.c72
-rw-r--r--kernel/sched/fair.c4
-rw-r--r--kernel/sched/sched.h1
3 files changed, 54 insertions, 23 deletions
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index a3cb1b34ad48..da806ebac086 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -1228,7 +1228,8 @@ unsigned int min_max_freq = 1;
unsigned int max_capacity = 1024; /* max(rq->capacity) */
unsigned int min_capacity = 1024; /* min(rq->capacity) */
-unsigned int max_load_scale_factor = 1024; /* max(rq->load_scale_factor) */
+unsigned int max_load_scale_factor = 1024; /* max possible load scale factor */
+unsigned int max_possible_capacity = 1024; /* max(rq->max_possible_capacity) */
/* Window size (in ns) */
__read_mostly unsigned int sched_ravg_window = 10000000;
@@ -2291,25 +2292,33 @@ heavy_task_wakeup(struct task_struct *p, struct rq *rq, int event)
#endif /* CONFIG_SCHED_FREQ_INPUT */
/* Keep track of max/min capacity possible across CPUs "currently" */
-static void update_min_max_capacity(void)
+static void __update_min_max_capacity(void)
{
int i;
int max = 0, min = INT_MAX;
- int max_lsf = 0;
- for_each_possible_cpu(i) {
+ for_each_online_cpu(i) {
if (cpu_rq(i)->capacity > max)
max = cpu_rq(i)->capacity;
if (cpu_rq(i)->capacity < min)
min = cpu_rq(i)->capacity;
-
- if (cpu_rq(i)->load_scale_factor > max_lsf)
- max_lsf = cpu_rq(i)->load_scale_factor;
}
max_capacity = max;
min_capacity = min;
- max_load_scale_factor = max_lsf;
+}
+
+static void update_min_max_capacity(void)
+{
+ int i;
+
+ for_each_possible_cpu(i)
+ raw_spin_lock(&cpu_rq(i)->lock);
+
+ __update_min_max_capacity();
+
+ for_each_possible_cpu(i)
+ raw_spin_unlock(&cpu_rq(i)->lock);
}
/*
@@ -2386,15 +2395,21 @@ static int cpufreq_notifier_policy(struct notifier_block *nb,
unsigned long val, void *data)
{
struct cpufreq_policy *policy = (struct cpufreq_policy *)data;
- int i;
+ int i, update_max = 0;
+ u64 highest_mpc = 0, highest_mplsf = 0;
const struct cpumask *cpus = policy->related_cpus;
unsigned int orig_min_max_freq = min_max_freq;
unsigned int orig_max_possible_freq = max_possible_freq;
/* Initialized to policy->max in case policy->related_cpus is empty! */
unsigned int orig_max_freq = policy->max;
- if (val != CPUFREQ_NOTIFY)
+ if (val != CPUFREQ_NOTIFY && val != CPUFREQ_REMOVE_POLICY)
+ return 0;
+
+ if (val == CPUFREQ_REMOVE_POLICY) {
+ update_min_max_capacity();
return 0;
+ }
for_each_cpu(i, policy->related_cpus) {
cpumask_copy(&cpu_rq(i)->freq_domain_cpumask,
@@ -2413,11 +2428,6 @@ static int cpufreq_notifier_policy(struct notifier_block *nb,
BUG_ON(!min_max_freq);
BUG_ON(!policy->max);
- if (orig_max_possible_freq == max_possible_freq &&
- orig_min_max_freq == min_max_freq &&
- orig_max_freq == policy->max)
- return 0;
-
/*
* A changed min_max_freq or max_possible_freq (possible during bootup)
* needs to trigger re-computation of load_scale_factor and capacity for
@@ -2442,8 +2452,10 @@ static int cpufreq_notifier_policy(struct notifier_block *nb,
*/
if (orig_min_max_freq != min_max_freq ||
- orig_max_possible_freq != max_possible_freq)
+ orig_max_possible_freq != max_possible_freq) {
cpus = cpu_possible_mask;
+ update_max = 1;
+ }
/*
* Changed load_scale_factor can trigger reclassification of tasks as
@@ -2453,16 +2465,34 @@ static int cpufreq_notifier_policy(struct notifier_block *nb,
pre_big_small_task_count_change(cpu_possible_mask);
for_each_cpu(i, cpus) {
struct rq *rq = cpu_rq(i);
- u64 max_possible_capacity;
rq->capacity = compute_capacity(i);
- max_possible_capacity = div_u64(((u64) rq->capacity) *
- rq->max_possible_freq, rq->max_freq);
- rq->max_possible_capacity = (int) max_possible_capacity;
rq->load_scale_factor = compute_load_scale_factor(i);
+
+ if (update_max) {
+ u64 mpc, mplsf;
+
+ mpc = div_u64(((u64) rq->capacity) *
+ rq->max_possible_freq, rq->max_freq);
+ rq->max_possible_capacity = (int) mpc;
+
+ mplsf = div_u64(((u64) rq->load_scale_factor) *
+ rq->max_possible_freq, rq->max_freq);
+
+ if (mpc > highest_mpc)
+ highest_mpc = mpc;
+
+ if (mplsf > highest_mplsf)
+ highest_mplsf = mplsf;
+ }
+ }
+
+ if (update_max) {
+ max_possible_capacity = highest_mpc;
+ max_load_scale_factor = highest_mplsf;
}
- update_min_max_capacity();
+ __update_min_max_capacity();
post_big_small_task_count_change(cpu_possible_mask);
return 0;
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index a27fbeafe382..c939bf59ca58 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -3157,7 +3157,7 @@ static int eligible_cpu(struct task_struct *p, int cpu, int sync)
if (mostly_idle_cpu_sync(cpu, sync))
return 1;
- if (rq->capacity != max_capacity)
+ if (rq->max_possible_capacity != max_possible_capacity)
return !spill_threshold_crossed(p, rq, cpu, sync);
return 0;
@@ -3708,7 +3708,7 @@ unsigned int nr_eligible_big_tasks(int cpu)
int nr = rq->nr_running;
int nr_small = rq->hmp_stats.nr_small_tasks;
- if (rq->capacity != max_capacity)
+ if (rq->max_possible_capacity != max_possible_capacity)
return nr_big;
/* Consider all (except small) tasks on max_capacity cpu as big tasks */
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index d897c967bb87..1d675545817e 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -990,6 +990,7 @@ extern unsigned int min_possible_efficiency;
extern unsigned int max_capacity;
extern unsigned int min_capacity;
extern unsigned int max_load_scale_factor;
+extern unsigned int max_possible_capacity;
extern unsigned long capacity_scale_cpu_efficiency(int cpu);
extern unsigned long capacity_scale_cpu_freq(int cpu);
extern unsigned int sched_mostly_idle_load;