diff options
| author | Srivatsa Vaddagiri <vatsa@codeaurora.org> | 2014-09-04 14:05:28 +0530 |
|---|---|---|
| committer | David Keitel <dkeitel@codeaurora.org> | 2016-03-23 20:00:42 -0700 |
| commit | b7e40e50e9ca1d13c7d806dc5ea0557a776bde2d (patch) | |
| tree | 2d5a6a63929045b51bde2fc58b397662e293f76d /kernel | |
| parent | 38daa13114b64db8710801809903384999996762 (diff) | |
sched: fix wrong load_scale_factor/capacity/nr_big/small_tasks
A couple bugs exist with incorrect use of cpu_online_mask in
pre/post_big_small_task() functions, leading to potentially incorrect
computation of load_scale_factor/capacity/nr_big/small_tasks.
pre/post_big_small_task_count_change() use cpu_online_mask in an
unreliable manner. While local_irq_disable() in
pre_big_small_task_count_change() ensures a cpu won't go away in
cpu_online_mask, nothing prevents a cpu from coming online
concurrently. As a result, cpu_online_mask used in
pre_big_small_task_count_change() can be inconsistent with that used
in post_big_small_task_count_change() which can lead to an attempt to
unlock rq->lock which was not taken before.
Secondly, when either max_possible_freq or min_max_freq is changing,
it needs to trigger recomputation of load_scale_factor and capacity
for *all* cpus, even if some are offline. Otherwise, an offline cpu
could later come online with incorrect load_scale_factor/capacity.
While it should be sufficient to scan online cpus for
updating their nr_big/small_tasks in
post_big_small_task_count_change(), unfortunately it sounds pretty
hard to provide a stable cpu_online_mask when its called from
cpufreq_notifier_policy(). cpufreq framework can trigger a
CPUFREQ_NOTIFY notification in multiple contexts, some in cpu-hotplug
paths, which makes it pretty hard to guess whether get_online_cpus()
can be taken without causing deadlocks or not. To workaround the
insufficient information we have about the hotplug-safety context when
CPUFREQ_NOTIFY is issued, have post_big_small_task_count_change()
traverse all possible cpus in updating nr_big/small_task_count.
CRs-Fixed: 717134
Change-Id: Ife8f3f7cdfd77d5a21eee63627d7a3465930aed5
Signed-off-by: Srivatsa Vaddagiri <vatsa@codeaurora.org>
Diffstat (limited to 'kernel')
| -rw-r--r-- | kernel/sched/core.c | 48 | ||||
| -rw-r--r-- | kernel/sched/fair.c | 22 | ||||
| -rw-r--r-- | kernel/sched/sched.h | 4 |
3 files changed, 55 insertions, 19 deletions
diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 7fd3c2a0ccbe..4ab1322a527b 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -1981,9 +1981,11 @@ static int cpufreq_notifier_policy(struct notifier_block *nb, { struct cpufreq_policy *policy = (struct cpufreq_policy *)data; int i; - unsigned int min_max = min_max_freq; const struct cpumask *cpus = policy->related_cpus; - int orig_min_max_freq = min_max_freq; + unsigned int orig_min_max_freq = min_max_freq; + unsigned int orig_max_possible_freq = max_possible_freq; + /* Initialized to policy->max in case policy->related_cpus is empty! */ + unsigned int orig_max_freq = policy->max; if (val != CPUFREQ_NOTIFY) return 0; @@ -1991,6 +1993,7 @@ static int cpufreq_notifier_policy(struct notifier_block *nb, for_each_cpu(i, policy->related_cpus) { cpumask_copy(&cpu_rq(i)->freq_domain_cpumask, policy->related_cpus); + orig_max_freq = cpu_rq(i)->max_freq; cpu_rq(i)->min_freq = policy->min; cpu_rq(i)->max_freq = policy->max; cpu_rq(i)->max_possible_freq = policy->cpuinfo.max_freq; @@ -1998,20 +2001,49 @@ static int cpufreq_notifier_policy(struct notifier_block *nb, max_possible_freq = max(max_possible_freq, policy->cpuinfo.max_freq); if (min_max_freq == 1) - min_max = UINT_MAX; - min_max_freq = min(min_max, policy->cpuinfo.max_freq); + min_max_freq = UINT_MAX; + min_max_freq = min(min_max_freq, policy->cpuinfo.max_freq); BUG_ON(!min_max_freq); BUG_ON(!policy->max); - if (min_max_freq != orig_min_max_freq) - cpus = cpu_online_mask; + if (orig_max_possible_freq == max_possible_freq && + orig_min_max_freq == min_max_freq && + orig_max_freq == policy->max) + return 0; + + /* + * A changed min_max_freq or max_possible_freq (possible during bootup) + * needs to trigger re-computation of load_scale_factor and capacity for + * all possible cpus (even those offline). It also needs to trigger + * re-computation of nr_big/small_task count on all online cpus. + * + * A changed rq->max_freq otoh needs to trigger re-computation of + * load_scale_factor and capacity for just the cluster of cpus involved. + * Since small task definition depends on max_load_scale_factor, a + * changed load_scale_factor of one cluster could influence small_task + * classification of tasks in another cluster. Hence a changed + * rq->max_freq will need to trigger re-computation of nr_big/small_task + * count on all online cpus. + * + * While it should be sufficient for nr_big/small_tasks to be + * re-computed for only online cpus, we have inadequate context + * information here (in policy notifier) with regard to hotplug-safety + * context in which notification is issued. As a result, we can't use + * get_online_cpus() here, as it can lead to deadlock. Until cpufreq is + * fixed up to issue notification always in hotplug-safe context, + * re-compute nr_big/small_task for all possible cpus. + */ + + if (orig_min_max_freq != min_max_freq || + orig_max_possible_freq != max_possible_freq) + cpus = cpu_possible_mask; /* * Changed load_scale_factor can trigger reclassification of tasks as * big or small. Make this change "atomic" so that tasks are accounted * properly due to changed load_scale_factor */ - pre_big_small_task_count_change(); + pre_big_small_task_count_change(cpu_possible_mask); for_each_cpu(i, cpus) { struct rq *rq = cpu_rq(i); @@ -2022,7 +2054,7 @@ static int cpufreq_notifier_policy(struct notifier_block *nb, } update_min_max_capacity(); - post_big_small_task_count_change(); + post_big_small_task_count_change(cpu_possible_mask); return 0; } diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 2af6e630271a..86683790fa81 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -3176,13 +3176,13 @@ void fixup_nr_big_small_task(int cpu) } /* Disable interrupts and grab runqueue lock of all cpus listed in @cpus */ -void pre_big_small_task_count_change(void) +void pre_big_small_task_count_change(const struct cpumask *cpus) { int i; local_irq_disable(); - for_each_online_cpu(i) + for_each_cpu(i, cpus) raw_spin_lock(&cpu_rq(i)->lock); } @@ -3190,15 +3190,15 @@ void pre_big_small_task_count_change(void) * Reinitialize 'nr_big_tasks' and 'nr_small_tasks' counters on all affected * cpus */ -void post_big_small_task_count_change(void) +void post_big_small_task_count_change(const struct cpumask *cpus) { int i; /* Assumes local_irq_disable() keeps online cpumap stable */ - for_each_online_cpu(i) + for_each_cpu(i, cpus) fixup_nr_big_small_task(i); - for_each_online_cpu(i) + for_each_cpu(i, cpus) raw_spin_unlock(&cpu_rq(i)->lock); local_irq_enable(); @@ -3290,15 +3290,19 @@ int sched_hmp_proc_update_handler(struct ctl_table *table, int write, */ if ((*data != old_val) && (data == &sysctl_sched_upmigrate_pct || - data == &sysctl_sched_small_task_pct)) - pre_big_small_task_count_change(); + data == &sysctl_sched_small_task_pct)) { + get_online_cpus(); + pre_big_small_task_count_change(cpu_online_mask); + } set_hmp_defaults(); if ((*data != old_val) && (data == &sysctl_sched_upmigrate_pct || - data == &sysctl_sched_small_task_pct)) - post_big_small_task_count_change(); + data == &sysctl_sched_small_task_pct)) { + post_big_small_task_count_change(cpu_online_mask); + put_online_cpus(); + } return 0; } diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index 9958361c27c1..5a804e108e32 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -1068,8 +1068,8 @@ extern unsigned int sched_enable_power_aware; int mostly_idle_cpu(int cpu); extern void check_for_migration(struct rq *rq, struct task_struct *p); -extern void pre_big_small_task_count_change(void); -extern void post_big_small_task_count_change(void); +extern void pre_big_small_task_count_change(const struct cpumask *cpus); +extern void post_big_small_task_count_change(const struct cpumask *cpus); extern void inc_nr_big_small_task(struct rq *rq, struct task_struct *p); extern void dec_nr_big_small_task(struct rq *rq, struct task_struct *p); extern void set_hmp_defaults(void); |
