summaryrefslogtreecommitdiff
path: root/kernel/sched/fair.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/sched/fair.c')
-rw-r--r--kernel/sched/fair.c40
1 files changed, 38 insertions, 2 deletions
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 2ea3a4337dde..f962ab1eb046 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -2402,7 +2402,8 @@ void task_numa_work(struct callback_head *work)
return;
- down_read(&mm->mmap_sem);
+ if (!down_read_trylock(&mm->mmap_sem))
+ return;
vma = find_vma(mm, start);
if (!vma) {
reset_ptenuma_scan(p);
@@ -2641,6 +2642,7 @@ u32 sched_get_wake_up_idle(struct task_struct *p)
return !!enabled;
}
+EXPORT_SYMBOL(sched_get_wake_up_idle);
int sched_set_wake_up_idle(struct task_struct *p, int wake_up_idle)
{
@@ -2653,6 +2655,7 @@ int sched_set_wake_up_idle(struct task_struct *p, int wake_up_idle)
return 0;
}
+EXPORT_SYMBOL(sched_set_wake_up_idle);
static const u32 runnable_avg_yN_inv[] = {
0xffffffff, 0xfa83b2da, 0xf5257d14, 0xefe4b99a, 0xeac0c6e6, 0xe5b906e6,
@@ -6596,10 +6599,22 @@ static int sched_group_energy(struct energy_env *eenv)
{
struct cpumask visit_cpus;
u64 total_energy = 0;
+ int cpu_count;
WARN_ON(!eenv->sg_top->sge);
cpumask_copy(&visit_cpus, sched_group_cpus(eenv->sg_top));
+ /* If a cpu is hotplugged in while we are in this function,
+ * it does not appear in the existing visit_cpus mask
+ * which came from the sched_group pointer of the
+ * sched_domain pointed at by sd_ea for either the prev
+ * or next cpu and was dereferenced in __energy_diff.
+ * Since we will dereference sd_scs later as we iterate
+ * through the CPUs we expect to visit, new CPUs can
+ * be present which are not in the visit_cpus mask.
+ * Guard this with cpu_count.
+ */
+ cpu_count = cpumask_weight(&visit_cpus);
while (!cpumask_empty(&visit_cpus)) {
struct sched_group *sg_shared_cap = NULL;
@@ -6609,6 +6624,8 @@ static int sched_group_energy(struct energy_env *eenv)
/*
* Is the group utilization affected by cpus outside this
* sched_group?
+ * This sd may have groups with cpus which were not present
+ * when we took visit_cpus.
*/
sd = rcu_dereference(per_cpu(sd_scs, cpu));
@@ -6658,8 +6675,24 @@ static int sched_group_energy(struct energy_env *eenv)
total_energy += sg_busy_energy + sg_idle_energy;
- if (!sd->child)
+ if (!sd->child) {
+ /*
+ * cpu_count here is the number of
+ * cpus we expect to visit in this
+ * calculation. If we race against
+ * hotplug, we can have extra cpus
+ * added to the groups we are
+ * iterating which do not appear in
+ * the visit_cpus mask. In that case
+ * we are not able to calculate energy
+ * without restarting so we will bail
+ * out and use prev_cpu this time.
+ */
+ if (!cpu_count)
+ return -EINVAL;
cpumask_xor(&visit_cpus, &visit_cpus, sched_group_cpus(sg));
+ cpu_count--;
+ }
if (cpumask_equal(sched_group_cpus(sg), sched_group_cpus(eenv->sg_top)))
goto next_cpu;
@@ -6671,6 +6704,9 @@ static int sched_group_energy(struct energy_env *eenv)
* If we raced with hotplug and got an sd NULL-pointer;
* returning a wrong energy estimation is better than
* entering an infinite loop.
+ * Specifically: If a cpu is unplugged after we took
+ * the visit_cpus mask, it no longer has an sd_scs
+ * pointer, so when we dereference it, we get NULL.
*/
if (cpumask_test_cpu(cpu, &visit_cpus))
return -EINVAL;