summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--kernel/sched/core.c10
-rw-r--r--kernel/sched/fair.c253
-rw-r--r--kernel/sched/sched.h1
3 files changed, 140 insertions, 124 deletions
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 5c441a5ec617..6b30aa0ccf50 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -1231,6 +1231,9 @@ unsigned int min_capacity = 1024; /* min(rq->capacity) */
unsigned int max_load_scale_factor = 1024; /* max possible load scale factor */
unsigned int max_possible_capacity = 1024; /* max(rq->max_possible_capacity) */
+/* Mask of all CPUs that have max_possible_capacity */
+cpumask_t mpc_mask = CPU_MASK_ALL;
+
/* Window size (in ns) */
__read_mostly unsigned int sched_ravg_window = 10000000;
@@ -2568,8 +2571,13 @@ static int cpufreq_notifier_policy(struct notifier_block *nb,
mplsf = div_u64(((u64) rq->load_scale_factor) *
rq->max_possible_freq, rq->max_freq);
- if (mpc > highest_mpc)
+ if (mpc > highest_mpc) {
highest_mpc = mpc;
+ cpumask_clear(&mpc_mask);
+ cpumask_set_cpu(i, &mpc_mask);
+ } else if (mpc == highest_mpc) {
+ cpumask_set_cpu(i, &mpc_mask);
+ }
if (mplsf > highest_mplsf)
highest_mplsf = mplsf;
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 39f656fcc0ac..20d123712eef 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -3026,11 +3026,9 @@ static inline u64 cpu_load_sync(int cpu, int sync)
}
static int
-spill_threshold_crossed(struct task_struct *p, struct rq *rq, int cpu,
- int sync)
+spill_threshold_crossed(u64 task_load, u64 cpu_load, struct rq *rq)
{
- u64 total_load = cpu_load_sync(cpu, sync) +
- scale_load_to_cpu(task_load(p), cpu);
+ u64 total_load = task_load + cpu_load;
if (total_load > sched_spill_load ||
(rq->nr_running + 1) > sysctl_sched_spill_nr_run)
@@ -3048,10 +3046,9 @@ int mostly_idle_cpu(int cpu)
&& !sched_cpu_high_irqload(cpu);
}
-static int mostly_idle_cpu_sync(int cpu, int sync)
+static int mostly_idle_cpu_sync(int cpu, u64 load, int sync)
{
struct rq *rq = cpu_rq(cpu);
- u64 load = cpu_load_sync(cpu, sync);
int nr_running;
nr_running = rq->nr_running;
@@ -3149,14 +3146,12 @@ done:
* sched_downmigrate. This will help avoid frequenty migrations for
* tasks with load close to the upmigrate threshold
*/
-static int task_will_fit(struct task_struct *p, int cpu)
+
+static int task_load_will_fit(struct task_struct *p, u64 task_load, int cpu)
{
- u64 load;
- int prev_cpu = task_cpu(p);
- struct rq *prev_rq = cpu_rq(prev_cpu);
+ struct rq *prev_rq = cpu_rq(task_cpu(p));
struct rq *rq = cpu_rq(cpu);
- int upmigrate = sched_upmigrate;
- int nice = task_nice(p);
+ int upmigrate, nice;
if (rq->capacity == max_capacity)
return 1;
@@ -3165,33 +3160,40 @@ static int task_will_fit(struct task_struct *p, int cpu)
if (rq->capacity > prev_rq->capacity)
return 1;
} else {
+ nice = task_nice(p);
if (nice > sched_upmigrate_min_nice || upmigrate_discouraged(p))
return 1;
- load = scale_load_to_cpu(task_load(p), cpu);
-
+ upmigrate = sched_upmigrate;
if (prev_rq->capacity > rq->capacity)
upmigrate = sched_downmigrate;
- if (load < upmigrate)
+ if (task_load < upmigrate)
return 1;
}
return 0;
}
-static int eligible_cpu(struct task_struct *p, int cpu, int sync)
+static int task_will_fit(struct task_struct *p, int cpu)
+{
+ u64 tload = scale_load_to_cpu(task_load(p), cpu);
+
+ return task_load_will_fit(p, tload, cpu);
+}
+
+static int eligible_cpu(u64 task_load, u64 cpu_load, int cpu, int sync)
{
struct rq *rq = cpu_rq(cpu);
if (sched_cpu_high_irqload(cpu))
return 0;
- if (mostly_idle_cpu_sync(cpu, sync))
+ if (mostly_idle_cpu_sync(cpu, cpu_load, sync))
return 1;
if (rq->max_possible_capacity != max_possible_capacity)
- return !spill_threshold_crossed(p, rq, cpu, sync);
+ return !spill_threshold_crossed(task_load, cpu_load, rq);
return 0;
}
@@ -3246,22 +3248,23 @@ unsigned int power_cost_at_freq(int cpu, unsigned int freq)
/* Return the cost of running task p on CPU cpu. This function
* currently assumes that task p is the only task which will run on
* the CPU. */
-static unsigned int power_cost(struct task_struct *p, int cpu)
+static unsigned int power_cost(u64 task_load, int cpu)
{
+ unsigned int task_freq, cur_freq;
+ struct rq *rq = cpu_rq(cpu);
u64 demand;
- unsigned int task_freq;
- unsigned int cur_freq = cpu_rq(cpu)->cur_freq;
if (!sysctl_sched_enable_power_aware)
- return cpu_rq(cpu)->max_possible_capacity;
+ return rq->max_possible_capacity;
/* calculate % of max freq needed */
- demand = scale_load_to_cpu(task_load(p), cpu) * 100;
+ demand = task_load * 100;
demand = div64_u64(demand, max_task_load());
- task_freq = demand * cpu_rq(cpu)->max_possible_freq;
+ task_freq = demand * rq->max_possible_freq;
task_freq /= 100; /* khz needed */
+ cur_freq = rq->cur_freq;
task_freq = max(cur_freq, task_freq);
return power_cost_at_freq(cpu, task_freq);
@@ -3269,94 +3272,80 @@ static unsigned int power_cost(struct task_struct *p, int cpu)
static int best_small_task_cpu(struct task_struct *p, int sync)
{
- int best_busy_cpu = -1, best_fallback_cpu = -1;
- int best_mi_cpu = -1;
- int min_cost_cpu = -1, min_cstate_cpu = -1;
+ int best_busy_cpu = -1, fallback_cpu = -1;
+ int min_cstate_cpu = -1;
int min_cstate = INT_MAX;
- int min_fallback_cpu_cost = INT_MAX;
- int min_cost = INT_MAX;
- int i, cstate, cpu_cost;
- u64 load, min_busy_load = ULLONG_MAX;
- int cost_list[nr_cpu_ids];
- int prev_cpu = task_cpu(p);
- struct cpumask search_cpus;
+ int cpu_cost, min_cost = INT_MAX;
+ int i, cstate, prev_cpu;
+ int hmp_capable;
+ u64 tload, cpu_load, min_load = ULLONG_MAX;
+ cpumask_t mi_cpus = CPU_MASK_NONE;
+ cpumask_t temp;
- cpumask_and(&search_cpus, tsk_cpus_allowed(p), cpu_online_mask);
+ cpumask_and(&temp, &mpc_mask, cpu_possible_mask);
+ hmp_capable = !cpumask_full(&temp);
- if (cpumask_empty(&search_cpus))
- return prev_cpu;
+ for_each_cpu_and(i, tsk_cpus_allowed(p), cpu_online_mask) {
+ struct rq *rq = cpu_rq(i);
- /* Take a first pass to find the lowest power cost CPU. This
- will avoid a potential O(n^2) search */
- for_each_cpu(i, &search_cpus) {
+ prev_cpu = (i == task_cpu(p));
- trace_sched_cpu_load(cpu_rq(i), idle_cpu(i),
- mostly_idle_cpu_sync(i, sync),
- sched_irqload(i), power_cost(p, i),
+ trace_sched_cpu_load(rq, idle_cpu(i),
+ mostly_idle_cpu_sync(i,
+ cpu_load_sync(i, sync), sync),
+ sched_irqload(i),
+ power_cost(scale_load_to_cpu(task_load(p),
+ i), i),
cpu_temp(i));
- cpu_cost = power_cost(p, i);
- if (cpu_cost < min_cost ||
- (cpu_cost == min_cost && i == prev_cpu)) {
- min_cost = cpu_cost;
- min_cost_cpu = i;
- }
-
- cost_list[i] = cpu_cost;
- }
-
- /*
- * Optimization to steer task towards the minimum power cost
- * CPU if it's the task's previous CPU. The tradeoff is that
- * we may have to check the same information again in pass 2.
- */
- if (!cpu_rq(min_cost_cpu)->cstate &&
- mostly_idle_cpu_sync(min_cost_cpu, sync) &&
- !sched_cpu_high_irqload(min_cost_cpu) && min_cost_cpu == prev_cpu)
- return min_cost_cpu;
-
- for_each_cpu(i, &search_cpus) {
- struct rq *rq = cpu_rq(i);
- cstate = rq->cstate;
-
- if (power_delta_exceeded(cost_list[i], min_cost)) {
- if (cost_list[i] < min_fallback_cpu_cost ||
- (cost_list[i] == min_fallback_cpu_cost &&
- i == prev_cpu)) {
- best_fallback_cpu = i;
- min_fallback_cpu_cost = cost_list[i];
+ if (rq->max_possible_capacity == max_possible_capacity &&
+ hmp_capable) {
+ tload = scale_load_to_cpu(task_load(p), i);
+ cpu_cost = power_cost(tload, i);
+ if (cpu_cost < min_cost ||
+ (prev_cpu && cpu_cost == min_cost)) {
+ fallback_cpu = i;
+ min_cost = cpu_cost;
}
continue;
}
- if (idle_cpu(i) && cstate && !sched_cpu_high_irqload(i)) {
+ if (sched_cpu_high_irqload(i))
+ continue;
+
+ /* Todo this can be optimized to avoid checking c-state
+ * and moving cstate assignment statement inside the if */
+ cstate = rq->cstate;
+ if (idle_cpu(i) && cstate) {
if (cstate < min_cstate ||
- (cstate == min_cstate && i == prev_cpu)) {
+ (prev_cpu && cstate == min_cstate)) {
min_cstate_cpu = i;
min_cstate = cstate;
}
continue;
}
- if (mostly_idle_cpu_sync(i, sync) &&
- !sched_cpu_high_irqload(i)) {
- if (best_mi_cpu == -1 || i == prev_cpu)
- best_mi_cpu = i;
+ cpu_load = cpu_load_sync(i, sync);
+ if (mostly_idle_cpu_sync(i, cpu_load, sync)) {
+ if (prev_cpu)
+ return task_cpu(p);
+
+ cpumask_set_cpu(i, &mi_cpus);
continue;
}
- load = cpu_load_sync(i, sync);
- if (!spill_threshold_crossed(p, rq, i, sync)) {
- if (load < min_busy_load ||
- (load == min_busy_load && i == prev_cpu)) {
- min_busy_load = load;
+ tload = scale_load_to_cpu(task_load(p), i);
+ if (!spill_threshold_crossed(tload, cpu_load, rq)) {
+ if (cpu_load < min_load ||
+ (prev_cpu && cpu_load == min_load)) {
+ min_load = cpu_load;
best_busy_cpu = i;
}
}
}
- if (best_mi_cpu != -1)
- return best_mi_cpu;
+ if (!cpumask_empty(&mi_cpus))
+ return cpumask_first(&mi_cpus);
if (min_cstate_cpu != -1)
return min_cstate_cpu;
@@ -3364,7 +3353,7 @@ static int best_small_task_cpu(struct task_struct *p, int sync)
if (best_busy_cpu != -1)
return best_busy_cpu;
- return best_fallback_cpu;
+ return fallback_cpu;
}
#define UP_MIGRATION 1
@@ -3372,11 +3361,11 @@ static int best_small_task_cpu(struct task_struct *p, int sync)
#define EA_MIGRATION 3
#define IRQLOAD_MIGRATION 4
-static int skip_cpu(struct task_struct *p, int cpu, int reason)
+static int skip_cpu(struct task_struct *p, u64 task_load, int cpu, int reason)
{
struct rq *rq = cpu_rq(cpu);
struct rq *task_rq = task_rq(p);
- int skip = 0;
+ int skip;
if (!reason)
return 0;
@@ -3395,14 +3384,15 @@ static int skip_cpu(struct task_struct *p, int cpu, int reason)
case EA_MIGRATION:
skip = rq->capacity < task_rq->capacity ||
- power_cost(p, cpu) > power_cost(p, task_cpu(p));
+ power_cost(task_load, cpu) >
+ power_cost(task_load, task_cpu(p));
break;
case IRQLOAD_MIGRATION:
/* Purposely fall through */
default:
- skip = (cpu == task_cpu(p));
+ skip = (rq == task_rq);
break;
}
@@ -3433,7 +3423,7 @@ static int select_packing_target(struct task_struct *p, int best_cpu)
/* Pick the first lowest power cpu as target */
for_each_cpu(i, &search_cpus) {
- int cost = power_cost(p, i);
+ int cost = power_cost(scale_load_to_cpu(task_load(p), i), i);
if (cost < min_cost && !sched_cpu_high_irqload(i)) {
target = i;
@@ -3464,10 +3454,11 @@ static int select_best_cpu(struct task_struct *p, int target, int reason,
int sync)
{
int i, best_cpu = -1, fallback_idle_cpu = -1, min_cstate_cpu = -1;
- int prev_cpu = task_cpu(p);
+ int prev_cpu;
int cpu_cost, min_cost = INT_MAX;
int min_idle_cost = INT_MAX, min_busy_cost = INT_MAX;
- u64 load, min_load = ULLONG_MAX, min_fallback_load = ULLONG_MAX;
+ u64 tload, cpu_load;
+ u64 min_load = ULLONG_MAX, min_fallback_load = ULLONG_MAX;
int small_task = is_small_task(p);
int boost = sched_boost();
int cstate, min_cstate = INT_MAX;
@@ -3500,12 +3491,17 @@ static int select_best_cpu(struct task_struct *p, int target, int reason,
/* Todo : Optimize this loop */
for_each_cpu_and(i, tsk_cpus_allowed(p), cpu_online_mask) {
+ tload = scale_load_to_cpu(task_load(p), i);
+ cpu_load = cpu_load_sync(i, sync);
+ prev_cpu = (i == task_cpu(p));
+
trace_sched_cpu_load(cpu_rq(i), idle_cpu(i),
- mostly_idle_cpu_sync(i, sync),
- sched_irqload(i), power_cost(p, i),
+ mostly_idle_cpu_sync(i,
+ cpu_load_sync(i, sync), sync),
+ sched_irqload(i), power_cost(tload, i),
cpu_temp(i));
- if (skip_cpu(p, i, reason))
+ if (skip_cpu(p, tload, i, reason))
continue;
/*
@@ -3513,14 +3509,13 @@ static int select_best_cpu(struct task_struct *p, int target, int reason,
* won't fit is our fallback if we can't find a CPU
* where the task will fit.
*/
- if (!task_will_fit(p, i)) {
- if (mostly_idle_cpu_sync(i, sync) &&
+ if (!task_load_will_fit(p, tload, i)) {
+ if (mostly_idle_cpu_sync(i, cpu_load, sync) &&
!sched_cpu_high_irqload(i)) {
- load = cpu_load_sync(i, sync);
- if (load < min_fallback_load ||
- (load == min_fallback_load &&
- i == prev_cpu)) {
- min_fallback_load = load;
+ if (cpu_load < min_fallback_load ||
+ (cpu_load == min_fallback_load &&
+ prev_cpu)) {
+ min_fallback_load = cpu_load;
fallback_idle_cpu = i;
}
}
@@ -3531,7 +3526,7 @@ static int select_best_cpu(struct task_struct *p, int target, int reason,
if (prefer_idle == -1)
prefer_idle = cpu_rq(i)->prefer_idle;
- if (!eligible_cpu(p, i, sync))
+ if (!eligible_cpu(tload, cpu_load, i, sync))
continue;
/*
@@ -3540,9 +3535,7 @@ static int select_best_cpu(struct task_struct *p, int target, int reason,
* spill.
*/
- load = cpu_load_sync(i, sync);
- cpu_cost = power_cost(p, i);
- cstate = cpu_rq(i)->cstate;
+ cpu_cost = power_cost(tload, i);
/*
* If the task fits in a CPU in a lower power band, that
@@ -3572,6 +3565,8 @@ static int select_best_cpu(struct task_struct *p, int target, int reason,
*/
if (idle_cpu(i) || (sync && i == curr_cpu && prefer_idle &&
cpu_rq(i)->nr_running == 1)) {
+ cstate = cpu_rq(i)->cstate;
+
if (cstate > min_cstate)
continue;
@@ -3583,7 +3578,7 @@ static int select_best_cpu(struct task_struct *p, int target, int reason,
}
if (cpu_cost < min_idle_cost ||
- (cpu_cost == min_idle_cost && i == prev_cpu)) {
+ (prev_cpu && cpu_cost == min_idle_cost)) {
min_idle_cost = cpu_cost;
min_cstate_cpu = i;
}
@@ -3595,11 +3590,11 @@ static int select_best_cpu(struct task_struct *p, int target, int reason,
* For CPUs that are not completely idle, pick one with the
* lowest load and break ties with power cost
*/
- if (load > min_load)
+ if (cpu_load > min_load)
continue;
- if (load < min_load) {
- min_load = load;
+ if (cpu_load < min_load) {
+ min_load = cpu_load;
min_busy_cost = cpu_cost;
best_cpu = i;
continue;
@@ -3611,7 +3606,7 @@ static int select_best_cpu(struct task_struct *p, int target, int reason,
* more power efficient CPU option.
*/
if (cpu_cost < min_busy_cost ||
- (cpu_cost == min_busy_cost && i == prev_cpu)) {
+ (prev_cpu && cpu_cost == min_busy_cost)) {
min_busy_cost = cpu_cost;
best_cpu = i;
}
@@ -3622,7 +3617,7 @@ static int select_best_cpu(struct task_struct *p, int target, int reason,
* best_cpu cannot have high irq load.
*/
if (min_cstate_cpu >= 0 && (prefer_idle > 0 || best_cpu < 0 ||
- !mostly_idle_cpu_sync(best_cpu, sync)))
+ !mostly_idle_cpu_sync(best_cpu, min_load, sync)))
best_cpu = min_cstate_cpu;
done:
if (best_cpu < 0) {
@@ -3632,7 +3627,7 @@ done:
* prev_cpu. We may just benefit from having
* a hot cache.
*/
- best_cpu = prev_cpu;
+ best_cpu = task_cpu(p);
else
best_cpu = fallback_idle_cpu;
}
@@ -4133,15 +4128,27 @@ static int lower_power_cpu_available(struct task_struct *p, int cpu)
{
int i;
int lowest_power_cpu = task_cpu(p);
- int lowest_power = power_cost(p, task_cpu(p));
+ int lowest_power = power_cost(scale_load_to_cpu(task_load(p),
+ lowest_power_cpu), lowest_power_cpu);
+ struct cpumask search_cpus;
+ struct rq *rq = cpu_rq(cpu);
+
+ /*
+ * This function should be called only when task 'p' fits in the current
+ * CPU which can be ensured by task_will_fit() prior to this.
+ */
+ cpumask_and(&search_cpus, tsk_cpus_allowed(p), cpu_online_mask);
+ cpumask_and(&search_cpus, &search_cpus, &rq->freq_domain_cpumask);
+ cpumask_clear_cpu(lowest_power_cpu, &search_cpus);
/* Is a lower-powered idle CPU available which will fit this task? */
- for_each_cpu_and(i, tsk_cpus_allowed(p), cpu_online_mask) {
- if (idle_cpu(i) && task_will_fit(p, i)) {
- int idle_power_cost = power_cost(p, i);
- if (idle_power_cost < lowest_power) {
+ for_each_cpu(i, &search_cpus) {
+ if (idle_cpu(i)) {
+ int cost =
+ power_cost(scale_load_to_cpu(task_load(p), i), i);
+ if (cost < lowest_power) {
lowest_power_cpu = i;
- lowest_power = idle_power_cost;
+ lowest_power = cost;
}
}
}
@@ -4305,13 +4312,13 @@ static inline int find_new_hmp_ilb(int type)
return 0;
}
-static inline int power_cost(struct task_struct *p, int cpu)
+static inline int power_cost(u64 task_load, int cpu)
{
return SCHED_CAPACITY_SCALE;
}
static inline int
-spill_threshold_crossed(struct task_struct *p, struct rq *rq, int cpu, int sync)
+spill_threshold_crossed(u64 task_load, u64 cpu_load, struct rq *rq)
{
return 0;
}
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 568c3427ca0e..44233c6adca0 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -992,6 +992,7 @@ extern unsigned int max_capacity;
extern unsigned int min_capacity;
extern unsigned int max_load_scale_factor;
extern unsigned int max_possible_capacity;
+extern cpumask_t mpc_mask;
extern unsigned long capacity_scale_cpu_efficiency(int cpu);
extern unsigned long capacity_scale_cpu_freq(int cpu);
extern unsigned int sched_mostly_idle_load;