diff options
| -rw-r--r-- | kernel/sched/core.c | 10 | ||||
| -rw-r--r-- | kernel/sched/fair.c | 253 | ||||
| -rw-r--r-- | kernel/sched/sched.h | 1 |
3 files changed, 140 insertions, 124 deletions
diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 5c441a5ec617..6b30aa0ccf50 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -1231,6 +1231,9 @@ unsigned int min_capacity = 1024; /* min(rq->capacity) */ unsigned int max_load_scale_factor = 1024; /* max possible load scale factor */ unsigned int max_possible_capacity = 1024; /* max(rq->max_possible_capacity) */ +/* Mask of all CPUs that have max_possible_capacity */ +cpumask_t mpc_mask = CPU_MASK_ALL; + /* Window size (in ns) */ __read_mostly unsigned int sched_ravg_window = 10000000; @@ -2568,8 +2571,13 @@ static int cpufreq_notifier_policy(struct notifier_block *nb, mplsf = div_u64(((u64) rq->load_scale_factor) * rq->max_possible_freq, rq->max_freq); - if (mpc > highest_mpc) + if (mpc > highest_mpc) { highest_mpc = mpc; + cpumask_clear(&mpc_mask); + cpumask_set_cpu(i, &mpc_mask); + } else if (mpc == highest_mpc) { + cpumask_set_cpu(i, &mpc_mask); + } if (mplsf > highest_mplsf) highest_mplsf = mplsf; diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 39f656fcc0ac..20d123712eef 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -3026,11 +3026,9 @@ static inline u64 cpu_load_sync(int cpu, int sync) } static int -spill_threshold_crossed(struct task_struct *p, struct rq *rq, int cpu, - int sync) +spill_threshold_crossed(u64 task_load, u64 cpu_load, struct rq *rq) { - u64 total_load = cpu_load_sync(cpu, sync) + - scale_load_to_cpu(task_load(p), cpu); + u64 total_load = task_load + cpu_load; if (total_load > sched_spill_load || (rq->nr_running + 1) > sysctl_sched_spill_nr_run) @@ -3048,10 +3046,9 @@ int mostly_idle_cpu(int cpu) && !sched_cpu_high_irqload(cpu); } -static int mostly_idle_cpu_sync(int cpu, int sync) +static int mostly_idle_cpu_sync(int cpu, u64 load, int sync) { struct rq *rq = cpu_rq(cpu); - u64 load = cpu_load_sync(cpu, sync); int nr_running; nr_running = rq->nr_running; @@ -3149,14 +3146,12 @@ done: * sched_downmigrate. This will help avoid frequenty migrations for * tasks with load close to the upmigrate threshold */ -static int task_will_fit(struct task_struct *p, int cpu) + +static int task_load_will_fit(struct task_struct *p, u64 task_load, int cpu) { - u64 load; - int prev_cpu = task_cpu(p); - struct rq *prev_rq = cpu_rq(prev_cpu); + struct rq *prev_rq = cpu_rq(task_cpu(p)); struct rq *rq = cpu_rq(cpu); - int upmigrate = sched_upmigrate; - int nice = task_nice(p); + int upmigrate, nice; if (rq->capacity == max_capacity) return 1; @@ -3165,33 +3160,40 @@ static int task_will_fit(struct task_struct *p, int cpu) if (rq->capacity > prev_rq->capacity) return 1; } else { + nice = task_nice(p); if (nice > sched_upmigrate_min_nice || upmigrate_discouraged(p)) return 1; - load = scale_load_to_cpu(task_load(p), cpu); - + upmigrate = sched_upmigrate; if (prev_rq->capacity > rq->capacity) upmigrate = sched_downmigrate; - if (load < upmigrate) + if (task_load < upmigrate) return 1; } return 0; } -static int eligible_cpu(struct task_struct *p, int cpu, int sync) +static int task_will_fit(struct task_struct *p, int cpu) +{ + u64 tload = scale_load_to_cpu(task_load(p), cpu); + + return task_load_will_fit(p, tload, cpu); +} + +static int eligible_cpu(u64 task_load, u64 cpu_load, int cpu, int sync) { struct rq *rq = cpu_rq(cpu); if (sched_cpu_high_irqload(cpu)) return 0; - if (mostly_idle_cpu_sync(cpu, sync)) + if (mostly_idle_cpu_sync(cpu, cpu_load, sync)) return 1; if (rq->max_possible_capacity != max_possible_capacity) - return !spill_threshold_crossed(p, rq, cpu, sync); + return !spill_threshold_crossed(task_load, cpu_load, rq); return 0; } @@ -3246,22 +3248,23 @@ unsigned int power_cost_at_freq(int cpu, unsigned int freq) /* Return the cost of running task p on CPU cpu. This function * currently assumes that task p is the only task which will run on * the CPU. */ -static unsigned int power_cost(struct task_struct *p, int cpu) +static unsigned int power_cost(u64 task_load, int cpu) { + unsigned int task_freq, cur_freq; + struct rq *rq = cpu_rq(cpu); u64 demand; - unsigned int task_freq; - unsigned int cur_freq = cpu_rq(cpu)->cur_freq; if (!sysctl_sched_enable_power_aware) - return cpu_rq(cpu)->max_possible_capacity; + return rq->max_possible_capacity; /* calculate % of max freq needed */ - demand = scale_load_to_cpu(task_load(p), cpu) * 100; + demand = task_load * 100; demand = div64_u64(demand, max_task_load()); - task_freq = demand * cpu_rq(cpu)->max_possible_freq; + task_freq = demand * rq->max_possible_freq; task_freq /= 100; /* khz needed */ + cur_freq = rq->cur_freq; task_freq = max(cur_freq, task_freq); return power_cost_at_freq(cpu, task_freq); @@ -3269,94 +3272,80 @@ static unsigned int power_cost(struct task_struct *p, int cpu) static int best_small_task_cpu(struct task_struct *p, int sync) { - int best_busy_cpu = -1, best_fallback_cpu = -1; - int best_mi_cpu = -1; - int min_cost_cpu = -1, min_cstate_cpu = -1; + int best_busy_cpu = -1, fallback_cpu = -1; + int min_cstate_cpu = -1; int min_cstate = INT_MAX; - int min_fallback_cpu_cost = INT_MAX; - int min_cost = INT_MAX; - int i, cstate, cpu_cost; - u64 load, min_busy_load = ULLONG_MAX; - int cost_list[nr_cpu_ids]; - int prev_cpu = task_cpu(p); - struct cpumask search_cpus; + int cpu_cost, min_cost = INT_MAX; + int i, cstate, prev_cpu; + int hmp_capable; + u64 tload, cpu_load, min_load = ULLONG_MAX; + cpumask_t mi_cpus = CPU_MASK_NONE; + cpumask_t temp; - cpumask_and(&search_cpus, tsk_cpus_allowed(p), cpu_online_mask); + cpumask_and(&temp, &mpc_mask, cpu_possible_mask); + hmp_capable = !cpumask_full(&temp); - if (cpumask_empty(&search_cpus)) - return prev_cpu; + for_each_cpu_and(i, tsk_cpus_allowed(p), cpu_online_mask) { + struct rq *rq = cpu_rq(i); - /* Take a first pass to find the lowest power cost CPU. This - will avoid a potential O(n^2) search */ - for_each_cpu(i, &search_cpus) { + prev_cpu = (i == task_cpu(p)); - trace_sched_cpu_load(cpu_rq(i), idle_cpu(i), - mostly_idle_cpu_sync(i, sync), - sched_irqload(i), power_cost(p, i), + trace_sched_cpu_load(rq, idle_cpu(i), + mostly_idle_cpu_sync(i, + cpu_load_sync(i, sync), sync), + sched_irqload(i), + power_cost(scale_load_to_cpu(task_load(p), + i), i), cpu_temp(i)); - cpu_cost = power_cost(p, i); - if (cpu_cost < min_cost || - (cpu_cost == min_cost && i == prev_cpu)) { - min_cost = cpu_cost; - min_cost_cpu = i; - } - - cost_list[i] = cpu_cost; - } - - /* - * Optimization to steer task towards the minimum power cost - * CPU if it's the task's previous CPU. The tradeoff is that - * we may have to check the same information again in pass 2. - */ - if (!cpu_rq(min_cost_cpu)->cstate && - mostly_idle_cpu_sync(min_cost_cpu, sync) && - !sched_cpu_high_irqload(min_cost_cpu) && min_cost_cpu == prev_cpu) - return min_cost_cpu; - - for_each_cpu(i, &search_cpus) { - struct rq *rq = cpu_rq(i); - cstate = rq->cstate; - - if (power_delta_exceeded(cost_list[i], min_cost)) { - if (cost_list[i] < min_fallback_cpu_cost || - (cost_list[i] == min_fallback_cpu_cost && - i == prev_cpu)) { - best_fallback_cpu = i; - min_fallback_cpu_cost = cost_list[i]; + if (rq->max_possible_capacity == max_possible_capacity && + hmp_capable) { + tload = scale_load_to_cpu(task_load(p), i); + cpu_cost = power_cost(tload, i); + if (cpu_cost < min_cost || + (prev_cpu && cpu_cost == min_cost)) { + fallback_cpu = i; + min_cost = cpu_cost; } continue; } - if (idle_cpu(i) && cstate && !sched_cpu_high_irqload(i)) { + if (sched_cpu_high_irqload(i)) + continue; + + /* Todo this can be optimized to avoid checking c-state + * and moving cstate assignment statement inside the if */ + cstate = rq->cstate; + if (idle_cpu(i) && cstate) { if (cstate < min_cstate || - (cstate == min_cstate && i == prev_cpu)) { + (prev_cpu && cstate == min_cstate)) { min_cstate_cpu = i; min_cstate = cstate; } continue; } - if (mostly_idle_cpu_sync(i, sync) && - !sched_cpu_high_irqload(i)) { - if (best_mi_cpu == -1 || i == prev_cpu) - best_mi_cpu = i; + cpu_load = cpu_load_sync(i, sync); + if (mostly_idle_cpu_sync(i, cpu_load, sync)) { + if (prev_cpu) + return task_cpu(p); + + cpumask_set_cpu(i, &mi_cpus); continue; } - load = cpu_load_sync(i, sync); - if (!spill_threshold_crossed(p, rq, i, sync)) { - if (load < min_busy_load || - (load == min_busy_load && i == prev_cpu)) { - min_busy_load = load; + tload = scale_load_to_cpu(task_load(p), i); + if (!spill_threshold_crossed(tload, cpu_load, rq)) { + if (cpu_load < min_load || + (prev_cpu && cpu_load == min_load)) { + min_load = cpu_load; best_busy_cpu = i; } } } - if (best_mi_cpu != -1) - return best_mi_cpu; + if (!cpumask_empty(&mi_cpus)) + return cpumask_first(&mi_cpus); if (min_cstate_cpu != -1) return min_cstate_cpu; @@ -3364,7 +3353,7 @@ static int best_small_task_cpu(struct task_struct *p, int sync) if (best_busy_cpu != -1) return best_busy_cpu; - return best_fallback_cpu; + return fallback_cpu; } #define UP_MIGRATION 1 @@ -3372,11 +3361,11 @@ static int best_small_task_cpu(struct task_struct *p, int sync) #define EA_MIGRATION 3 #define IRQLOAD_MIGRATION 4 -static int skip_cpu(struct task_struct *p, int cpu, int reason) +static int skip_cpu(struct task_struct *p, u64 task_load, int cpu, int reason) { struct rq *rq = cpu_rq(cpu); struct rq *task_rq = task_rq(p); - int skip = 0; + int skip; if (!reason) return 0; @@ -3395,14 +3384,15 @@ static int skip_cpu(struct task_struct *p, int cpu, int reason) case EA_MIGRATION: skip = rq->capacity < task_rq->capacity || - power_cost(p, cpu) > power_cost(p, task_cpu(p)); + power_cost(task_load, cpu) > + power_cost(task_load, task_cpu(p)); break; case IRQLOAD_MIGRATION: /* Purposely fall through */ default: - skip = (cpu == task_cpu(p)); + skip = (rq == task_rq); break; } @@ -3433,7 +3423,7 @@ static int select_packing_target(struct task_struct *p, int best_cpu) /* Pick the first lowest power cpu as target */ for_each_cpu(i, &search_cpus) { - int cost = power_cost(p, i); + int cost = power_cost(scale_load_to_cpu(task_load(p), i), i); if (cost < min_cost && !sched_cpu_high_irqload(i)) { target = i; @@ -3464,10 +3454,11 @@ static int select_best_cpu(struct task_struct *p, int target, int reason, int sync) { int i, best_cpu = -1, fallback_idle_cpu = -1, min_cstate_cpu = -1; - int prev_cpu = task_cpu(p); + int prev_cpu; int cpu_cost, min_cost = INT_MAX; int min_idle_cost = INT_MAX, min_busy_cost = INT_MAX; - u64 load, min_load = ULLONG_MAX, min_fallback_load = ULLONG_MAX; + u64 tload, cpu_load; + u64 min_load = ULLONG_MAX, min_fallback_load = ULLONG_MAX; int small_task = is_small_task(p); int boost = sched_boost(); int cstate, min_cstate = INT_MAX; @@ -3500,12 +3491,17 @@ static int select_best_cpu(struct task_struct *p, int target, int reason, /* Todo : Optimize this loop */ for_each_cpu_and(i, tsk_cpus_allowed(p), cpu_online_mask) { + tload = scale_load_to_cpu(task_load(p), i); + cpu_load = cpu_load_sync(i, sync); + prev_cpu = (i == task_cpu(p)); + trace_sched_cpu_load(cpu_rq(i), idle_cpu(i), - mostly_idle_cpu_sync(i, sync), - sched_irqload(i), power_cost(p, i), + mostly_idle_cpu_sync(i, + cpu_load_sync(i, sync), sync), + sched_irqload(i), power_cost(tload, i), cpu_temp(i)); - if (skip_cpu(p, i, reason)) + if (skip_cpu(p, tload, i, reason)) continue; /* @@ -3513,14 +3509,13 @@ static int select_best_cpu(struct task_struct *p, int target, int reason, * won't fit is our fallback if we can't find a CPU * where the task will fit. */ - if (!task_will_fit(p, i)) { - if (mostly_idle_cpu_sync(i, sync) && + if (!task_load_will_fit(p, tload, i)) { + if (mostly_idle_cpu_sync(i, cpu_load, sync) && !sched_cpu_high_irqload(i)) { - load = cpu_load_sync(i, sync); - if (load < min_fallback_load || - (load == min_fallback_load && - i == prev_cpu)) { - min_fallback_load = load; + if (cpu_load < min_fallback_load || + (cpu_load == min_fallback_load && + prev_cpu)) { + min_fallback_load = cpu_load; fallback_idle_cpu = i; } } @@ -3531,7 +3526,7 @@ static int select_best_cpu(struct task_struct *p, int target, int reason, if (prefer_idle == -1) prefer_idle = cpu_rq(i)->prefer_idle; - if (!eligible_cpu(p, i, sync)) + if (!eligible_cpu(tload, cpu_load, i, sync)) continue; /* @@ -3540,9 +3535,7 @@ static int select_best_cpu(struct task_struct *p, int target, int reason, * spill. */ - load = cpu_load_sync(i, sync); - cpu_cost = power_cost(p, i); - cstate = cpu_rq(i)->cstate; + cpu_cost = power_cost(tload, i); /* * If the task fits in a CPU in a lower power band, that @@ -3572,6 +3565,8 @@ static int select_best_cpu(struct task_struct *p, int target, int reason, */ if (idle_cpu(i) || (sync && i == curr_cpu && prefer_idle && cpu_rq(i)->nr_running == 1)) { + cstate = cpu_rq(i)->cstate; + if (cstate > min_cstate) continue; @@ -3583,7 +3578,7 @@ static int select_best_cpu(struct task_struct *p, int target, int reason, } if (cpu_cost < min_idle_cost || - (cpu_cost == min_idle_cost && i == prev_cpu)) { + (prev_cpu && cpu_cost == min_idle_cost)) { min_idle_cost = cpu_cost; min_cstate_cpu = i; } @@ -3595,11 +3590,11 @@ static int select_best_cpu(struct task_struct *p, int target, int reason, * For CPUs that are not completely idle, pick one with the * lowest load and break ties with power cost */ - if (load > min_load) + if (cpu_load > min_load) continue; - if (load < min_load) { - min_load = load; + if (cpu_load < min_load) { + min_load = cpu_load; min_busy_cost = cpu_cost; best_cpu = i; continue; @@ -3611,7 +3606,7 @@ static int select_best_cpu(struct task_struct *p, int target, int reason, * more power efficient CPU option. */ if (cpu_cost < min_busy_cost || - (cpu_cost == min_busy_cost && i == prev_cpu)) { + (prev_cpu && cpu_cost == min_busy_cost)) { min_busy_cost = cpu_cost; best_cpu = i; } @@ -3622,7 +3617,7 @@ static int select_best_cpu(struct task_struct *p, int target, int reason, * best_cpu cannot have high irq load. */ if (min_cstate_cpu >= 0 && (prefer_idle > 0 || best_cpu < 0 || - !mostly_idle_cpu_sync(best_cpu, sync))) + !mostly_idle_cpu_sync(best_cpu, min_load, sync))) best_cpu = min_cstate_cpu; done: if (best_cpu < 0) { @@ -3632,7 +3627,7 @@ done: * prev_cpu. We may just benefit from having * a hot cache. */ - best_cpu = prev_cpu; + best_cpu = task_cpu(p); else best_cpu = fallback_idle_cpu; } @@ -4133,15 +4128,27 @@ static int lower_power_cpu_available(struct task_struct *p, int cpu) { int i; int lowest_power_cpu = task_cpu(p); - int lowest_power = power_cost(p, task_cpu(p)); + int lowest_power = power_cost(scale_load_to_cpu(task_load(p), + lowest_power_cpu), lowest_power_cpu); + struct cpumask search_cpus; + struct rq *rq = cpu_rq(cpu); + + /* + * This function should be called only when task 'p' fits in the current + * CPU which can be ensured by task_will_fit() prior to this. + */ + cpumask_and(&search_cpus, tsk_cpus_allowed(p), cpu_online_mask); + cpumask_and(&search_cpus, &search_cpus, &rq->freq_domain_cpumask); + cpumask_clear_cpu(lowest_power_cpu, &search_cpus); /* Is a lower-powered idle CPU available which will fit this task? */ - for_each_cpu_and(i, tsk_cpus_allowed(p), cpu_online_mask) { - if (idle_cpu(i) && task_will_fit(p, i)) { - int idle_power_cost = power_cost(p, i); - if (idle_power_cost < lowest_power) { + for_each_cpu(i, &search_cpus) { + if (idle_cpu(i)) { + int cost = + power_cost(scale_load_to_cpu(task_load(p), i), i); + if (cost < lowest_power) { lowest_power_cpu = i; - lowest_power = idle_power_cost; + lowest_power = cost; } } } @@ -4305,13 +4312,13 @@ static inline int find_new_hmp_ilb(int type) return 0; } -static inline int power_cost(struct task_struct *p, int cpu) +static inline int power_cost(u64 task_load, int cpu) { return SCHED_CAPACITY_SCALE; } static inline int -spill_threshold_crossed(struct task_struct *p, struct rq *rq, int cpu, int sync) +spill_threshold_crossed(u64 task_load, u64 cpu_load, struct rq *rq) { return 0; } diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index 568c3427ca0e..44233c6adca0 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -992,6 +992,7 @@ extern unsigned int max_capacity; extern unsigned int min_capacity; extern unsigned int max_load_scale_factor; extern unsigned int max_possible_capacity; +extern cpumask_t mpc_mask; extern unsigned long capacity_scale_cpu_efficiency(int cpu); extern unsigned long capacity_scale_cpu_freq(int cpu); extern unsigned int sched_mostly_idle_load; |
