summaryrefslogtreecommitdiff
path: root/kernel
diff options
context:
space:
mode:
authorSrivatsa Vaddagiri <vatsa@codeaurora.org>2015-04-22 17:12:09 +0530
committerDavid Keitel <dkeitel@codeaurora.org>2016-03-23 21:25:11 -0700
commitdf6bfcaf70e9552bc7076fa0749f4f5c632897eb (patch)
treebfc5cf1fe5596c8aa10168027359882bba6914a8 /kernel
parentcb1bb6a8f4a881b2c6c80160a94975dd93175558 (diff)
sched: Update fair and rt placement logic to use scheduler clusters
Make use of clusters in the fair and rt scheduling classes. This is needed as the freq domain mask can no longer be used to do correct task placement. The freq domain mask was being used to demarcate clusters. Change-Id: I57f74147c7006f22d6760256926c10fd0bf50cbd Signed-off-by: Srivatsa Vaddagiri <vatsa@codeaurora.org> Signed-off-by: Syed Rameez Mustafa <rameezmustafa@codeaurora.org> [joonwoop@codeaurora.org: fixed merge conflicts due to omitted changes for CONFIG_SCHED_QHMP.] Signed-off-by: Joonwoo Park <joonwoop@codeaurora.org>
Diffstat (limited to 'kernel')
-rw-r--r--kernel/sched/core.c5
-rw-r--r--kernel/sched/fair.c516
-rw-r--r--kernel/sched/rt.c50
-rw-r--r--kernel/sched/sched.h12
-rw-r--r--kernel/sysctl.c7
5 files changed, 355 insertions, 235 deletions
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 8bd6fbde7efe..8cdd373a8980 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -1287,7 +1287,7 @@ static int compute_load_scale_factor(struct sched_cluster *cluster)
return load_scale;
}
-static struct list_head cluster_head;
+struct list_head cluster_head;
static DEFINE_MUTEX(cluster_lock);
static cpumask_t all_cluster_cpus = CPU_MASK_NONE;
DECLARE_BITMAP(all_cluster_ids, NR_CPUS);
@@ -1311,9 +1311,6 @@ static struct sched_cluster init_cluster = {
.dstate_wakeup_latency = 0,
};
-#define for_each_sched_cluster(cluster) \
- list_for_each_entry_rcu(cluster, &cluster_head, list)
-
void update_all_clusters_stats(void)
{
struct sched_cluster *cluster;
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index be7b44f9a85f..950ab9229cfc 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -2702,15 +2702,9 @@ unsigned int __read_mostly sysctl_sched_spill_nr_run = 10;
*/
unsigned int __read_mostly sysctl_sched_enable_power_aware = 0;
-/*
- * This specifies the maximum percent power difference between 2
- * CPUs for them to be considered identical in terms of their
- * power characteristics (i.e. they are in the same power band).
- */
-unsigned int __read_mostly sysctl_sched_powerband_limit_pct;
-
unsigned int __read_mostly sysctl_sched_lowspill_freq;
unsigned int __read_mostly sysctl_sched_pack_freq = UINT_MAX;
+
/*
* CPUs with load greater than the sched_spill_load_threshold are not
* eligible for task placement. When all CPUs in a cluster achieve a
@@ -2894,18 +2888,6 @@ static inline u64 cpu_load_sync(int cpu, int sync)
return scale_load_to_cpu(cpu_cravg_sync(cpu, sync), cpu);
}
-static int
-spill_threshold_crossed(u64 task_load, u64 cpu_load, struct rq *rq)
-{
- u64 total_load = task_load + cpu_load;
-
- if (total_load > sched_spill_load ||
- (rq->nr_running + 1) > sysctl_sched_spill_nr_run)
- return 1;
-
- return 0;
-}
-
static int boost_refcount;
static DEFINE_SPINLOCK(boost_lock);
static DEFINE_MUTEX(boost_mutex);
@@ -3010,33 +2992,11 @@ static int task_will_fit(struct task_struct *p, int cpu)
return task_load_will_fit(p, tload, cpu);
}
-static int eligible_cpu(u64 task_load, u64 cpu_load, int cpu, int sync)
-{
- if (sched_cpu_high_irqload(cpu))
- return 0;
-
- return !spill_threshold_crossed(task_load, cpu_load, cpu_rq(cpu));
-}
-
struct cpu_pwr_stats __weak *get_cpu_pwr_stats(void)
{
return NULL;
}
-int power_delta_exceeded(unsigned int cpu_cost, unsigned int base_cost)
-{
- int delta, cost_limit;
-
- if (!base_cost || cpu_cost == base_cost ||
- !sysctl_sched_powerband_limit_pct)
- return 0;
-
- delta = cpu_cost - base_cost;
- cost_limit = div64_u64((u64)sysctl_sched_powerband_limit_pct *
- (u64)base_cost, 100);
- return abs(delta) > cost_limit;
-}
-
/*
* Return the cost of running task p on CPU cpu. This function
* currently assumes that task p is the only task which will run on
@@ -3108,59 +3068,270 @@ unlock:
}
+struct cpu_select_env {
+ struct task_struct *p;
+ u8 reason;
+ u8 need_idle:1;
+ u8 boost:1;
+ u8 sync:1;
+ u8 ignore_prev_cpu:1;
+ int prev_cpu;
+ DECLARE_BITMAP(candidate_list, NR_CPUS);
+ DECLARE_BITMAP(backup_list, NR_CPUS);
+ u64 task_load;
+ u64 cpu_load;
+};
+
+struct cluster_cpu_stats {
+ int best_idle_cpu, best_capacity_cpu, best_cpu, best_sibling_cpu;
+ int min_cost, best_sibling_cpu_cost;
+ u64 min_load, best_sibling_cpu_load;
+ s64 highest_spare_capacity;
+};
+
#define UP_MIGRATION 1
#define DOWN_MIGRATION 2
-#define IRQLOAD_MIGRATION 4
+#define IRQLOAD_MIGRATION 3
-static int skip_cluster(int tcpu, int cpu, int reason)
+static int
+spill_threshold_crossed(struct cpu_select_env *env, struct rq *rq)
{
- int skip;
+ u64 total_load;
- if (!reason)
+ total_load = env->task_load + env->cpu_load;
+
+ if (total_load > sched_spill_load ||
+ (rq->nr_running + 1) > sysctl_sched_spill_nr_run)
+ return 1;
+
+ return 0;
+}
+
+static int skip_cpu(int cpu, struct cpu_select_env *env)
+{
+ int tcpu = task_cpu(env->p);
+ int skip = 0;
+
+ if (!env->reason)
return 0;
- switch (reason) {
- case UP_MIGRATION:
- skip = (cpu_capacity(cpu) <= cpu_capacity(tcpu));
- break;
+ if (is_reserved(cpu))
+ return 1;
- case DOWN_MIGRATION:
- skip = (cpu_capacity(cpu) >= cpu_capacity(tcpu));
+ switch (env->reason) {
+ case UP_MIGRATION:
+ skip = !idle_cpu(cpu);
break;
-
case IRQLOAD_MIGRATION:
/* Purposely fall through */
-
default:
- return 0;
+ skip = (cpu == tcpu);
+ break;
}
return skip;
}
-static int skip_cpu(struct rq *task_rq, struct rq *rq, int cpu, int reason)
+static inline int
+acceptable_capacity(struct sched_cluster *cluster, struct cpu_select_env *env)
{
- int skip;
+ int tcpu;
- if (!reason)
- return 0;
-
- if (is_reserved(cpu))
+ if (!env->reason)
return 1;
- switch (reason) {
+ tcpu = task_cpu(env->p);
+ switch (env->reason) {
case UP_MIGRATION:
- skip = !idle_cpu(cpu);
- break;
+ return cluster->capacity > cpu_capacity(tcpu);
- case IRQLOAD_MIGRATION:
- /* Purposely fall through */
+ case DOWN_MIGRATION:
+ return cluster->capacity < cpu_capacity(tcpu);
default:
- skip = (rq == task_rq);
+ break;
}
- return skip;
+ return 1;
+}
+
+static int
+skip_cluster(struct sched_cluster *cluster, struct cpu_select_env *env)
+{
+ if (!acceptable_capacity(cluster, env)) {
+ __clear_bit(cluster->id, env->candidate_list);
+ return 1;
+ }
+
+ return 0;
+}
+
+static struct sched_cluster *
+select_least_power_cluster(struct cpu_select_env *env)
+{
+ struct sched_cluster *cluster;
+
+ for_each_sched_cluster(cluster) {
+ if (!skip_cluster(cluster, env)) {
+ int cpu = cluster_first_cpu(cluster);
+
+ env->task_load = scale_load_to_cpu(task_load(env->p),
+ cpu);
+ if (task_load_will_fit(env->p, env->task_load, cpu))
+ return cluster;
+
+ __set_bit(cluster->id, env->backup_list);
+ __clear_bit(cluster->id, env->candidate_list);
+ }
+ }
+
+ return NULL;
+}
+
+static struct sched_cluster *
+next_candidate(const unsigned long *list, int start, int end)
+{
+ int cluster_id;
+
+ cluster_id = find_next_bit(list, end, start - 1 + 1);
+ if (cluster_id >= end)
+ return NULL;
+
+ return sched_cluster[cluster_id];
+}
+
+static void update_spare_capacity(
+struct cluster_cpu_stats *stats, int cpu, int capacity, u64 cpu_load)
+{
+ s64 spare_capacity = sched_ravg_window - cpu_load;
+
+ if (spare_capacity > 0 &&
+ (spare_capacity > stats->highest_spare_capacity ||
+ (spare_capacity == stats->highest_spare_capacity &&
+ capacity > cpu_capacity(stats->best_capacity_cpu)))) {
+ stats->highest_spare_capacity = spare_capacity;
+ stats->best_capacity_cpu = cpu;
+ }
+}
+
+static inline void find_backup_cluster(
+struct cpu_select_env *env, struct cluster_cpu_stats *stats)
+{
+ struct sched_cluster *next = NULL;
+ int i;
+
+ while (!bitmap_empty(env->backup_list, num_clusters)) {
+ next = next_candidate(env->backup_list, 0, num_clusters);
+ __clear_bit(next->id, env->backup_list);
+ for_each_cpu_and(i, &env->p->cpus_allowed, &next->cpus) {
+ trace_sched_cpu_load_wakeup(cpu_rq(i), idle_cpu(i),
+ sched_irqload(i), power_cost(i, task_load(env->p) +
+ cpu_cravg_sync(i, env->sync)), 0);
+
+ update_spare_capacity(stats, i, next->capacity,
+ cpu_load_sync(i, env->sync));
+ }
+ }
+}
+
+struct sched_cluster *
+next_best_cluster(struct sched_cluster *cluster, struct cpu_select_env *env)
+{
+ struct sched_cluster *next = NULL;
+
+ __clear_bit(cluster->id, env->candidate_list);
+
+ do {
+ if (bitmap_empty(env->candidate_list, num_clusters))
+ return NULL;
+
+ next = next_candidate(env->candidate_list, 0, num_clusters);
+ if (next)
+ if (skip_cluster(next, env))
+ next = NULL;
+ } while (!next);
+
+ env->task_load = scale_load_to_cpu(task_load(env->p),
+ cluster_first_cpu(next));
+ return next;
+}
+
+static void update_cluster_stats(int cpu, struct cluster_cpu_stats *stats,
+ struct cpu_select_env *env)
+{
+ int cpu_cost;
+ int prev_cpu = env->prev_cpu;
+
+ cpu_cost = power_cost(cpu, task_load(env->p) +
+ cpu_cravg_sync(cpu, env->sync));
+ if (cpu_cost > stats->min_cost)
+ return;
+
+ if (cpu != prev_cpu && cpus_share_cache(prev_cpu, cpu)) {
+ if (stats->best_sibling_cpu_cost > cpu_cost ||
+ (stats->best_sibling_cpu_cost == cpu_cost &&
+ stats->best_sibling_cpu_load > env->cpu_load)) {
+
+ stats->best_sibling_cpu_cost = cpu_cost;
+ stats->best_sibling_cpu_load = env->cpu_load;
+ stats->best_sibling_cpu = cpu;
+ }
+ }
+
+ if ((cpu_cost < stats->min_cost) ||
+ ((stats->best_cpu != prev_cpu && stats->min_load > env->cpu_load) ||
+ cpu == prev_cpu)) {
+ if (env->need_idle) {
+ if (idle_cpu(cpu)) {
+ stats->min_cost = cpu_cost;
+ stats->best_idle_cpu = cpu;
+ }
+ } else {
+ stats->min_cost = cpu_cost;
+ stats->min_load = env->cpu_load;
+ stats->best_cpu = cpu;
+ }
+ }
+}
+
+static void find_best_cpu_in_cluster(struct sched_cluster *c,
+ struct cpu_select_env *env, struct cluster_cpu_stats *stats)
+{
+ int i;
+ struct cpumask search_cpus;
+
+ cpumask_and(&search_cpus, tsk_cpus_allowed(env->p), &c->cpus);
+ if (env->ignore_prev_cpu)
+ cpumask_clear_cpu(env->prev_cpu, &search_cpus);
+
+ for_each_cpu(i, &search_cpus) {
+ env->cpu_load = cpu_load_sync(i, env->sync);
+
+ trace_sched_cpu_load_wakeup(cpu_rq(i), idle_cpu(i),
+ sched_irqload(i),
+ power_cost(i, task_load(env->p) +
+ cpu_cravg_sync(i, env->sync)), 0);
+
+ if (unlikely(!cpu_active(i)) || skip_cpu(i, env))
+ continue;
+
+ update_spare_capacity(stats, i, c->capacity, env->cpu_load);
+
+ if (env->boost || sched_cpu_high_irqload(i) ||
+ spill_threshold_crossed(env, cpu_rq(i)))
+ continue;
+
+ update_cluster_stats(i, stats, env);
+ }
+}
+
+static inline void init_cluster_cpu_stats(struct cluster_cpu_stats *stats)
+{
+ stats->best_cpu = stats->best_idle_cpu = -1;
+ stats->best_capacity_cpu = stats->best_sibling_cpu = -1;
+ stats->min_cost = stats->best_sibling_cpu_cost = INT_MAX;
+ stats->min_load = stats->best_sibling_cpu_load = ULLONG_MAX;
+ stats->highest_spare_capacity = 0;
}
/*
@@ -3175,163 +3346,118 @@ static int skip_cpu(struct rq *task_rq, struct rq *rq, int cpu, int reason)
static inline int wake_to_idle(struct task_struct *p)
{
return (current->flags & PF_WAKE_UP_IDLE) ||
- (p->flags & PF_WAKE_UP_IDLE);
+ (p->flags & PF_WAKE_UP_IDLE);
}
-static inline bool short_sleep_task_waking(struct task_struct *p, int prev_cpu,
- const cpumask_t *search_cpus)
+static inline bool
+bias_to_prev_cpu(struct cpu_select_env *env, struct cluster_cpu_stats *stats)
{
+ int prev_cpu;
+ struct task_struct *task = env->p;
+ struct sched_cluster *cluster;
+
+ if (env->boost || env->reason || env->need_idle ||
+ !sched_short_sleep_task_threshold)
+ return false;
+
+ prev_cpu = env->prev_cpu;
+ if (!cpumask_test_cpu(prev_cpu, tsk_cpus_allowed(task)) ||
+ unlikely(!cpu_active(prev_cpu)))
+ return false;
+
/*
* This function should be used by task wake up path only as it's
* assuming p->last_switch_out_ts as last sleep time.
* p->last_switch_out_ts can denote last preemption time as well as
* last sleep time.
*/
- return (sched_short_sleep_task_threshold &&
- (p->ravg.mark_start - p->last_switch_out_ts <
- sched_short_sleep_task_threshold) &&
- cpumask_test_cpu(prev_cpu, search_cpus));
+ if (task->ravg.mark_start - task->last_switch_out_ts >=
+ sched_short_sleep_task_threshold)
+ return false;
+
+ env->task_load = scale_load_to_cpu(task_load(task), prev_cpu);
+ cluster = cpu_rq(prev_cpu)->cluster;
+
+ if (!task_load_will_fit(task, env->task_load, prev_cpu)) {
+
+ __set_bit(cluster->id, env->backup_list);
+ __clear_bit(cluster->id, env->candidate_list);
+ return false;
+ }
+
+ env->cpu_load = cpu_load_sync(prev_cpu, env->sync);
+ if (sched_cpu_high_irqload(prev_cpu) ||
+ spill_threshold_crossed(env, cpu_rq(prev_cpu))) {
+ update_spare_capacity(stats, prev_cpu,
+ cluster->capacity, env->cpu_load);
+ env->ignore_prev_cpu = 1;
+ return false;
+ }
+
+ return true;
}
/* return cheapest cpu that can fit this task */
static int select_best_cpu(struct task_struct *p, int target, int reason,
int sync)
{
- int i, best_cpu = -1, best_idle_cpu = -1, best_capacity_cpu = -1;
- int prev_cpu = task_cpu(p), best_sibling_cpu = -1;
- int cpu_cost, min_cost = INT_MAX, best_sibling_cpu_cost = INT_MAX;
- u64 tload, cpu_load, best_sibling_cpu_load = ULLONG_MAX;
- u64 min_load = ULLONG_MAX;
- s64 spare_capacity, highest_spare_capacity = 0;
- int boost = sched_boost();
- int need_idle = wake_to_idle(p);
+ struct sched_cluster *cluster;
+ struct cluster_cpu_stats stats;
bool fast_path = false;
- cpumask_t search_cpus;
- struct rq *trq;
-
- cpumask_and(&search_cpus, tsk_cpus_allowed(p), cpu_online_mask);
-
- if (!boost && !reason && !need_idle &&
- short_sleep_task_waking(p, prev_cpu, &search_cpus)) {
- cpu_load = cpu_load_sync(prev_cpu, sync);
- tload = scale_load_to_cpu(task_load(p), prev_cpu);
- if (eligible_cpu(tload, cpu_load, prev_cpu, sync) &&
- task_load_will_fit(p, tload, prev_cpu)) {
- fast_path = true;
- best_cpu = prev_cpu;
- goto done;
- }
-
- spare_capacity = sched_ravg_window - cpu_load;
- if (spare_capacity > 0) {
- highest_spare_capacity = spare_capacity;
- best_capacity_cpu = prev_cpu;
- }
- cpumask_clear_cpu(prev_cpu, &search_cpus);
- }
-
- trq = task_rq(p);
- for_each_cpu(i, &search_cpus) {
- struct rq *rq = cpu_rq(i);
-
- trace_sched_cpu_load_wakeup(cpu_rq(i), idle_cpu(i),
- sched_irqload(i),
- power_cost(i, task_load(p) + cpu_cravg_sync(i, sync)),
- cpu_temp(i));
-
- if (skip_cluster(task_cpu(p), i, reason)) {
- cpumask_andnot(&search_cpus, &search_cpus,
- &rq->cluster->cpus);
- continue;
- }
- if (skip_cpu(task_rq(p), rq, i, reason))
- continue;
-
- cpu_load = cpu_load_sync(i, sync);
- spare_capacity = sched_ravg_window - cpu_load;
+ struct cpu_select_env env = {
+ .p = p,
+ .reason = reason,
+ .need_idle = wake_to_idle(p),
+ .boost = sched_boost(),
+ .sync = sync,
+ .prev_cpu = target,
+ .ignore_prev_cpu = 0,
+ };
- /* Note the highest spare capacity CPU in the system */
- if (spare_capacity > 0 &&
- (spare_capacity > highest_spare_capacity ||
- (spare_capacity == highest_spare_capacity &&
- cpu_capacity(i) > cpu_capacity(best_capacity_cpu)))) {
- highest_spare_capacity = spare_capacity;
- best_capacity_cpu = i;
- }
+ bitmap_copy(env.candidate_list, all_cluster_ids, NR_CPUS);
+ bitmap_zero(env.backup_list, NR_CPUS);
- if (boost)
- continue;
+ init_cluster_cpu_stats(&stats);
- tload = scale_load_to_cpu(task_load(p), i);
- if (!eligible_cpu(tload, cpu_load, i, sync) ||
- !task_load_will_fit(p, tload, i))
- continue;
+ if (bias_to_prev_cpu(&env, &stats)) {
+ fast_path = true;
+ goto out;
+ }
- /*
- * The task will fit on this CPU and the CPU can accommodate it
- * under spill.
- */
+ rcu_read_lock();
+ cluster = select_least_power_cluster(&env);
- cpu_cost = power_cost(i, task_load(p) +
- cpu_cravg_sync(i, sync));
+ if (!cluster) {
+ rcu_read_unlock();
+ goto out;
+ }
- if (cpu_cost > min_cost)
- continue;
+ do {
+ find_best_cpu_in_cluster(cluster, &env, &stats);
- /*
- * If the task fits in a CPU in a lower power band, that
- * overrides all other considerations.
- */
- if (power_delta_exceeded(cpu_cost, min_cost)) {
- min_cost = cpu_cost;
- min_load = ULLONG_MAX;
- best_cpu = -1;
- }
+ } while ((cluster = next_best_cluster(cluster, &env)));
- if (i != prev_cpu && cpus_share_cache(prev_cpu, i)) {
- if (best_sibling_cpu_cost > cpu_cost ||
- (best_sibling_cpu_cost == cpu_cost &&
- best_sibling_cpu_load > cpu_load)) {
- best_sibling_cpu_cost = cpu_cost;
- best_sibling_cpu_load = cpu_load;
- best_sibling_cpu = i;
- }
- }
+ rcu_read_unlock();
- if ((cpu_cost < min_cost) ||
- ((best_cpu != prev_cpu && min_load > cpu_load) ||
- i == prev_cpu)) {
- if (need_idle) {
- if (idle_cpu(i)) {
- min_cost = cpu_cost;
- best_idle_cpu = i;
- }
- } else {
- min_cost = cpu_cost;
- min_load = cpu_load;
- best_cpu = i;
- }
- }
- }
+ if (stats.best_idle_cpu >= 0) {
+ target = stats.best_idle_cpu;
+ } else if (stats.best_cpu >= 0) {
+ if (stats.best_cpu != task_cpu(p) &&
+ stats.min_cost == stats.best_sibling_cpu_cost)
+ stats.best_cpu = stats.best_sibling_cpu;
- if (best_idle_cpu >= 0) {
- best_cpu = best_idle_cpu;
- } else if (best_cpu < 0 || boost) {
- if (unlikely(best_capacity_cpu < 0))
- best_cpu = prev_cpu;
- else
- best_cpu = best_capacity_cpu;
+ target = stats.best_cpu;
} else {
- if (best_cpu != prev_cpu && min_cost == best_sibling_cpu_cost)
- best_cpu = best_sibling_cpu;
+ find_backup_cluster(&env, &stats);
+ if (stats.best_capacity_cpu >= 0)
+ target = stats.best_capacity_cpu;
}
-done:
- trace_sched_task_load(p, boost, reason, sync, need_idle, fast_path,
- best_cpu);
-
- return best_cpu;
+out:
+ trace_sched_task_load(p, sched_boost(), env.reason, env.sync,
+ env.need_idle, fast_path, target);
+ return target;
}
static void
@@ -3920,7 +4046,7 @@ unsigned int power_cost(int cpu, u64 demand)
}
static inline int
-spill_threshold_crossed(u64 task_load, u64 cpu_load, struct rq *rq)
+spill_threshold_crossed(struct cpu_select_env *env, struct rq *rq)
{
return 0;
}
diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c
index 1fd22539a334..e698cc3438c7 100644
--- a/kernel/sched/rt.c
+++ b/kernel/sched/rt.c
@@ -1661,13 +1661,15 @@ static struct task_struct *pick_highest_pushable_task(struct rq *rq, int cpu)
static DEFINE_PER_CPU(cpumask_var_t, local_cpu_mask);
#ifdef CONFIG_SCHED_HMP
+
static int find_lowest_rq_hmp(struct task_struct *task)
{
struct cpumask *lowest_mask = *this_cpu_ptr(&local_cpu_mask);
- int cpu_cost, min_cost = INT_MAX;
- u64 cpu_load, min_load = ULLONG_MAX;
+ struct cpumask candidate_mask = CPU_MASK_NONE;
+ struct sched_cluster *cluster;
int best_cpu = -1;
int prev_cpu = task_cpu(task);
+ u64 cpu_load, min_load = ULLONG_MAX;
int i;
/* Make sure the mask is initialized first */
@@ -1686,36 +1688,26 @@ static int find_lowest_rq_hmp(struct task_struct *task)
* the best one based on our affinity and topology.
*/
- /* Skip performance considerations and optimize for power.
- * Worst case we'll be iterating over all CPUs here. CPU
- * online mask should be taken care of when constructing
- * the lowest_mask.
- */
- for_each_cpu(i, lowest_mask) {
- cpu_load = scale_load_to_cpu(
- cpu_rq(i)->hmp_stats.cumulative_runnable_avg, i);
- cpu_cost = power_cost(i, cpu_cravg_sync(i, 0));
- trace_sched_cpu_load_wakeup(cpu_rq(i), idle_cpu(i),
- sched_irqload(i), cpu_cost, cpu_temp(i));
-
- if (power_delta_exceeded(cpu_cost, min_cost)) {
- if (cpu_cost > min_cost)
- continue;
+ for_each_sched_cluster(cluster) {
+ cpumask_and(&candidate_mask, &cluster->cpus, lowest_mask);
- min_cost = cpu_cost;
- min_load = ULLONG_MAX;
- best_cpu = -1;
- }
-
- if (sched_cpu_high_irqload(i))
+ if (cpumask_empty(&candidate_mask))
continue;
- if (cpu_load < min_load ||
- (cpu_load == min_load &&
- (i == prev_cpu || (best_cpu != prev_cpu &&
- cpus_share_cache(prev_cpu, i))))) {
- min_load = cpu_load;
- best_cpu = i;
+ for_each_cpu(i, &candidate_mask) {
+ if (sched_cpu_high_irqload(i))
+ continue;
+
+ cpu_load = scale_load_to_cpu(
+ cpu_rq(i)->hmp_stats.cumulative_runnable_avg, i);
+
+ if (cpu_load < min_load ||
+ (cpu_load == min_load &&
+ (i == prev_cpu || (best_cpu != prev_cpu &&
+ cpus_share_cache(prev_cpu, i))))) {
+ min_load = cpu_load;
+ best_cpu = i;
+ }
}
}
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 35a13974f34a..9e4f0887136c 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -395,6 +395,13 @@ static inline int cluster_first_cpu(struct sched_cluster *cluster)
return cpumask_first(&cluster->cpus);
}
+extern struct list_head cluster_head;
+extern int num_clusters;
+extern struct sched_cluster *sched_cluster[NR_CPUS];
+
+#define for_each_sched_cluster(cluster) \
+ list_for_each_entry_rcu(cluster, &cluster_head, list)
+
#endif
/* CFS-related fields in a runqueue */
@@ -1031,6 +1038,11 @@ unsigned int cpu_temp(int cpu);
extern unsigned int nr_eligible_big_tasks(int cpu);
extern void update_up_down_migrate(void);
+static inline struct sched_cluster *cpu_cluster(int cpu)
+{
+ return cpu_rq(cpu)->cluster;
+}
+
static inline int cpu_capacity(int cpu)
{
return cpu_rq(cpu)->cluster->capacity;
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index dcb852652bc8..1f2afa6eefaf 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -411,13 +411,6 @@ static struct ctl_table kern_table[] = {
.proc_handler = sched_hmp_proc_update_handler,
},
{
- .procname = "sched_power_band_limit",
- .data = &sysctl_sched_powerband_limit_pct,
- .maxlen = sizeof(unsigned int),
- .mode = 0644,
- .proc_handler = sched_hmp_proc_update_handler,
- },
- {
.procname = "sched_lowspill_freq",
.data = &sysctl_sched_lowspill_freq,
.maxlen = sizeof(unsigned int),