summaryrefslogtreecommitdiff
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/sched/core.c5
-rw-r--r--kernel/sched/fair.c516
-rw-r--r--kernel/sched/rt.c50
-rw-r--r--kernel/sched/sched.h12
-rw-r--r--kernel/sysctl.c7
5 files changed, 355 insertions, 235 deletions
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 8bd6fbde7efe..8cdd373a8980 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -1287,7 +1287,7 @@ static int compute_load_scale_factor(struct sched_cluster *cluster)
return load_scale;
}
-static struct list_head cluster_head;
+struct list_head cluster_head;
static DEFINE_MUTEX(cluster_lock);
static cpumask_t all_cluster_cpus = CPU_MASK_NONE;
DECLARE_BITMAP(all_cluster_ids, NR_CPUS);
@@ -1311,9 +1311,6 @@ static struct sched_cluster init_cluster = {
.dstate_wakeup_latency = 0,
};
-#define for_each_sched_cluster(cluster) \
- list_for_each_entry_rcu(cluster, &cluster_head, list)
-
void update_all_clusters_stats(void)
{
struct sched_cluster *cluster;
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index be7b44f9a85f..950ab9229cfc 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -2702,15 +2702,9 @@ unsigned int __read_mostly sysctl_sched_spill_nr_run = 10;
*/
unsigned int __read_mostly sysctl_sched_enable_power_aware = 0;
-/*
- * This specifies the maximum percent power difference between 2
- * CPUs for them to be considered identical in terms of their
- * power characteristics (i.e. they are in the same power band).
- */
-unsigned int __read_mostly sysctl_sched_powerband_limit_pct;
-
unsigned int __read_mostly sysctl_sched_lowspill_freq;
unsigned int __read_mostly sysctl_sched_pack_freq = UINT_MAX;
+
/*
* CPUs with load greater than the sched_spill_load_threshold are not
* eligible for task placement. When all CPUs in a cluster achieve a
@@ -2894,18 +2888,6 @@ static inline u64 cpu_load_sync(int cpu, int sync)
return scale_load_to_cpu(cpu_cravg_sync(cpu, sync), cpu);
}
-static int
-spill_threshold_crossed(u64 task_load, u64 cpu_load, struct rq *rq)
-{
- u64 total_load = task_load + cpu_load;
-
- if (total_load > sched_spill_load ||
- (rq->nr_running + 1) > sysctl_sched_spill_nr_run)
- return 1;
-
- return 0;
-}
-
static int boost_refcount;
static DEFINE_SPINLOCK(boost_lock);
static DEFINE_MUTEX(boost_mutex);
@@ -3010,33 +2992,11 @@ static int task_will_fit(struct task_struct *p, int cpu)
return task_load_will_fit(p, tload, cpu);
}
-static int eligible_cpu(u64 task_load, u64 cpu_load, int cpu, int sync)
-{
- if (sched_cpu_high_irqload(cpu))
- return 0;
-
- return !spill_threshold_crossed(task_load, cpu_load, cpu_rq(cpu));
-}
-
struct cpu_pwr_stats __weak *get_cpu_pwr_stats(void)
{
return NULL;
}
-int power_delta_exceeded(unsigned int cpu_cost, unsigned int base_cost)
-{
- int delta, cost_limit;
-
- if (!base_cost || cpu_cost == base_cost ||
- !sysctl_sched_powerband_limit_pct)
- return 0;
-
- delta = cpu_cost - base_cost;
- cost_limit = div64_u64((u64)sysctl_sched_powerband_limit_pct *
- (u64)base_cost, 100);
- return abs(delta) > cost_limit;
-}
-
/*
* Return the cost of running task p on CPU cpu. This function
* currently assumes that task p is the only task which will run on
@@ -3108,59 +3068,270 @@ unlock:
}
+struct cpu_select_env {
+ struct task_struct *p;
+ u8 reason;
+ u8 need_idle:1;
+ u8 boost:1;
+ u8 sync:1;
+ u8 ignore_prev_cpu:1;
+ int prev_cpu;
+ DECLARE_BITMAP(candidate_list, NR_CPUS);
+ DECLARE_BITMAP(backup_list, NR_CPUS);
+ u64 task_load;
+ u64 cpu_load;
+};
+
+struct cluster_cpu_stats {
+ int best_idle_cpu, best_capacity_cpu, best_cpu, best_sibling_cpu;
+ int min_cost, best_sibling_cpu_cost;
+ u64 min_load, best_sibling_cpu_load;
+ s64 highest_spare_capacity;
+};
+
#define UP_MIGRATION 1
#define DOWN_MIGRATION 2
-#define IRQLOAD_MIGRATION 4
+#define IRQLOAD_MIGRATION 3
-static int skip_cluster(int tcpu, int cpu, int reason)
+static int
+spill_threshold_crossed(struct cpu_select_env *env, struct rq *rq)
{
- int skip;
+ u64 total_load;
- if (!reason)
+ total_load = env->task_load + env->cpu_load;
+
+ if (total_load > sched_spill_load ||
+ (rq->nr_running + 1) > sysctl_sched_spill_nr_run)
+ return 1;
+
+ return 0;
+}
+
+static int skip_cpu(int cpu, struct cpu_select_env *env)
+{
+ int tcpu = task_cpu(env->p);
+ int skip = 0;
+
+ if (!env->reason)
return 0;
- switch (reason) {
- case UP_MIGRATION:
- skip = (cpu_capacity(cpu) <= cpu_capacity(tcpu));
- break;
+ if (is_reserved(cpu))
+ return 1;
- case DOWN_MIGRATION:
- skip = (cpu_capacity(cpu) >= cpu_capacity(tcpu));
+ switch (env->reason) {
+ case UP_MIGRATION:
+ skip = !idle_cpu(cpu);
break;
-
case IRQLOAD_MIGRATION:
/* Purposely fall through */
-
default:
- return 0;
+ skip = (cpu == tcpu);
+ break;
}
return skip;
}
-static int skip_cpu(struct rq *task_rq, struct rq *rq, int cpu, int reason)
+static inline int
+acceptable_capacity(struct sched_cluster *cluster, struct cpu_select_env *env)
{
- int skip;
+ int tcpu;
- if (!reason)
- return 0;
-
- if (is_reserved(cpu))
+ if (!env->reason)
return 1;
- switch (reason) {
+ tcpu = task_cpu(env->p);
+ switch (env->reason) {
case UP_MIGRATION:
- skip = !idle_cpu(cpu);
- break;
+ return cluster->capacity > cpu_capacity(tcpu);
- case IRQLOAD_MIGRATION:
- /* Purposely fall through */
+ case DOWN_MIGRATION:
+ return cluster->capacity < cpu_capacity(tcpu);
default:
- skip = (rq == task_rq);
+ break;
}
- return skip;
+ return 1;
+}
+
+static int
+skip_cluster(struct sched_cluster *cluster, struct cpu_select_env *env)
+{
+ if (!acceptable_capacity(cluster, env)) {
+ __clear_bit(cluster->id, env->candidate_list);
+ return 1;
+ }
+
+ return 0;
+}
+
+static struct sched_cluster *
+select_least_power_cluster(struct cpu_select_env *env)
+{
+ struct sched_cluster *cluster;
+
+ for_each_sched_cluster(cluster) {
+ if (!skip_cluster(cluster, env)) {
+ int cpu = cluster_first_cpu(cluster);
+
+ env->task_load = scale_load_to_cpu(task_load(env->p),
+ cpu);
+ if (task_load_will_fit(env->p, env->task_load, cpu))
+ return cluster;
+
+ __set_bit(cluster->id, env->backup_list);
+ __clear_bit(cluster->id, env->candidate_list);
+ }
+ }
+
+ return NULL;
+}
+
+static struct sched_cluster *
+next_candidate(const unsigned long *list, int start, int end)
+{
+ int cluster_id;
+
+ cluster_id = find_next_bit(list, end, start - 1 + 1);
+ if (cluster_id >= end)
+ return NULL;
+
+ return sched_cluster[cluster_id];
+}
+
+static void update_spare_capacity(
+struct cluster_cpu_stats *stats, int cpu, int capacity, u64 cpu_load)
+{
+ s64 spare_capacity = sched_ravg_window - cpu_load;
+
+ if (spare_capacity > 0 &&
+ (spare_capacity > stats->highest_spare_capacity ||
+ (spare_capacity == stats->highest_spare_capacity &&
+ capacity > cpu_capacity(stats->best_capacity_cpu)))) {
+ stats->highest_spare_capacity = spare_capacity;
+ stats->best_capacity_cpu = cpu;
+ }
+}
+
+static inline void find_backup_cluster(
+struct cpu_select_env *env, struct cluster_cpu_stats *stats)
+{
+ struct sched_cluster *next = NULL;
+ int i;
+
+ while (!bitmap_empty(env->backup_list, num_clusters)) {
+ next = next_candidate(env->backup_list, 0, num_clusters);
+ __clear_bit(next->id, env->backup_list);
+ for_each_cpu_and(i, &env->p->cpus_allowed, &next->cpus) {
+ trace_sched_cpu_load_wakeup(cpu_rq(i), idle_cpu(i),
+ sched_irqload(i), power_cost(i, task_load(env->p) +
+ cpu_cravg_sync(i, env->sync)), 0);
+
+ update_spare_capacity(stats, i, next->capacity,
+ cpu_load_sync(i, env->sync));
+ }
+ }
+}
+
+struct sched_cluster *
+next_best_cluster(struct sched_cluster *cluster, struct cpu_select_env *env)
+{
+ struct sched_cluster *next = NULL;
+
+ __clear_bit(cluster->id, env->candidate_list);
+
+ do {
+ if (bitmap_empty(env->candidate_list, num_clusters))
+ return NULL;
+
+ next = next_candidate(env->candidate_list, 0, num_clusters);
+ if (next)
+ if (skip_cluster(next, env))
+ next = NULL;
+ } while (!next);
+
+ env->task_load = scale_load_to_cpu(task_load(env->p),
+ cluster_first_cpu(next));
+ return next;
+}
+
+static void update_cluster_stats(int cpu, struct cluster_cpu_stats *stats,
+ struct cpu_select_env *env)
+{
+ int cpu_cost;
+ int prev_cpu = env->prev_cpu;
+
+ cpu_cost = power_cost(cpu, task_load(env->p) +
+ cpu_cravg_sync(cpu, env->sync));
+ if (cpu_cost > stats->min_cost)
+ return;
+
+ if (cpu != prev_cpu && cpus_share_cache(prev_cpu, cpu)) {
+ if (stats->best_sibling_cpu_cost > cpu_cost ||
+ (stats->best_sibling_cpu_cost == cpu_cost &&
+ stats->best_sibling_cpu_load > env->cpu_load)) {
+
+ stats->best_sibling_cpu_cost = cpu_cost;
+ stats->best_sibling_cpu_load = env->cpu_load;
+ stats->best_sibling_cpu = cpu;
+ }
+ }
+
+ if ((cpu_cost < stats->min_cost) ||
+ ((stats->best_cpu != prev_cpu && stats->min_load > env->cpu_load) ||
+ cpu == prev_cpu)) {
+ if (env->need_idle) {
+ if (idle_cpu(cpu)) {
+ stats->min_cost = cpu_cost;
+ stats->best_idle_cpu = cpu;
+ }
+ } else {
+ stats->min_cost = cpu_cost;
+ stats->min_load = env->cpu_load;
+ stats->best_cpu = cpu;
+ }
+ }
+}
+
+static void find_best_cpu_in_cluster(struct sched_cluster *c,
+ struct cpu_select_env *env, struct cluster_cpu_stats *stats)
+{
+ int i;
+ struct cpumask search_cpus;
+
+ cpumask_and(&search_cpus, tsk_cpus_allowed(env->p), &c->cpus);
+ if (env->ignore_prev_cpu)
+ cpumask_clear_cpu(env->prev_cpu, &search_cpus);
+
+ for_each_cpu(i, &search_cpus) {
+ env->cpu_load = cpu_load_sync(i, env->sync);
+
+ trace_sched_cpu_load_wakeup(cpu_rq(i), idle_cpu(i),
+ sched_irqload(i),
+ power_cost(i, task_load(env->p) +
+ cpu_cravg_sync(i, env->sync)), 0);
+
+ if (unlikely(!cpu_active(i)) || skip_cpu(i, env))
+ continue;
+
+ update_spare_capacity(stats, i, c->capacity, env->cpu_load);
+
+ if (env->boost || sched_cpu_high_irqload(i) ||
+ spill_threshold_crossed(env, cpu_rq(i)))
+ continue;
+
+ update_cluster_stats(i, stats, env);
+ }
+}
+
+static inline void init_cluster_cpu_stats(struct cluster_cpu_stats *stats)
+{
+ stats->best_cpu = stats->best_idle_cpu = -1;
+ stats->best_capacity_cpu = stats->best_sibling_cpu = -1;
+ stats->min_cost = stats->best_sibling_cpu_cost = INT_MAX;
+ stats->min_load = stats->best_sibling_cpu_load = ULLONG_MAX;
+ stats->highest_spare_capacity = 0;
}
/*
@@ -3175,163 +3346,118 @@ static int skip_cpu(struct rq *task_rq, struct rq *rq, int cpu, int reason)
static inline int wake_to_idle(struct task_struct *p)
{
return (current->flags & PF_WAKE_UP_IDLE) ||
- (p->flags & PF_WAKE_UP_IDLE);
+ (p->flags & PF_WAKE_UP_IDLE);
}
-static inline bool short_sleep_task_waking(struct task_struct *p, int prev_cpu,
- const cpumask_t *search_cpus)
+static inline bool
+bias_to_prev_cpu(struct cpu_select_env *env, struct cluster_cpu_stats *stats)
{
+ int prev_cpu;
+ struct task_struct *task = env->p;
+ struct sched_cluster *cluster;
+
+ if (env->boost || env->reason || env->need_idle ||
+ !sched_short_sleep_task_threshold)
+ return false;
+
+ prev_cpu = env->prev_cpu;
+ if (!cpumask_test_cpu(prev_cpu, tsk_cpus_allowed(task)) ||
+ unlikely(!cpu_active(prev_cpu)))
+ return false;
+
/*
* This function should be used by task wake up path only as it's
* assuming p->last_switch_out_ts as last sleep time.
* p->last_switch_out_ts can denote last preemption time as well as
* last sleep time.
*/
- return (sched_short_sleep_task_threshold &&
- (p->ravg.mark_start - p->last_switch_out_ts <
- sched_short_sleep_task_threshold) &&
- cpumask_test_cpu(prev_cpu, search_cpus));
+ if (task->ravg.mark_start - task->last_switch_out_ts >=
+ sched_short_sleep_task_threshold)
+ return false;
+
+ env->task_load = scale_load_to_cpu(task_load(task), prev_cpu);
+ cluster = cpu_rq(prev_cpu)->cluster;
+
+ if (!task_load_will_fit(task, env->task_load, prev_cpu)) {
+
+ __set_bit(cluster->id, env->backup_list);
+ __clear_bit(cluster->id, env->candidate_list);
+ return false;
+ }
+
+ env->cpu_load = cpu_load_sync(prev_cpu, env->sync);
+ if (sched_cpu_high_irqload(prev_cpu) ||
+ spill_threshold_crossed(env, cpu_rq(prev_cpu))) {
+ update_spare_capacity(stats, prev_cpu,
+ cluster->capacity, env->cpu_load);
+ env->ignore_prev_cpu = 1;
+ return false;
+ }
+
+ return true;
}
/* return cheapest cpu that can fit this task */
static int select_best_cpu(struct task_struct *p, int target, int reason,
int sync)
{
- int i, best_cpu = -1, best_idle_cpu = -1, best_capacity_cpu = -1;
- int prev_cpu = task_cpu(p), best_sibling_cpu = -1;
- int cpu_cost, min_cost = INT_MAX, best_sibling_cpu_cost = INT_MAX;
- u64 tload, cpu_load, best_sibling_cpu_load = ULLONG_MAX;
- u64 min_load = ULLONG_MAX;
- s64 spare_capacity, highest_spare_capacity = 0;
- int boost = sched_boost();
- int need_idle = wake_to_idle(p);
+ struct sched_cluster *cluster;
+ struct cluster_cpu_stats stats;
bool fast_path = false;
- cpumask_t search_cpus;
- struct rq *trq;
-
- cpumask_and(&search_cpus, tsk_cpus_allowed(p), cpu_online_mask);
-
- if (!boost && !reason && !need_idle &&
- short_sleep_task_waking(p, prev_cpu, &search_cpus)) {
- cpu_load = cpu_load_sync(prev_cpu, sync);
- tload = scale_load_to_cpu(task_load(p), prev_cpu);
- if (eligible_cpu(tload, cpu_load, prev_cpu, sync) &&
- task_load_will_fit(p, tload, prev_cpu)) {
- fast_path = true;
- best_cpu = prev_cpu;
- goto done;
- }
-
- spare_capacity = sched_ravg_window - cpu_load;
- if (spare_capacity > 0) {
- highest_spare_capacity = spare_capacity;
- best_capacity_cpu = prev_cpu;
- }
- cpumask_clear_cpu(prev_cpu, &search_cpus);
- }
-
- trq = task_rq(p);
- for_each_cpu(i, &search_cpus) {
- struct rq *rq = cpu_rq(i);
-
- trace_sched_cpu_load_wakeup(cpu_rq(i), idle_cpu(i),
- sched_irqload(i),
- power_cost(i, task_load(p) + cpu_cravg_sync(i, sync)),
- cpu_temp(i));
-
- if (skip_cluster(task_cpu(p), i, reason)) {
- cpumask_andnot(&search_cpus, &search_cpus,
- &rq->cluster->cpus);
- continue;
- }
- if (skip_cpu(task_rq(p), rq, i, reason))
- continue;
-
- cpu_load = cpu_load_sync(i, sync);
- spare_capacity = sched_ravg_window - cpu_load;
+ struct cpu_select_env env = {
+ .p = p,
+ .reason = reason,
+ .need_idle = wake_to_idle(p),
+ .boost = sched_boost(),
+ .sync = sync,
+ .prev_cpu = target,
+ .ignore_prev_cpu = 0,
+ };
- /* Note the highest spare capacity CPU in the system */
- if (spare_capacity > 0 &&
- (spare_capacity > highest_spare_capacity ||
- (spare_capacity == highest_spare_capacity &&
- cpu_capacity(i) > cpu_capacity(best_capacity_cpu)))) {
- highest_spare_capacity = spare_capacity;
- best_capacity_cpu = i;
- }
+ bitmap_copy(env.candidate_list, all_cluster_ids, NR_CPUS);
+ bitmap_zero(env.backup_list, NR_CPUS);
- if (boost)
- continue;
+ init_cluster_cpu_stats(&stats);
- tload = scale_load_to_cpu(task_load(p), i);
- if (!eligible_cpu(tload, cpu_load, i, sync) ||
- !task_load_will_fit(p, tload, i))
- continue;
+ if (bias_to_prev_cpu(&env, &stats)) {
+ fast_path = true;
+ goto out;
+ }
- /*
- * The task will fit on this CPU and the CPU can accommodate it
- * under spill.
- */
+ rcu_read_lock();
+ cluster = select_least_power_cluster(&env);
- cpu_cost = power_cost(i, task_load(p) +
- cpu_cravg_sync(i, sync));
+ if (!cluster) {
+ rcu_read_unlock();
+ goto out;
+ }
- if (cpu_cost > min_cost)
- continue;
+ do {
+ find_best_cpu_in_cluster(cluster, &env, &stats);
- /*
- * If the task fits in a CPU in a lower power band, that
- * overrides all other considerations.
- */
- if (power_delta_exceeded(cpu_cost, min_cost)) {
- min_cost = cpu_cost;
- min_load = ULLONG_MAX;
- best_cpu = -1;
- }
+ } while ((cluster = next_best_cluster(cluster, &env)));
- if (i != prev_cpu && cpus_share_cache(prev_cpu, i)) {
- if (best_sibling_cpu_cost > cpu_cost ||
- (best_sibling_cpu_cost == cpu_cost &&
- best_sibling_cpu_load > cpu_load)) {
- best_sibling_cpu_cost = cpu_cost;
- best_sibling_cpu_load = cpu_load;
- best_sibling_cpu = i;
- }
- }
+ rcu_read_unlock();
- if ((cpu_cost < min_cost) ||
- ((best_cpu != prev_cpu && min_load > cpu_load) ||
- i == prev_cpu)) {
- if (need_idle) {
- if (idle_cpu(i)) {
- min_cost = cpu_cost;
- best_idle_cpu = i;
- }
- } else {
- min_cost = cpu_cost;
- min_load = cpu_load;
- best_cpu = i;
- }
- }
- }
+ if (stats.best_idle_cpu >= 0) {
+ target = stats.best_idle_cpu;
+ } else if (stats.best_cpu >= 0) {
+ if (stats.best_cpu != task_cpu(p) &&
+ stats.min_cost == stats.best_sibling_cpu_cost)
+ stats.best_cpu = stats.best_sibling_cpu;
- if (best_idle_cpu >= 0) {
- best_cpu = best_idle_cpu;
- } else if (best_cpu < 0 || boost) {
- if (unlikely(best_capacity_cpu < 0))
- best_cpu = prev_cpu;
- else
- best_cpu = best_capacity_cpu;
+ target = stats.best_cpu;
} else {
- if (best_cpu != prev_cpu && min_cost == best_sibling_cpu_cost)
- best_cpu = best_sibling_cpu;
+ find_backup_cluster(&env, &stats);
+ if (stats.best_capacity_cpu >= 0)
+ target = stats.best_capacity_cpu;
}
-done:
- trace_sched_task_load(p, boost, reason, sync, need_idle, fast_path,
- best_cpu);
-
- return best_cpu;
+out:
+ trace_sched_task_load(p, sched_boost(), env.reason, env.sync,
+ env.need_idle, fast_path, target);
+ return target;
}
static void
@@ -3920,7 +4046,7 @@ unsigned int power_cost(int cpu, u64 demand)
}
static inline int
-spill_threshold_crossed(u64 task_load, u64 cpu_load, struct rq *rq)
+spill_threshold_crossed(struct cpu_select_env *env, struct rq *rq)
{
return 0;
}
diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c
index 1fd22539a334..e698cc3438c7 100644
--- a/kernel/sched/rt.c
+++ b/kernel/sched/rt.c
@@ -1661,13 +1661,15 @@ static struct task_struct *pick_highest_pushable_task(struct rq *rq, int cpu)
static DEFINE_PER_CPU(cpumask_var_t, local_cpu_mask);
#ifdef CONFIG_SCHED_HMP
+
static int find_lowest_rq_hmp(struct task_struct *task)
{
struct cpumask *lowest_mask = *this_cpu_ptr(&local_cpu_mask);
- int cpu_cost, min_cost = INT_MAX;
- u64 cpu_load, min_load = ULLONG_MAX;
+ struct cpumask candidate_mask = CPU_MASK_NONE;
+ struct sched_cluster *cluster;
int best_cpu = -1;
int prev_cpu = task_cpu(task);
+ u64 cpu_load, min_load = ULLONG_MAX;
int i;
/* Make sure the mask is initialized first */
@@ -1686,36 +1688,26 @@ static int find_lowest_rq_hmp(struct task_struct *task)
* the best one based on our affinity and topology.
*/
- /* Skip performance considerations and optimize for power.
- * Worst case we'll be iterating over all CPUs here. CPU
- * online mask should be taken care of when constructing
- * the lowest_mask.
- */
- for_each_cpu(i, lowest_mask) {
- cpu_load = scale_load_to_cpu(
- cpu_rq(i)->hmp_stats.cumulative_runnable_avg, i);
- cpu_cost = power_cost(i, cpu_cravg_sync(i, 0));
- trace_sched_cpu_load_wakeup(cpu_rq(i), idle_cpu(i),
- sched_irqload(i), cpu_cost, cpu_temp(i));
-
- if (power_delta_exceeded(cpu_cost, min_cost)) {
- if (cpu_cost > min_cost)
- continue;
+ for_each_sched_cluster(cluster) {
+ cpumask_and(&candidate_mask, &cluster->cpus, lowest_mask);
- min_cost = cpu_cost;
- min_load = ULLONG_MAX;
- best_cpu = -1;
- }
-
- if (sched_cpu_high_irqload(i))
+ if (cpumask_empty(&candidate_mask))
continue;
- if (cpu_load < min_load ||
- (cpu_load == min_load &&
- (i == prev_cpu || (best_cpu != prev_cpu &&
- cpus_share_cache(prev_cpu, i))))) {
- min_load = cpu_load;
- best_cpu = i;
+ for_each_cpu(i, &candidate_mask) {
+ if (sched_cpu_high_irqload(i))
+ continue;
+
+ cpu_load = scale_load_to_cpu(
+ cpu_rq(i)->hmp_stats.cumulative_runnable_avg, i);
+
+ if (cpu_load < min_load ||
+ (cpu_load == min_load &&
+ (i == prev_cpu || (best_cpu != prev_cpu &&
+ cpus_share_cache(prev_cpu, i))))) {
+ min_load = cpu_load;
+ best_cpu = i;
+ }
}
}
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 35a13974f34a..9e4f0887136c 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -395,6 +395,13 @@ static inline int cluster_first_cpu(struct sched_cluster *cluster)
return cpumask_first(&cluster->cpus);
}
+extern struct list_head cluster_head;
+extern int num_clusters;
+extern struct sched_cluster *sched_cluster[NR_CPUS];
+
+#define for_each_sched_cluster(cluster) \
+ list_for_each_entry_rcu(cluster, &cluster_head, list)
+
#endif
/* CFS-related fields in a runqueue */
@@ -1031,6 +1038,11 @@ unsigned int cpu_temp(int cpu);
extern unsigned int nr_eligible_big_tasks(int cpu);
extern void update_up_down_migrate(void);
+static inline struct sched_cluster *cpu_cluster(int cpu)
+{
+ return cpu_rq(cpu)->cluster;
+}
+
static inline int cpu_capacity(int cpu)
{
return cpu_rq(cpu)->cluster->capacity;
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index dcb852652bc8..1f2afa6eefaf 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -411,13 +411,6 @@ static struct ctl_table kern_table[] = {
.proc_handler = sched_hmp_proc_update_handler,
},
{
- .procname = "sched_power_band_limit",
- .data = &sysctl_sched_powerband_limit_pct,
- .maxlen = sizeof(unsigned int),
- .mode = 0644,
- .proc_handler = sched_hmp_proc_update_handler,
- },
- {
.procname = "sched_lowspill_freq",
.data = &sysctl_sched_lowspill_freq,
.maxlen = sizeof(unsigned int),