summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Documentation/scheduler/sched-hmp.txt18
-rw-r--r--kernel/sched/core.c22
-rw-r--r--kernel/sched/fair.c43
-rw-r--r--kernel/sched/sched.h2
4 files changed, 80 insertions, 5 deletions
diff --git a/Documentation/scheduler/sched-hmp.txt b/Documentation/scheduler/sched-hmp.txt
index 97072a8bee02..5af8b6df1cb4 100644
--- a/Documentation/scheduler/sched-hmp.txt
+++ b/Documentation/scheduler/sched-hmp.txt
@@ -565,6 +565,24 @@ both tasks and CPUs to aid in the placement of tasks.
whose upmigrate_discourage flag is set) will never be classified as big tasks
despite their high demand.
+ As the load scale factor is calculated against current fmax, it gets boosted
+ when a lower capacity CPU is restricted to run at lower fmax. The task
+ demand is inflated in this scenario and the task upmigrates early to the
+ maximum capacity CPU. Hence this threshold is auto-adjusted by a factor
+ equal to max_possible_frequency/current_frequency of a lower capacity CPU.
+ This adjustment happens only when the lower capacity CPU frequency is
+ restricted. The same adjustment is applied to the downmigrate threshold
+ as well.
+
+ When the frequency restriction is relaxed, the previous values are restored.
+ sched_up_down_migrate_auto_update macro defined in kernel/sched/core.c
+ controls this auto-adjustment behavior and it is enabled by default.
+
+ If the adjusted upmigrate threshold exceeds the window size, it is clipped to
+ the window size. If the adjusted downmigrate threshold decreases the difference
+ between the upmigrate and downmigrate, it is clipped to a value such that the
+ difference between the modified and the original thresholds is same.
+
- mostly_idle
The "mostly_idle" classification applies to CPUs. This
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index b670bc883f91..e877e761fac5 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -2410,6 +2410,27 @@ static int compute_load_scale_factor(int cpu)
return load_scale;
}
+#define sched_up_down_migrate_auto_update 1
+static void check_for_up_down_migrate_update(const struct cpumask *cpus)
+{
+ int i = cpumask_first(cpus);
+ struct rq *rq = cpu_rq(i);
+
+ if (!sched_up_down_migrate_auto_update)
+ return;
+
+ if (rq->max_possible_capacity == max_possible_capacity)
+ return;
+
+ if (rq->max_possible_freq == rq->max_freq)
+ up_down_migrate_scale_factor = 1024;
+ else
+ up_down_migrate_scale_factor = (1024 * rq->max_possible_freq)/
+ rq->max_freq;
+
+ update_up_down_migrate();
+}
+
static int cpufreq_notifier_policy(struct notifier_block *nb,
unsigned long val, void *data)
{
@@ -2512,6 +2533,7 @@ static int cpufreq_notifier_policy(struct notifier_block *nb,
}
__update_min_max_capacity();
+ check_for_up_down_migrate_update(policy->related_cpus);
post_big_small_task_count_change(cpu_possible_mask);
return 0;
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 06639a3def40..85831d2f47ed 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -2777,6 +2777,14 @@ static int __read_mostly sched_upmigrate_min_nice = 15;
int __read_mostly sysctl_sched_upmigrate_min_nice = 15;
/*
+ * The load scale factor of a CPU gets boosted when its max frequency
+ * is restricted due to which the tasks are migrating to higher capacity
+ * CPUs early. The sched_upmigrate threshold is auto-upgraded by
+ * rq->max_possible_freq/rq->max_freq of a lower capacity CPU.
+ */
+unsigned int up_down_migrate_scale_factor = 1024;
+
+/*
* Scheduler boost is a mechanism to temporarily place tasks on CPUs
* with higher capacity than those where a task would have normally
* ended up with their load characteristics. Any entity enabling
@@ -2791,6 +2799,35 @@ static inline int available_cpu_capacity(int cpu)
return rq->capacity;
}
+void update_up_down_migrate(void)
+{
+ unsigned int up_migrate = pct_to_real(sysctl_sched_upmigrate_pct);
+ unsigned int down_migrate = pct_to_real(sysctl_sched_downmigrate_pct);
+ unsigned int delta;
+
+ if (up_down_migrate_scale_factor == 1024)
+ goto done;
+
+ delta = up_migrate - down_migrate;
+
+ up_migrate /= NSEC_PER_USEC;
+ up_migrate *= up_down_migrate_scale_factor;
+ up_migrate >>= 10;
+ up_migrate *= NSEC_PER_USEC;
+
+ up_migrate = min(up_migrate, sched_ravg_window);
+
+ down_migrate /= NSEC_PER_USEC;
+ down_migrate *= up_down_migrate_scale_factor;
+ down_migrate >>= 10;
+ down_migrate *= NSEC_PER_USEC;
+
+ down_migrate = min(down_migrate, up_migrate - delta);
+done:
+ sched_upmigrate = up_migrate;
+ sched_downmigrate = down_migrate;
+}
+
void set_hmp_defaults(void)
{
sched_spill_load =
@@ -2799,11 +2836,7 @@ void set_hmp_defaults(void)
sched_small_task =
pct_to_real(sysctl_sched_small_task_pct);
- sched_upmigrate =
- pct_to_real(sysctl_sched_upmigrate_pct);
-
- sched_downmigrate =
- pct_to_real(sysctl_sched_downmigrate_pct);
+ update_up_down_migrate();
#ifdef CONFIG_SCHED_FREQ_INPUT
sched_heavy_task =
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index e485b120ff00..3865b31996b9 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -1001,6 +1001,7 @@ extern unsigned int sched_init_task_load_pelt;
extern unsigned int sched_init_task_load_windows;
extern u64 scale_load_to_cpu(u64 load, int cpu);
extern unsigned int sched_heavy_task;
+extern unsigned int up_down_migrate_scale_factor;
extern void reset_cpu_hmp_stats(int cpu, int reset_cra);
extern void fixup_nr_big_small_task(int cpu, int reset_stats);
extern unsigned int task_load(struct task_struct *p);
@@ -1009,6 +1010,7 @@ extern void sched_account_irqtime(int cpu, struct task_struct *curr,
u64 delta, u64 wallclock);
unsigned int cpu_temp(int cpu);
extern unsigned int nr_eligible_big_tasks(int cpu);
+extern void update_up_down_migrate(void);
static inline int capacity(struct rq *rq)
{