4 files changed, 80 insertions, 5 deletions
diff --git a/Documentation/scheduler/sched-hmp.txt b/Documentation/scheduler/sched-hmp.txt
index 97072a8bee02..5af8b6df1cb4 100644
--- a/Documentation/scheduler/sched-hmp.txt
+++ b/Documentation/scheduler/sched-hmp.txt
@@ -565,6 +565,24 @@ both tasks and CPUs to aid in the placement of tasks.
   whose upmigrate_discourage flag is set) will never be classified as big tasks
   despite their high demand.
 
+  As the load scale factor is calculated against current fmax, it gets boosted
+  when a lower capacity CPU is restricted to run at lower fmax. The task
+  demand is inflated in this scenario and the task upmigrates early to the
+  maximum capacity CPU. Hence this threshold is auto-adjusted by a factor
+  equal to max_possible_frequency/current_frequency of a lower capacity CPU.
+  This adjustment happens only when the lower capacity CPU frequency is
+  restricted. The same adjustment is applied to the downmigrate threshold
+  as well.
+
+  When the frequency restriction is relaxed, the previous values are restored.
+  sched_up_down_migrate_auto_update macro defined in kernel/sched/core.c
+  controls this auto-adjustment behavior and it is enabled by default.
+
+  If the adjusted upmigrate threshold exceeds the window size, it is clipped to
+  the window size. If the adjusted downmigrate threshold decreases the difference
+  between the upmigrate and downmigrate, it is clipped to a value such that the
+  difference between the modified and the original thresholds is same.
+
 - mostly_idle
 
   The "mostly_idle" classification applies to CPUs. This
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index b670bc883f91..e877e761fac5 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -2410,6 +2410,27 @@ static int compute_load_scale_factor(int cpu)
 	return load_scale;
 }
 
+#define sched_up_down_migrate_auto_update 1
+static void check_for_up_down_migrate_update(const struct cpumask *cpus)
+{
+	int i = cpumask_first(cpus);
+	struct rq *rq = cpu_rq(i);
+
+	if (!sched_up_down_migrate_auto_update)
+		return;
+
+	if (rq->max_possible_capacity == max_possible_capacity)
+		return;
+
+	if (rq->max_possible_freq == rq->max_freq)
+		up_down_migrate_scale_factor = 1024;
+	else
+		up_down_migrate_scale_factor = (1024 * rq->max_possible_freq)/
+					rq->max_freq;
+
+	update_up_down_migrate();
+}
+
 static int cpufreq_notifier_policy(struct notifier_block *nb,
 		unsigned long val, void *data)
 {
@@ -2512,6 +2533,7 @@ static int cpufreq_notifier_policy(struct notifier_block *nb,
 	}
 
 	__update_min_max_capacity();
+	check_for_up_down_migrate_update(policy->related_cpus);
 	post_big_small_task_count_change(cpu_possible_mask);
 
 	return 0;
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 06639a3def40..85831d2f47ed 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -2777,6 +2777,14 @@ static int __read_mostly sched_upmigrate_min_nice = 15;
 int __read_mostly sysctl_sched_upmigrate_min_nice = 15;
 
 /*
+ * The load scale factor of a CPU gets boosted when its max frequency
+ * is restricted due to which the tasks are migrating to higher capacity
+ * CPUs early. The sched_upmigrate threshold is auto-upgraded by
+ * rq->max_possible_freq/rq->max_freq of a lower capacity CPU.
+ */
+unsigned int up_down_migrate_scale_factor = 1024;
+
+/*
  * Scheduler boost is a mechanism to temporarily place tasks on CPUs
  * with higher capacity than those where a task would have normally
  * ended up with their load characteristics. Any entity enabling
@@ -2791,6 +2799,35 @@ static inline int available_cpu_capacity(int cpu)
 	return rq->capacity;
 }
 
+void update_up_down_migrate(void)
+{
+	unsigned int up_migrate = pct_to_real(sysctl_sched_upmigrate_pct);
+	unsigned int down_migrate = pct_to_real(sysctl_sched_downmigrate_pct);
+	unsigned int delta;
+
+	if (up_down_migrate_scale_factor == 1024)
+		goto done;
+
+	delta = up_migrate - down_migrate;
+
+	up_migrate /= NSEC_PER_USEC;
+	up_migrate *= up_down_migrate_scale_factor;
+	up_migrate >>= 10;
+	up_migrate *= NSEC_PER_USEC;
+
+	up_migrate = min(up_migrate, sched_ravg_window);
+
+	down_migrate /= NSEC_PER_USEC;
+	down_migrate *= up_down_migrate_scale_factor;
+	down_migrate >>= 10;
+	down_migrate *= NSEC_PER_USEC;
+
+	down_migrate = min(down_migrate, up_migrate - delta);
+done:
+	sched_upmigrate = up_migrate;
+	sched_downmigrate = down_migrate;
+}
+
 void set_hmp_defaults(void)
 {
 	sched_spill_load =
@@ -2799,11 +2836,7 @@ void set_hmp_defaults(void)
 	sched_small_task =
 		pct_to_real(sysctl_sched_small_task_pct);
 
-	sched_upmigrate =
-		pct_to_real(sysctl_sched_upmigrate_pct);
-
-	sched_downmigrate =
-		pct_to_real(sysctl_sched_downmigrate_pct);
+	update_up_down_migrate();
 
 #ifdef CONFIG_SCHED_FREQ_INPUT
 	sched_heavy_task =
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index e485b120ff00..3865b31996b9 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -1001,6 +1001,7 @@ extern unsigned int sched_init_task_load_pelt;
 extern unsigned int sched_init_task_load_windows;
 extern u64 scale_load_to_cpu(u64 load, int cpu);
 extern unsigned int sched_heavy_task;
+extern unsigned int up_down_migrate_scale_factor;
 extern void reset_cpu_hmp_stats(int cpu, int reset_cra);
 extern void fixup_nr_big_small_task(int cpu, int reset_stats);
 extern unsigned int task_load(struct task_struct *p);
@@ -1009,6 +1010,7 @@ extern void sched_account_irqtime(int cpu, struct task_struct *curr,
 				 u64 delta, u64 wallclock);
 unsigned int cpu_temp(int cpu);
 extern unsigned int nr_eligible_big_tasks(int cpu);
+extern void update_up_down_migrate(void);
 
 static inline int capacity(struct rq *rq)
 {