From 1ed33cf95476666656d510fab15d87ee04e2327d Mon Sep 17 00:00:00 2001
From: Joel Fernandes <joelaf@google.com>
Date: Mon, 28 Aug 2017 09:56:27 -0700
Subject: UPSTREAM: cpufreq: schedutil: Make iowait boost more energy efficient

Currently the iowait_boost feature in schedutil makes the frequency go
to max on iowait wakeups.  This feature was added to handle a case that
Peter described where the throughput of operations involving continuous
I/O requests [1] is reduced due to running at a lower frequency, however
the lower throughput itself causes utilization to be low and hence
causing frequency to be low hence its "stuck".

Instead of going to max, its also possible to achieve the same effect by
ramping up to max if there are repeated in_iowait wakeups happening.
This patch is an attempt to do that. We start from a lower frequency
(policy->min) and double the boost for every consecutive iowait update
until we reach the maximum iowait boost frequency (iowait_boost_max).

I ran a synthetic test (continuous O_DIRECT writes in a loop) on an x86
machine with intel_pstate in passive mode using schedutil. In this test
the iowait_boost value ramped from 800MHz to 4GHz in 60ms. The patch
achieves the desired improved throughput as the existing behavior.

[1] https://patchwork.kernel.org/patch/9735885/

Change-Id: I4a018434a50f4ca29ec15b03465f6dc212e54423
Cc: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
Cc: Len Brown <lenb@kernel.org>
Cc: Rafael J. Wysocki <rjw@rjwysocki.net>
Cc: Viresh Kumar <viresh.kumar@linaro.org>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Suggested-by: Peter Zijlstra <peterz@infradead.org>
Suggested-by: Viresh Kumar <viresh.kumar@linaro.org>
Signed-off-by: Joel Fernandes <joelaf@google.com>
---
 kernel/sched/cpufreq_schedutil.c | 38 ++++++++++++++++++++++++++++++++------
 1 file changed, 32 insertions(+), 6 deletions(-)

(limited to 'kernel')

diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c
index e12309c1b07b..f6b95ca15023 100644
--- a/kernel/sched/cpufreq_schedutil.c
+++ b/kernel/sched/cpufreq_schedutil.c
@@ -64,6 +64,7 @@ struct sugov_cpu {
 	struct update_util_data update_util;
 	struct sugov_policy *sg_policy;
 
+	bool iowait_boost_pending;
 	unsigned long iowait_boost;
 	unsigned long iowait_boost_max;
 	u64 last_update;
@@ -224,30 +225,54 @@ static void sugov_set_iowait_boost(struct sugov_cpu *sg_cpu, u64 time,
 				   unsigned int flags)
 {
 	if (flags & SCHED_CPUFREQ_IOWAIT) {
-		sg_cpu->iowait_boost = sg_cpu->iowait_boost_max;
+		if (sg_cpu->iowait_boost_pending)
+			return;
+
+		sg_cpu->iowait_boost_pending = true;
+
+		if (sg_cpu->iowait_boost) {
+			sg_cpu->iowait_boost <<= 1;
+			if (sg_cpu->iowait_boost > sg_cpu->iowait_boost_max)
+				sg_cpu->iowait_boost = sg_cpu->iowait_boost_max;
+		} else {
+			sg_cpu->iowait_boost = sg_cpu->sg_policy->policy->min;
+		}
 	} else if (sg_cpu->iowait_boost) {
 		s64 delta_ns = time - sg_cpu->last_update;
 
 		/* Clear iowait_boost if the CPU apprears to have been idle. */
-		if (delta_ns > TICK_NSEC)
+		if (delta_ns > TICK_NSEC) {
 			sg_cpu->iowait_boost = 0;
+			sg_cpu->iowait_boost_pending = false;
+		}
 	}
 }
 
 static void sugov_iowait_boost(struct sugov_cpu *sg_cpu, unsigned long *util,
 			       unsigned long *max)
 {
-	unsigned long boost_util = sg_cpu->iowait_boost;
-	unsigned long boost_max = sg_cpu->iowait_boost_max;
+	unsigned long boost_util, boost_max;
 
-	if (!boost_util)
+	if (!sg_cpu->iowait_boost)
 		return;
 
+	if (sg_cpu->iowait_boost_pending) {
+		sg_cpu->iowait_boost_pending = false;
+	} else {
+		sg_cpu->iowait_boost >>= 1;
+		if (sg_cpu->iowait_boost < sg_cpu->sg_policy->policy->min) {
+			sg_cpu->iowait_boost = 0;
+			return;
+		}
+	}
+
+	boost_util = sg_cpu->iowait_boost;
+	boost_max = sg_cpu->iowait_boost_max;
+
 	if (*util * boost_max < *max * boost_util) {
 		*util = boost_util;
 		*max = boost_max;
 	}
-	sg_cpu->iowait_boost >>= 1;
 }
 
 #ifdef CONFIG_NO_HZ_COMMON
@@ -320,6 +345,7 @@ static unsigned int sugov_next_freq_shared(struct sugov_cpu *sg_cpu)
 		delta_ns = last_freq_update_time - j_sg_cpu->last_update;
 		if (delta_ns > TICK_NSEC) {
 			j_sg_cpu->iowait_boost = 0;
+			j_sg_cpu->iowait_boost_pending = false;
 			continue;
 		}
 		if (j_sg_cpu->flags & SCHED_CPUFREQ_DL)
-- 
cgit v1.2.3


From 7842de4545c71363ef599173b66ab5e1933c43e1 Mon Sep 17 00:00:00 2001
From: Joel Fernandes <joelaf@google.com>
Date: Sun, 23 Jul 2017 08:54:26 -0700
Subject: UPSTREAM: cpufreq: schedutil: Use unsigned int for iowait boost

Make iowait_boost and iowait_boost_max as unsigned int since its unit is kHz
and this is consistent with struct cpufreq_policy. Also change the local
variables in sugov_iowait_boost to match this.

Change-Id: I6c67ed94c57c4bdb24bada4b97045593fcb95d2e
Cc: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
Cc: Len Brown <lenb@kernel.org>
Cc: Rafael J. Wysocki <rjw@rjwysocki.net>
Cc: Viresh Kumar <viresh.kumar@linaro.org>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Joel Fernandes <joelaf@google.com>
---
 kernel/sched/cpufreq_schedutil.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'kernel')

diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c
index f6b95ca15023..a0bcf488fb81 100644
--- a/kernel/sched/cpufreq_schedutil.c
+++ b/kernel/sched/cpufreq_schedutil.c
@@ -65,8 +65,8 @@ struct sugov_cpu {
 	struct sugov_policy *sg_policy;
 
 	bool iowait_boost_pending;
-	unsigned long iowait_boost;
-	unsigned long iowait_boost_max;
+	unsigned int iowait_boost;
+	unsigned int iowait_boost_max;
 	u64 last_update;
 
 	/* The fields below are only needed when sharing a policy. */
@@ -251,7 +251,7 @@ static void sugov_set_iowait_boost(struct sugov_cpu *sg_cpu, u64 time,
 static void sugov_iowait_boost(struct sugov_cpu *sg_cpu, unsigned long *util,
 			       unsigned long *max)
 {
-	unsigned long boost_util, boost_max;
+	unsigned int boost_util, boost_max;
 
 	if (!sg_cpu->iowait_boost)
 		return;
-- 
cgit v1.2.3


From 26b37261ea25c6928c6a69e77a8f7d39ee3267c9 Mon Sep 17 00:00:00 2001
From: Joonwoo Park <joonwoop@codeaurora.org>
Date: Fri, 26 May 2017 11:11:47 -0700
Subject: sched: deadline: WALT: account cumulative runnable avg

Account cumulative runnable average for WALT CPU utilization accounting.

Change-Id: I56934894e626dec183740eeaf89a57d2ef638143
Signed-off-by: Joonwoo Park <joonwoop@codeaurora.org>
---
 kernel/sched/deadline.c | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'kernel')

diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c
index 9ec7f19b5020..2aae8b8b68e8 100644
--- a/kernel/sched/deadline.c
+++ b/kernel/sched/deadline.c
@@ -18,6 +18,8 @@
 
 #include <linux/slab.h>
 
+#include "walt.h"
+
 struct dl_bandwidth def_dl_bandwidth;
 
 static inline struct task_struct *dl_task_of(struct sched_dl_entity *dl_se)
@@ -882,6 +884,7 @@ void inc_dl_tasks(struct sched_dl_entity *dl_se, struct dl_rq *dl_rq)
 	WARN_ON(!dl_prio(prio));
 	dl_rq->dl_nr_running++;
 	add_nr_running(rq_of_dl_rq(dl_rq), 1);
+	walt_inc_cumulative_runnable_avg(rq_of_dl_rq(dl_rq), dl_task_of(dl_se));
 
 	inc_dl_deadline(dl_rq, deadline);
 	inc_dl_migration(dl_se, dl_rq);
@@ -896,6 +899,7 @@ void dec_dl_tasks(struct sched_dl_entity *dl_se, struct dl_rq *dl_rq)
 	WARN_ON(!dl_rq->dl_nr_running);
 	dl_rq->dl_nr_running--;
 	sub_nr_running(rq_of_dl_rq(dl_rq), 1);
+	walt_dec_cumulative_runnable_avg(rq_of_dl_rq(dl_rq), dl_task_of(dl_se));
 
 	dec_dl_deadline(dl_rq, dl_se->deadline);
 	dec_dl_migration(dl_se, dl_rq);
-- 
cgit v1.2.3


From 48f67ea85de468a9b3e47e723e7681cf7771dea6 Mon Sep 17 00:00:00 2001
From: Joonwoo Park <joonwoop@codeaurora.org>
Date: Fri, 26 May 2017 11:19:36 -0700
Subject: sched: WALT: fix broken cumulative runnable average accounting

When running tasks's ravg.demand is changed update_history() adjusts
rq->cumulative_runnable_avg to reflect change of CPU load.  Currently
this fixup is broken by accumulating task's new demand without
subtracting the task's old demand.

Fix the fixup logic to subtract the task's old demand.

Change-Id: I61beb32a4850879ccb39b733f5564251e465bfeb
Signed-off-by: Joonwoo Park <joonwoop@codeaurora.org>
---
 kernel/sched/walt.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'kernel')

diff --git a/kernel/sched/walt.c b/kernel/sched/walt.c
index 92c3aae8e056..166641ed1f39 100644
--- a/kernel/sched/walt.c
+++ b/kernel/sched/walt.c
@@ -111,8 +111,10 @@ walt_dec_cumulative_runnable_avg(struct rq *rq,
 
 static void
 fixup_cumulative_runnable_avg(struct rq *rq,
-			      struct task_struct *p, s64 task_load_delta)
+			      struct task_struct *p, u64 new_task_load)
 {
+	s64 task_load_delta = (s64)new_task_load - task_load(p);
+
 	rq->cumulative_runnable_avg += task_load_delta;
 	if ((s64)rq->cumulative_runnable_avg < 0)
 		panic("cra less than zero: tld: %lld, task_load(p) = %u\n",
-- 
cgit v1.2.3


From ee4cebd75ed7b77132c39c0093923f9ff1bcafaa Mon Sep 17 00:00:00 2001
From: Joonwoo Park <joonwoop@codeaurora.org>
Date: Thu, 8 Dec 2016 16:12:12 -0800
Subject: sched: EAS/WALT: use cr_avg instead of prev_runnable_sum

WALT accounts two major statistics; CPU load and cumulative tasks
demand.

The CPU load which is account of accumulated each CPU's absolute
execution time is for CPU frequency guidance.  Whereas cumulative
tasks demand which is each CPU's instantaneous load to reflect
CPU's load at given time is for task placement decision.

Use cumulative tasks demand for cpu_util() for task placement and
introduce cpu_util_freq() for frequency guidance.

Change-Id: Id928f01dbc8cb2a617cdadc584c1f658022565c5
Signed-off-by: Joonwoo Park <joonwoop@codeaurora.org>
---
 kernel/sched/core.c  |  2 +-
 kernel/sched/fair.c  |  4 ++--
 kernel/sched/sched.h | 16 +++++++++++++++-
 3 files changed, 18 insertions(+), 4 deletions(-)

(limited to 'kernel')

diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 9307827cc7b1..4f97de8e0b18 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -2992,7 +2992,7 @@ static void sched_freq_tick_pelt(int cpu)
 #ifdef CONFIG_SCHED_WALT
 static void sched_freq_tick_walt(int cpu)
 {
-	unsigned long cpu_utilization = cpu_util(cpu);
+	unsigned long cpu_utilization = cpu_util_freq(cpu);
 	unsigned long capacity_curr = capacity_curr_of(cpu);
 
 	if (walt_disabled || !sysctl_sched_use_walt_cpu_util)
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index c193e9b1c38f..3641dad3d4cc 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -4659,7 +4659,7 @@ static inline void hrtick_update(struct rq *rq)
 static bool cpu_overutilized(int cpu);
 unsigned long boosted_cpu_util(int cpu);
 #else
-#define boosted_cpu_util(cpu) cpu_util(cpu)
+#define boosted_cpu_util(cpu) cpu_util_freq(cpu)
 #endif
 
 #ifdef CONFIG_SMP
@@ -5937,7 +5937,7 @@ schedtune_task_margin(struct task_struct *task)
 unsigned long
 boosted_cpu_util(int cpu)
 {
-	unsigned long util = cpu_util(cpu);
+	unsigned long util = cpu_util_freq(cpu);
 	long margin = schedtune_cpu_margin(util, cpu);
 
 	trace_sched_boost_cpu(cpu, util, margin);
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 029cf2bbeda2..73077f535e95 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -1592,7 +1592,7 @@ static inline unsigned long __cpu_util(int cpu, int delta)
 
 #ifdef CONFIG_SCHED_WALT
 	if (!walt_disabled && sysctl_sched_use_walt_cpu_util) {
-		util = cpu_rq(cpu)->prev_runnable_sum << SCHED_LOAD_SHIFT;
+		util = cpu_rq(cpu)->cumulative_runnable_avg << SCHED_LOAD_SHIFT;
 		do_div(util, walt_ravg_window);
 	}
 #endif
@@ -1608,6 +1608,20 @@ static inline unsigned long cpu_util(int cpu)
 	return __cpu_util(cpu, 0);
 }
 
+static inline unsigned long cpu_util_freq(int cpu)
+{
+	unsigned long util = cpu_rq(cpu)->cfs.avg.util_avg;
+	unsigned long capacity = capacity_orig_of(cpu);
+
+#ifdef CONFIG_SCHED_WALT
+	if (!walt_disabled && sysctl_sched_use_walt_cpu_util) {
+		util = cpu_rq(cpu)->prev_runnable_sum << SCHED_LOAD_SHIFT;
+		do_div(util, walt_ravg_window);
+	}
+#endif
+	return (util >= capacity) ? capacity : util;
+}
+
 #endif
 
 #ifdef CONFIG_CPU_FREQ_GOV_SCHED
-- 
cgit v1.2.3


From 2d7da09705d6560c61676376be3ae05a5019600a Mon Sep 17 00:00:00 2001
From: Joonwoo Park <joonwoop@codeaurora.org>
Date: Mon, 16 Jan 2017 18:09:20 -0800
Subject: sched: EAS: schedfreq: fix CPU util over estimation

WALT CPU utilization reports CPU load of all the scheduler classes.
Therefore adding RT class's load additionally will cause frequency
overshooting.  Fix such issue by not accounting RT class load when
requesting capacity.

Change-Id: I29600d7af7ca8c00e0d2ff1e13872024ccaa72bf
Signed-off-by: Joonwoo Park <joonwoop@codeaurora.org>
---
 kernel/sched/cpufreq_sched.c | 14 +++++++++++++-
 1 file changed, 13 insertions(+), 1 deletion(-)

(limited to 'kernel')

diff --git a/kernel/sched/cpufreq_sched.c b/kernel/sched/cpufreq_sched.c
index f10d9f7d6d07..6ffb23adbcef 100644
--- a/kernel/sched/cpufreq_sched.c
+++ b/kernel/sched/cpufreq_sched.c
@@ -235,6 +235,18 @@ out:
 	cpufreq_cpu_put(policy);
 }
 
+#ifdef CONFIG_SCHED_WALT
+static inline unsigned long
+requested_capacity(struct sched_capacity_reqs *scr)
+{
+	if (!walt_disabled && sysctl_sched_use_walt_cpu_util)
+		return scr->cfs;
+	return scr->cfs + scr->rt;
+}
+#else
+#define requested_capacity(scr) (scr->cfs + scr->rt)
+#endif
+
 void update_cpu_capacity_request(int cpu, bool request)
 {
 	unsigned long new_capacity;
@@ -245,7 +257,7 @@ void update_cpu_capacity_request(int cpu, bool request)
 
 	scr = &per_cpu(cpu_sched_capacity_reqs, cpu);
 
-	new_capacity = scr->cfs + scr->rt;
+	new_capacity = requested_capacity(scr);
 	new_capacity = new_capacity * capacity_margin
 		/ SCHED_CAPACITY_SCALE;
 	new_capacity += scr->dl;
-- 
cgit v1.2.3


From c8b8c92bbc8954ae18cba7eb0c1f3a06a244129c Mon Sep 17 00:00:00 2001
From: Joonwoo Park <joonwoop@codeaurora.org>
Date: Fri, 20 Jan 2017 11:10:15 -0800
Subject: sched: WALT: fix potential overflow

Task demand and CPU util are in u64.

Change-Id: If7ec1623e723026d3346201122aab0303a6d2ba2
Signed-off-by: Joonwoo Park <joonwoop@codeaurora.org>
---
 kernel/sched/sched.h | 14 ++++++--------
 1 file changed, 6 insertions(+), 8 deletions(-)

(limited to 'kernel')

diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 73077f535e95..d4613c5be81d 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -1591,10 +1591,9 @@ static inline unsigned long __cpu_util(int cpu, int delta)
 	unsigned long capacity = capacity_orig_of(cpu);
 
 #ifdef CONFIG_SCHED_WALT
-	if (!walt_disabled && sysctl_sched_use_walt_cpu_util) {
-		util = cpu_rq(cpu)->cumulative_runnable_avg << SCHED_LOAD_SHIFT;
-		do_div(util, walt_ravg_window);
-	}
+	if (!walt_disabled && sysctl_sched_use_walt_cpu_util)
+		util = div64_u64(cpu_rq(cpu)->cumulative_runnable_avg,
+				 walt_ravg_window >> SCHED_LOAD_SHIFT);
 #endif
 	delta += util;
 	if (delta < 0)
@@ -1614,10 +1613,9 @@ static inline unsigned long cpu_util_freq(int cpu)
 	unsigned long capacity = capacity_orig_of(cpu);
 
 #ifdef CONFIG_SCHED_WALT
-	if (!walt_disabled && sysctl_sched_use_walt_cpu_util) {
-		util = cpu_rq(cpu)->prev_runnable_sum << SCHED_LOAD_SHIFT;
-		do_div(util, walt_ravg_window);
-	}
+	if (!walt_disabled && sysctl_sched_use_walt_cpu_util)
+		util = div64_u64(cpu_rq(cpu)->prev_runnable_sum,
+				 walt_ravg_window >> SCHED_LOAD_SHIFT);
 #endif
 	return (util >= capacity) ? capacity : util;
 }
-- 
cgit v1.2.3


From f94958ffa75d4f18d6d6838629d71edee41103c5 Mon Sep 17 00:00:00 2001
From: Joonwoo Park <joonwoop@codeaurora.org>
Date: Wed, 25 Jan 2017 17:07:19 -0800
Subject: cpufreq: sched: WALT: don't apply capacity margin twice

With WALT all the scheduler classes' load are accounted in scr->cfs and
update_cpu_capacity_request() adds capacity margin.  At present, at tick
path, scheduler also adds capacity margin.  Therefore the margin applied
twice.

Fix such error by using margin applied cpu utilization only for checking
whether frequency increase is needed.

Change-Id: Id7d8cc73b2e4eec70b274ca66e09bb0b16bf6f09
Signed-off-by: Joonwoo Park <joonwoop@codeaurora.org>
(trivial rebase conflict)
Signed-off-by: Chris Redpath <chris.redpath@arm.com>
---
 kernel/sched/core.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'kernel')

diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 4f97de8e0b18..0b14b4634b8c 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -2999,13 +2999,13 @@ static void sched_freq_tick_walt(int cpu)
 		return sched_freq_tick_pelt(cpu);
 
 	/*
-	 * Add a margin to the WALT utilization.
+	 * Add a margin to the WALT utilization to check if we will need to
+	 * increase frequency.
 	 * NOTE: WALT tracks a single CPU signal for all the scheduling
 	 * classes, thus this margin is going to be added to the DL class as
 	 * well, which is something we do not do in sched_freq_tick_pelt case.
 	 */
-	cpu_utilization = add_capacity_margin(cpu_utilization);
-	if (cpu_utilization <= capacity_curr)
+	if (add_capacity_margin(cpu_utilization) <= capacity_curr)
 		return;
 
 	/*
-- 
cgit v1.2.3


From 94e5c965075b55a5dfd1c4cce580e2dfb0c7ffc3 Mon Sep 17 00:00:00 2001
From: Joonwoo Park <joonwoop@codeaurora.org>
Date: Wed, 25 Jan 2017 17:45:56 -0800
Subject: sched: EAS/WALT: take into account of waking task's load

WALT's function cpu_util(cpu) reports CPU's load without taking into
account of waking task's load.  Thus currently cpu_overutilized()
underestimates load on the previous CPU of waking task.

Take into account of task's load to determine whether previous CPU is
overutilzed to bail out early without running energy_diff() which is
expensive.

Change-Id: I30f146984a880ad2cc1b8a4ce35bd239a8c9a607
Signed-off-by: Joonwoo Park <joonwoop@codeaurora.org>
(minor rebase conflicts)
Signed-off-by: Chris Redpath <chris.redpath@arm.com>
---
 kernel/sched/fair.c | 16 ++++++++++++++--
 1 file changed, 14 insertions(+), 2 deletions(-)

(limited to 'kernel')

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 3641dad3d4cc..94bb5786d8a7 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -4656,6 +4656,7 @@ static inline void hrtick_update(struct rq *rq)
 #endif
 
 #ifdef CONFIG_SMP
+static bool __cpu_overutilized(int cpu, int delta);
 static bool cpu_overutilized(int cpu);
 unsigned long boosted_cpu_util(int cpu);
 #else
@@ -5856,9 +5857,14 @@ static inline bool task_fits_max(struct task_struct *p, int cpu)
 	return __task_fits(p, cpu, 0);
 }
 
+static bool __cpu_overutilized(int cpu, int delta)
+{
+	return (capacity_of(cpu) * 1024) < ((cpu_util(cpu) + delta) * capacity_margin);
+}
+
 static bool cpu_overutilized(int cpu)
 {
-	return (capacity_of(cpu) * 1024) < (cpu_util(cpu) * capacity_margin);
+	return __cpu_overutilized(cpu, 0);
 }
 
 #ifdef CONFIG_SCHED_TUNE
@@ -6577,6 +6583,7 @@ static int select_energy_cpu_brute(struct task_struct *p, int prev_cpu, int sync
 	}
 
 	if (target_cpu != prev_cpu) {
+		int delta = 0;
 		struct energy_env eenv = {
 			.util_delta     = task_util(p),
 			.src_cpu        = prev_cpu,
@@ -6584,8 +6591,13 @@ static int select_energy_cpu_brute(struct task_struct *p, int prev_cpu, int sync
 			.task           = p,
 		};
 
+
+#ifdef CONFIG_SCHED_WALT
+		if (!walt_disabled && sysctl_sched_use_walt_cpu_util)
+			delta = task_util(p);
+#endif
 		/* Not enough spare capacity on previous cpu */
-		if (cpu_overutilized(prev_cpu)) {
+		if (__cpu_overutilized(prev_cpu, delta)) {
 			schedstat_inc(p, se.statistics.nr_wakeups_secb_insuff_cap);
 			schedstat_inc(this_rq(), eas_stats.secb_insuff_cap);
 			goto unlock;
-- 
cgit v1.2.3


From 11b618a0b2fc8598474a518add7037d4005d83e8 Mon Sep 17 00:00:00 2001
From: Joonwoo Park <joonwoop@codeaurora.org>
Date: Mon, 24 Jul 2017 13:23:05 +0100
Subject: sched: EAS: fix incorrect energy delta calculation due to rounding
 error

In order to calculate energy difference we currently iterates CPUs under
the same sched doamin to accumulate total energy cost and compare before
and after :

  for_each_domain(cpu)
          total_energy_before += (cpu_util * power) >> SCHED_CAPACITY_SHIFT;

  for_each_domain(cpu)
          total_energy_after += (cpu_util * power) >> SCHED_CAPACITY_SHIFT;

Doing such can incorrectly calculate and report abs(delta) > 0 when
there is actually no energy delta between before and after because the
same total accumulated cpu_util of all the CPUs can be distributed
differently before and after and it causes different amount of rounding
error.

Fix such incorrectness by shifting just once with accumulated
total_energy.

Change-Id: I82f1e2e358367058960938b4ef81714f57e921cf
Signed-off-by: Joonwoo Park <joonwoop@codeaurora.org>
(moved part to another commit)
Signed-off-by: Chris Redpath <chris.redpath@arm.com>
---
 kernel/sched/fair.c | 18 +++++++-----------
 1 file changed, 7 insertions(+), 11 deletions(-)

(limited to 'kernel')

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 94bb5786d8a7..61c8952c7aaf 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -5473,10 +5473,8 @@ end:
  */
 static int sched_group_energy(struct energy_env *eenv)
 {
-	struct sched_domain *sd;
-	int cpu, total_energy = 0;
 	struct cpumask visit_cpus;
-	struct sched_group *sg;
+	u64 total_energy = 0;
 
 	WARN_ON(!eenv->sg_top->sge);
 
@@ -5484,8 +5482,8 @@ static int sched_group_energy(struct energy_env *eenv)
 
 	while (!cpumask_empty(&visit_cpus)) {
 		struct sched_group *sg_shared_cap = NULL;
-
-		cpu = cpumask_first(&visit_cpus);
+		int cpu = cpumask_first(&visit_cpus);
+		struct sched_domain *sd;
 
 		/*
 		 * Is the group utilization affected by cpus outside this
@@ -5497,7 +5495,7 @@ static int sched_group_energy(struct energy_env *eenv)
 			sg_shared_cap = sd->parent->groups;
 
 		for_each_domain(cpu, sd) {
-			sg = sd->groups;
+			struct sched_group *sg = sd->groups;
 
 			/* Has this sched_domain already been visited? */
 			if (sd->child && group_first_cpu(sg) != cpu)
@@ -5533,11 +5531,9 @@ static int sched_group_energy(struct energy_env *eenv)
 				idle_idx = group_idle_state(eenv, sg);
 				group_util = group_norm_util(eenv, sg);
 
-				sg_busy_energy = (group_util * sg->sge->cap_states[cap_idx].power)
-								>> SCHED_CAPACITY_SHIFT;
+				sg_busy_energy = (group_util * sg->sge->cap_states[cap_idx].power);
 				sg_idle_energy = ((SCHED_LOAD_SCALE-group_util)
-								* sg->sge->idle_states[idle_idx].power)
-								>> SCHED_CAPACITY_SHIFT;
+								* sg->sge->idle_states[idle_idx].power);
 
 				total_energy += sg_busy_energy + sg_idle_energy;
 
@@ -5562,7 +5558,7 @@ next_cpu:
 		continue;
 	}
 
-	eenv->energy = total_energy;
+	eenv->energy = total_energy >> SCHED_CAPACITY_SHIFT;
 	return 0;
 }
 
-- 
cgit v1.2.3


From 3989a247e2744f437233a36a0183edf3b3dfc8f1 Mon Sep 17 00:00:00 2001
From: Joonwoo Park <joonwoop@codeaurora.org>
Date: Tue, 7 Mar 2017 18:08:24 -0800
Subject: sched: EAS: kill incorrect nohz idle cpu kick

EAS won't allow NOHZ idle balancer until CPU's over utilized.  However
nohz_kick_needed() can return true.  This causes idle CPU wake up for
nothing.

Change-Id: I6e548442e29e4f85cda695e4c7101dd591b12fe6
Signed-off-by: Joonwoo Park <joonwoop@codeaurora.org>
---
 kernel/sched/fair.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'kernel')

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 61c8952c7aaf..0f43ece69e8c 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -9768,12 +9768,12 @@ static inline bool nohz_kick_needed(struct rq *rq)
 		return true;
 
 	/* Do idle load balance if there have misfit task */
-	if (energy_aware() && rq->misfit_task)
-		return true;
+	if (energy_aware())
+		return rq->misfit_task;
 
 	rcu_read_lock();
 	sd = rcu_dereference(per_cpu(sd_busy, cpu));
-	if (sd && !energy_aware()) {
+	if (sd) {
 		sgc = sd->groups->sgc;
 		nr_busy = atomic_read(&sgc->nr_busy_cpus);
 
-- 
cgit v1.2.3


From 0caf1df0c520325c9352149795266709e989b222 Mon Sep 17 00:00:00 2001
From: Joonwoo Park <joonwoop@codeaurora.org>
Date: Tue, 16 May 2017 11:13:00 -0700
Subject: sched: WALT: fix window mis-alignment

The initial window start needs to be close to ktime ns = 0 to be
aligned with scheduler tick.

Change-Id: Ia91f74efce2f910106622a054a6fcd507e763ca5
Signed-off-by: Joonwoo Park <joonwoop@codeaurora.org>
---
 kernel/sched/walt.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'kernel')

diff --git a/kernel/sched/walt.c b/kernel/sched/walt.c
index 166641ed1f39..28e999554463 100644
--- a/kernel/sched/walt.c
+++ b/kernel/sched/walt.c
@@ -804,11 +804,11 @@ void walt_set_window_start(struct rq *rq)
 	int cpu = cpu_of(rq);
 	struct rq *sync_rq = cpu_rq(sync_cpu);
 
-	if (rq->window_start)
+	if (likely(rq->window_start))
 		return;
 
 	if (cpu == sync_cpu) {
-		rq->window_start = walt_ktime_clock();
+		rq->window_start = 1;
 	} else {
 		raw_spin_unlock(&rq->lock);
 		double_rq_lock(rq, sync_rq);
-- 
cgit v1.2.3


From 2f3e97a814c8f906d3334c85271aacb7a8783490 Mon Sep 17 00:00:00 2001
From: Florian Meier <Florian.Meier@informatik.uni-erlangen.de>
Date: Thu, 14 Jul 2016 12:07:26 -0700
Subject: gcov: add support for gcc version >= 6

commit d02038f972538b93011d78c068f44514fbde0a8c upstream.

Link: http://lkml.kernel.org/r/20160701130914.GA23225@styxhp
Signed-off-by: Florian Meier <Florian.Meier@informatik.uni-erlangen.de>
Reviewed-by: Peter Oberparleiter <oberpar@linux.vnet.ibm.com>
Tested-by: Peter Oberparleiter <oberpar@linux.vnet.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 kernel/gcov/gcc_4_7.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'kernel')

diff --git a/kernel/gcov/gcc_4_7.c b/kernel/gcov/gcc_4_7.c
index e25e92fb44fa..6a5c239c7669 100644
--- a/kernel/gcov/gcc_4_7.c
+++ b/kernel/gcov/gcc_4_7.c
@@ -18,7 +18,7 @@
 #include <linux/vmalloc.h>
 #include "gcov.h"
 
-#if __GNUC__ == 5 && __GNUC_MINOR__ >= 1
+#if (__GNUC__ > 5) || (__GNUC__ == 5 && __GNUC_MINOR__ >= 1)
 #define GCOV_COUNTERS			10
 #elif __GNUC__ == 4 && __GNUC_MINOR__ >= 9
 #define GCOV_COUNTERS			9
-- 
cgit v1.2.3


From d255fffdb532caf103369a5894be942e528c3151 Mon Sep 17 00:00:00 2001
From: Martin Liska <mliska@suse.cz>
Date: Fri, 12 May 2017 15:46:35 -0700
Subject: gcov: support GCC 7.1

commit 05384213436ab690c46d9dfec706b80ef8d671ab upstream.

Starting from GCC 7.1, __gcov_exit is a new symbol expected to be
implemented in a profiling runtime.

[akpm@linux-foundation.org: coding-style fixes]
[mliska@suse.cz: v2]
  Link: http://lkml.kernel.org/r/e63a3c59-0149-c97e-4084-20ca8f146b26@suse.cz
Link: http://lkml.kernel.org/r/8c4084fa-3885-29fe-5fc4-0d4ca199c785@suse.cz
Signed-off-by: Martin Liska <mliska@suse.cz>
Acked-by: Peter Oberparleiter <oberpar@linux.vnet.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 kernel/gcov/base.c    | 6 ++++++
 kernel/gcov/gcc_4_7.c | 4 +++-
 2 files changed, 9 insertions(+), 1 deletion(-)

(limited to 'kernel')

diff --git a/kernel/gcov/base.c b/kernel/gcov/base.c
index 7080ae1eb6c1..f850e906564b 100644
--- a/kernel/gcov/base.c
+++ b/kernel/gcov/base.c
@@ -98,6 +98,12 @@ void __gcov_merge_icall_topn(gcov_type *counters, unsigned int n_counters)
 }
 EXPORT_SYMBOL(__gcov_merge_icall_topn);
 
+void __gcov_exit(void)
+{
+	/* Unused. */
+}
+EXPORT_SYMBOL(__gcov_exit);
+
 /**
  * gcov_enable_events - enable event reporting through gcov_event()
  *
diff --git a/kernel/gcov/gcc_4_7.c b/kernel/gcov/gcc_4_7.c
index 6a5c239c7669..46a18e72bce6 100644
--- a/kernel/gcov/gcc_4_7.c
+++ b/kernel/gcov/gcc_4_7.c
@@ -18,7 +18,9 @@
 #include <linux/vmalloc.h>
 #include "gcov.h"
 
-#if (__GNUC__ > 5) || (__GNUC__ == 5 && __GNUC_MINOR__ >= 1)
+#if (__GNUC__ >= 7)
+#define GCOV_COUNTERS			9
+#elif (__GNUC__ > 5) || (__GNUC__ == 5 && __GNUC_MINOR__ >= 1)
 #define GCOV_COUNTERS			10
 #elif __GNUC__ == 4 && __GNUC_MINOR__ >= 9
 #define GCOV_COUNTERS			9
-- 
cgit v1.2.3


From 3482bbea6b3e9ed685ce42a77927dfeaec0be122 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Tue, 11 Apr 2017 00:20:41 +0200
Subject: BACKPORT: cpufreq: schedutil: Use policy-dependent transition delays

Make the schedutil governor take the initial (default) value of the
rate_limit_us sysfs attribute from the (new) transition_delay_us
policy parameter (to be set by the scaling driver).

That will allow scaling drivers to make schedutil use smaller default
values of rate_limit_us and reduce the default average time interval
between consecutive frequency changes.

Make intel_pstate set transition_delay_us to 500.

BACKPORT: Modified to support the separate up_rate_limit_us and
down_rate_limit_us (upstream just has a single rate_limit_us). Also
dropped the changes for intel_pstate as there's a merge conflict.

Change-Id: I62a8543879a4d8582cdcb31ebd55607705d1c8b1
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Acked-by: Viresh Kumar <viresh.kumar@linaro.org>
(cherry picked from commit 1b72e7fd304639f1cd49d1e11955c4974936d88c)
Signed-off-by: Brendan Jackman <brendan.jackman@arm.com>
---
 kernel/sched/cpufreq_schedutil.c | 20 +++++++++++++-------
 1 file changed, 13 insertions(+), 7 deletions(-)

(limited to 'kernel')

diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c
index a0bcf488fb81..b90f7434e13b 100644
--- a/kernel/sched/cpufreq_schedutil.c
+++ b/kernel/sched/cpufreq_schedutil.c
@@ -615,7 +615,6 @@ static int sugov_init(struct cpufreq_policy *policy)
 {
 	struct sugov_policy *sg_policy;
 	struct sugov_tunables *tunables;
-	unsigned int lat;
 	int ret = 0;
 
 	/* State should be equivalent to EXIT */
@@ -654,12 +653,19 @@ static int sugov_init(struct cpufreq_policy *policy)
 		goto stop_kthread;
 	}
 
-	tunables->up_rate_limit_us = LATENCY_MULTIPLIER;
-	tunables->down_rate_limit_us = LATENCY_MULTIPLIER;
-	lat = policy->cpuinfo.transition_latency / NSEC_PER_USEC;
-	if (lat) {
-		tunables->up_rate_limit_us *= lat;
-		tunables->down_rate_limit_us *= lat;
+	if (policy->up_transition_delay_us && policy->down_transition_delay_us) {
+		tunables->up_rate_limit_us = policy->up_transition_delay_us;
+		tunables->down_rate_limit_us = policy->down_transition_delay_us;
+	} else {
+		unsigned int lat;
+
+                tunables->up_rate_limit_us = LATENCY_MULTIPLIER;
+                tunables->down_rate_limit_us = LATENCY_MULTIPLIER;
+		lat = policy->cpuinfo.transition_latency / NSEC_PER_USEC;
+		if (lat) {
+                        tunables->up_rate_limit_us *= lat;
+                        tunables->down_rate_limit_us *= lat;
+                }
 	}
 
 	policy->governor_data = sg_policy;
-- 
cgit v1.2.3


From ed48d9230e303aaff037d60fd866088c114184e5 Mon Sep 17 00:00:00 2001
From: Waiman Long <longman@redhat.com>
Date: Thu, 24 Aug 2017 12:04:29 -0400
Subject: cpuset: Fix incorrect memory_pressure control file mapping

commit 1c08c22c874ac88799cab1f78c40f46110274915 upstream.

The memory_pressure control file was incorrectly set up without
a private value (0, by default). As a result, this control
file was treated like memory_migrate on read. By adding back the
FILE_MEMORY_PRESSURE private value, the correct memory pressure value
will be returned.

Signed-off-by: Waiman Long <longman@redhat.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
Fixes: 7dbdb199d3bf ("cgroup: replace cftype->mode with CFTYPE_WORLD_WRITABLE")
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 kernel/cpuset.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'kernel')

diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index 8ccd66a97c8b..2924b6faa469 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -1910,6 +1910,7 @@ static struct cftype files[] = {
 	{
 		.name = "memory_pressure",
 		.read_u64 = cpuset_read_u64,
+		.private = FILE_MEMORY_PRESSURE,
 	},
 
 	{
-- 
cgit v1.2.3