From 1cb87c38cb81847938bbb4776d9c6f3afe8fd938 Mon Sep 17 00:00:00 2001
From: Ke Wang <ke.wang@spreadtrum.com>
Date: Wed, 1 Nov 2017 14:11:06 +0800
Subject: sched: EAS: Fix the condition to distinguish energy before/after

Before commit 5f8b3a757d65 ("sched/fair: consider task utilization in
group_norm_util()"), eenv->util_delta is used to distinguish energy
before and energy after in sched_group_energy(). After that commit,
eenv->util_delta can not do that any more.

In this commit, use trg_cpu to distinguish energy before/after in
sched_group_energy().

Before apply this commit, cap_before/cap_delta is not correct:
<idle>-0 [001] 147504.608920: sched_energy_diff: pid=7 comm=rcu_preempt
src_cpu=1 dst_cpu=3 usage_delta=7 nrg_before=250 nrg_after=250 nrg_diff=0
cap_before=0 cap_after=528 cap_delta=1056 nrg_delta=0 nrg_payoff=0

After apply this commit, cap_before/cap_delta retrun to normal:
<idle>-0 [001] 220.494011: sched_energy_diff:    pid=7 comm=rcu_preempt
src_cpu=1 dst_cpu=2 usage_delta=3 nrg_before=248 nrg_after=248 nrg_diff=0
cap_before=528 cap_after=528 cap_delta=0 nrg_delta=0 nrg_payoff=0

Signed-off-by: Ke Wang <ke.wang@spreadtrum.com>
---
 kernel/sched/fair.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'kernel')
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index ac22d32a6255..06b814b58d20 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -5534,13 +5534,13 @@ static int sched_group_energy(struct energy_env *eenv)
 
 				if (sg->group_weight == 1) {
 					/* Remove capacity of src CPU (before task move) */
-					if (eenv->util_delta == 0 &&
+					if (eenv->trg_cpu == eenv->src_cpu &&
 					    cpumask_test_cpu(eenv->src_cpu, sched_group_cpus(sg))) {
 						eenv->cap.before = sg->sge->cap_states[cap_idx].cap;
 						eenv->cap.delta -= eenv->cap.before;
 					}
 					/* Add capacity of dst CPU  (after task move) */
-					if (eenv->util_delta != 0 &&
+					if (eenv->trg_cpu == eenv->dst_cpu &&
 					    cpumask_test_cpu(eenv->dst_cpu, sched_group_cpus(sg))) {
 						eenv->cap.after = sg->sge->cap_states[cap_idx].cap;
 						eenv->cap.delta += eenv->cap.after;
-- 
cgit v1.2.3


From 7d5a251c66be3516c14cffa80e6b076b37736971 Mon Sep 17 00:00:00 2001
From: Ke Wang <ke.wang@spreadtrum.com>
Date: Mon, 30 Oct 2017 17:38:16 +0800
Subject: sched: EAS: update trg_cpu to backup_cpu if no energy saving for
 target_cpu

If no energy saving for target_cpu in the calculation of energy_diff(),
backup_cpu will be set as the new dst_cpu for the next calculation. At this
point, we also need update the new trg_cpu as backup_cpu to make sure the
subsequent calculation of energy_diff() is correct.

Signed-off-by: Ke Wang <ke.wang@spreadtrum.com>
---
 kernel/sched/fair.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'kernel')

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 06b814b58d20..3b429c5ce721 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -6723,6 +6723,7 @@ static int select_energy_cpu_brute(struct task_struct *p, int prev_cpu, int sync
 			/* No energy saving for target_cpu, try backup */
 			target_cpu = tmp_backup;
 			eenv.dst_cpu = target_cpu;
+			eenv.trg_cpu = target_cpu;
 			if (tmp_backup < 0 ||
 			    tmp_backup == prev_cpu ||
 			    energy_diff(&eenv) >= 0) {
-- 
cgit v1.2.3


From 47c87b2654376e7dda646ca5a2af067c5d368ca7 Mon Sep 17 00:00:00 2001
From: Ke Wang <ke.wang@spreadtrum.com>
Date: Wed, 1 Nov 2017 16:07:38 +0800
Subject: sched: EAS: Fix the calculation of group util in group_idle_state()

util_delta becomes not zero in eenv_before, which will affect the
calculation of grp_util in group_idle_state(). Fix it under the
new condition.

Change-Id: Ic3853bb45876a8e388afcbe4e72d25fc42b1d7b0
Signed-off-by: Ke Wang <ke.wang@spreadtrum.com>
---
 kernel/sched/fair.c | 19 +++++++++----------
 1 file changed, 9 insertions(+), 10 deletions(-)

(limited to 'kernel')

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 3b429c5ce721..5c65f3ad6da1 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -5429,13 +5429,6 @@ static int group_idle_state(struct energy_env *eenv, struct sched_group *sg)
 	/* Take non-cpuidle idling into account (active idle/arch_cpu_idle()) */
 	state++;
 
-	/*
-	 * Try to estimate if a deeper idle state is
-	 * achievable when we move the task.
-	 */
-	for_each_cpu(i, sched_group_cpus(sg))
-		grp_util += cpu_util(i);
-
 	src_in_grp = cpumask_test_cpu(eenv->src_cpu, sched_group_cpus(sg));
 	dst_in_grp = cpumask_test_cpu(eenv->dst_cpu, sched_group_cpus(sg));
 	if (src_in_grp == dst_in_grp) {
@@ -5444,10 +5437,16 @@ static int group_idle_state(struct energy_env *eenv, struct sched_group *sg)
 		 */
 		goto end;
 	}
-	/* add or remove util as appropriate to indicate what group util
-	 * will be (worst case - no concurrent execution) after moving the task
+
+	/*
+	 * Try to estimate if a deeper idle state is
+	 * achievable when we move the task.
 	 */
-	grp_util += src_in_grp ? -eenv->util_delta : eenv->util_delta;
+	for_each_cpu(i, sched_group_cpus(sg)) {
+		grp_util += cpu_util_wake(i, eenv->task);
+		if (unlikely(i == eenv->trg_cpu))
+			grp_util += eenv->util_delta;
+	}
 
 	if (grp_util <=
 		((long)sg->sgc->max_capacity * (int)sg->group_weight)) {
-- 
cgit v1.2.3


From cd04e987d1da0eadc25c2186bd6bd93f22c3e851 Mon Sep 17 00:00:00 2001
From: Joel Fernandes <joelaf@google.com>
Date: Mon, 11 Sep 2017 17:05:49 -0700
Subject: ANDROID: sched/rt: add schedtune accounting

This patch adds schedtune enqueue/dequeue to RT scheduling class.

Change-Id: If416e64319d62191f3aedd675d3e9a21fe2102fb
Signed-off-by: Joel Fernandes <joelaf@google.com>
---
 kernel/sched/rt.c | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'kernel')

diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c
index 069f8982867f..88f28e996249 100644
--- a/kernel/sched/rt.c
+++ b/kernel/sched/rt.c
@@ -9,6 +9,7 @@
 #include <linux/irq_work.h>
 
 #include "walt.h"
+#include "tune.h"
 
 int sched_rr_timeslice = RR_TIMESLICE;
 
@@ -1321,6 +1322,8 @@ enqueue_task_rt(struct rq *rq, struct task_struct *p, int flags)
 
 	if (!task_current(rq, p) && p->nr_cpus_allowed > 1)
 		enqueue_pushable_task(rq, p);
+
+	schedtune_enqueue_task(p, cpu_of(rq));
 }
 
 static void dequeue_task_rt(struct rq *rq, struct task_struct *p, int flags)
@@ -1332,6 +1335,7 @@ static void dequeue_task_rt(struct rq *rq, struct task_struct *p, int flags)
 	walt_dec_cumulative_runnable_avg(rq, p);
 
 	dequeue_pushable_task(rq, p);
+	schedtune_dequeue_task(p, cpu_of(rq));
 }
 
 /*
-- 
cgit v1.2.3


From d194ba5d712f051ff6c025f3484bb72f219764e3 Mon Sep 17 00:00:00 2001
From: Joel Fernandes <joelaf@google.com>
Date: Mon, 11 Sep 2017 17:10:37 -0700
Subject: ANDROID: sched/rt: schedtune: Add boost retention to RT

Boosted RT tasks can be deboosted quickly, this makes boost usless
for RT tasks and causes lots of glitching. Use timers to prevent
de-boost too soon and wait for long enough such that next enqueue
happens after a threshold.

While this can be solved in the governor, there are following
advantages:
- The approach used is governor-independent
- Reduces boost group lock contention for frequently sleepers/wakers
- Works with schedfreq without any other schedfreq hacks.

Bug: 30210506

Change-Id: I41788b235586988be446505deb7c0529758a9898
Signed-off-by: Joel Fernandes <joelaf@google.com>
---
 kernel/sched/core.c  |   1 +
 kernel/sched/rt.c    | 154 +++++++++++++++++++++++++++++++++++++++++++++++++++
 kernel/sched/sched.h |   1 +
 3 files changed, 156 insertions(+)

(limited to 'kernel')

diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 0c9e332ceb3b..3030633d8900 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -2200,6 +2200,7 @@ static void __sched_fork(unsigned long clone_flags, struct task_struct *p)
 	init_dl_task_timer(&p->dl);
 	__dl_clear_params(p);
 
+	init_rt_schedtune_timer(&p->rt);
 	INIT_LIST_HEAD(&p->rt.run_list);
 
 #ifdef CONFIG_PREEMPT_NOTIFIERS
diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c
index 88f28e996249..f41435e7f75d 100644
--- a/kernel/sched/rt.c
+++ b/kernel/sched/rt.c
@@ -7,6 +7,7 @@
 
 #include <linux/slab.h>
 #include <linux/irq_work.h>
+#include <linux/hrtimer.h>
 
 #include "walt.h"
 #include "tune.h"
@@ -986,6 +987,73 @@ static int sched_rt_runtime_exceeded(struct rt_rq *rt_rq)
 	return 0;
 }
 
+#define RT_SCHEDTUNE_INTERVAL 50000000ULL
+
+static void sched_rt_update_capacity_req(struct rq *rq);
+
+static enum hrtimer_restart rt_schedtune_timer(struct hrtimer *timer)
+{
+	struct sched_rt_entity *rt_se = container_of(timer,
+			struct sched_rt_entity,
+			schedtune_timer);
+	struct task_struct *p = rt_task_of(rt_se);
+	struct rq *rq = task_rq(p);
+
+	raw_spin_lock(&rq->lock);
+
+	/*
+	 * Nothing to do if:
+	 * - task has switched runqueues
+	 * - task isn't RT anymore
+	 */
+	if (rq != task_rq(p) || (p->sched_class != &rt_sched_class))
+		goto out;
+
+	/*
+	 * If task got enqueued back during callback time, it means we raced
+	 * with the enqueue on another cpu, that's Ok, just do nothing as
+	 * enqueue path would have tried to cancel us and we shouldn't run
+	 * Also check the schedtune_enqueued flag as class-switch on a
+	 * sleeping task may have already canceled the timer and done dq
+	 */
+	if (p->on_rq || !rt_se->schedtune_enqueued)
+		goto out;
+
+	/*
+	 * RT task is no longer active, cancel boost
+	 */
+	rt_se->schedtune_enqueued = false;
+	schedtune_dequeue_task(p, cpu_of(rq));
+	sched_rt_update_capacity_req(rq);
+	cpufreq_update_this_cpu(rq, SCHED_CPUFREQ_RT);
+out:
+	raw_spin_unlock(&rq->lock);
+
+	/*
+	 * This can free the task_struct if no more references.
+	 */
+	put_task_struct(p);
+
+	return HRTIMER_NORESTART;
+}
+
+void init_rt_schedtune_timer(struct sched_rt_entity *rt_se)
+{
+	struct hrtimer *timer = &rt_se->schedtune_timer;
+
+	hrtimer_init(timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+	timer->function = rt_schedtune_timer;
+	rt_se->schedtune_enqueued = false;
+}
+
+static void start_schedtune_timer(struct sched_rt_entity *rt_se)
+{
+	struct hrtimer *timer = &rt_se->schedtune_timer;
+
+	hrtimer_start(timer, ns_to_ktime(RT_SCHEDTUNE_INTERVAL),
+			HRTIMER_MODE_REL_PINNED);
+}
+
 /*
  * Update the current task's runtime statistics. Skip current tasks that
  * are not in our scheduling class.
@@ -1323,7 +1391,33 @@ enqueue_task_rt(struct rq *rq, struct task_struct *p, int flags)
 	if (!task_current(rq, p) && p->nr_cpus_allowed > 1)
 		enqueue_pushable_task(rq, p);
 
+	if (!schedtune_task_boost(p))
+		return;
+
+	/*
+	 * If schedtune timer is active, that means a boost was already
+	 * done, just cancel the timer so that deboost doesn't happen.
+	 * Otherwise, increase the boost. If an enqueued timer was
+	 * cancelled, put the task reference.
+	 */
+	if (hrtimer_try_to_cancel(&rt_se->schedtune_timer) == 1)
+		put_task_struct(p);
+
+	/*
+	 * schedtune_enqueued can be true in the following situation:
+	 * enqueue_task_rt grabs rq lock before timer fires
+	 *    or before its callback acquires rq lock
+	 * schedtune_enqueued can be false if timer callback is running
+	 * and timer just released rq lock, or if the timer finished
+	 * running and canceling the boost
+	 */
+	if (rt_se->schedtune_enqueued)
+		return;
+
+	rt_se->schedtune_enqueued = true;
 	schedtune_enqueue_task(p, cpu_of(rq));
+	sched_rt_update_capacity_req(rq);
+	cpufreq_update_this_cpu(rq, SCHED_CPUFREQ_RT);
 }
 
 static void dequeue_task_rt(struct rq *rq, struct task_struct *p, int flags)
@@ -1335,7 +1429,20 @@ static void dequeue_task_rt(struct rq *rq, struct task_struct *p, int flags)
 	walt_dec_cumulative_runnable_avg(rq, p);
 
 	dequeue_pushable_task(rq, p);
+
+	if (!rt_se->schedtune_enqueued)
+		return;
+
+	if (flags == DEQUEUE_SLEEP) {
+		get_task_struct(p);
+		start_schedtune_timer(rt_se);
+		return;
+	}
+
+	rt_se->schedtune_enqueued = false;
 	schedtune_dequeue_task(p, cpu_of(rq));
+	sched_rt_update_capacity_req(rq);
+	cpufreq_update_this_cpu(rq, SCHED_CPUFREQ_RT);
 }
 
 /*
@@ -1375,6 +1482,33 @@ static void yield_task_rt(struct rq *rq)
 #ifdef CONFIG_SMP
 static int find_lowest_rq(struct task_struct *task);
 
+/*
+ * Perform a schedtune dequeue and cancelation of boost timers if needed.
+ * Should be called only with the rq->lock held.
+ */
+static void schedtune_dequeue_rt(struct rq *rq, struct task_struct *p)
+{
+	struct sched_rt_entity *rt_se = &p->rt;
+
+	BUG_ON(!raw_spin_is_locked(&rq->lock));
+
+	if (!rt_se->schedtune_enqueued)
+		return;
+
+	/*
+	 * Incase of class change cancel any active timers. If an enqueued
+	 * timer was cancelled, put the task ref.
+	 */
+	if (hrtimer_try_to_cancel(&rt_se->schedtune_timer) == 1)
+		put_task_struct(p);
+
+	/* schedtune_enqueued is true, deboost it */
+	rt_se->schedtune_enqueued = false;
+	schedtune_dequeue_task(p, task_cpu(p));
+	sched_rt_update_capacity_req(rq);
+	cpufreq_update_this_cpu(rq, SCHED_CPUFREQ_RT);
+}
+
 static int
 select_task_rq_rt(struct task_struct *p, int cpu, int sd_flag, int flags,
 		  int sibling_count_hint)
@@ -1429,6 +1563,19 @@ select_task_rq_rt(struct task_struct *p, int cpu, int sd_flag, int flags,
 	rcu_read_unlock();
 
 out:
+	/*
+	 * If previous CPU was different, make sure to cancel any active
+	 * schedtune timers and deboost.
+	 */
+	if (task_cpu(p) != cpu) {
+		unsigned long fl;
+		struct rq *prq = task_rq(p);
+
+		raw_spin_lock_irqsave(&prq->lock, fl);
+		schedtune_dequeue_rt(prq, p);
+		raw_spin_unlock_irqrestore(&prq->lock, fl);
+	}
+
 	return cpu;
 }
 
@@ -2205,6 +2352,13 @@ static void rq_offline_rt(struct rq *rq)
  */
 static void switched_from_rt(struct rq *rq, struct task_struct *p)
 {
+	/*
+	 * On class switch from rt, always cancel active schedtune timers,
+	 * this handles the cases where we switch class for a task that is
+	 * already rt-dequeued but has a running timer.
+	 */
+	schedtune_dequeue_rt(rq, p);
+
 	/*
 	 * If there are other RT tasks then we will reschedule
 	 * and the scheduling of the other RT tasks will handle
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 0238e94b0a1e..028e232103c2 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -1398,6 +1398,7 @@ extern void resched_cpu(int cpu);
 
 extern struct rt_bandwidth def_rt_bandwidth;
 extern void init_rt_bandwidth(struct rt_bandwidth *rt_b, u64 period, u64 runtime);
+extern void init_rt_schedtune_timer(struct sched_rt_entity *rt_se);
 
 extern struct dl_bandwidth def_dl_bandwidth;
 extern void init_dl_bandwidth(struct dl_bandwidth *dl_b, u64 period, u64 runtime);
-- 
cgit v1.2.3


From df147c9e336cfcb4183db1eb9552b0429060cd0d Mon Sep 17 00:00:00 2001
From: Viresh Kumar <viresh.kumar@linaro.org>
Date: Thu, 2 Nov 2017 15:13:26 +0530
Subject: cpufreq: Drop schedfreq governor

We all should be using (and improving) the schedutil governor now. Get
rid of the non-upstream governor.

Tested on Hikey.

Change-Id: Ic660756536e5da51952738c3c18b94e31f58cd57
Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
---
 kernel/sched/Makefile        |   1 -
 kernel/sched/core.c          |  86 -------
 kernel/sched/cpufreq_sched.c | 525 -------------------------------------------
 kernel/sched/fair.c          |  89 +-------
 kernel/sched/rt.c            |  49 +---
 kernel/sched/sched.h         |  75 -------
 kernel/sysctl.c              |   7 -
 7 files changed, 4 insertions(+), 828 deletions(-)
 delete mode 100644 kernel/sched/cpufreq_sched.c

(limited to 'kernel')

diff --git a/kernel/sched/Makefile b/kernel/sched/Makefile
index ca0d94096170..d7ec4f7dd0d9 100644
--- a/kernel/sched/Makefile
+++ b/kernel/sched/Makefile
@@ -22,5 +22,4 @@ obj-$(CONFIG_SCHED_DEBUG) += debug.o
 obj-$(CONFIG_SCHED_TUNE) += tune.o
 obj-$(CONFIG_CGROUP_CPUACCT) += cpuacct.o
 obj-$(CONFIG_CPU_FREQ) += cpufreq.o
-obj-$(CONFIG_CPU_FREQ_GOV_SCHED) += cpufreq_sched.o
 obj-$(CONFIG_CPU_FREQ_GOV_SCHEDUTIL) += cpufreq_schedutil.o
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 3030633d8900..889fb1aff1e0 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -2983,91 +2983,6 @@ unsigned long long task_sched_runtime(struct task_struct *p)
 	return ns;
 }
 
-#ifdef CONFIG_CPU_FREQ_GOV_SCHED
-
-static inline
-unsigned long add_capacity_margin(unsigned long cpu_capacity)
-{
-	cpu_capacity  = cpu_capacity * capacity_margin;
-	cpu_capacity /= SCHED_CAPACITY_SCALE;
-	return cpu_capacity;
-}
-
-static inline
-unsigned long sum_capacity_reqs(unsigned long cfs_cap,
-				struct sched_capacity_reqs *scr)
-{
-	unsigned long total = add_capacity_margin(cfs_cap + scr->rt);
-	return total += scr->dl;
-}
-
-unsigned long boosted_cpu_util(int cpu);
-static void sched_freq_tick_pelt(int cpu)
-{
-	unsigned long cpu_utilization = boosted_cpu_util(cpu);
-	unsigned long capacity_curr = capacity_curr_of(cpu);
-	struct sched_capacity_reqs *scr;
-
-	scr = &per_cpu(cpu_sched_capacity_reqs, cpu);
-	if (sum_capacity_reqs(cpu_utilization, scr) < capacity_curr)
-		return;
-
-	/*
-	 * To make free room for a task that is building up its "real"
-	 * utilization and to harm its performance the least, request
-	 * a jump to a higher OPP as soon as the margin of free capacity
-	 * is impacted (specified by capacity_margin).
-	 * Remember CPU utilization in sched_capacity_reqs should be normalised.
-	 */
-	cpu_utilization = cpu_utilization * SCHED_CAPACITY_SCALE / capacity_orig_of(cpu);
-	set_cfs_cpu_capacity(cpu, true, cpu_utilization);
-}
-
-#ifdef CONFIG_SCHED_WALT
-static void sched_freq_tick_walt(int cpu)
-{
-	unsigned long cpu_utilization = cpu_util_freq(cpu);
-	unsigned long capacity_curr = capacity_curr_of(cpu);
-
-	if (walt_disabled || !sysctl_sched_use_walt_cpu_util)
-		return sched_freq_tick_pelt(cpu);
-
-	/*
-	 * Add a margin to the WALT utilization to check if we will need to
-	 * increase frequency.
-	 * NOTE: WALT tracks a single CPU signal for all the scheduling
-	 * classes, thus this margin is going to be added to the DL class as
-	 * well, which is something we do not do in sched_freq_tick_pelt case.
-	 */
-	if (add_capacity_margin(cpu_utilization) <= capacity_curr)
-		return;
-
-	/*
-	 * It is likely that the load is growing so we
-	 * keep the added margin in our request as an
-	 * extra boost.
-	 * Remember CPU utilization in sched_capacity_reqs should be normalised.
-	 */
-	cpu_utilization = cpu_utilization * SCHED_CAPACITY_SCALE / capacity_orig_of(cpu);
-	set_cfs_cpu_capacity(cpu, true, cpu_utilization);
-
-}
-#define _sched_freq_tick(cpu) sched_freq_tick_walt(cpu)
-#else
-#define _sched_freq_tick(cpu) sched_freq_tick_pelt(cpu)
-#endif /* CONFIG_SCHED_WALT */
-
-static void sched_freq_tick(int cpu)
-{
-	if (!sched_freq())
-		return;
-
-	_sched_freq_tick(cpu);
-}
-#else
-static inline void sched_freq_tick(int cpu) { }
-#endif /* CONFIG_CPU_FREQ_GOV_SCHED */
-
 /*
  * This function gets called by the timer code, with HZ frequency.
  * We call it with interrupts disabled.
@@ -3088,7 +3003,6 @@ void scheduler_tick(void)
 	curr->sched_class->task_tick(rq, curr, 0);
 	update_cpu_load_active(rq);
 	calc_global_load_tick(rq);
-	sched_freq_tick(cpu);
 	raw_spin_unlock(&rq->lock);
 
 	perf_event_task_tick();
diff --git a/kernel/sched/cpufreq_sched.c b/kernel/sched/cpufreq_sched.c
deleted file mode 100644
index ec0aed7a8f96..000000000000
--- a/kernel/sched/cpufreq_sched.c
+++ /dev/null
@@ -1,525 +0,0 @@
-/*
- *  Copyright (C)  2015 Michael Turquette <mturquette@linaro.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#include <linux/cpufreq.h>
-#include <linux/module.h>
-#include <linux/kthread.h>
-#include <linux/percpu.h>
-#include <linux/irq_work.h>
-#include <linux/delay.h>
-#include <linux/string.h>
-
-#define CREATE_TRACE_POINTS
-#include <trace/events/cpufreq_sched.h>
-
-#include "sched.h"
-
-#define THROTTLE_DOWN_NSEC	50000000 /* 50ms default */
-#define THROTTLE_UP_NSEC	500000 /* 500us default */
-
-struct static_key __read_mostly __sched_freq = STATIC_KEY_INIT_FALSE;
-static bool __read_mostly cpufreq_driver_slow;
-
-#ifndef CONFIG_CPU_FREQ_DEFAULT_GOV_SCHED
-static struct cpufreq_governor cpufreq_gov_sched;
-#endif
-
-static DEFINE_PER_CPU(unsigned long, enabled);
-DEFINE_PER_CPU(struct sched_capacity_reqs, cpu_sched_capacity_reqs);
-
-struct gov_tunables {
-	struct gov_attr_set attr_set;
-	unsigned int up_throttle_nsec;
-	unsigned int down_throttle_nsec;
-};
-
-/**
- * gov_data - per-policy data internal to the governor
- * @up_throttle: next throttling period expiry if increasing OPP
- * @down_throttle: next throttling period expiry if decreasing OPP
- * @up_throttle_nsec: throttle period length in nanoseconds if increasing OPP
- * @down_throttle_nsec: throttle period length in nanoseconds if decreasing OPP
- * @task: worker thread for dvfs transition that may block/sleep
- * @irq_work: callback used to wake up worker thread
- * @requested_freq: last frequency requested by the sched governor
- *
- * struct gov_data is the per-policy cpufreq_sched-specific data structure. A
- * per-policy instance of it is created when the cpufreq_sched governor receives
- * the CPUFREQ_GOV_START condition and a pointer to it exists in the gov_data
- * member of struct cpufreq_policy.
- *
- * Readers of this data must call down_read(policy->rwsem). Writers must
- * call down_write(policy->rwsem).
- */
-struct gov_data {
-	ktime_t up_throttle;
-	ktime_t down_throttle;
-	struct gov_tunables *tunables;
-	struct list_head tunables_hook;
-	struct task_struct *task;
-	struct irq_work irq_work;
-	unsigned int requested_freq;
-};
-
-static void cpufreq_sched_try_driver_target(struct cpufreq_policy *policy,
-					    unsigned int freq)
-{
-	struct gov_data *gd = policy->governor_data;
-
-	/* avoid race with cpufreq_sched_stop */
-	if (!down_write_trylock(&policy->rwsem))
-		return;
-
-	__cpufreq_driver_target(policy, freq, CPUFREQ_RELATION_L);
-
-	gd->up_throttle = ktime_add_ns(ktime_get(),
-				       gd->tunables->up_throttle_nsec);
-	gd->down_throttle = ktime_add_ns(ktime_get(),
-					 gd->tunables->down_throttle_nsec);
-	up_write(&policy->rwsem);
-}
-
-static bool finish_last_request(struct gov_data *gd, unsigned int cur_freq)
-{
-	ktime_t now = ktime_get();
-
-	ktime_t throttle = gd->requested_freq < cur_freq ?
-		gd->down_throttle : gd->up_throttle;
-
-	if (ktime_after(now, throttle))
-		return false;
-
-	while (1) {
-		int usec_left = ktime_to_ns(ktime_sub(throttle, now));
-
-		usec_left /= NSEC_PER_USEC;
-		trace_cpufreq_sched_throttled(usec_left);
-		usleep_range(usec_left, usec_left + 100);
-		now = ktime_get();
-		if (ktime_after(now, throttle))
-			return true;
-	}
-}
-
-/*
- * we pass in struct cpufreq_policy. This is safe because changing out the
- * policy requires a call to __cpufreq_governor(policy, CPUFREQ_GOV_STOP),
- * which tears down all of the data structures and __cpufreq_governor(policy,
- * CPUFREQ_GOV_START) will do a full rebuild, including this kthread with the
- * new policy pointer
- */
-static int cpufreq_sched_thread(void *data)
-{
-	struct sched_param param;
-	struct cpufreq_policy *policy;
-	struct gov_data *gd;
-	unsigned int new_request = 0;
-	unsigned int last_request = 0;
-	int ret;
-
-	policy = (struct cpufreq_policy *) data;
-	gd = policy->governor_data;
-
-	param.sched_priority = 50;
-	ret = sched_setscheduler_nocheck(gd->task, SCHED_FIFO, &param);
-	if (ret) {
-		pr_warn("%s: failed to set SCHED_FIFO\n", __func__);
-		do_exit(-EINVAL);
-	} else {
-		pr_debug("%s: kthread (%d) set to SCHED_FIFO\n",
-				__func__, gd->task->pid);
-	}
-
-	do {
-		new_request = gd->requested_freq;
-		if (new_request == last_request) {
-			set_current_state(TASK_INTERRUPTIBLE);
-			if (kthread_should_stop())
-				break;
-			schedule();
-		} else {
-			/*
-			 * if the frequency thread sleeps while waiting to be
-			 * unthrottled, start over to check for a newer request
-			 */
-			if (finish_last_request(gd, policy->cur))
-				continue;
-			last_request = new_request;
-			cpufreq_sched_try_driver_target(policy, new_request);
-		}
-	} while (!kthread_should_stop());
-
-	return 0;
-}
-
-static void cpufreq_sched_irq_work(struct irq_work *irq_work)
-{
-	struct gov_data *gd;
-
-	gd = container_of(irq_work, struct gov_data, irq_work);
-	if (!gd)
-		return;
-
-	wake_up_process(gd->task);
-}
-
-static void update_fdomain_capacity_request(int cpu)
-{
-	unsigned int freq_new, index_new, cpu_tmp;
-	struct cpufreq_policy *policy;
-	struct gov_data *gd;
-	unsigned long capacity = 0;
-
-	/*
-	 * Avoid grabbing the policy if possible. A test is still
-	 * required after locking the CPU's policy to avoid racing
-	 * with the governor changing.
-	 */
-	if (!per_cpu(enabled, cpu))
-		return;
-
-	policy = cpufreq_cpu_get(cpu);
-	if (IS_ERR_OR_NULL(policy))
-		return;
-
-	if (policy->governor != &cpufreq_gov_sched ||
-	    !policy->governor_data)
-		goto out;
-
-	gd = policy->governor_data;
-
-	/* find max capacity requested by cpus in this policy */
-	for_each_cpu(cpu_tmp, policy->cpus) {
-		struct sched_capacity_reqs *scr;
-
-		scr = &per_cpu(cpu_sched_capacity_reqs, cpu_tmp);
-		capacity = max(capacity, scr->total);
-	}
-
-	/* Convert the new maximum capacity request into a cpu frequency */
-	freq_new = capacity * policy->cpuinfo.max_freq >> SCHED_CAPACITY_SHIFT;
-	if (cpufreq_frequency_table_target(policy, policy->freq_table,
-					   freq_new, CPUFREQ_RELATION_L,
-					   &index_new))
-		goto out;
-	freq_new = policy->freq_table[index_new].frequency;
-
-	if (freq_new > policy->max)
-		freq_new = policy->max;
-
-	if (freq_new < policy->min)
-		freq_new = policy->min;
-
-	trace_cpufreq_sched_request_opp(cpu, capacity, freq_new,
-					gd->requested_freq);
-	if (freq_new == gd->requested_freq)
-		goto out;
-
-	gd->requested_freq = freq_new;
-
-	/*
-	 * Throttling is not yet supported on platforms with fast cpufreq
-	 * drivers.
-	 */
-	if (cpufreq_driver_slow)
-		irq_work_queue_on(&gd->irq_work, cpu);
-	else
-		cpufreq_sched_try_driver_target(policy, freq_new);
-
-out:
-	cpufreq_cpu_put(policy);
-}
-
-#ifdef CONFIG_SCHED_WALT
-static inline unsigned long
-requested_capacity(struct sched_capacity_reqs *scr)
-{
-	if (!walt_disabled && sysctl_sched_use_walt_cpu_util)
-		return scr->cfs;
-	return scr->cfs + scr->rt;
-}
-#else
-#define requested_capacity(scr) (scr->cfs + scr->rt)
-#endif
-
-void update_cpu_capacity_request(int cpu, bool request)
-{
-	unsigned long new_capacity;
-	struct sched_capacity_reqs *scr;
-
-	/* The rq lock serializes access to the CPU's sched_capacity_reqs. */
-	lockdep_assert_held(&cpu_rq(cpu)->lock);
-
-	scr = &per_cpu(cpu_sched_capacity_reqs, cpu);
-
-	new_capacity = requested_capacity(scr);
-	new_capacity = new_capacity * capacity_margin
-		/ SCHED_CAPACITY_SCALE;
-	new_capacity += scr->dl;
-
-	if (new_capacity == scr->total)
-		return;
-
-	trace_cpufreq_sched_update_capacity(cpu, request, scr, new_capacity);
-
-	scr->total = new_capacity;
-	if (request)
-		update_fdomain_capacity_request(cpu);
-}
-
-static inline void set_sched_freq(void)
-{
-	static_key_slow_inc(&__sched_freq);
-}
-
-static inline void clear_sched_freq(void)
-{
-	static_key_slow_dec(&__sched_freq);
-}
-
-/* Tunables */
-static struct gov_tunables *global_tunables;
-
-static inline struct gov_tunables *to_tunables(struct gov_attr_set *attr_set)
-{
-	return container_of(attr_set, struct gov_tunables, attr_set);
-}
-
-static ssize_t up_throttle_nsec_show(struct gov_attr_set *attr_set, char *buf)
-{
-	struct gov_tunables *tunables = to_tunables(attr_set);
-
-	return sprintf(buf, "%u\n", tunables->up_throttle_nsec);
-}
-
-static ssize_t up_throttle_nsec_store(struct gov_attr_set *attr_set,
-				      const char *buf, size_t count)
-{
-	struct gov_tunables *tunables = to_tunables(attr_set);
-	int ret;
-	long unsigned int val;
-
-	ret = kstrtoul(buf, 0, &val);
-	if (ret < 0)
-		return ret;
-	tunables->up_throttle_nsec = val;
-	return count;
-}
-
-static ssize_t down_throttle_nsec_show(struct gov_attr_set *attr_set, char *buf)
-{
-	struct gov_tunables *tunables = to_tunables(attr_set);
-
-	return sprintf(buf, "%u\n", tunables->down_throttle_nsec);
-}
-
-static ssize_t down_throttle_nsec_store(struct gov_attr_set *attr_set,
-					const char *buf, size_t count)
-{
-	struct gov_tunables *tunables = to_tunables(attr_set);
-	int ret;
-	long unsigned int val;
-
-	ret = kstrtoul(buf, 0, &val);
-	if (ret < 0)
-		return ret;
-	tunables->down_throttle_nsec = val;
-	return count;
-}
-
-static struct governor_attr up_throttle_nsec = __ATTR_RW(up_throttle_nsec);
-static struct governor_attr down_throttle_nsec = __ATTR_RW(down_throttle_nsec);
-
-static struct attribute *schedfreq_attributes[] = {
-	&up_throttle_nsec.attr,
-	&down_throttle_nsec.attr,
-	NULL
-};
-
-static struct kobj_type tunables_ktype = {
-	.default_attrs = schedfreq_attributes,
-	.sysfs_ops = &governor_sysfs_ops,
-};
-
-static int cpufreq_sched_policy_init(struct cpufreq_policy *policy)
-{
-	struct gov_data *gd;
-	int cpu;
-	int rc;
-
-	for_each_cpu(cpu, policy->cpus)
-		memset(&per_cpu(cpu_sched_capacity_reqs, cpu), 0,
-		       sizeof(struct sched_capacity_reqs));
-
-	gd = kzalloc(sizeof(*gd), GFP_KERNEL);
-	if (!gd)
-		return -ENOMEM;
-
-	policy->governor_data = gd;
-
-	if (!global_tunables) {
-		gd->tunables = kzalloc(sizeof(*gd->tunables), GFP_KERNEL);
-		if (!gd->tunables)
-			goto free_gd;
-
-		gd->tunables->up_throttle_nsec =
-			policy->cpuinfo.transition_latency ?
-			policy->cpuinfo.transition_latency :
-			THROTTLE_UP_NSEC;
-		gd->tunables->down_throttle_nsec =
-			THROTTLE_DOWN_NSEC;
-
-		rc = kobject_init_and_add(&gd->tunables->attr_set.kobj,
-					  &tunables_ktype,
-					  get_governor_parent_kobj(policy),
-					  "%s", cpufreq_gov_sched.name);
-		if (rc)
-			goto free_tunables;
-
-		gov_attr_set_init(&gd->tunables->attr_set,
-				  &gd->tunables_hook);
-
-		pr_debug("%s: throttle_threshold = %u [ns]\n",
-			 __func__, gd->tunables->up_throttle_nsec);
-
-		if (!have_governor_per_policy())
-			global_tunables = gd->tunables;
-	} else {
-		gd->tunables = global_tunables;
-		gov_attr_set_get(&global_tunables->attr_set,
-				 &gd->tunables_hook);
-	}
-
-	policy->governor_data = gd;
-	if (cpufreq_driver_is_slow()) {
-		cpufreq_driver_slow = true;
-		gd->task = kthread_create(cpufreq_sched_thread, policy,
-					  "kschedfreq:%d",
-					  cpumask_first(policy->related_cpus));
-		if (IS_ERR_OR_NULL(gd->task)) {
-			pr_err("%s: failed to create kschedfreq thread\n",
-			       __func__);
-			goto free_tunables;
-		}
-		get_task_struct(gd->task);
-		kthread_bind_mask(gd->task, policy->related_cpus);
-		wake_up_process(gd->task);
-		init_irq_work(&gd->irq_work, cpufreq_sched_irq_work);
-	}
-
-	set_sched_freq();
-
-	return 0;
-
-free_tunables:
-	kfree(gd->tunables);
-free_gd:
-	policy->governor_data = NULL;
-	kfree(gd);
-	return -ENOMEM;
-}
-
-static int cpufreq_sched_policy_exit(struct cpufreq_policy *policy)
-{
-	unsigned int count;
-	struct gov_data *gd = policy->governor_data;
-
-	clear_sched_freq();
-	if (cpufreq_driver_slow) {
-		kthread_stop(gd->task);
-		put_task_struct(gd->task);
-	}
-
-	count = gov_attr_set_put(&gd->tunables->attr_set, &gd->tunables_hook);
-	if (!count) {
-		if (!have_governor_per_policy())
-			global_tunables = NULL;
-		kfree(gd->tunables);
-	}
-
-	policy->governor_data = NULL;
-
-	kfree(gd);
-	return 0;
-}
-
-static int cpufreq_sched_start(struct cpufreq_policy *policy)
-{
-	int cpu;
-
-	for_each_cpu(cpu, policy->cpus)
-		per_cpu(enabled, cpu) = 1;
-
-	return 0;
-}
-
-static void cpufreq_sched_limits(struct cpufreq_policy *policy)
-{
-	unsigned int clamp_freq;
-	struct gov_data *gd = policy->governor_data;;
-
-	pr_debug("limit event for cpu %u: %u - %u kHz, currently %u kHz\n",
-		policy->cpu, policy->min, policy->max,
-		policy->cur);
-
-	clamp_freq = clamp(gd->requested_freq, policy->min, policy->max);
-
-	if (policy->cur != clamp_freq)
-		__cpufreq_driver_target(policy, clamp_freq, CPUFREQ_RELATION_L);
-}
-
-static int cpufreq_sched_stop(struct cpufreq_policy *policy)
-{
-	int cpu;
-
-	for_each_cpu(cpu, policy->cpus)
-		per_cpu(enabled, cpu) = 0;
-
-	return 0;
-}
-
-static int cpufreq_sched_setup(struct cpufreq_policy *policy,
-			       unsigned int event)
-{
-	switch (event) {
-	case CPUFREQ_GOV_POLICY_INIT:
-		return cpufreq_sched_policy_init(policy);
-	case CPUFREQ_GOV_POLICY_EXIT:
-		return cpufreq_sched_policy_exit(policy);
-	case CPUFREQ_GOV_START:
-		return cpufreq_sched_start(policy);
-	case CPUFREQ_GOV_STOP:
-		return cpufreq_sched_stop(policy);
-	case CPUFREQ_GOV_LIMITS:
-		cpufreq_sched_limits(policy);
-		break;
-	}
-	return 0;
-}
-
-
-#ifndef CONFIG_CPU_FREQ_DEFAULT_GOV_SCHED
-static
-#endif
-struct cpufreq_governor cpufreq_gov_sched = {
-	.name			= "sched",
-	.governor		= cpufreq_sched_setup,
-	.owner			= THIS_MODULE,
-};
-
-static int __init cpufreq_sched_init(void)
-{
-	int cpu;
-
-	for_each_cpu(cpu, cpu_possible_mask)
-		per_cpu(enabled, cpu) = 0;
-	return cpufreq_register_governor(&cpufreq_gov_sched);
-}
-
-/* Try to make this the default governor */
-fs_initcall(cpufreq_sched_init);
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 5c65f3ad6da1..b5ea66e5551c 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -54,7 +54,6 @@ unsigned int sysctl_sched_latency = 6000000ULL;
 unsigned int normalized_sysctl_sched_latency = 6000000ULL;
 
 unsigned int sysctl_sched_sync_hint_enable = 1;
-unsigned int sysctl_sched_initial_task_util = 0;
 unsigned int sysctl_sched_cstate_aware = 1;
 
 #ifdef CONFIG_SCHED_WALT
@@ -750,9 +749,7 @@ void init_entity_runnable_average(struct sched_entity *se)
 	sa->load_sum = sa->load_avg * LOAD_AVG_MAX;
 	/*
 	 * In previous Android versions, we used to have:
-	 * 	sa->util_avg =  sched_freq() ?
-	 * 		sysctl_sched_initial_task_util :
-	 *		scale_load_down(SCHED_LOAD_SCALE);
+	 * 	sa->util_avg = scale_load_down(SCHED_LOAD_SCALE);
 	 * 	sa->util_sum = sa->util_avg * LOAD_AVG_MAX;
 	 * However, that functionality has been moved to enqueue.
 	 * It is unclear if we should restore this in enqueue.
@@ -4668,21 +4665,6 @@ unsigned long boosted_cpu_util(int cpu);
 #define boosted_cpu_util(cpu) cpu_util_freq(cpu)
 #endif
 
-#ifdef CONFIG_SMP
-static void update_capacity_of(int cpu)
-{
-	unsigned long req_cap;
-
-	if (!sched_freq())
-		return;
-
-	/* Normalize scale-invariant capacity to cpu. */
-	req_cap = boosted_cpu_util(cpu);
-	req_cap = req_cap * SCHED_CAPACITY_SCALE / capacity_orig_of(cpu);
-	set_cfs_cpu_capacity(cpu, true, req_cap);
-}
-#endif
-
 /*
  * The enqueue_task method is called before nr_running is
  * increased. Here we update the fair scheduling stats and
@@ -4695,7 +4677,6 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags)
 	struct sched_entity *se = &p->se;
 #ifdef CONFIG_SMP
 	int task_new = flags & ENQUEUE_WAKEUP_NEW;
-	int task_wakeup = flags & ENQUEUE_WAKEUP;
 #endif
 
 	/*
@@ -4769,16 +4750,6 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags)
 			rq->rd->overutilized = true;
 			trace_sched_overutilized(true);
 		}
-
-		/*
-		 * We want to potentially trigger a freq switch
-		 * request only for tasks that are waking up; this is
-		 * because we get here also during load balancing, but
-		 * in these cases it seems wise to trigger as single
-		 * request after load balancing is done.
-		 */
-		if (task_new || task_wakeup)
-			update_capacity_of(cpu_of(rq));
 	}
 
 #endif /* CONFIG_SMP */
@@ -4854,25 +4825,8 @@ static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags)
 	 */
 	schedtune_dequeue_task(p, cpu_of(rq));
 
-	if (!se) {
+	if (!se)
 		walt_dec_cumulative_runnable_avg(rq, p);
-
-		/*
-		 * We want to potentially trigger a freq switch
-		 * request only for tasks that are going to sleep;
-		 * this is because we get here also during load
-		 * balancing, but in these cases it seems wise to
-		 * trigger as single request after load balancing is
-		 * done.
-		 */
-		if (task_sleep) {
-			if (rq->cfs.nr_running)
-				update_capacity_of(cpu_of(rq));
-			else if (sched_freq())
-				set_cfs_cpu_capacity(cpu_of(rq), false, 0); /* no normalization required for 0 */
-		}
-	}
-
 #endif /* CONFIG_SMP */
 
 	hrtick_update(rq);
@@ -7709,10 +7663,6 @@ static void attach_one_task(struct rq *rq, struct task_struct *p)
 {
 	raw_spin_lock(&rq->lock);
 	attach_task(rq, p);
-	/*
-	 * We want to potentially raise target_cpu's OPP.
-	 */
-	update_capacity_of(cpu_of(rq));
 	raw_spin_unlock(&rq->lock);
 }
 
@@ -7734,11 +7684,6 @@ static void attach_tasks(struct lb_env *env)
 		attach_task(env->dst_rq, p);
 	}
 
-	/*
-	 * We want to potentially raise env.dst_cpu's OPP.
-	 */
-	update_capacity_of(env->dst_cpu);
-
 	raw_spin_unlock(&env->dst_rq->lock);
 }
 
@@ -9081,11 +9026,6 @@ more_balance:
 		 * ld_moved     - cumulative load moved across iterations
 		 */
 		cur_ld_moved = detach_tasks(&env);
-		/*
-		 * We want to potentially lower env.src_cpu's OPP.
-		 */
-		if (cur_ld_moved)
-			update_capacity_of(env.src_cpu);
 
 		/*
 		 * We've detached some tasks from busiest_rq. Every
@@ -9310,7 +9250,6 @@ static int idle_balance(struct rq *this_rq)
 	struct sched_domain *sd;
 	int pulled_task = 0;
 	u64 curr_cost = 0;
-	long removed_util=0;
 
 	idle_enter_fair(this_rq);
 
@@ -9334,17 +9273,6 @@ static int idle_balance(struct rq *this_rq)
 
 	raw_spin_unlock(&this_rq->lock);
 
-	/*
-	 * If removed_util_avg is !0 we most probably migrated some task away
-	 * from this_cpu. In this case we might be willing to trigger an OPP
-	 * update, but we want to do so if we don't find anybody else to pull
-	 * here (we will trigger an OPP update with the pulled task's enqueue
-	 * anyway).
-	 *
-	 * Record removed_util before calling update_blocked_averages, and use
-	 * it below (before returning) to see if an OPP update is required.
-	 */
-	removed_util = atomic_long_read(&(this_rq->cfs).removed_util_avg);
 	update_blocked_averages(this_cpu);
 	rcu_read_lock();
 	for_each_domain(this_cpu, sd) {
@@ -9409,12 +9337,6 @@ out:
 	if (pulled_task) {
 		idle_exit_fair(this_rq);
 		this_rq->idle_stamp = 0;
-	} else if (removed_util) {
-		/*
-		 * No task pulled and someone has been migrated away.
-		 * Good case to trigger an OPP update.
-		 */
-		update_capacity_of(this_cpu);
 	}
 
 	return pulled_task;
@@ -9488,13 +9410,8 @@ static int active_load_balance_cpu_stop(void *data)
 		update_rq_clock(busiest_rq);
 
 		p = detach_one_task(&env);
-		if (p) {
+		if (p)
 			schedstat_inc(sd, alb_pushed);
-			/*
-			 * We want to potentially lower env.src_cpu's OPP.
-			 */
-			update_capacity_of(env.src_cpu);
-		}
 		else
 			schedstat_inc(sd, alb_failed);
 	}
diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c
index f41435e7f75d..ebf0d9329c86 100644
--- a/kernel/sched/rt.c
+++ b/kernel/sched/rt.c
@@ -1636,41 +1636,6 @@ static void check_preempt_curr_rt(struct rq *rq, struct task_struct *p, int flag
 #endif
 }
 
-#ifdef CONFIG_SMP
-static void sched_rt_update_capacity_req(struct rq *rq)
-{
-	u64 total, used, age_stamp, avg;
-	s64 delta;
-
-	if (!sched_freq())
-		return;
-
-	sched_avg_update(rq);
-	/*
-	 * Since we're reading these variables without serialization make sure
-	 * we read them once before doing sanity checks on them.
-	 */
-	age_stamp = READ_ONCE(rq->age_stamp);
-	avg = READ_ONCE(rq->rt_avg);
-	delta = rq_clock(rq) - age_stamp;
-
-	if (unlikely(delta < 0))
-		delta = 0;
-
-	total = sched_avg_period() + delta;
-
-	used = div_u64(avg, total);
-	if (unlikely(used > SCHED_CAPACITY_SCALE))
-		used = SCHED_CAPACITY_SCALE;
-
-	set_rt_cpu_capacity(rq->cpu, 1, (unsigned long)(used));
-}
-#else
-static inline void sched_rt_update_capacity_req(struct rq *rq)
-{ }
-
-#endif
-
 static struct sched_rt_entity *pick_next_rt_entity(struct rq *rq,
 						   struct rt_rq *rt_rq)
 {
@@ -1739,17 +1704,8 @@ pick_next_task_rt(struct rq *rq, struct task_struct *prev)
 	if (prev->sched_class == &rt_sched_class)
 		update_curr_rt(rq);
 
-	if (!rt_rq->rt_queued) {
-		/*
-		 * The next task to be picked on this rq will have a lower
-		 * priority than rt tasks so we can spend some time to update
-		 * the capacity used by rt tasks based on the last activity.
-		 * This value will be the used as an estimation of the next
-		 * activity.
-		 */
-		sched_rt_update_capacity_req(rq);
+	if (!rt_rq->rt_queued)
 		return NULL;
-	}
 
 	put_prev_task(rq, prev);
 
@@ -2476,9 +2432,6 @@ static void task_tick_rt(struct rq *rq, struct task_struct *p, int queued)
 
 	update_curr_rt(rq);
 
-	if (rq->rt.rt_nr_running)
-		sched_rt_update_capacity_req(rq);
-
 	watchdog(rq, p);
 
 	/*
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 028e232103c2..782746140711 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -1629,81 +1629,6 @@ static inline unsigned long cpu_util_freq(int cpu)
 
 #endif
 
-#ifdef CONFIG_CPU_FREQ_GOV_SCHED
-#define capacity_max SCHED_CAPACITY_SCALE
-extern unsigned int capacity_margin;
-extern struct static_key __sched_freq;
-
-static inline bool sched_freq(void)
-{
-	return static_key_false(&__sched_freq);
-}
-
-/*
- * sched_capacity_reqs expects capacity requests to be normalised.
- * All capacities should sum to the range of 0-1024.
- */
-DECLARE_PER_CPU(struct sched_capacity_reqs, cpu_sched_capacity_reqs);
-void update_cpu_capacity_request(int cpu, bool request);
-
-static inline void set_cfs_cpu_capacity(int cpu, bool request,
-					unsigned long capacity)
-{
-	struct sched_capacity_reqs *scr = &per_cpu(cpu_sched_capacity_reqs, cpu);
-
-#ifdef CONFIG_SCHED_WALT
-       if (!walt_disabled && sysctl_sched_use_walt_cpu_util) {
-		int rtdl = scr->rt + scr->dl;
-		/*
-		 * WALT tracks the utilization of a CPU considering the load
-		 * generated by all the scheduling classes.
-		 * Since the following call to:
-		 *    update_cpu_capacity
-		 * is already adding the RT and DL utilizations let's remove
-		 * these contributions from the WALT signal.
-		 */
-		if (capacity > rtdl)
-			capacity -= rtdl;
-		else
-			capacity = 0;
-	}
-#endif
-	if (scr->cfs != capacity) {
-		scr->cfs = capacity;
-		update_cpu_capacity_request(cpu, request);
-	}
-}
-
-static inline void set_rt_cpu_capacity(int cpu, bool request,
-				       unsigned long capacity)
-{
-	if (per_cpu(cpu_sched_capacity_reqs, cpu).rt != capacity) {
-		per_cpu(cpu_sched_capacity_reqs, cpu).rt = capacity;
-		update_cpu_capacity_request(cpu, request);
-	}
-}
-
-static inline void set_dl_cpu_capacity(int cpu, bool request,
-				       unsigned long capacity)
-{
-	if (per_cpu(cpu_sched_capacity_reqs, cpu).dl != capacity) {
-		per_cpu(cpu_sched_capacity_reqs, cpu).dl = capacity;
-		update_cpu_capacity_request(cpu, request);
-	}
-}
-#else
-static inline bool sched_freq(void) { return false; }
-static inline void set_cfs_cpu_capacity(int cpu, bool request,
-					unsigned long capacity)
-{ }
-static inline void set_rt_cpu_capacity(int cpu, bool request,
-				       unsigned long capacity)
-{ }
-static inline void set_dl_cpu_capacity(int cpu, bool request,
-				       unsigned long capacity)
-{ }
-#endif
-
 static inline void sched_rt_avg_update(struct rq *rq, u64 rt_delta)
 {
 	rq->rt_avg += rt_delta * arch_scale_freq_capacity(NULL, cpu_of(rq));
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 55caf81a833f..4e2f98dd2052 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -342,13 +342,6 @@ static struct ctl_table kern_table[] = {
 		.proc_handler	= proc_dointvec,
 	},
 #endif
-	{
-		.procname	= "sched_initial_task_util",
-		.data		= &sysctl_sched_initial_task_util,
-		.maxlen		= sizeof(unsigned int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec,
-	},
 	{
 		.procname	= "sched_cstate_aware",
 		.data		= &sysctl_sched_cstate_aware,
-- 
cgit v1.2.3


From 3822fe484cef0ef3e37e7106bfd684639f64e77b Mon Sep 17 00:00:00 2001
From: Todd Kjos <tkjos@google.com>
Date: Wed, 8 Nov 2017 00:43:37 +0000
Subject: Revert "ANDROID: sched/rt: schedtune: Add boost retention to RT"

This reverts commit d194ba5d712f051ff6c025f3484bb72f219764e3.

Reason for revert: Broke some builds. Will fix and resubmit.

Change-Id: I4e6fa1562346eda1bbf058f1d5ace5ba6256ce07
---
 kernel/sched/core.c  |   1 -
 kernel/sched/rt.c    | 154 ---------------------------------------------------
 kernel/sched/sched.h |   1 -
 3 files changed, 156 deletions(-)

(limited to 'kernel')

diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 889fb1aff1e0..1eb91a696069 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -2200,7 +2200,6 @@ static void __sched_fork(unsigned long clone_flags, struct task_struct *p)
 	init_dl_task_timer(&p->dl);
 	__dl_clear_params(p);
 
-	init_rt_schedtune_timer(&p->rt);
 	INIT_LIST_HEAD(&p->rt.run_list);
 
 #ifdef CONFIG_PREEMPT_NOTIFIERS
diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c
index ebf0d9329c86..c8322ab130eb 100644
--- a/kernel/sched/rt.c
+++ b/kernel/sched/rt.c
@@ -7,7 +7,6 @@
 
 #include <linux/slab.h>
 #include <linux/irq_work.h>
-#include <linux/hrtimer.h>
 
 #include "walt.h"
 #include "tune.h"
@@ -987,73 +986,6 @@ static int sched_rt_runtime_exceeded(struct rt_rq *rt_rq)
 	return 0;
 }
 
-#define RT_SCHEDTUNE_INTERVAL 50000000ULL
-
-static void sched_rt_update_capacity_req(struct rq *rq);
-
-static enum hrtimer_restart rt_schedtune_timer(struct hrtimer *timer)
-{
-	struct sched_rt_entity *rt_se = container_of(timer,
-			struct sched_rt_entity,
-			schedtune_timer);
-	struct task_struct *p = rt_task_of(rt_se);
-	struct rq *rq = task_rq(p);
-
-	raw_spin_lock(&rq->lock);
-
-	/*
-	 * Nothing to do if:
-	 * - task has switched runqueues
-	 * - task isn't RT anymore
-	 */
-	if (rq != task_rq(p) || (p->sched_class != &rt_sched_class))
-		goto out;
-
-	/*
-	 * If task got enqueued back during callback time, it means we raced
-	 * with the enqueue on another cpu, that's Ok, just do nothing as
-	 * enqueue path would have tried to cancel us and we shouldn't run
-	 * Also check the schedtune_enqueued flag as class-switch on a
-	 * sleeping task may have already canceled the timer and done dq
-	 */
-	if (p->on_rq || !rt_se->schedtune_enqueued)
-		goto out;
-
-	/*
-	 * RT task is no longer active, cancel boost
-	 */
-	rt_se->schedtune_enqueued = false;
-	schedtune_dequeue_task(p, cpu_of(rq));
-	sched_rt_update_capacity_req(rq);
-	cpufreq_update_this_cpu(rq, SCHED_CPUFREQ_RT);
-out:
-	raw_spin_unlock(&rq->lock);
-
-	/*
-	 * This can free the task_struct if no more references.
-	 */
-	put_task_struct(p);
-
-	return HRTIMER_NORESTART;
-}
-
-void init_rt_schedtune_timer(struct sched_rt_entity *rt_se)
-{
-	struct hrtimer *timer = &rt_se->schedtune_timer;
-
-	hrtimer_init(timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
-	timer->function = rt_schedtune_timer;
-	rt_se->schedtune_enqueued = false;
-}
-
-static void start_schedtune_timer(struct sched_rt_entity *rt_se)
-{
-	struct hrtimer *timer = &rt_se->schedtune_timer;
-
-	hrtimer_start(timer, ns_to_ktime(RT_SCHEDTUNE_INTERVAL),
-			HRTIMER_MODE_REL_PINNED);
-}
-
 /*
  * Update the current task's runtime statistics. Skip current tasks that
  * are not in our scheduling class.
@@ -1391,33 +1323,7 @@ enqueue_task_rt(struct rq *rq, struct task_struct *p, int flags)
 	if (!task_current(rq, p) && p->nr_cpus_allowed > 1)
 		enqueue_pushable_task(rq, p);
 
-	if (!schedtune_task_boost(p))
-		return;
-
-	/*
-	 * If schedtune timer is active, that means a boost was already
-	 * done, just cancel the timer so that deboost doesn't happen.
-	 * Otherwise, increase the boost. If an enqueued timer was
-	 * cancelled, put the task reference.
-	 */
-	if (hrtimer_try_to_cancel(&rt_se->schedtune_timer) == 1)
-		put_task_struct(p);
-
-	/*
-	 * schedtune_enqueued can be true in the following situation:
-	 * enqueue_task_rt grabs rq lock before timer fires
-	 *    or before its callback acquires rq lock
-	 * schedtune_enqueued can be false if timer callback is running
-	 * and timer just released rq lock, or if the timer finished
-	 * running and canceling the boost
-	 */
-	if (rt_se->schedtune_enqueued)
-		return;
-
-	rt_se->schedtune_enqueued = true;
 	schedtune_enqueue_task(p, cpu_of(rq));
-	sched_rt_update_capacity_req(rq);
-	cpufreq_update_this_cpu(rq, SCHED_CPUFREQ_RT);
 }
 
 static void dequeue_task_rt(struct rq *rq, struct task_struct *p, int flags)
@@ -1429,20 +1335,7 @@ static void dequeue_task_rt(struct rq *rq, struct task_struct *p, int flags)
 	walt_dec_cumulative_runnable_avg(rq, p);
 
 	dequeue_pushable_task(rq, p);
-
-	if (!rt_se->schedtune_enqueued)
-		return;
-
-	if (flags == DEQUEUE_SLEEP) {
-		get_task_struct(p);
-		start_schedtune_timer(rt_se);
-		return;
-	}
-
-	rt_se->schedtune_enqueued = false;
 	schedtune_dequeue_task(p, cpu_of(rq));
-	sched_rt_update_capacity_req(rq);
-	cpufreq_update_this_cpu(rq, SCHED_CPUFREQ_RT);
 }
 
 /*
@@ -1482,33 +1375,6 @@ static void yield_task_rt(struct rq *rq)
 #ifdef CONFIG_SMP
 static int find_lowest_rq(struct task_struct *task);
 
-/*
- * Perform a schedtune dequeue and cancelation of boost timers if needed.
- * Should be called only with the rq->lock held.
- */
-static void schedtune_dequeue_rt(struct rq *rq, struct task_struct *p)
-{
-	struct sched_rt_entity *rt_se = &p->rt;
-
-	BUG_ON(!raw_spin_is_locked(&rq->lock));
-
-	if (!rt_se->schedtune_enqueued)
-		return;
-
-	/*
-	 * Incase of class change cancel any active timers. If an enqueued
-	 * timer was cancelled, put the task ref.
-	 */
-	if (hrtimer_try_to_cancel(&rt_se->schedtune_timer) == 1)
-		put_task_struct(p);
-
-	/* schedtune_enqueued is true, deboost it */
-	rt_se->schedtune_enqueued = false;
-	schedtune_dequeue_task(p, task_cpu(p));
-	sched_rt_update_capacity_req(rq);
-	cpufreq_update_this_cpu(rq, SCHED_CPUFREQ_RT);
-}
-
 static int
 select_task_rq_rt(struct task_struct *p, int cpu, int sd_flag, int flags,
 		  int sibling_count_hint)
@@ -1563,19 +1429,6 @@ select_task_rq_rt(struct task_struct *p, int cpu, int sd_flag, int flags,
 	rcu_read_unlock();
 
 out:
-	/*
-	 * If previous CPU was different, make sure to cancel any active
-	 * schedtune timers and deboost.
-	 */
-	if (task_cpu(p) != cpu) {
-		unsigned long fl;
-		struct rq *prq = task_rq(p);
-
-		raw_spin_lock_irqsave(&prq->lock, fl);
-		schedtune_dequeue_rt(prq, p);
-		raw_spin_unlock_irqrestore(&prq->lock, fl);
-	}
-
 	return cpu;
 }
 
@@ -2308,13 +2161,6 @@ static void rq_offline_rt(struct rq *rq)
  */
 static void switched_from_rt(struct rq *rq, struct task_struct *p)
 {
-	/*
-	 * On class switch from rt, always cancel active schedtune timers,
-	 * this handles the cases where we switch class for a task that is
-	 * already rt-dequeued but has a running timer.
-	 */
-	schedtune_dequeue_rt(rq, p);
-
 	/*
 	 * If there are other RT tasks then we will reschedule
 	 * and the scheduling of the other RT tasks will handle
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 782746140711..203d64a0c947 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -1398,7 +1398,6 @@ extern void resched_cpu(int cpu);
 
 extern struct rt_bandwidth def_rt_bandwidth;
 extern void init_rt_bandwidth(struct rt_bandwidth *rt_b, u64 period, u64 runtime);
-extern void init_rt_schedtune_timer(struct sched_rt_entity *rt_se);
 
 extern struct dl_bandwidth def_dl_bandwidth;
 extern void init_dl_bandwidth(struct dl_bandwidth *dl_b, u64 period, u64 runtime);
-- 
cgit v1.2.3