sched: precompute required frequency for CPU load

At present in order to estimate power cost of CPU load, HMP scheduler converts CPU load to coresponding frequency on the fly which can be avoided. Optimize and reduce execution time of select_best_cpu() by precomputing CPU load to frequency conversion. This optimization reduces about ~20% of execution time of select_best_cpu() on average. Change-Id: I385c57f2ea9a50883b76ba6ca3deb673b827217f [joonwoop@codeaurora.org: fixed minior conflict in kernel/sched/sched.h. stripped out codes for CONFIG_SCHED_QHMP.] Signed-off-by: Joonwoo Park <joonwoop@codeaurora.org>
author: Joonwoo Park <joonwoop@codeaurora.org> 2015-08-21 11:02:22 -0700
committer: David Keitel <dkeitel@codeaurora.org> 2016-03-23 20:02:31 -0700
commit: 91a87102354241b57814e359827a69d0f2fa4924 (patch)
tree: adfbdef2075908fb2308a08702ca5e79830f2e83 /kernel
parent: 383ae6b29eb14d498e3a57f40fa61115d910646d (diff)
4 files changed, 117 insertions, 73 deletions
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 1e638fc6ebce..3647ca390840 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -1729,19 +1729,26 @@ u32 __weak get_freq_max_load(int cpu, u32 freq)
 }
 
 DEFINE_PER_CPU(struct freq_max_load *, freq_max_load);
+static DEFINE_SPINLOCK(freq_max_load_lock);
 
 int sched_update_freq_max_load(const cpumask_t *cpumask)
 {
 	int i, cpu, ret;
-	unsigned int freq, max;
+	unsigned int freq;
 	struct cpu_pstate_pwr *costs;
 	struct cpu_pwr_stats *per_cpu_info = get_cpu_pwr_stats();
 	struct freq_max_load *max_load, *old_max_load;
+	struct freq_max_load_entry *entry;
+	u64 max_demand_capacity, max_demand;
+	unsigned long flags;
+	u32 hfreq;
+	int hpct;
 
 	if (!per_cpu_info || !sysctl_sched_enable_power_aware)
 		return 0;
 
-	mutex_lock(&policy_mutex);
+	spin_lock_irqsave(&freq_max_load_lock, flags);
+	max_demand_capacity = div64_u64(max_task_load(), max_possible_capacity);
 	for_each_cpu(cpu, cpumask) {
 		if (!per_cpu_info[cpu].ptable) {
 			ret = -EINVAL;
@@ -1752,24 +1759,35 @@ int sched_update_freq_max_load(const cpumask_t *cpumask)
 
 		/*
 		 * allocate len + 1 and leave the last power cost as 0 for
-		 * power_cost_at_freq() can stop iterating index when
+		 * power_cost() can stop iterating index when
 		 * per_cpu_info[cpu].len > len of max_load due to race between
 		 * cpu power stats update and get_cpu_pwr_stats().
 		 */
 		max_load = kzalloc(sizeof(struct freq_max_load) +
-				   sizeof(u32) * (per_cpu_info[cpu].len + 1),
-				   GFP_ATOMIC);
+				   sizeof(struct freq_max_load_entry) *
+				   (per_cpu_info[cpu].len + 1), GFP_ATOMIC);
 		if (unlikely(!max_load)) {
 			ret = -ENOMEM;
 			goto fail;
 		}
 
+		max_load->length = per_cpu_info[cpu].len;
+
+		max_demand = max_demand_capacity *
+			     cpu_rq(cpu)->max_possible_capacity;
+
 		i = 0;
 		costs = per_cpu_info[cpu].ptable;
 		while (costs[i].freq) {
+			entry = &max_load->freqs[i];
 			freq = costs[i].freq;
-			max = get_freq_max_load(cpu, freq);
-			max_load->freqs[i] = div64_u64((u64)freq * max, 100);
+			hpct = get_freq_max_load(cpu, freq);
+			if (hpct <= 0 && hpct > 100)
+				hpct = 100;
+			hfreq = div64_u64((u64)freq * hpct , 100);
+			entry->hdemand =
+			    div64_u64(max_demand * hfreq,
+				      cpu_rq(cpu)->max_possible_freq);
 			i++;
 		}
 
@@ -1778,7 +1796,7 @@ int sched_update_freq_max_load(const cpumask_t *cpumask)
 			kfree_rcu(old_max_load, rcu);
 	}
 
-	mutex_unlock(&policy_mutex);
+	spin_unlock_irqrestore(&freq_max_load_lock, flags);
 	return 0;
 
 fail:
@@ -1790,7 +1808,7 @@ fail:
 		}
 	}
 
-	mutex_unlock(&policy_mutex);
+	spin_unlock_irqrestore(&freq_max_load_lock, flags);
 	return ret;
 }
 #else	/* CONFIG_SCHED_FREQ_INPUT */
@@ -2448,6 +2466,8 @@ int sched_set_window(u64 window_start, unsigned int window_size)
 
 	reset_all_window_stats(ws, window_size);
 
+	sched_update_freq_max_load(cpu_possible_mask);
+
 	mutex_unlock(&policy_mutex);
 
 	return 0;
@@ -2754,6 +2774,8 @@ static int cpufreq_notifier_policy(struct notifier_block *nb,
 	if (update_max) {
 		max_possible_capacity = highest_mpc;
 		max_load_scale_factor = highest_mplsf;
+
+		sched_update_freq_max_load(cpu_possible_mask);
 	}
 
 	__update_min_max_capacity();
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 118767ee9a1d..c9b16b85c3c2 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -2887,24 +2887,7 @@ static inline u64 cpu_load(int cpu)
 
 static inline u64 cpu_load_sync(int cpu, int sync)
 {
-	struct rq *rq = cpu_rq(cpu);
-	u64 load;
-
-	load = rq->hmp_stats.cumulative_runnable_avg;
-
-	/*
-	 * If load is being checked in a sync wakeup environment,
-	 * we may want to discount the load of the currently running
-	 * task.
-	 */
-	if (sync && cpu == smp_processor_id()) {
-		if (load > rq->curr->ravg.demand)
-			load -= rq->curr->ravg.demand;
-		else
-			load = 0;
-	}
-
-	return scale_load_to_cpu(load, cpu);
+	return scale_load_to_cpu(cpu_cravg_sync(cpu, sync), cpu);
 }
 
 static int
@@ -3052,12 +3035,20 @@ int power_delta_exceeded(unsigned int cpu_cost, unsigned int base_cost)
 	return abs(delta) > cost_limit;
 }
 
-static unsigned int power_cost_at_freq(int cpu, unsigned int freq)
+/*
+ * Return the cost of running task p on CPU cpu. This function
+ * currently assumes that task p is the only task which will run on
+ * the CPU.
+ */
+unsigned int power_cost(int cpu, u64 demand)
 {
-	int i = 0;
+	int first, mid, last;
 	struct cpu_pwr_stats *per_cpu_info = get_cpu_pwr_stats();
 	struct cpu_pstate_pwr *costs;
 	struct freq_max_load *max_load;
+	int total_static_pwr_cost = 0;
+	struct rq *rq = cpu_rq(cpu);
+	unsigned int pc;
 
 	if (!per_cpu_info || !per_cpu_info[cpu].ptable ||
 	    !sysctl_sched_enable_power_aware)
@@ -3066,49 +3057,52 @@ static unsigned int power_cost_at_freq(int cpu, unsigned int freq)
 		 * capacity as a rough stand-in for real CPU power
 		 * numbers, assuming bigger CPUs are more power
 		 * hungry. */
-		return cpu_rq(cpu)->max_possible_capacity;
-
-	costs = per_cpu_info[cpu].ptable;
+		return rq->max_possible_capacity;
 
 	rcu_read_lock();
 	max_load = rcu_dereference(per_cpu(freq_max_load, cpu));
-	while (costs[i].freq != 0) {
-		if (costs[i+1].freq == 0 ||
-		    (costs[i].freq >= freq &&
-		     (!max_load || max_load->freqs[i] >= freq))) {
-			rcu_read_unlock();
-			return costs[i].power;
-		}
-		i++;
+	if (!max_load) {
+		pc = rq->max_possible_capacity;
+		goto unlock;
 	}
-	rcu_read_unlock();
-	BUG();
-}
 
-/* Return the cost of running the total task load total_load on CPU cpu. */
-unsigned int power_cost(u64 total_load, int cpu)
-{
-	unsigned int task_freq;
-	struct rq *rq = cpu_rq(cpu);
-	u64 demand;
-	int total_static_pwr_cost = 0;
+	costs = per_cpu_info[cpu].ptable;
 
-	if (!sysctl_sched_enable_power_aware)
-		return rq->max_possible_capacity;
+	if (demand <= max_load->freqs[0].hdemand) {
+		pc = costs[0].power;
+		goto unlock;
+	} else if (demand > max_load->freqs[max_load->length - 1].hdemand) {
+		pc = costs[max_load->length - 1].power;
+		goto unlock;
+	}
 
-	/* calculate % of max freq needed */
-	demand = total_load * 100;
-	demand = div64_u64(demand, max_task_load());
+	first = 0;
+	last = max_load->length - 1;
+	mid = (last - first) >> 1;
+	while (1) {
+		if (demand <= max_load->freqs[mid].hdemand)
+			last = mid;
+		else
+			first = mid;
 
-	task_freq = demand * rq->max_possible_freq;
-	task_freq /= 100; /* khz needed */
+		if (last - first == 1)
+			break;
+		mid = first + ((last - first) >> 1);
+	}
+
+	pc = costs[last].power;
+
+unlock:
+	rcu_read_unlock();
 
 	if (idle_cpu(cpu) && rq->cstate) {
 		total_static_pwr_cost += rq->static_cpu_pwr_cost;
 		if (rq->dstate)
 			total_static_pwr_cost += rq->static_cluster_pwr_cost;
 	}
-	return power_cost_at_freq(cpu, task_freq) + total_static_pwr_cost;
+
+	return pc + total_static_pwr_cost;
+
 }
 
 #define UP_MIGRATION		1
@@ -3141,8 +3135,7 @@ static int skip_freq_domain(struct rq *task_rq, struct rq *rq, int reason)
 	return skip;
 }
 
-static int skip_cpu(struct rq *task_rq, struct rq *rq, int cpu,
-		    u64 task_load, int reason)
+static int skip_cpu(struct rq *task_rq, struct rq *rq, int cpu, int reason)
 {
 	int skip;
 
@@ -3203,8 +3196,9 @@ static int select_best_cpu(struct task_struct *p, int target, int reason,
 		struct rq *rq = cpu_rq(i);
 
 		trace_sched_cpu_load(cpu_rq(i), idle_cpu(i), sched_irqload(i),
-		 power_cost(scale_load_to_cpu(task_load(p) +
-		 cpu_load_sync(i, sync), i), i), cpu_temp(i));
+				    power_cost(i, task_load(p) +
+					       cpu_cravg_sync(i, sync)),
+				    cpu_temp(i));
 
 		if (skip_freq_domain(trq, rq, reason)) {
 			cpumask_andnot(&search_cpus, &search_cpus,
@@ -3212,8 +3206,7 @@ static int select_best_cpu(struct task_struct *p, int target, int reason,
 			continue;
 		}
 
-		tload =  scale_load_to_cpu(task_load(p), i);
-		if (skip_cpu(trq, rq, i, tload, reason))
+		if (skip_cpu(trq, rq, i, reason))
 			continue;
 
 		cpu_load = cpu_load_sync(i, sync);
@@ -3232,6 +3225,7 @@ static int select_best_cpu(struct task_struct *p, int target, int reason,
 		if (boost)
 			continue;
 
+		tload = scale_load_to_cpu(task_load(p), i);
 		if (!eligible_cpu(tload, cpu_load, i, sync) ||
 					!task_load_will_fit(p, tload, i))
 			continue;
@@ -3241,7 +3235,8 @@ static int select_best_cpu(struct task_struct *p, int target, int reason,
 		 * under spill.
 		 */
 
-		cpu_cost = power_cost(tload + cpu_load, i);
+		cpu_cost = power_cost(i, task_load(p) +
+					 cpu_cravg_sync(i, sync));
 
 		if (cpu_cost > min_cost)
 			continue;
@@ -3879,7 +3874,7 @@ static inline int select_best_cpu(struct task_struct *p, int target,
 	return 0;
 }
 
-static inline int power_cost(u64 total_load, int cpu)
+unsigned int power_cost(int cpu, u64 demand)
 {
 	return SCHED_CAPACITY_SCALE;
 }
@@ -5237,7 +5232,7 @@ static void throttle_cfs_rq(struct cfs_rq *cfs_rq)
 	/* Log effect on hmp stats after throttling */
 	trace_sched_cpu_load(rq, idle_cpu(cpu_of(rq)),
 			     sched_irqload(cpu_of(rq)),
-			     power_cost_at_freq(cpu_of(rq), 0),
+			     power_cost(cpu_of(rq), 0),
 			     cpu_temp(cpu_of(rq)));
 }
 
@@ -5294,7 +5289,7 @@ void unthrottle_cfs_rq(struct cfs_rq *cfs_rq)
 	/* Log effect on hmp stats after un-throttling */
 	trace_sched_cpu_load(rq, idle_cpu(cpu_of(rq)),
 			     sched_irqload(cpu_of(rq)),
-			     power_cost_at_freq(cpu_of(rq), 0),
+			     power_cost(cpu_of(rq), 0),
 			     cpu_temp(cpu_of(rq)));
 }
 
@@ -8072,7 +8067,7 @@ static inline void update_sg_lb_stats(struct lb_env *env,
 
 		trace_sched_cpu_load(cpu_rq(i), idle_cpu(i),
 				     sched_irqload(i),
-				     power_cost_at_freq(i, 0),
+				     power_cost(i, 0),
 				     cpu_temp(i));
 
 		/* Bias balancing toward cpus of our domain */
diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c
index 4d490c90b03e..82b4052b621b 100644
--- a/kernel/sched/rt.c
+++ b/kernel/sched/rt.c
@@ -1693,8 +1693,7 @@ static int find_lowest_rq_hmp(struct task_struct *task)
 	for_each_cpu(i, lowest_mask) {
 		cpu_load = scale_load_to_cpu(
 			cpu_rq(i)->hmp_stats.cumulative_runnable_avg, i);
-		cpu_cost = power_cost(cpu_load, i);
-
+		cpu_cost = power_cost(i, cpu_cravg_sync(i, 0));
 		trace_sched_cpu_load(cpu_rq(i), idle_cpu(i), sched_irqload(i),
 						cpu_cost, cpu_temp(i));
 
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index cb9114208ed0..fa04d61ad70a 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -28,9 +28,15 @@ extern atomic_long_t calc_load_tasks;
 
 extern void calc_global_load_tick(struct rq *this_rq);
 
+struct freq_max_load_entry {
+	/* The maximum load which has accounted governor's headroom. */
+	u64 hdemand;
+};
+
 struct freq_max_load {
 	struct rcu_head rcu;
-	u32 freqs[0];
+	int length;
+	struct freq_max_load_entry freqs[0];
 };
 
 extern DEFINE_PER_CPU(struct freq_max_load *, freq_max_load);
@@ -1236,12 +1242,34 @@ static inline void clear_reserved(int cpu)
 	clear_bit(CPU_RESERVED, &rq->hmp_flags);
 }
 
+static inline u64 cpu_cravg_sync(int cpu, int sync)
+{
+	struct rq *rq = cpu_rq(cpu);
+	u64 load;
+
+	load = rq->hmp_stats.cumulative_runnable_avg;
+
+	/*
+	 * If load is being checked in a sync wakeup environment,
+	 * we may want to discount the load of the currently running
+	 * task.
+	 */
+	if (sync && cpu == smp_processor_id()) {
+		if (load > rq->curr->ravg.demand)
+			load -= rq->curr->ravg.demand;
+		else
+			load = 0;
+	}
+
+	return load;
+}
+
 extern void check_for_migration(struct rq *rq, struct task_struct *p);
 extern void pre_big_task_count_change(const struct cpumask *cpus);
 extern void post_big_task_count_change(const struct cpumask *cpus);
 extern void set_hmp_defaults(void);
 extern int power_delta_exceeded(unsigned int cpu_cost, unsigned int base_cost);
-extern unsigned int power_cost(u64 total_load, int cpu);
+extern unsigned int power_cost(int cpu, u64 demand);
 extern void reset_all_window_stats(u64 window_start, unsigned int window_size);
 extern void boost_kick(int cpu);
 extern int sched_boost(void);
author	Joonwoo Park <joonwoop@codeaurora.org>	2015-08-21 11:02:22 -0700
committer	David Keitel <dkeitel@codeaurora.org>	2016-03-23 20:02:31 -0700
commit	91a87102354241b57814e359827a69d0f2fa4924 (patch)
tree	adfbdef2075908fb2308a08702ca5e79830f2e83 /kernel
parent	383ae6b29eb14d498e3a57f40fa61115d910646d (diff)