sched: balance power inefficient CPUs with one task

Normally the load balancer does not pay attention to CPUs with one task since it is not possible to subdivide that load any further to achieve better balance. With power aware scheduling however it may be desirable to relocate that one task if the CPU it is currently executing on is less power efficient than other CPUs in the system. Change-Id: Idf3f5e22b88048184323513f0052827b884526b6 Signed-off-by: Steve Muckle <smuckle@codeaurora.org> Signed-off-by: Srivatsa Vaddagiri <vatsa@codeaurora.org> [rameezmustafa@codeaurora.org]: Port to msm-3.18] Signed-off-by: Syed Rameez Mustafa <rameezmustafa@codeaurora.org [joonwoop@codeaurora.org: group_capacity_factor changed to group_no_capacity. group_classify is called by update_sg_lb_stats() too.] Signed-off-by: Joonwoo Park <joonwoop@codeaurora.org>
author: Steve Muckle <smuckle@codeaurora.org> 2014-04-04 13:42:34 -0700
committer: David Keitel <dkeitel@codeaurora.org> 2016-03-23 19:59:25 -0700
commit: c4a015173ea9b5acee813214b2be854965071ef4 (patch)
tree: fb8bb920344936a0c1225c306193e5e95ed8562e
parent: 5b7076b4e584c6f8f053338ef8f10133738ca761 (diff)
1 files changed, 37 insertions, 8 deletions
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 0f38e31eea4a..967432761b06 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -6217,6 +6217,7 @@ enum fbq_type { regular, remote, all };
 #define LBF_DST_PINNED  0x04
 #define LBF_SOME_PINNED	0x08
 #define LBF_IGNORE_SMALL_TASKS 0x10
+#define LBF_PWR_ACTIVE_BALANCE 0x20
 
 struct lb_env {
 	struct sched_domain	*sd;
@@ -6713,6 +6714,7 @@ static unsigned long task_h_load(struct task_struct *p)
 
 enum group_type {
 	group_other = 0,
+	group_ea,
 	group_imbalanced,
 	group_overloaded,
 };
@@ -7038,13 +7040,28 @@ group_is_overloaded(struct lb_env *env, struct sg_lb_stats *sgs)
 
 static inline enum
 group_type group_classify(struct sched_group *group,
-			  struct sg_lb_stats *sgs)
+			  struct sg_lb_stats *sgs, struct lb_env *env)
 {
-	if (sgs->group_no_capacity)
+	if (sgs->group_no_capacity) {
+		env->flags &= ~LBF_PWR_ACTIVE_BALANCE;
 		return group_overloaded;
+	}
 
-	if (sg_imbalanced(group))
+	if (sg_imbalanced(group)) {
+		env->flags &= ~LBF_PWR_ACTIVE_BALANCE;
 		return group_imbalanced;
+	}
+
+
+	/* Mark a less power-efficient CPU as busy only if we haven't
+	 * seen a busy group yet. We want to prioritize spreading
+	 * work over power optimization. */
+	if (group->group_weight == 1 && sgs->sum_nr_running &&
+	    power_cost_at_freq(env->dst_cpu, 0) <
+	    power_cost_at_freq(cpumask_first(sched_group_cpus(group)), 0)) {
+		env->flags |= LBF_PWR_ACTIVE_BALANCE;
+		return group_ea;
+	}
 
 	return group_other;
 }
@@ -7112,7 +7129,7 @@ static inline void update_sg_lb_stats(struct lb_env *env,
 	sgs->group_weight = group->group_weight;
 
 	sgs->group_no_capacity = group_is_overloaded(env, sgs);
-	sgs->group_type = group_classify(group, sgs);
+	sgs->group_type = group_classify(group, sgs, env);
 }
 
 /**
@@ -7138,8 +7155,14 @@ static bool update_sd_pick_busiest(struct lb_env *env,
 	if (sgs->group_type > busiest->group_type)
 		return true;
 
-	if (sgs->group_type < busiest->group_type)
+	if (sgs->group_type < busiest->group_type) {
+		if (sgs->group_type == group_ea)
+			env->flags &= ~LBF_PWR_ACTIVE_BALANCE;
 		return false;
+	}
+
+	if (env->flags & LBF_PWR_ACTIVE_BALANCE)
+		return true;
 
 	if (sgs->avg_load <= busiest->avg_load)
 		return false;
@@ -7246,7 +7269,7 @@ static inline void update_sd_lb_stats(struct lb_env *env, struct sd_lb_stats *sd
 		    group_has_capacity(env, &sds->local_stat) &&
 		    (sgs->sum_nr_running > 1)) {
 			sgs->group_no_capacity = 1;
-			sgs->group_type = group_classify(sg, sgs);
+			sgs->group_type = group_classify(sg, sgs, env);
 		}
 
 		if (update_sd_pick_busiest(env, sds, sg, sgs)) {
@@ -7677,6 +7700,9 @@ static int need_active_balance(struct lb_env *env)
 {
 	struct sched_domain *sd = env->sd;
 
+	if (env->flags & LBF_PWR_ACTIVE_BALANCE)
+		return 1;
+
 	if (env->idle == CPU_NEWLY_IDLE) {
 
 		/*
@@ -7905,14 +7931,17 @@ more_balance:
 	}
 
 	if (!ld_moved) {
-		schedstat_inc(sd, lb_failed[idle]);
+		if (!(env.flags & LBF_PWR_ACTIVE_BALANCE))
+			schedstat_inc(sd, lb_failed[idle]);
+
 		/*
 		 * Increment the failure counter only on periodic balance.
 		 * We do not want newidle balance, which can be very
 		 * frequent, pollute the failure counter causing
 		 * excessive cache_hot migrations and active balances.
 		 */
-		if (idle != CPU_NEWLY_IDLE)
+		if (idle != CPU_NEWLY_IDLE &&
+		    !(env.flags & LBF_PWR_ACTIVE_BALANCE))
 			sd->nr_balance_failed++;
 
 		if (need_active_balance(&env)) {
author	Steve Muckle <smuckle@codeaurora.org>	2014-04-04 13:42:34 -0700
committer	David Keitel <dkeitel@codeaurora.org>	2016-03-23 19:59:25 -0700
commit	c4a015173ea9b5acee813214b2be854965071ef4 (patch)
tree	fb8bb920344936a0c1225c306193e5e95ed8562e
parent	5b7076b4e584c6f8f053338ef8f10133738ca761 (diff)