summaryrefslogtreecommitdiff
path: root/kernel
diff options
context:
space:
mode:
authorLinux Build Service Account <lnxbuild@localhost>2016-12-19 17:04:54 -0800
committerGerrit - the friendly Code Review server <code-review@localhost>2016-12-19 17:04:54 -0800
commite26b0777dc92ab47d16b0134e57cd1c9e1083a9a (patch)
tree6896cfb7a25310fb4f871b86d74f939d9727c3d4 /kernel
parentaf833ae6a86db02de5cd1347779470485f80f5ce (diff)
parent0dee0d1411e4ba837089a769a5bcce57a5a14df2 (diff)
Merge "sched: Avoid waking idle cpu for short-burst tasks"
Diffstat (limited to 'kernel')
-rw-r--r--kernel/sched/fair.c39
-rw-r--r--kernel/sched/hmp.c17
-rw-r--r--kernel/sched/rt.c18
-rw-r--r--kernel/sched/sched.h10
-rw-r--r--kernel/sysctl.c7
5 files changed, 82 insertions, 9 deletions
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 14acefa27ec1..87538f7d495a 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -2613,6 +2613,7 @@ static u32 __compute_runnable_contrib(u64 n)
#define SBC_FLAG_CSTATE_LOAD 0x100
#define SBC_FLAG_BEST_SIBLING 0x200
#define SBC_FLAG_WAKER_CPU 0x400
+#define SBC_FLAG_PACK_TASK 0x800
/* Cluster selection flag */
#define SBC_FLAG_COLOC_CLUSTER 0x10000
@@ -2629,6 +2630,7 @@ struct cpu_select_env {
u8 sync:1;
u8 ignore_prev_cpu:1;
enum sched_boost_policy boost_policy;
+ u8 pack_task:1;
int prev_cpu;
DECLARE_BITMAP(candidate_list, NR_CPUS);
DECLARE_BITMAP(backup_list, NR_CPUS);
@@ -2980,8 +2982,17 @@ static void update_cluster_stats(int cpu, struct cluster_cpu_stats *stats,
{
int cpu_cost;
- cpu_cost = power_cost(cpu, task_load(env->p) +
+ /*
+ * We try to find the least loaded *busy* CPU irrespective
+ * of the power cost.
+ */
+ if (env->pack_task)
+ cpu_cost = cpu_min_power_cost(cpu);
+
+ else
+ cpu_cost = power_cost(cpu, task_load(env->p) +
cpu_cravg_sync(cpu, env->sync));
+
if (cpu_cost <= stats->min_cost)
__update_cluster_stats(cpu, stats, env, cpu_cost);
}
@@ -3056,6 +3067,15 @@ static inline int wake_to_idle(struct task_struct *p)
(p->flags & PF_WAKE_UP_IDLE) || sysctl_sched_wake_to_idle;
}
+static inline bool env_has_special_flags(struct cpu_select_env *env)
+{
+ if (env->need_idle || env->boost_policy != SCHED_BOOST_NONE ||
+ env->reason)
+ return true;
+
+ return false;
+}
+
static inline bool
bias_to_prev_cpu(struct cpu_select_env *env, struct cluster_cpu_stats *stats)
{
@@ -3063,9 +3083,7 @@ bias_to_prev_cpu(struct cpu_select_env *env, struct cluster_cpu_stats *stats)
struct task_struct *task = env->p;
struct sched_cluster *cluster;
- if (env->boost_policy != SCHED_BOOST_NONE || env->reason ||
- !task->ravg.mark_start ||
- env->need_idle || !sched_short_sleep_task_threshold)
+ if (!task->ravg.mark_start || !sched_short_sleep_task_threshold)
return false;
prev_cpu = env->prev_cpu;
@@ -3114,8 +3132,7 @@ bias_to_prev_cpu(struct cpu_select_env *env, struct cluster_cpu_stats *stats)
static inline bool
wake_to_waker_cluster(struct cpu_select_env *env)
{
- return env->boost_policy == SCHED_BOOST_NONE &&
- !env->need_idle && !env->reason && env->sync &&
+ return env->sync &&
task_load(current) > sched_big_waker_task_load &&
task_load(env->p) < sched_small_wakee_task_load;
}
@@ -3140,7 +3157,6 @@ cluster_allowed(struct task_struct *p, struct sched_cluster *cluster)
return !cpumask_empty(&tmp_mask);
}
-
/* return cheapest cpu that can fit this task */
static int select_best_cpu(struct task_struct *p, int target, int reason,
int sync)
@@ -3150,6 +3166,7 @@ static int select_best_cpu(struct task_struct *p, int target, int reason,
struct related_thread_group *grp;
unsigned int sbc_flag = 0;
int cpu = raw_smp_processor_id();
+ bool special;
struct cpu_select_env env = {
.p = p,
@@ -3162,6 +3179,7 @@ static int select_best_cpu(struct task_struct *p, int target, int reason,
.rtg = NULL,
.sbc_best_flag = 0,
.sbc_best_cluster_flag = 0,
+ .pack_task = false,
};
env.boost_policy = task_sched_boost(p) ?
@@ -3171,6 +3189,7 @@ static int select_best_cpu(struct task_struct *p, int target, int reason,
bitmap_zero(env.backup_list, NR_CPUS);
init_cluster_cpu_stats(&stats);
+ special = env_has_special_flags(&env);
rcu_read_lock();
@@ -3182,7 +3201,7 @@ static int select_best_cpu(struct task_struct *p, int target, int reason,
clear_bit(pref_cluster->id, env.candidate_list);
else
env.rtg = grp;
- } else {
+ } else if (!special) {
cluster = cpu_rq(cpu)->cluster;
if (wake_to_waker_cluster(&env)) {
if (bias_to_waker_cpu(p, cpu)) {
@@ -3203,6 +3222,10 @@ static int select_best_cpu(struct task_struct *p, int target, int reason,
}
}
+ if (!special && is_short_burst_task(p)) {
+ env.pack_task = true;
+ sbc_flag = SBC_FLAG_PACK_TASK;
+ }
retry:
cluster = select_least_power_cluster(&env);
diff --git a/kernel/sched/hmp.c b/kernel/sched/hmp.c
index b2f3013bfe31..95e618ee1124 100644
--- a/kernel/sched/hmp.c
+++ b/kernel/sched/hmp.c
@@ -961,6 +961,13 @@ sched_long_cpu_selection_threshold = 100 * NSEC_PER_MSEC;
unsigned int __read_mostly sysctl_sched_restrict_cluster_spill;
+/*
+ * Scheduler tries to avoid waking up idle CPUs for tasks running
+ * in short bursts. If the task average burst is less than
+ * sysctl_sched_short_burst nanoseconds, it is eligible for packing.
+ */
+unsigned int __read_mostly sysctl_sched_short_burst;
+
static void
_update_up_down_migrate(unsigned int *up_migrate, unsigned int *down_migrate)
{
@@ -1553,7 +1560,13 @@ void init_new_task_load(struct task_struct *p, bool idle_task)
memset(&p->ravg, 0, sizeof(struct ravg));
p->cpu_cycles = 0;
p->ravg.curr_burst = 0;
- p->ravg.avg_burst = 0;
+ /*
+ * Initialize the avg_burst to twice the threshold, so that
+ * a task would not be classified as short burst right away
+ * after fork. It takes at least 6 sleep-wakeup cycles for
+ * the avg_burst to go below the threshold.
+ */
+ p->ravg.avg_burst = 2 * (u64)sysctl_sched_short_burst;
p->ravg.curr_window_cpu = kcalloc(nr_cpu_ids, sizeof(u32), GFP_KERNEL);
p->ravg.prev_window_cpu = kcalloc(nr_cpu_ids, sizeof(u32), GFP_KERNEL);
@@ -2987,6 +3000,8 @@ void reset_task_stats(struct task_struct *p)
p->ravg.curr_window_cpu = curr_window_ptr;
p->ravg.prev_window_cpu = prev_window_ptr;
+ p->ravg.avg_burst = 2 * (u64)sysctl_sched_short_burst;
+
/* Retain EXITING_TASK marker */
p->ravg.sum_history[0] = sum;
}
diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c
index f8bc34c31c42..3fe00d6fa335 100644
--- a/kernel/sched/rt.c
+++ b/kernel/sched/rt.c
@@ -1723,6 +1723,7 @@ static int find_lowest_rq_hmp(struct task_struct *task)
int i;
int restrict_cluster;
int boost_on_big;
+ int pack_task, wakeup_latency, least_wakeup_latency = INT_MAX;
boost_on_big = sched_boost() == FULL_THROTTLE_BOOST &&
sched_boost_policy() == SCHED_BOOST_ON_BIG;
@@ -1739,6 +1740,8 @@ static int find_lowest_rq_hmp(struct task_struct *task)
if (!cpupri_find(&task_rq(task)->rd->cpupri, task, lowest_mask))
return best_cpu; /* No targets found */
+ pack_task = is_short_burst_task(task);
+
/*
* At this point we have built a mask of cpus representing the
* lowest priority tasks in the system. Now we want to elect
@@ -1764,6 +1767,20 @@ static int find_lowest_rq_hmp(struct task_struct *task)
if (!restrict_cluster)
cpu_load = scale_load_to_cpu(cpu_load, i);
+ if (pack_task) {
+ wakeup_latency = cpu_rq(i)->wakeup_latency;
+
+ if (wakeup_latency > least_wakeup_latency)
+ continue;
+
+ if (wakeup_latency < least_wakeup_latency) {
+ least_wakeup_latency = wakeup_latency;
+ min_load = cpu_load;
+ best_cpu = i;
+ continue;
+ }
+ }
+
if (cpu_load < min_load ||
(cpu_load == min_load &&
(i == prev_cpu || (best_cpu != prev_cpu &&
@@ -1772,6 +1789,7 @@ static int find_lowest_rq_hmp(struct task_struct *task)
best_cpu = i;
}
}
+
if (restrict_cluster && best_cpu != -1)
break;
}
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index bcf3f019a300..ae7442007e8b 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -1216,6 +1216,11 @@ static inline int cpu_max_power_cost(int cpu)
return cpu_rq(cpu)->cluster->max_power_cost;
}
+static inline int cpu_min_power_cost(int cpu)
+{
+ return cpu_rq(cpu)->cluster->min_power_cost;
+}
+
static inline u32 cpu_cycles_to_freq(u64 cycles, u32 period)
{
return div64_u64(cycles, period);
@@ -1413,6 +1418,11 @@ static inline u64 cpu_cravg_sync(int cpu, int sync)
return load;
}
+static inline bool is_short_burst_task(struct task_struct *p)
+{
+ return p->ravg.avg_burst < sysctl_sched_short_burst;
+}
+
extern void check_for_migration(struct rq *rq, struct task_struct *p);
extern void pre_big_task_count_change(const struct cpumask *cpus);
extern void post_big_task_count_change(const struct cpumask *cpus);
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index c272e31f37ea..ba69f4c96d7c 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -507,6 +507,13 @@ static struct ctl_table kern_table[] = {
.extra1 = &zero,
.extra2 = &three,
},
+ {
+ .procname = "sched_short_burst_ns",
+ .data = &sysctl_sched_short_burst,
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
#endif /* CONFIG_SCHED_HMP */
#ifdef CONFIG_SCHED_DEBUG
{