summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSteve Muckle <smuckle@codeaurora.org>2014-05-06 18:05:50 -0700
committerDavid Keitel <dkeitel@codeaurora.org>2016-03-23 19:59:29 -0700
commitf469bce8e2d2062568f753ca7e9099715f504df8 (patch)
tree821999bd5edcf830dae9ab4094a04e8bc7c3fb5a
parente640249dbade56af7bc968fce2f5ede230602e6e (diff)
sched: add migration load change notifier for frequency guidance
When a task moves between CPUs in two different frequency domains the cpufreq governor may wish to immediately modify the frequency of both the source and destination CPUs of the migrating task. A tunable is provided to establish what size task is considered "significant" enough to warrant notifying cpufreq. Also fix a bug that would cause load to not be accounted properly during wakeup migrations. Change-Id: Ie8f6b1cc4d43a602840dac18590b42a81327c95a Signed-off-by: Steve Muckle <smuckle@codeaurora.org> [rameezmustafa@codeaurora.org: Add double rq locking for set_task_cpu()] Signed-off-by: Syed Rameez Mustafa <rameezmustafa@codeaurora.org>
-rw-r--r--include/linux/sched.h2
-rw-r--r--include/linux/sched/sysctl.h5
-rw-r--r--kernel/sched/core.c61
-rw-r--r--kernel/sched/fair.c4
-rw-r--r--kernel/sched/sched.h4
-rw-r--r--kernel/sysctl.c9
6 files changed, 78 insertions, 7 deletions
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 763eb0312130..2d2a94575eaa 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -3171,6 +3171,8 @@ struct migration_notify_data {
int load;
};
+extern struct atomic_notifier_head load_alert_notifier_head;
+
extern long sched_setaffinity(pid_t pid, const struct cpumask *new_mask);
extern long sched_getaffinity(pid_t pid, struct cpumask *mask);
diff --git a/include/linux/sched/sysctl.h b/include/linux/sched/sysctl.h
index 66a978ca7a65..b65ee06f80c9 100644
--- a/include/linux/sched/sysctl.h
+++ b/include/linux/sched/sysctl.h
@@ -47,6 +47,8 @@ extern unsigned int sysctl_sched_window_stats_policy;
extern unsigned int sysctl_sched_init_task_load_pct;
#endif
+extern unsigned int sysctl_sched_task_migrate_notify_pct;
+
#ifdef CONFIG_SCHED_HMP
extern unsigned int sysctl_sched_enable_hmp_task_placement;
extern unsigned int sysctl_sched_mostly_idle_nr_run;
@@ -87,6 +89,9 @@ int sched_proc_update_handler(struct ctl_table *table, int write,
loff_t *ppos);
#endif
+extern int sched_migrate_notify_proc_handler(struct ctl_table *table,
+ int write, void __user *buffer, size_t *lenp, loff_t *ppos);
+
extern int sched_hmp_proc_update_handler(struct ctl_table *table,
int write, void __user *buffer, size_t *lenp, loff_t *ppos);
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index d2b7d83fbd90..a96e2225755a 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -92,6 +92,7 @@
#include <trace/events/sched.h>
ATOMIC_NOTIFIER_HEAD(migration_notifier_head);
+ATOMIC_NOTIFIER_HEAD(load_alert_notifier_head);
DEFINE_MUTEX(sched_domains_mutex);
DEFINE_PER_CPU_SHARED_ALIGNED(struct rq, runqueues);
@@ -1095,6 +1096,29 @@ unsigned int __read_mostly sched_use_pelt;
unsigned int max_possible_efficiency = 1024;
unsigned int min_possible_efficiency = 1024;
+__read_mostly unsigned int sysctl_sched_task_migrate_notify_pct = 25;
+unsigned int sched_task_migrate_notify;
+
+int sched_migrate_notify_proc_handler(struct ctl_table *table, int write,
+ void __user *buffer, size_t *lenp,
+ loff_t *ppos)
+{
+ int ret;
+ unsigned int *data = (unsigned int *)table->data;
+
+ ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
+ if (ret || !write)
+ return ret;
+
+ if (*data > 100)
+ return -EINVAL;
+
+ sched_task_migrate_notify = div64_u64((u64)*data *
+ (u64)max_task_load(), 100);
+
+ return 0;
+}
+
/*
* Called when new window is starting for a task, to record cpu usage over
* recently concluded window(s). Normally 'samples' should be 1. It can be > 1
@@ -1687,21 +1711,46 @@ void set_task_cpu(struct task_struct *p, unsigned int new_cpu)
perf_event_task_migrate(p);
#if defined(CONFIG_SCHED_FREQ_INPUT) || defined(CONFIG_SCHED_HMP)
- if (p->on_rq) {
+ if (p->on_rq || p->state == TASK_WAKING) {
struct rq *src_rq = task_rq(p);
struct rq *dest_rq = cpu_rq(new_cpu);
- p->on_rq = 0; /* Fixme */
- update_task_ravg(p, task_rq(p), 0, sched_clock());
- p->on_rq = 1; /* Fixme */
+ /* In the wakeup case the task has already had
+ * its statisics updated (and the RQ is not locked). */
+ if (p->state != TASK_WAKING) {
+ p->on_rq = 0; /* todo */
+ update_task_ravg(p, task_rq(p), 0,
+ sched_clock());
+ p->on_rq = 1; /* todo */
+ }
+
+ if (p->state == TASK_WAKING)
+ double_rq_lock(src_rq, dest_rq);
+
update_task_ravg(dest_rq->curr, dest_rq,
- 1, sched_clock());
+ 1, sched_clock());
src_rq->curr_runnable_sum -= p->ravg.sum;
src_rq->prev_runnable_sum -= p->ravg.prev_window;
dest_rq->curr_runnable_sum += p->ravg.sum;
dest_rq->prev_runnable_sum += p->ravg.prev_window;
+
+ if (p->state == TASK_WAKING)
+ double_rq_unlock(src_rq, dest_rq);
+
+ /* Is p->ravg.prev_window significant? Trigger a load
+ alert notifier if so. */
+ if (p->ravg.prev_window > sched_task_migrate_notify &&
+ !cpumask_test_cpu(new_cpu,
+ &src_rq->freq_domain_cpumask)) {
+ atomic_notifier_call_chain(
+ &load_alert_notifier_head, 0,
+ (void *)(long)task_cpu(p));
+ atomic_notifier_call_chain(
+ &load_alert_notifier_head, 0,
+ (void *)(long)new_cpu);
+ }
}
#endif
@@ -7899,6 +7948,8 @@ static int cpufreq_notifier_policy(struct notifier_block *nb,
return 0;
for_each_cpu(i, policy->related_cpus) {
+ cpumask_copy(&cpu_rq(i)->freq_domain_cpumask,
+ policy->related_cpus);
cpu_rq(i)->min_freq = policy->min;
cpu_rq(i)->max_freq = policy->max;
cpu_rq(i)->max_possible_freq = policy->cpuinfo.max_freq;
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index b04af1c436cc..6fe51274c748 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -2551,7 +2551,7 @@ static inline unsigned int task_load(struct task_struct *p)
return p->ravg.demand;
}
-static inline unsigned int max_task_load(void)
+unsigned int max_task_load(void)
{
if (sched_use_pelt)
return LOAD_AVG_MAX;
@@ -6442,7 +6442,9 @@ static void detach_task(struct task_struct *p, struct lb_env *env)
deactivate_task(env->src_rq, p, 0);
p->on_rq = TASK_ON_RQ_MIGRATING;
+ double_lock_balance(env->src_rq, env->dst_rq);
set_task_cpu(p, env->dst_cpu);
+ double_unlock_balance(env->src_rq, env->dst_rq);
}
/*
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index c5d593ba30f2..a0d35bbc2626 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -650,6 +650,8 @@ struct rq {
* max_possible_freq = maximum supported by hardware
*/
unsigned int cur_freq, max_freq, min_freq, max_possible_freq;
+ struct cpumask freq_domain_cpumask;
+
u64 cumulative_runnable_avg;
int efficiency; /* Differentiate cpus with different IPC capability */
int load_scale_factor;
@@ -961,7 +963,7 @@ static inline u64 scale_task_load(u64 load, int cpu)
return load;
}
#endif
-
+unsigned int max_task_load(void);
static inline void
inc_cumulative_runnable_avg(struct rq *rq, struct task_struct *p)
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 06fe2f6591e7..4560a50a4558 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -292,6 +292,15 @@ static struct ctl_table kern_table[] = {
.mode = 0644,
.proc_handler = proc_dointvec,
},
+#ifdef CONFIG_SCHED_FREQ_INPUT
+ {
+ .procname = "sched_task_migrate_notify",
+ .data = &sysctl_sched_task_migrate_notify_pct,
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = sched_migrate_notify_proc_handler,
+ },
+#endif
#if defined(CONFIG_SCHED_FREQ_INPUT) || defined(CONFIG_SCHED_HMP)
{
.procname = "sched_window_stats_policy",