summaryrefslogtreecommitdiff
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/sched/core.c117
-rw-r--r--kernel/sched/fair.c74
-rw-r--r--kernel/sched/sched.h18
-rw-r--r--kernel/sysctl.c7
4 files changed, 132 insertions, 84 deletions
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index b1d48c53bf7e..d7f43c26e0c5 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -860,7 +860,7 @@ static inline void enqueue_task(struct rq *rq, struct task_struct *p, int flags)
sched_info_queued(rq, p);
p->sched_class->enqueue_task(rq, p, flags);
trace_sched_enq_deq_task(p, 1);
- rq->cumulative_runnable_avg += p->se.ravg.demand;
+ inc_cumulative_runnable_avg(rq, p);
}
static inline void dequeue_task(struct rq *rq, struct task_struct *p, int flags)
@@ -870,8 +870,7 @@ static inline void dequeue_task(struct rq *rq, struct task_struct *p, int flags)
sched_info_dequeued(rq, p);
p->sched_class->dequeue_task(rq, p, flags);
trace_sched_enq_deq_task(p, 0);
- rq->cumulative_runnable_avg -= p->se.ravg.demand;
- BUG_ON((s64)rq->cumulative_runnable_avg < 0);
+ dec_cumulative_runnable_avg(rq, p);
}
void activate_task(struct rq *rq, struct task_struct *p, int flags)
@@ -1150,11 +1149,7 @@ static struct rq *__migrate_task(struct rq *rq, struct task_struct *p, int dest_
mnd.src_cpu = src_cpu;
mnd.dest_cpu = dest_cpu;
- if (sysctl_sched_ravg_window)
- mnd.load = div64_u64((u64)p->se.ravg.demand * 100,
- (u64)(sysctl_sched_ravg_window));
- else
- mnd.load = 0;
+ mnd.load = pct_task_load(p);
atomic_notifier_call_chain(&migration_notifier_head,
0, (void *)&mnd);
}
@@ -1757,6 +1752,15 @@ static inline void ttwu_activate(struct rq *rq, struct task_struct *p, int en_fl
wq_worker_waking_up(p, cpu_of(rq));
}
+/* Window size (in ns) */
+__read_mostly unsigned int sched_ravg_window = 10000000;
+
+/* Min window size (in ns) = 10ms */
+__read_mostly unsigned int min_sched_ravg_window = 10000000;
+
+/* Max window size (in ns) = 1s */
+__read_mostly unsigned int max_sched_ravg_window = 1000000000;
+
/*
* Called when new window is starting for a task, to record cpu usage over
* recently concluded window(s). Normally 'samples' should be 1. It can be > 1
@@ -1766,9 +1770,9 @@ static inline void ttwu_activate(struct rq *rq, struct task_struct *p, int en_fl
static inline void
update_history(struct rq *rq, struct task_struct *p, u32 runtime, int samples)
{
- u32 *hist = &p->se.ravg.sum_history[0];
+ u32 *hist = &p->ravg.sum_history[0];
int ridx, widx;
- u32 max = 0;
+ u32 sum = 0, avg;
/* Ignore windows where task had no activity */
if (!runtime)
@@ -1779,86 +1783,97 @@ update_history(struct rq *rq, struct task_struct *p, u32 runtime, int samples)
ridx = widx - samples;
for (; ridx >= 0; --widx, --ridx) {
hist[widx] = hist[ridx];
- if (hist[widx] > max)
- max = hist[widx];
+ sum += hist[widx];
}
for (widx = 0; widx < samples && widx < RAVG_HIST_SIZE; widx++) {
hist[widx] = runtime;
- if (hist[widx] > max)
- max = hist[widx];
+ sum += hist[widx];
}
- p->se.ravg.sum = 0;
+ p->ravg.sum = 0;
if (p->on_rq) {
- rq->cumulative_runnable_avg -= p->se.ravg.demand;
+ rq->cumulative_runnable_avg -= p->ravg.demand;
BUG_ON((s64)rq->cumulative_runnable_avg < 0);
}
- /*
- * Maximum demand seen over previous RAVG_HIST_SIZE windows drives
- * frequency demand for a task. Record maximum in 'demand' attribute.
- */
- p->se.ravg.demand = max;
+
+ avg = sum / RAVG_HIST_SIZE;
+
+ p->ravg.demand = max(avg, runtime);
+
if (p->on_rq)
- rq->cumulative_runnable_avg += p->se.ravg.demand;
+ rq->cumulative_runnable_avg += p->ravg.demand;
}
-/* Window size (in ns) */
-__read_mostly unsigned int sysctl_sched_ravg_window = 50000000;
+static int __init set_sched_ravg_window(char *str)
+{
+ get_option(&str, &sched_ravg_window);
+
+ return 0;
+}
+
+early_param("sched_ravg_window", set_sched_ravg_window);
void update_task_ravg(struct task_struct *p, struct rq *rq, int update_sum)
{
- u32 window_size = sysctl_sched_ravg_window;
+ u32 window_size = sched_ravg_window;
int new_window;
u64 wallclock = sched_clock();
+ if (sched_ravg_window < min_sched_ravg_window)
+ return;
+
do {
s64 delta = 0;
int n;
u64 now = wallclock;
new_window = 0;
- delta = now - p->se.ravg.window_start;
+ delta = now - p->ravg.window_start;
BUG_ON(delta < 0);
if (delta > window_size) {
- p->se.ravg.window_start += window_size;
- now = p->se.ravg.window_start;
+ p->ravg.window_start += window_size;
+ now = p->ravg.window_start;
new_window = 1;
}
if (update_sum) {
- delta = now - p->se.ravg.mark_start;
+ unsigned int cur_freq = rq->cur_freq;
+
+ delta = now - p->ravg.mark_start;
BUG_ON(delta < 0);
- if (likely(rq->cur_freq &&
- rq->cur_freq <= max_possible_freq))
- delta = div64_u64(delta * rq->cur_freq,
+ if (unlikely(cur_freq > max_possible_freq))
+ cur_freq = max_possible_freq;
+
+ delta = div64_u64(delta * cur_freq,
max_possible_freq);
- p->se.ravg.sum += delta;
- WARN_ON(p->se.ravg.sum > window_size);
+ p->ravg.sum += delta;
+ WARN_ON(p->ravg.sum > window_size);
}
if (!new_window)
break;
- update_history(rq, p, p->se.ravg.sum, 1);
+ update_history(rq, p, p->ravg.sum, 1);
- delta = wallclock - p->se.ravg.window_start;
+ delta = wallclock - p->ravg.window_start;
BUG_ON(delta < 0);
n = div64_u64(delta, window_size);
if (n) {
if (!update_sum)
- p->se.ravg.window_start = wallclock;
+ p->ravg.window_start = wallclock;
else
- p->se.ravg.window_start += n * window_size;
- BUG_ON(p->se.ravg.window_start > wallclock);
+ p->ravg.window_start += (u64)n *
+ (u64)window_size;
+ BUG_ON(p->ravg.window_start > wallclock);
if (update_sum)
update_history(rq, p, window_size, n);
}
- p->se.ravg.mark_start = p->se.ravg.window_start;
+ p->ravg.mark_start = p->ravg.window_start;
} while (new_window);
- p->se.ravg.mark_start = wallclock;
+ p->ravg.mark_start = wallclock;
}
/*
@@ -2162,11 +2177,8 @@ out:
mnd.src_cpu = src_cpu;
mnd.dest_cpu = cpu;
- if (sysctl_sched_ravg_window)
- mnd.load = div64_u64((u64)p->se.ravg.demand * 100,
- (u64)(sysctl_sched_ravg_window));
- else
- mnd.load = 0;
+ mnd.load = pct_task_load(p);
+
/*
* Call the migration notifier with mnd for foreground task
* migrations as well as for wakeups if their load is above
@@ -2281,8 +2293,6 @@ void __dl_clear_params(struct task_struct *p)
*/
static void __sched_fork(unsigned long clone_flags, struct task_struct *p)
{
- int i;
-
p->on_rq = 0;
p->se.on_rq = 0;
@@ -2291,12 +2301,7 @@ static void __sched_fork(unsigned long clone_flags, struct task_struct *p)
p->se.prev_sum_exec_runtime = 0;
p->se.nr_migrations = 0;
p->se.vruntime = 0;
- p->se.ravg.sum = 0;
- p->se.ravg.demand = 0;
- p->se.ravg.window_start = 0;
- p->se.ravg.mark_start = 0;
- for (i = 0; i < RAVG_HIST_SIZE; ++i)
- p->se.ravg.sum_history[i] = 0;
+ init_new_task_load(p);
INIT_LIST_HEAD(&p->se.group_node);
@@ -2564,7 +2569,6 @@ void wake_up_new_task(struct task_struct *p)
{
unsigned long flags;
struct rq *rq;
- u64 wallclock = sched_clock();
raw_spin_lock_irqsave(&p->pi_lock, flags);
/* Initialize new task's runnable average */
@@ -2580,8 +2584,6 @@ void wake_up_new_task(struct task_struct *p)
rq = __task_rq_lock(p);
activate_task(rq, p, 0);
- p->se.ravg.window_start = wallclock;
- p->se.ravg.mark_start = wallclock;
p->on_rq = TASK_ON_RQ_QUEUED;
trace_sched_wakeup_new(p);
check_preempt_curr(rq, p, WF_FORK);
@@ -7577,6 +7579,7 @@ static int cpufreq_notifier_trans(struct notifier_block *nb,
if (val != CPUFREQ_POSTCHANGE)
return 0;
+ BUG_ON(!new_freq);
cpu_rq(cpu)->cur_freq = new_freq;
return 0;
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index cbf3d3697322..f62f9b7fd049 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -2449,6 +2449,39 @@ static inline void update_cfs_shares(struct cfs_rq *cfs_rq)
}
#endif /* CONFIG_FAIR_GROUP_SCHED */
+static inline unsigned int task_load(struct task_struct *p)
+{
+ return p->ravg.demand;
+}
+
+static inline unsigned int max_task_load(void)
+{
+ return sched_ravg_window;
+}
+
+/* Return task demand in percentage scale */
+unsigned int pct_task_load(struct task_struct *p)
+{
+ unsigned int load;
+
+ load = div64_u64((u64)task_load(p) * 100, (u64)max_task_load());
+
+ return load;
+}
+
+void init_new_task_load(struct task_struct *p)
+{
+ int i;
+ u64 wallclock = sched_clock();
+
+ p->ravg.sum = 0;
+ p->ravg.demand = 0;
+ p->ravg.window_start = wallclock;
+ p->ravg.mark_start = wallclock;
+ for (i = 0; i < RAVG_HIST_SIZE; ++i)
+ p->ravg.sum_history[i] = 0;
+}
+
#ifdef CONFIG_SMP
/* Precomputed fixed inverse multiplies for multiplication by y^n */
static const u32 runnable_avg_yN_inv[] = {
@@ -5558,6 +5591,7 @@ struct lb_env {
};
static DEFINE_PER_CPU(bool, dbs_boost_needed);
+static DEFINE_PER_CPU(int, dbs_boost_load_moved);
/*
* Is this task likely cache-hot:
@@ -5746,7 +5780,7 @@ static void detach_task(struct task_struct *p, struct lb_env *env)
*
* Returns a task if successful and NULL otherwise.
*/
-static struct task_struct *detach_one_task(struct lb_env *env, int *total_run_moved)
+static struct task_struct *detach_one_task(struct lb_env *env)
{
struct task_struct *p, *n;
@@ -5765,10 +5799,7 @@ static struct task_struct *detach_one_task(struct lb_env *env, int *total_run_mo
* inside detach_tasks().
*/
schedstat_inc(env->sd, lb_gained[env->idle]);
-
- if (sysctl_sched_ravg_window)
- *total_run_moved += div64_u64((u64)p->se.ravg.demand *
- 100, (u64)(sysctl_sched_ravg_window));
+ per_cpu(dbs_boost_load_moved, env->dst_cpu) += pct_task_load(p);
return p;
}
@@ -5783,7 +5814,7 @@ static const unsigned int sched_nr_migrate_break = 32;
*
* Returns number of detached tasks if successful and 0 otherwise.
*/
-static int detach_tasks(struct lb_env *env, int *total_run_moved)
+static int detach_tasks(struct lb_env *env)
{
struct list_head *tasks = &env->src_rq->cfs_tasks;
struct task_struct *p;
@@ -5833,9 +5864,7 @@ static int detach_tasks(struct lb_env *env, int *total_run_moved)
detached++;
env->imbalance -= load;
- if (sysctl_sched_ravg_window)
- *total_run_moved += div64_u64((u64)p->se.ravg.demand *
- 100, (u64)(sysctl_sched_ravg_window));
+ per_cpu(dbs_boost_load_moved, env->dst_cpu) += pct_task_load(p);
#ifdef CONFIG_PREEMPT
/*
@@ -6975,7 +7004,6 @@ static int load_balance(int this_cpu, struct rq *this_rq,
int *continue_balancing)
{
int ld_moved = 0, cur_ld_moved, active_balance = 0;
- int total_run_moved = 0;
struct sched_domain *sd_parent = sd->parent;
struct sched_group *group = NULL;
struct rq *busiest = NULL;
@@ -7004,6 +7032,7 @@ static int load_balance(int this_cpu, struct rq *this_rq,
cpumask_copy(cpus, cpu_active_mask);
+ per_cpu(dbs_boost_load_moved, this_cpu) = 0;
schedstat_inc(sd, lb_count[idle]);
redo:
@@ -7049,7 +7078,7 @@ more_balance:
* cur_ld_moved - load moved in current iteration
* ld_moved - cumulative load moved across iterations
*/
- cur_ld_moved = detach_tasks(&env, &total_run_moved);
+ cur_ld_moved = detach_tasks(&env);
/*
* We've detached some tasks from busiest_rq. Every
@@ -7187,13 +7216,16 @@ more_balance:
if (per_cpu(dbs_boost_needed, this_cpu)) {
struct migration_notify_data mnd;
- per_cpu(dbs_boost_needed, this_cpu) = false;
-
mnd.src_cpu = cpu_of(busiest);
mnd.dest_cpu = this_cpu;
- mnd.load = total_run_moved;
+ mnd.load = per_cpu(dbs_boost_load_moved, this_cpu);
+ if (mnd.load > 100)
+ mnd.load = 100;
atomic_notifier_call_chain(&migration_notifier_head,
0, (void *)&mnd);
+ per_cpu(dbs_boost_needed, this_cpu) = false;
+ per_cpu(dbs_boost_load_moved, this_cpu) = 0;
+
}
}
if (likely(!active_balance)) {
@@ -7391,13 +7423,14 @@ static int active_load_balance_cpu_stop(void *data)
struct rq *busiest_rq = data;
int busiest_cpu = cpu_of(busiest_rq);
int target_cpu = busiest_rq->push_cpu;
- int total_run_moved = 0;
struct rq *target_rq = cpu_rq(target_cpu);
struct sched_domain *sd;
struct task_struct *p = NULL;
raw_spin_lock_irq(&busiest_rq->lock);
+ per_cpu(dbs_boost_load_moved, target_cpu) = 0;
+
/* make sure the requested cpu hasn't gone down in the meantime */
if (unlikely(busiest_cpu != smp_processor_id() ||
!busiest_rq->active_balance))
@@ -7434,7 +7467,7 @@ static int active_load_balance_cpu_stop(void *data)
schedstat_inc(sd, alb_count);
- p = detach_one_task(&env, &total_run_moved);
+ p = detach_one_task(&env);
if (p)
schedstat_inc(sd, alb_pushed);
else
@@ -7453,13 +7486,16 @@ out_unlock:
if (per_cpu(dbs_boost_needed, target_cpu)) {
struct migration_notify_data mnd;
- per_cpu(dbs_boost_needed, target_cpu) = false;
-
mnd.src_cpu = cpu_of(busiest_rq);
mnd.dest_cpu = target_cpu;
- mnd.load = total_run_moved;
+ mnd.load = per_cpu(dbs_boost_load_moved, target_cpu);
+ if (mnd.load > 100)
+ mnd.load = 100;
atomic_notifier_call_chain(&migration_notifier_head,
0, (void *)&mnd);
+
+ per_cpu(dbs_boost_needed, target_cpu) = false;
+ per_cpu(dbs_boost_load_moved, target_cpu) = 0;
}
return 0;
}
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index b85da1bb4b49..fc3f624ca8c1 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -35,7 +35,6 @@ extern void update_cpu_load_active(struct rq *this_rq);
static inline void update_cpu_load_active(struct rq *this_rq) { }
#endif
-extern unsigned int sysctl_sched_ravg_window;
/*
* Helpers for converting nanosecond timing to jiffy resolution
*/
@@ -912,6 +911,23 @@ static inline void sched_ttwu_pending(void) { }
#include "stats.h"
#include "auto_group.h"
+extern unsigned int sched_ravg_window;
+extern unsigned int pct_task_load(struct task_struct *p);
+extern void init_new_task_load(struct task_struct *p);
+
+static inline void
+inc_cumulative_runnable_avg(struct rq *rq, struct task_struct *p)
+{
+ rq->cumulative_runnable_avg += p->ravg.demand;
+}
+
+static inline void
+dec_cumulative_runnable_avg(struct rq *rq, struct task_struct *p)
+{
+ rq->cumulative_runnable_avg -= p->ravg.demand;
+ BUG_ON((s64)rq->cumulative_runnable_avg < 0);
+}
+
#ifdef CONFIG_CGROUP_SCHED
/*
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 1314618f07f8..292979f360d5 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -293,13 +293,6 @@ static struct ctl_table kern_table[] = {
.proc_handler = proc_dointvec,
},
{
- .procname = "sched_ravg_window",
- .data = &sysctl_sched_ravg_window,
- .maxlen = sizeof(unsigned int),
- .mode = 0644,
- .proc_handler = proc_dointvec,
- },
- {
.procname = "sched_wakeup_load_threshold",
.data = &sysctl_sched_wakeup_load_threshold,
.maxlen = sizeof(unsigned int),