summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--include/linux/sched.h15
-rw-r--r--include/trace/events/sched.h39
-rw-r--r--kernel/fork.c2
-rw-r--r--kernel/sched/core.c24
-rw-r--r--kernel/sched/hmp.c92
-rw-r--r--kernel/sched/sched.h6
-rw-r--r--kernel/smpboot.c2
7 files changed, 141 insertions, 39 deletions
diff --git a/include/linux/sched.h b/include/linux/sched.h
index a395d8a9ff73..06acefeffd4c 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -356,7 +356,7 @@ extern int lockdep_tasklist_lock_is_held(void);
extern void sched_init(void);
extern void sched_init_smp(void);
extern asmlinkage void schedule_tail(struct task_struct *prev);
-extern void init_idle(struct task_struct *idle, int cpu);
+extern void init_idle(struct task_struct *idle, int cpu, bool hotplug);
extern void init_idle_bootup_task(struct task_struct *idle);
extern cpumask_var_t cpu_isolated_map;
@@ -1332,11 +1332,15 @@ struct ravg {
* sysctl_sched_ravg_hist_size windows. 'demand' could drive frequency
* demand for tasks.
*
- * 'curr_window' represents task's contribution to cpu busy time
- * statistics (rq->curr_runnable_sum) in current window
+ * 'curr_window_cpu' represents task's contribution to cpu busy time on
+ * various CPUs in the current window
*
- * 'prev_window' represents task's contribution to cpu busy time
- * statistics (rq->prev_runnable_sum) in previous window
+ * 'prev_window_cpu' represents task's contribution to cpu busy time on
+ * various CPUs in the previous window
+ *
+ * 'curr_window' represents the sum of all entries in curr_window_cpu
+ *
+ * 'prev_window' represents the sum of all entries in prev_window_cpu
*
* 'pred_demand' represents task's current predicted cpu busy time
*
@@ -1346,6 +1350,7 @@ struct ravg {
u64 mark_start;
u32 sum, demand;
u32 sum_history[RAVG_HIST_SIZE_MAX];
+ u32 *curr_window_cpu, *prev_window_cpu;
u32 curr_window, prev_window;
u16 active_windows;
u32 pred_demand;
diff --git a/include/trace/events/sched.h b/include/trace/events/sched.h
index daf69b7df534..209355c66e02 100644
--- a/include/trace/events/sched.h
+++ b/include/trace/events/sched.h
@@ -260,6 +260,30 @@ TRACE_EVENT(sched_set_boost,
TP_printk("ref_count=%d", __entry->ref_count)
);
+#if defined(CREATE_TRACE_POINTS) && defined(CONFIG_SCHED_HMP)
+static inline void __window_data(u32 *dst, u32 *src)
+{
+ if (src)
+ memcpy(dst, src, nr_cpu_ids * sizeof(u32));
+ else
+ memset(dst, 0, nr_cpu_ids * sizeof(u32));
+}
+
+struct trace_seq;
+const char *__window_print(struct trace_seq *p, const u32 *buf, int buf_len)
+{
+ int i;
+ const char *ret = p->buffer + seq_buf_used(&p->seq);
+
+ for (i = 0; i < buf_len; i++)
+ trace_seq_printf(p, "%u ", buf[i]);
+
+ trace_seq_putc(p, 0);
+
+ return ret;
+}
+#endif
+
TRACE_EVENT(sched_update_task_ravg,
TP_PROTO(struct task_struct *p, struct rq *rq, enum task_event evt,
@@ -288,10 +312,12 @@ TRACE_EVENT(sched_update_task_ravg,
__field( u64, rq_ps )
__field( u64, grp_cs )
__field( u64, grp_ps )
- __field( u64, grp_nt_cs )
- __field( u64, grp_nt_ps )
+ __field( u64, grp_nt_cs )
+ __field( u64, grp_nt_ps )
__field( u32, curr_window )
__field( u32, prev_window )
+ __dynamic_array(u32, curr_sum, nr_cpu_ids )
+ __dynamic_array(u32, prev_sum, nr_cpu_ids )
__field( u64, nt_cs )
__field( u64, nt_ps )
__field( u32, active_windows )
@@ -321,12 +347,14 @@ TRACE_EVENT(sched_update_task_ravg,
__entry->grp_nt_ps = cpu_time ? cpu_time->nt_prev_runnable_sum : 0;
__entry->curr_window = p->ravg.curr_window;
__entry->prev_window = p->ravg.prev_window;
+ __window_data(__get_dynamic_array(curr_sum), p->ravg.curr_window_cpu);
+ __window_data(__get_dynamic_array(prev_sum), p->ravg.prev_window_cpu);
__entry->nt_cs = rq->nt_curr_runnable_sum;
__entry->nt_ps = rq->nt_prev_runnable_sum;
__entry->active_windows = p->ravg.active_windows;
),
- TP_printk("wc %llu ws %llu delta %llu event %s cpu %d cur_freq %u cur_pid %d task %d (%s) ms %llu delta %llu demand %u sum %u irqtime %llu pred_demand %u rq_cs %llu rq_ps %llu cur_window %u prev_window %u nt_cs %llu nt_ps %llu active_wins %u grp_cs %lld grp_ps %lld, grp_nt_cs %llu, grp_nt_ps: %llu"
+ TP_printk("wc %llu ws %llu delta %llu event %s cpu %d cur_freq %u cur_pid %d task %d (%s) ms %llu delta %llu demand %u sum %u irqtime %llu pred_demand %u rq_cs %llu rq_ps %llu cur_window %u (%s) prev_window %u (%s) nt_cs %llu nt_ps %llu active_wins %u grp_cs %lld grp_ps %lld, grp_nt_cs %llu, grp_nt_ps: %llu"
, __entry->wallclock, __entry->win_start, __entry->delta,
task_event_names[__entry->evt], __entry->cpu,
__entry->cur_freq, __entry->cur_pid,
@@ -334,7 +362,10 @@ TRACE_EVENT(sched_update_task_ravg,
__entry->delta_m, __entry->demand,
__entry->sum, __entry->irqtime, __entry->pred_demand,
__entry->rq_cs, __entry->rq_ps, __entry->curr_window,
- __entry->prev_window, __entry->nt_cs, __entry->nt_ps,
+ __window_print(p, __get_dynamic_array(curr_sum), nr_cpu_ids),
+ __entry->prev_window,
+ __window_print(p, __get_dynamic_array(prev_sum), nr_cpu_ids),
+ __entry->nt_cs, __entry->nt_ps,
__entry->active_windows, __entry->grp_cs,
__entry->grp_ps, __entry->grp_nt_cs, __entry->grp_nt_ps)
);
diff --git a/kernel/fork.c b/kernel/fork.c
index e89d0bae6f20..8a5962276788 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1684,7 +1684,7 @@ struct task_struct *fork_idle(int cpu)
task = copy_process(CLONE_VM, 0, 0, NULL, &init_struct_pid, 0, 0);
if (!IS_ERR(task)) {
init_idle_pids(task->pids);
- init_idle(task, cpu);
+ init_idle(task, cpu, false);
}
return task;
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 024fb1007c78..01bc9edc8b81 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -2255,13 +2255,13 @@ void __dl_clear_params(struct task_struct *p)
void sched_exit(struct task_struct *p)
{
unsigned long flags;
- int cpu = get_cpu();
- struct rq *rq = cpu_rq(cpu);
+ struct rq *rq;
u64 wallclock;
sched_set_group_id(p, 0);
- raw_spin_lock_irqsave(&rq->lock, flags);
+ rq = task_rq_lock(p, &flags);
+
/* rq->curr == p */
wallclock = sched_ktime_clock();
update_task_ravg(rq->curr, rq, TASK_UPDATE, wallclock, 0);
@@ -2269,11 +2269,13 @@ void sched_exit(struct task_struct *p)
reset_task_stats(p);
p->ravg.mark_start = wallclock;
p->ravg.sum_history[0] = EXITING_TASK_MARKER;
+
+ kfree(p->ravg.curr_window_cpu);
+ kfree(p->ravg.prev_window_cpu);
+
enqueue_task(rq, p, 0);
clear_ed_task(p, rq);
- raw_spin_unlock_irqrestore(&rq->lock, flags);
-
- put_cpu();
+ task_rq_unlock(rq, p, &flags);
}
#endif /* CONFIG_SCHED_HMP */
@@ -2377,6 +2379,7 @@ int sched_fork(unsigned long clone_flags, struct task_struct *p)
int cpu = get_cpu();
__sched_fork(clone_flags, p);
+ init_new_task_load(p, false);
/*
* We mark the process as running here. This guarantees that
* nobody will actually run it, and a signal or other external
@@ -2562,7 +2565,6 @@ void wake_up_new_task(struct task_struct *p)
struct rq *rq;
raw_spin_lock_irqsave(&p->pi_lock, flags);
- init_new_task_load(p);
add_new_task_to_grp(p);
/* Initialize new task's runnable average */
init_entity_runnable_average(&p->se);
@@ -5210,17 +5212,21 @@ void init_idle_bootup_task(struct task_struct *idle)
* init_idle - set up an idle thread for a given CPU
* @idle: task in question
* @cpu: cpu the idle task belongs to
+ * @cpu_up: differentiate between initial boot vs hotplug
*
* NOTE: this function does not set the idle thread's NEED_RESCHED
* flag, to make booting more robust.
*/
-void init_idle(struct task_struct *idle, int cpu)
+void init_idle(struct task_struct *idle, int cpu, bool cpu_up)
{
struct rq *rq = cpu_rq(cpu);
unsigned long flags;
__sched_fork(0, idle);
+ if (!cpu_up)
+ init_new_task_load(idle, true);
+
raw_spin_lock_irqsave(&idle->pi_lock, flags);
raw_spin_lock(&rq->lock);
@@ -8051,7 +8057,7 @@ void __init sched_init(void)
* but because we are the idle thread, we just pick up running again
* when this runqueue becomes "idle".
*/
- init_idle(current, smp_processor_id());
+ init_idle(current, smp_processor_id(), false);
calc_load_update = jiffies + LOAD_FREQ;
diff --git a/kernel/sched/hmp.c b/kernel/sched/hmp.c
index 3d5de8ba70a2..6ede7a224430 100644
--- a/kernel/sched/hmp.c
+++ b/kernel/sched/hmp.c
@@ -1611,7 +1611,7 @@ unsigned int cpu_temp(int cpu)
return 0;
}
-void init_new_task_load(struct task_struct *p)
+void init_new_task_load(struct task_struct *p, bool idle_task)
{
int i;
u32 init_load_windows = sched_init_task_load_windows;
@@ -1623,6 +1623,15 @@ void init_new_task_load(struct task_struct *p)
memset(&p->ravg, 0, sizeof(struct ravg));
p->cpu_cycles = 0;
+ p->ravg.curr_window_cpu = kcalloc(nr_cpu_ids, sizeof(u32), GFP_ATOMIC);
+ p->ravg.prev_window_cpu = kcalloc(nr_cpu_ids, sizeof(u32), GFP_ATOMIC);
+
+ /* Don't have much choice. CPU frequency would be bogus */
+ BUG_ON(!p->ravg.curr_window_cpu || !p->ravg.prev_window_cpu);
+
+ if (idle_task)
+ return;
+
if (init_load_pct)
init_load_windows = div64_u64((u64)init_load_pct *
(u64)sched_ravg_window, 100);
@@ -2161,6 +2170,32 @@ void update_task_pred_demand(struct rq *rq, struct task_struct *p, int event)
p->ravg.pred_demand = new;
}
+static u32 empty_windows[NR_CPUS];
+
+static void rollover_task_window(struct task_struct *p, bool full_window)
+{
+ u32 *curr_cpu_windows = empty_windows;
+ u32 curr_window;
+ int i;
+
+ /* Rollover the sum */
+ curr_window = 0;
+
+ if (!full_window) {
+ curr_window = p->ravg.curr_window;
+ curr_cpu_windows = p->ravg.curr_window_cpu;
+ }
+
+ p->ravg.prev_window = curr_window;
+ p->ravg.curr_window = 0;
+
+ /* Roll over individual CPU contributions */
+ for (i = 0; i < nr_cpu_ids; i++) {
+ p->ravg.prev_window_cpu[i] = curr_cpu_windows[i];
+ p->ravg.curr_window_cpu[i] = 0;
+ }
+}
+
/*
* Account cpu activity in its busy time counters (rq->curr/prev_runnable_sum)
*/
@@ -2181,6 +2216,7 @@ static void update_cpu_busy_time(struct task_struct *p, struct rq *rq,
int prev_sum_reset = 0;
bool new_task;
struct related_thread_group *grp;
+ int cpu = rq->cpu;
new_window = mark_start < window_start;
if (new_window) {
@@ -2240,15 +2276,9 @@ static void update_cpu_busy_time(struct task_struct *p, struct rq *rq,
* Handle per-task window rollover. We don't care about the idle
* task or exiting tasks.
*/
- if (new_window && !is_idle_task(p) && !exiting_task(p)) {
- u32 curr_window = 0;
+ if (new_window && !is_idle_task(p) && !exiting_task(p))
+ rollover_task_window(p, full_window);
- if (!full_window)
- curr_window = p->ravg.curr_window;
-
- p->ravg.prev_window = curr_window;
- p->ravg.curr_window = 0;
- }
if (flip_counters) {
u64 curr_sum = *curr_runnable_sum;
@@ -2310,8 +2340,10 @@ static void update_cpu_busy_time(struct task_struct *p, struct rq *rq,
if (new_task)
*nt_curr_runnable_sum += delta;
- if (!is_idle_task(p) && !exiting_task(p))
+ if (!is_idle_task(p) && !exiting_task(p)) {
p->ravg.curr_window += delta;
+ p->ravg.curr_window_cpu[cpu] += delta;
+ }
return;
}
@@ -2336,8 +2368,10 @@ static void update_cpu_busy_time(struct task_struct *p, struct rq *rq,
* contribution to previous completed window.
*/
delta = scale_exec_time(window_start - mark_start, rq);
- if (!exiting_task(p))
+ if (!exiting_task(p)) {
p->ravg.prev_window += delta;
+ p->ravg.prev_window_cpu[cpu] += delta;
+ }
} else {
/*
* Since at least one full window has elapsed,
@@ -2345,8 +2379,10 @@ static void update_cpu_busy_time(struct task_struct *p, struct rq *rq,
* full window (window_size).
*/
delta = scale_exec_time(window_size, rq);
- if (!exiting_task(p))
+ if (!exiting_task(p)) {
p->ravg.prev_window = delta;
+ p->ravg.prev_window_cpu[cpu] = delta;
+ }
}
*prev_runnable_sum += delta;
@@ -2359,8 +2395,10 @@ static void update_cpu_busy_time(struct task_struct *p, struct rq *rq,
if (new_task)
*nt_curr_runnable_sum += delta;
- if (!exiting_task(p))
+ if (!exiting_task(p)) {
p->ravg.curr_window = delta;
+ p->ravg.curr_window_cpu[cpu] = delta;
+ }
return;
}
@@ -2386,8 +2424,10 @@ static void update_cpu_busy_time(struct task_struct *p, struct rq *rq,
* contribution to previous completed window.
*/
delta = scale_exec_time(window_start - mark_start, rq);
- if (!is_idle_task(p) && !exiting_task(p))
+ if (!is_idle_task(p) && !exiting_task(p)) {
p->ravg.prev_window += delta;
+ p->ravg.prev_window_cpu[cpu] += delta;
+ }
} else {
/*
* Since at least one full window has elapsed,
@@ -2395,8 +2435,10 @@ static void update_cpu_busy_time(struct task_struct *p, struct rq *rq,
* full window (window_size).
*/
delta = scale_exec_time(window_size, rq);
- if (!is_idle_task(p) && !exiting_task(p))
+ if (!is_idle_task(p) && !exiting_task(p)) {
p->ravg.prev_window = delta;
+ p->ravg.prev_window_cpu[cpu] = delta;
+ }
}
/*
@@ -2413,8 +2455,10 @@ static void update_cpu_busy_time(struct task_struct *p, struct rq *rq,
if (new_task)
*nt_curr_runnable_sum += delta;
- if (!is_idle_task(p) && !exiting_task(p))
+ if (!is_idle_task(p) && !exiting_task(p)) {
p->ravg.curr_window = delta;
+ p->ravg.curr_window_cpu[cpu] = delta;
+ }
return;
}
@@ -2829,11 +2873,23 @@ void sched_account_irqstart(int cpu, struct task_struct *curr, u64 wallclock)
void reset_task_stats(struct task_struct *p)
{
u32 sum = 0;
+ u32 *curr_window_ptr = NULL;
+ u32 *prev_window_ptr = NULL;
- if (exiting_task(p))
+ if (exiting_task(p)) {
sum = EXITING_TASK_MARKER;
+ } else {
+ curr_window_ptr = p->ravg.curr_window_cpu;
+ prev_window_ptr = p->ravg.prev_window_cpu;
+ memset(curr_window_ptr, 0, sizeof(u32) * nr_cpu_ids);
+ memset(prev_window_ptr, 0, sizeof(u32) * nr_cpu_ids);
+ }
memset(&p->ravg, 0, sizeof(struct ravg));
+
+ p->ravg.curr_window_cpu = curr_window_ptr;
+ p->ravg.prev_window_cpu = prev_window_ptr;
+
/* Retain EXITING_TASK marker */
p->ravg.sum_history[0] = sum;
}
@@ -2889,7 +2945,9 @@ static void reset_all_task_stats(void)
read_lock(&tasklist_lock);
do_each_thread(g, p) {
+ raw_spin_lock(&p->pi_lock);
reset_task_stats(p);
+ raw_spin_unlock(&p->pi_lock);
} while_each_thread(g, p);
read_unlock(&tasklist_lock);
}
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 27b28369440d..f786767aa353 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -1057,7 +1057,7 @@ extern unsigned int __read_mostly sched_upmigrate;
extern unsigned int __read_mostly sched_downmigrate;
extern unsigned int __read_mostly sysctl_sched_spill_nr_run;
-extern void init_new_task_load(struct task_struct *p);
+extern void init_new_task_load(struct task_struct *p, bool idle_task);
extern u64 sched_ktime_clock(void);
extern int got_boost_kick(void);
extern int register_cpu_cycle_counter_cb(struct cpu_cycle_counter_cb *cb);
@@ -1503,7 +1503,9 @@ static inline struct sched_cluster *rq_cluster(struct rq *rq)
return NULL;
}
-static inline void init_new_task_load(struct task_struct *p) { }
+static inline void init_new_task_load(struct task_struct *p, bool idle_task)
+{
+}
static inline u64 scale_load_to_cpu(u64 load, int cpu)
{
diff --git a/kernel/smpboot.c b/kernel/smpboot.c
index 6949476a118f..3a0415803b09 100644
--- a/kernel/smpboot.c
+++ b/kernel/smpboot.c
@@ -32,7 +32,7 @@ struct task_struct *idle_thread_get(unsigned int cpu)
if (!tsk)
return ERR_PTR(-ENOMEM);
- init_idle(tsk, cpu);
+ init_idle(tsk, cpu, true);
return tsk;
}