diff options
| -rw-r--r-- | include/linux/sched.h | 15 | ||||
| -rw-r--r-- | include/trace/events/sched.h | 39 | ||||
| -rw-r--r-- | kernel/fork.c | 2 | ||||
| -rw-r--r-- | kernel/sched/core.c | 24 | ||||
| -rw-r--r-- | kernel/sched/hmp.c | 92 | ||||
| -rw-r--r-- | kernel/sched/sched.h | 6 | ||||
| -rw-r--r-- | kernel/smpboot.c | 2 |
7 files changed, 141 insertions, 39 deletions
diff --git a/include/linux/sched.h b/include/linux/sched.h index a395d8a9ff73..06acefeffd4c 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -356,7 +356,7 @@ extern int lockdep_tasklist_lock_is_held(void); extern void sched_init(void); extern void sched_init_smp(void); extern asmlinkage void schedule_tail(struct task_struct *prev); -extern void init_idle(struct task_struct *idle, int cpu); +extern void init_idle(struct task_struct *idle, int cpu, bool hotplug); extern void init_idle_bootup_task(struct task_struct *idle); extern cpumask_var_t cpu_isolated_map; @@ -1332,11 +1332,15 @@ struct ravg { * sysctl_sched_ravg_hist_size windows. 'demand' could drive frequency * demand for tasks. * - * 'curr_window' represents task's contribution to cpu busy time - * statistics (rq->curr_runnable_sum) in current window + * 'curr_window_cpu' represents task's contribution to cpu busy time on + * various CPUs in the current window * - * 'prev_window' represents task's contribution to cpu busy time - * statistics (rq->prev_runnable_sum) in previous window + * 'prev_window_cpu' represents task's contribution to cpu busy time on + * various CPUs in the previous window + * + * 'curr_window' represents the sum of all entries in curr_window_cpu + * + * 'prev_window' represents the sum of all entries in prev_window_cpu * * 'pred_demand' represents task's current predicted cpu busy time * @@ -1346,6 +1350,7 @@ struct ravg { u64 mark_start; u32 sum, demand; u32 sum_history[RAVG_HIST_SIZE_MAX]; + u32 *curr_window_cpu, *prev_window_cpu; u32 curr_window, prev_window; u16 active_windows; u32 pred_demand; diff --git a/include/trace/events/sched.h b/include/trace/events/sched.h index daf69b7df534..209355c66e02 100644 --- a/include/trace/events/sched.h +++ b/include/trace/events/sched.h @@ -260,6 +260,30 @@ TRACE_EVENT(sched_set_boost, TP_printk("ref_count=%d", __entry->ref_count) ); +#if defined(CREATE_TRACE_POINTS) && defined(CONFIG_SCHED_HMP) +static inline void __window_data(u32 *dst, u32 *src) +{ + if (src) + memcpy(dst, src, nr_cpu_ids * sizeof(u32)); + else + memset(dst, 0, nr_cpu_ids * sizeof(u32)); +} + +struct trace_seq; +const char *__window_print(struct trace_seq *p, const u32 *buf, int buf_len) +{ + int i; + const char *ret = p->buffer + seq_buf_used(&p->seq); + + for (i = 0; i < buf_len; i++) + trace_seq_printf(p, "%u ", buf[i]); + + trace_seq_putc(p, 0); + + return ret; +} +#endif + TRACE_EVENT(sched_update_task_ravg, TP_PROTO(struct task_struct *p, struct rq *rq, enum task_event evt, @@ -288,10 +312,12 @@ TRACE_EVENT(sched_update_task_ravg, __field( u64, rq_ps ) __field( u64, grp_cs ) __field( u64, grp_ps ) - __field( u64, grp_nt_cs ) - __field( u64, grp_nt_ps ) + __field( u64, grp_nt_cs ) + __field( u64, grp_nt_ps ) __field( u32, curr_window ) __field( u32, prev_window ) + __dynamic_array(u32, curr_sum, nr_cpu_ids ) + __dynamic_array(u32, prev_sum, nr_cpu_ids ) __field( u64, nt_cs ) __field( u64, nt_ps ) __field( u32, active_windows ) @@ -321,12 +347,14 @@ TRACE_EVENT(sched_update_task_ravg, __entry->grp_nt_ps = cpu_time ? cpu_time->nt_prev_runnable_sum : 0; __entry->curr_window = p->ravg.curr_window; __entry->prev_window = p->ravg.prev_window; + __window_data(__get_dynamic_array(curr_sum), p->ravg.curr_window_cpu); + __window_data(__get_dynamic_array(prev_sum), p->ravg.prev_window_cpu); __entry->nt_cs = rq->nt_curr_runnable_sum; __entry->nt_ps = rq->nt_prev_runnable_sum; __entry->active_windows = p->ravg.active_windows; ), - TP_printk("wc %llu ws %llu delta %llu event %s cpu %d cur_freq %u cur_pid %d task %d (%s) ms %llu delta %llu demand %u sum %u irqtime %llu pred_demand %u rq_cs %llu rq_ps %llu cur_window %u prev_window %u nt_cs %llu nt_ps %llu active_wins %u grp_cs %lld grp_ps %lld, grp_nt_cs %llu, grp_nt_ps: %llu" + TP_printk("wc %llu ws %llu delta %llu event %s cpu %d cur_freq %u cur_pid %d task %d (%s) ms %llu delta %llu demand %u sum %u irqtime %llu pred_demand %u rq_cs %llu rq_ps %llu cur_window %u (%s) prev_window %u (%s) nt_cs %llu nt_ps %llu active_wins %u grp_cs %lld grp_ps %lld, grp_nt_cs %llu, grp_nt_ps: %llu" , __entry->wallclock, __entry->win_start, __entry->delta, task_event_names[__entry->evt], __entry->cpu, __entry->cur_freq, __entry->cur_pid, @@ -334,7 +362,10 @@ TRACE_EVENT(sched_update_task_ravg, __entry->delta_m, __entry->demand, __entry->sum, __entry->irqtime, __entry->pred_demand, __entry->rq_cs, __entry->rq_ps, __entry->curr_window, - __entry->prev_window, __entry->nt_cs, __entry->nt_ps, + __window_print(p, __get_dynamic_array(curr_sum), nr_cpu_ids), + __entry->prev_window, + __window_print(p, __get_dynamic_array(prev_sum), nr_cpu_ids), + __entry->nt_cs, __entry->nt_ps, __entry->active_windows, __entry->grp_cs, __entry->grp_ps, __entry->grp_nt_cs, __entry->grp_nt_ps) ); diff --git a/kernel/fork.c b/kernel/fork.c index e89d0bae6f20..8a5962276788 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1684,7 +1684,7 @@ struct task_struct *fork_idle(int cpu) task = copy_process(CLONE_VM, 0, 0, NULL, &init_struct_pid, 0, 0); if (!IS_ERR(task)) { init_idle_pids(task->pids); - init_idle(task, cpu); + init_idle(task, cpu, false); } return task; diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 024fb1007c78..01bc9edc8b81 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -2255,13 +2255,13 @@ void __dl_clear_params(struct task_struct *p) void sched_exit(struct task_struct *p) { unsigned long flags; - int cpu = get_cpu(); - struct rq *rq = cpu_rq(cpu); + struct rq *rq; u64 wallclock; sched_set_group_id(p, 0); - raw_spin_lock_irqsave(&rq->lock, flags); + rq = task_rq_lock(p, &flags); + /* rq->curr == p */ wallclock = sched_ktime_clock(); update_task_ravg(rq->curr, rq, TASK_UPDATE, wallclock, 0); @@ -2269,11 +2269,13 @@ void sched_exit(struct task_struct *p) reset_task_stats(p); p->ravg.mark_start = wallclock; p->ravg.sum_history[0] = EXITING_TASK_MARKER; + + kfree(p->ravg.curr_window_cpu); + kfree(p->ravg.prev_window_cpu); + enqueue_task(rq, p, 0); clear_ed_task(p, rq); - raw_spin_unlock_irqrestore(&rq->lock, flags); - - put_cpu(); + task_rq_unlock(rq, p, &flags); } #endif /* CONFIG_SCHED_HMP */ @@ -2377,6 +2379,7 @@ int sched_fork(unsigned long clone_flags, struct task_struct *p) int cpu = get_cpu(); __sched_fork(clone_flags, p); + init_new_task_load(p, false); /* * We mark the process as running here. This guarantees that * nobody will actually run it, and a signal or other external @@ -2562,7 +2565,6 @@ void wake_up_new_task(struct task_struct *p) struct rq *rq; raw_spin_lock_irqsave(&p->pi_lock, flags); - init_new_task_load(p); add_new_task_to_grp(p); /* Initialize new task's runnable average */ init_entity_runnable_average(&p->se); @@ -5210,17 +5212,21 @@ void init_idle_bootup_task(struct task_struct *idle) * init_idle - set up an idle thread for a given CPU * @idle: task in question * @cpu: cpu the idle task belongs to + * @cpu_up: differentiate between initial boot vs hotplug * * NOTE: this function does not set the idle thread's NEED_RESCHED * flag, to make booting more robust. */ -void init_idle(struct task_struct *idle, int cpu) +void init_idle(struct task_struct *idle, int cpu, bool cpu_up) { struct rq *rq = cpu_rq(cpu); unsigned long flags; __sched_fork(0, idle); + if (!cpu_up) + init_new_task_load(idle, true); + raw_spin_lock_irqsave(&idle->pi_lock, flags); raw_spin_lock(&rq->lock); @@ -8051,7 +8057,7 @@ void __init sched_init(void) * but because we are the idle thread, we just pick up running again * when this runqueue becomes "idle". */ - init_idle(current, smp_processor_id()); + init_idle(current, smp_processor_id(), false); calc_load_update = jiffies + LOAD_FREQ; diff --git a/kernel/sched/hmp.c b/kernel/sched/hmp.c index 3d5de8ba70a2..6ede7a224430 100644 --- a/kernel/sched/hmp.c +++ b/kernel/sched/hmp.c @@ -1611,7 +1611,7 @@ unsigned int cpu_temp(int cpu) return 0; } -void init_new_task_load(struct task_struct *p) +void init_new_task_load(struct task_struct *p, bool idle_task) { int i; u32 init_load_windows = sched_init_task_load_windows; @@ -1623,6 +1623,15 @@ void init_new_task_load(struct task_struct *p) memset(&p->ravg, 0, sizeof(struct ravg)); p->cpu_cycles = 0; + p->ravg.curr_window_cpu = kcalloc(nr_cpu_ids, sizeof(u32), GFP_ATOMIC); + p->ravg.prev_window_cpu = kcalloc(nr_cpu_ids, sizeof(u32), GFP_ATOMIC); + + /* Don't have much choice. CPU frequency would be bogus */ + BUG_ON(!p->ravg.curr_window_cpu || !p->ravg.prev_window_cpu); + + if (idle_task) + return; + if (init_load_pct) init_load_windows = div64_u64((u64)init_load_pct * (u64)sched_ravg_window, 100); @@ -2161,6 +2170,32 @@ void update_task_pred_demand(struct rq *rq, struct task_struct *p, int event) p->ravg.pred_demand = new; } +static u32 empty_windows[NR_CPUS]; + +static void rollover_task_window(struct task_struct *p, bool full_window) +{ + u32 *curr_cpu_windows = empty_windows; + u32 curr_window; + int i; + + /* Rollover the sum */ + curr_window = 0; + + if (!full_window) { + curr_window = p->ravg.curr_window; + curr_cpu_windows = p->ravg.curr_window_cpu; + } + + p->ravg.prev_window = curr_window; + p->ravg.curr_window = 0; + + /* Roll over individual CPU contributions */ + for (i = 0; i < nr_cpu_ids; i++) { + p->ravg.prev_window_cpu[i] = curr_cpu_windows[i]; + p->ravg.curr_window_cpu[i] = 0; + } +} + /* * Account cpu activity in its busy time counters (rq->curr/prev_runnable_sum) */ @@ -2181,6 +2216,7 @@ static void update_cpu_busy_time(struct task_struct *p, struct rq *rq, int prev_sum_reset = 0; bool new_task; struct related_thread_group *grp; + int cpu = rq->cpu; new_window = mark_start < window_start; if (new_window) { @@ -2240,15 +2276,9 @@ static void update_cpu_busy_time(struct task_struct *p, struct rq *rq, * Handle per-task window rollover. We don't care about the idle * task or exiting tasks. */ - if (new_window && !is_idle_task(p) && !exiting_task(p)) { - u32 curr_window = 0; + if (new_window && !is_idle_task(p) && !exiting_task(p)) + rollover_task_window(p, full_window); - if (!full_window) - curr_window = p->ravg.curr_window; - - p->ravg.prev_window = curr_window; - p->ravg.curr_window = 0; - } if (flip_counters) { u64 curr_sum = *curr_runnable_sum; @@ -2310,8 +2340,10 @@ static void update_cpu_busy_time(struct task_struct *p, struct rq *rq, if (new_task) *nt_curr_runnable_sum += delta; - if (!is_idle_task(p) && !exiting_task(p)) + if (!is_idle_task(p) && !exiting_task(p)) { p->ravg.curr_window += delta; + p->ravg.curr_window_cpu[cpu] += delta; + } return; } @@ -2336,8 +2368,10 @@ static void update_cpu_busy_time(struct task_struct *p, struct rq *rq, * contribution to previous completed window. */ delta = scale_exec_time(window_start - mark_start, rq); - if (!exiting_task(p)) + if (!exiting_task(p)) { p->ravg.prev_window += delta; + p->ravg.prev_window_cpu[cpu] += delta; + } } else { /* * Since at least one full window has elapsed, @@ -2345,8 +2379,10 @@ static void update_cpu_busy_time(struct task_struct *p, struct rq *rq, * full window (window_size). */ delta = scale_exec_time(window_size, rq); - if (!exiting_task(p)) + if (!exiting_task(p)) { p->ravg.prev_window = delta; + p->ravg.prev_window_cpu[cpu] = delta; + } } *prev_runnable_sum += delta; @@ -2359,8 +2395,10 @@ static void update_cpu_busy_time(struct task_struct *p, struct rq *rq, if (new_task) *nt_curr_runnable_sum += delta; - if (!exiting_task(p)) + if (!exiting_task(p)) { p->ravg.curr_window = delta; + p->ravg.curr_window_cpu[cpu] = delta; + } return; } @@ -2386,8 +2424,10 @@ static void update_cpu_busy_time(struct task_struct *p, struct rq *rq, * contribution to previous completed window. */ delta = scale_exec_time(window_start - mark_start, rq); - if (!is_idle_task(p) && !exiting_task(p)) + if (!is_idle_task(p) && !exiting_task(p)) { p->ravg.prev_window += delta; + p->ravg.prev_window_cpu[cpu] += delta; + } } else { /* * Since at least one full window has elapsed, @@ -2395,8 +2435,10 @@ static void update_cpu_busy_time(struct task_struct *p, struct rq *rq, * full window (window_size). */ delta = scale_exec_time(window_size, rq); - if (!is_idle_task(p) && !exiting_task(p)) + if (!is_idle_task(p) && !exiting_task(p)) { p->ravg.prev_window = delta; + p->ravg.prev_window_cpu[cpu] = delta; + } } /* @@ -2413,8 +2455,10 @@ static void update_cpu_busy_time(struct task_struct *p, struct rq *rq, if (new_task) *nt_curr_runnable_sum += delta; - if (!is_idle_task(p) && !exiting_task(p)) + if (!is_idle_task(p) && !exiting_task(p)) { p->ravg.curr_window = delta; + p->ravg.curr_window_cpu[cpu] = delta; + } return; } @@ -2829,11 +2873,23 @@ void sched_account_irqstart(int cpu, struct task_struct *curr, u64 wallclock) void reset_task_stats(struct task_struct *p) { u32 sum = 0; + u32 *curr_window_ptr = NULL; + u32 *prev_window_ptr = NULL; - if (exiting_task(p)) + if (exiting_task(p)) { sum = EXITING_TASK_MARKER; + } else { + curr_window_ptr = p->ravg.curr_window_cpu; + prev_window_ptr = p->ravg.prev_window_cpu; + memset(curr_window_ptr, 0, sizeof(u32) * nr_cpu_ids); + memset(prev_window_ptr, 0, sizeof(u32) * nr_cpu_ids); + } memset(&p->ravg, 0, sizeof(struct ravg)); + + p->ravg.curr_window_cpu = curr_window_ptr; + p->ravg.prev_window_cpu = prev_window_ptr; + /* Retain EXITING_TASK marker */ p->ravg.sum_history[0] = sum; } @@ -2889,7 +2945,9 @@ static void reset_all_task_stats(void) read_lock(&tasklist_lock); do_each_thread(g, p) { + raw_spin_lock(&p->pi_lock); reset_task_stats(p); + raw_spin_unlock(&p->pi_lock); } while_each_thread(g, p); read_unlock(&tasklist_lock); } diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index 27b28369440d..f786767aa353 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -1057,7 +1057,7 @@ extern unsigned int __read_mostly sched_upmigrate; extern unsigned int __read_mostly sched_downmigrate; extern unsigned int __read_mostly sysctl_sched_spill_nr_run; -extern void init_new_task_load(struct task_struct *p); +extern void init_new_task_load(struct task_struct *p, bool idle_task); extern u64 sched_ktime_clock(void); extern int got_boost_kick(void); extern int register_cpu_cycle_counter_cb(struct cpu_cycle_counter_cb *cb); @@ -1503,7 +1503,9 @@ static inline struct sched_cluster *rq_cluster(struct rq *rq) return NULL; } -static inline void init_new_task_load(struct task_struct *p) { } +static inline void init_new_task_load(struct task_struct *p, bool idle_task) +{ +} static inline u64 scale_load_to_cpu(u64 load, int cpu) { diff --git a/kernel/smpboot.c b/kernel/smpboot.c index 6949476a118f..3a0415803b09 100644 --- a/kernel/smpboot.c +++ b/kernel/smpboot.c @@ -32,7 +32,7 @@ struct task_struct *idle_thread_get(unsigned int cpu) if (!tsk) return ERR_PTR(-ENOMEM); - init_idle(tsk, cpu); + init_idle(tsk, cpu, true); return tsk; } |
