diff options
| author | Srivatsa Vaddagiri <vatsa@codeaurora.org> | 2015-05-12 15:01:15 +0530 |
|---|---|---|
| committer | Kyle Yan <kyan@codeaurora.org> | 2016-05-26 15:28:59 -0700 |
| commit | e6aae1c3e01868f37df31e7542c3160fd194474f (patch) | |
| tree | 3042f0654c617d05884ecf9150dd511bad4cffe1 /include/trace | |
| parent | 450ba6dd70df79333e9949af731a6b40bfa37856 (diff) | |
sched: Aggregate for frequency
Related threads in a group could execute on different CPUs and hence
present a split-demand picture to cpufreq governor. IOW the governor
fails to see the net cpu demand of all related threads in a given
window if the threads's execution were to be split across CPUs. That
could result in sub-optimal frequency chosen in comparison to the
ideal frequency at which the aggregate work (taken up by related
threads) needs to be run.
This patch aggregates cpu execution stats in a window for all related
threads in a group. This helps present cpu busy time to governor as if
all related threads were part of the same thread and thus help select
the right frequency required by related threads. This aggregation
is done per-cluster.
Change-Id: I71e6047620066323721c6d542034ddd4b2950e7f
Signed-off-by: Srivatsa Vaddagiri <vatsa@codeaurora.org>
Signed-off-by: Syed Rameez Mustafa <rameezmustafa@codeaurora.org>
[joonwoop@codeaurora.org: Fixed notify_migration() to hold rcu read
lock as this version of Linux doesn't hold p->pi_lock when the
function gets called while keeping use of rcu_access_pointer() since
we never dereference return value.]
Signed-off-by: Joonwoo Park <joonwoop@codeaurora.org>
Diffstat (limited to 'include/trace')
| -rw-r--r-- | include/trace/events/sched.h | 115 |
1 files changed, 81 insertions, 34 deletions
diff --git a/include/trace/events/sched.h b/include/trace/events/sched.h index 6c5fa35e2875..81415b78ef39 100644 --- a/include/trace/events/sched.h +++ b/include/trace/events/sched.h @@ -9,6 +9,8 @@ #include <linux/binfmts.h> struct rq; +struct group_cpu_time; +struct migration_sum_data; extern const char *task_event_names[]; /* @@ -269,9 +271,10 @@ TRACE_EVENT(sched_set_boost, TRACE_EVENT(sched_update_task_ravg, TP_PROTO(struct task_struct *p, struct rq *rq, enum task_event evt, - u64 wallclock, u64 irqtime, u32 cycles, u32 exec_time), + u64 wallclock, u64 irqtime, u32 cycles, u32 exec_time, + struct group_cpu_time *cpu_time), - TP_ARGS(p, rq, evt, wallclock, irqtime, cycles, exec_time), + TP_ARGS(p, rq, evt, wallclock, irqtime, cycles, exec_time, cpu_time), TP_STRUCT__entry( __array( char, comm, TASK_COMM_LEN ) @@ -290,8 +293,12 @@ TRACE_EVENT(sched_update_task_ravg, __field( int, cpu ) #ifdef CONFIG_SCHED_FREQ_INPUT __field(unsigned int, pred_demand ) - __field( u64, cs ) - __field( u64, ps ) + __field( u64, rq_cs ) + __field( u64, rq_ps ) + __field( u64, grp_cs ) + __field( u64, grp_ps ) + __field( u64, grp_nt_cs ) + __field( u64, grp_nt_ps ) __field( u32, curr_window ) __field( u32, prev_window ) __field( u64, nt_cs ) @@ -318,8 +325,12 @@ TRACE_EVENT(sched_update_task_ravg, __entry->irqtime = irqtime; #ifdef CONFIG_SCHED_FREQ_INPUT __entry->pred_demand = p->ravg.pred_demand; - __entry->cs = rq->curr_runnable_sum; - __entry->ps = rq->prev_runnable_sum; + __entry->rq_cs = rq->curr_runnable_sum; + __entry->rq_ps = rq->prev_runnable_sum; + __entry->grp_cs = cpu_time ? cpu_time->curr_runnable_sum : 0; + __entry->grp_ps = cpu_time ? cpu_time->prev_runnable_sum : 0; + __entry->grp_nt_cs = cpu_time ? cpu_time->nt_curr_runnable_sum : 0; + __entry->grp_nt_ps = cpu_time ? cpu_time->nt_prev_runnable_sum : 0; __entry->curr_window = p->ravg.curr_window; __entry->prev_window = p->ravg.prev_window; __entry->nt_cs = rq->nt_curr_runnable_sum; @@ -330,7 +341,7 @@ TRACE_EVENT(sched_update_task_ravg, TP_printk("wc %llu ws %llu delta %llu event %s cpu %d cur_freq %u cur_pid %d task %d (%s) ms %llu delta %llu demand %u sum %u irqtime %llu" #ifdef CONFIG_SCHED_FREQ_INPUT - " pred_demand %u cs %llu ps %llu cur_window %u prev_window %u nt_cs %llu nt_ps %llu active_wins %u" + " pred_demand %u rq_cs %llu rq_ps %llu cur_window %u prev_window %u nt_cs %llu nt_ps %llu active_wins %u grp_cs %lld grp_ps %lld, grp_nt_cs %llu, grp_nt_ps: %llu" #endif , __entry->wallclock, __entry->win_start, __entry->delta, task_event_names[__entry->evt], __entry->cpu, @@ -339,10 +350,12 @@ TRACE_EVENT(sched_update_task_ravg, __entry->delta_m, __entry->demand, __entry->sum, __entry->irqtime #ifdef CONFIG_SCHED_FREQ_INPUT - , __entry->pred_demand, __entry->cs, __entry->ps, + , __entry->pred_demand, __entry->rq_cs, __entry->rq_ps, __entry->curr_window, __entry->prev_window, __entry->nt_cs, __entry->nt_ps, - __entry->active_windows + __entry->active_windows, + __entry->grp_cs, __entry->grp_ps, + __entry->grp_nt_cs, __entry->grp_nt_ps #endif ) ); @@ -506,31 +519,62 @@ TRACE_EVENT(sched_update_pred_demand, TRACE_EVENT(sched_migration_update_sum, - TP_PROTO(struct rq *rq, struct task_struct *p), + TP_PROTO(struct task_struct *p, enum migrate_types migrate_type, struct migration_sum_data *d), - TP_ARGS(rq, p), + TP_ARGS(p, migrate_type, d), TP_STRUCT__entry( - __field(int, cpu ) + __field(int, tcpu ) __field(int, pid ) __field( u64, cs ) __field( u64, ps ) __field( s64, nt_cs ) __field( s64, nt_ps ) + __field(enum migrate_types, migrate_type ) + __field( s64, src_cs ) + __field( s64, src_ps ) + __field( s64, dst_cs ) + __field( s64, dst_ps ) + __field( s64, src_nt_cs ) + __field( s64, src_nt_ps ) + __field( s64, dst_nt_cs ) + __field( s64, dst_nt_ps ) ), TP_fast_assign( - __entry->cpu = cpu_of(rq); - __entry->cs = rq->curr_runnable_sum; - __entry->ps = rq->prev_runnable_sum; - __entry->nt_cs = (s64)rq->nt_curr_runnable_sum; - __entry->nt_ps = (s64)rq->nt_prev_runnable_sum; + __entry->tcpu = task_cpu(p); __entry->pid = p->pid; - ), - - TP_printk("cpu %d: cs %llu ps %llu nt_cs %lld nt_ps %lld pid %d", - __entry->cpu, __entry->cs, __entry->ps, - __entry->nt_cs, __entry->nt_ps, __entry->pid) + __entry->migrate_type = migrate_type; + __entry->src_cs = d->src_rq ? + d->src_rq->curr_runnable_sum : + d->src_cpu_time->curr_runnable_sum; + __entry->src_ps = d->src_rq ? + d->src_rq->prev_runnable_sum : + d->src_cpu_time->prev_runnable_sum; + __entry->dst_cs = d->dst_rq ? + d->dst_rq->curr_runnable_sum : + d->dst_cpu_time->curr_runnable_sum; + __entry->dst_ps = d->dst_rq ? + d->dst_rq->prev_runnable_sum : + d->dst_cpu_time->prev_runnable_sum; + __entry->src_nt_cs = d->src_rq ? + d->src_rq->nt_curr_runnable_sum : + d->src_cpu_time->nt_curr_runnable_sum; + __entry->src_nt_ps = d->src_rq ? + d->src_rq->nt_prev_runnable_sum : + d->src_cpu_time->nt_prev_runnable_sum; + __entry->dst_nt_cs = d->dst_rq ? + d->dst_rq->nt_curr_runnable_sum : + d->dst_cpu_time->nt_curr_runnable_sum; + __entry->dst_nt_ps = d->dst_rq ? + d->dst_rq->nt_prev_runnable_sum : + d->dst_cpu_time->nt_prev_runnable_sum; + ), + + TP_printk("pid %d task_cpu %d migrate_type %s src_cs %llu src_ps %llu dst_cs %lld dst_ps %lld src_nt_cs %llu src_nt_ps %llu dst_nt_cs %lld dst_nt_ps %lld", + __entry->pid, __entry->tcpu, migrate_type_names[__entry->migrate_type], + __entry->src_cs, __entry->src_ps, __entry->dst_cs, __entry->dst_ps, + __entry->src_nt_cs, __entry->src_nt_ps, __entry->dst_nt_cs, __entry->dst_nt_ps) ); TRACE_EVENT(sched_get_busy, @@ -562,15 +606,17 @@ TRACE_EVENT(sched_get_busy, TRACE_EVENT(sched_freq_alert, - TP_PROTO(int cpu, int pd_notif, u64 old_load, u64 new_load, - u64 old_pred, u64 new_pred), + TP_PROTO(int cpu, int pd_notif, int check_groups, struct rq *rq, + u64 new_load), - TP_ARGS(cpu, pd_notif, old_load, new_load, old_pred, new_pred), + TP_ARGS(cpu, pd_notif, check_groups, rq, new_load), TP_STRUCT__entry( __field( int, cpu ) __field( int, pd_notif ) - __field( u64, old_load ) + __field( int, check_groups ) + __field( u64, old_busy_time ) + __field( u64, ps ) __field( u64, new_load ) __field( u64, old_pred ) __field( u64, new_pred ) @@ -579,17 +625,18 @@ TRACE_EVENT(sched_freq_alert, TP_fast_assign( __entry->cpu = cpu; __entry->pd_notif = pd_notif; - __entry->old_load = old_load; + __entry->check_groups = check_groups; + __entry->old_busy_time = rq->old_busy_time; + __entry->ps = rq->prev_runnable_sum; __entry->new_load = new_load; - __entry->old_pred = old_pred; - __entry->new_pred = new_pred; + __entry->old_pred = rq->old_estimated_time; + __entry->new_pred = rq->hmp_stats.pred_demands_sum; ), - TP_printk("cpu %d pd_notif=%d old_load=%llu new_load=%llu " - "old_pred=%llu new_pred=%llu", - __entry->cpu, __entry->pd_notif, __entry->old_load, - __entry->new_load, __entry->old_pred, - __entry->new_pred) + TP_printk("cpu %d pd_notif=%d check_groups %d old_busy_time=%llu prev_sum=%lld new_load=%llu old_pred=%llu new_pred=%llu", + __entry->cpu, __entry->pd_notif, __entry->check_groups, + __entry->old_busy_time, __entry->ps, __entry->new_load, + __entry->old_pred, __entry->new_pred) ); #endif /* CONFIG_SCHED_FREQ_INPUT */ |
