diff options
Diffstat (limited to 'kernel/sched/hmp.c')
| -rw-r--r-- | kernel/sched/hmp.c | 261 |
1 files changed, 221 insertions, 40 deletions
diff --git a/kernel/sched/hmp.c b/kernel/sched/hmp.c index 35f4ea1761e2..8675ebeebf6a 100644 --- a/kernel/sched/hmp.c +++ b/kernel/sched/hmp.c @@ -825,15 +825,15 @@ unsigned int max_possible_capacity = 1024; /* max(rq->max_possible_capacity) */ unsigned int min_max_possible_capacity = 1024; /* min(rq->max_possible_capacity) */ -/* Window size (in ns) */ -__read_mostly unsigned int sched_ravg_window = 10000000; - /* Min window size (in ns) = 10ms */ #define MIN_SCHED_RAVG_WINDOW 10000000 /* Max window size (in ns) = 1s */ #define MAX_SCHED_RAVG_WINDOW 1000000000 +/* Window size (in ns) */ +__read_mostly unsigned int sched_ravg_window = MIN_SCHED_RAVG_WINDOW; + /* Temporarily disable window-stats activity on all cpus */ unsigned int __read_mostly sched_disable_window_stats; @@ -853,6 +853,17 @@ static DEFINE_RWLOCK(related_thread_group_lock); list_for_each_entry(grp, &related_thread_groups, list) /* + * Task load is categorized into buckets for the purpose of top task tracking. + * The entire range of load from 0 to sched_ravg_window needs to be covered + * in NUM_LOAD_INDICES number of buckets. Therefore the size of each bucket + * is given by sched_ravg_window / NUM_LOAD_INDICES. Since the default value + * of sched_ravg_window is MIN_SCHED_RAVG_WINDOW, use that to compute + * sched_load_granule. + */ +__read_mostly unsigned int sched_load_granule = + MIN_SCHED_RAVG_WINDOW / NUM_LOAD_INDICES; + +/* * Demand aggregation for frequency purpose: * * 'sched_freq_aggregate' controls aggregation of cpu demand of related threads @@ -2172,6 +2183,113 @@ void update_task_pred_demand(struct rq *rq, struct task_struct *p, int event) p->ravg.pred_demand = new; } +/* + * Special case the last index and provide a fast path for index = 0. + * Note that sched_load_granule can change underneath us if we are not + * holding any runqueue locks while calling the two functions below. + */ +static u32 __maybe_unused top_task_load(struct rq *rq) +{ + int index = rq->prev_top; + + if (!index) { + if (!rq->prev_runnable_sum) + return 0; + else + return sched_load_granule; + } else if (index == NUM_LOAD_INDICES - 1) { + return sched_ravg_window; + } else { + return (index + 1) * sched_load_granule; + } +} + +static int load_to_index(u32 load) +{ + if (load < sched_load_granule) + return 0; + else if (load >= sched_ravg_window) + return NUM_LOAD_INDICES - 1; + else + return load / sched_load_granule; +} + +static void update_top_tasks(struct task_struct *p, struct rq *rq, + u32 old_curr_window, int new_window, bool full_window) +{ + u8 *curr_table = rq->top_tasks[rq->curr_table]; + u8 *prev_table = rq->top_tasks[1 - rq->curr_table]; + int old_index, new_index, update_index; + u32 curr_window = p->ravg.curr_window; + u32 prev_window = p->ravg.prev_window; + bool zero_index_update; + + if (old_curr_window == curr_window && !new_window) + return; + + old_index = load_to_index(old_curr_window); + new_index = load_to_index(curr_window); + + if (!new_window) { + zero_index_update = !old_curr_window && curr_window; + if (old_index != new_index || zero_index_update) { + if (old_curr_window) + curr_table[old_index] -= 1; + if (curr_window) + curr_table[new_index] += 1; + if (new_index > rq->curr_top) + rq->curr_top = new_index; + } + + return; + } + + /* + * The window has rolled over for this task. By the time we get + * here, curr/prev swaps would has already occurred. So we need + * to use prev_window for the new index. + */ + update_index = load_to_index(prev_window); + + if (full_window) { + /* + * Two cases here. Either 'p' ran for the entire window or + * it didn't run at all. In either case there is no entry + * in the prev table. If 'p' ran the entire window, we just + * need to create a new entry in the prev table. In this case + * update_index will be correspond to sched_ravg_window + * so we can unconditionally update the top index. + */ + if (prev_window) { + prev_table[update_index] += 1; + rq->prev_top = update_index; + } + } else { + zero_index_update = !old_curr_window && prev_window; + if (old_index != update_index || zero_index_update) { + if (old_curr_window) + prev_table[old_index] -= 1; + + prev_table[update_index] += 1; + + if (update_index > rq->prev_top) + rq->prev_top = update_index; + } + } + + if (curr_window) { + curr_table[new_index] += 1; + + if (new_index > rq->curr_top) + rq->curr_top = new_index; + } +} + +static inline void clear_top_tasks_table(u8 *table) +{ + memset(table, 0, NUM_LOAD_INDICES * sizeof(u8)); +} + static u32 empty_windows[NR_CPUS]; static void rollover_task_window(struct task_struct *p, bool full_window) @@ -2219,6 +2337,7 @@ static void update_cpu_busy_time(struct task_struct *p, struct rq *rq, bool new_task; struct related_thread_group *grp; int cpu = rq->cpu; + u32 old_curr_window; new_window = mark_start < window_start; if (new_window) { @@ -2278,51 +2397,40 @@ static void update_cpu_busy_time(struct task_struct *p, struct rq *rq, * Handle per-task window rollover. We don't care about the idle * task or exiting tasks. */ - if (new_window && !is_idle_task(p) && !exiting_task(p)) - rollover_task_window(p, full_window); + if (!is_idle_task(p) && !exiting_task(p)) { + old_curr_window = p->ravg.curr_window; + if (new_window) + rollover_task_window(p, full_window); + } if (flip_counters) { u64 curr_sum = *curr_runnable_sum; u64 nt_curr_sum = *nt_curr_runnable_sum; + u8 curr_table = rq->curr_table; + u8 prev_table = 1 - curr_table; + int curr_top = rq->curr_top; - if (prev_sum_reset) + clear_top_tasks_table(rq->top_tasks[prev_table]); + + if (prev_sum_reset) { curr_sum = nt_curr_sum = 0; + curr_top = 0; + clear_top_tasks_table(rq->top_tasks[curr_table]); + } *prev_runnable_sum = curr_sum; *nt_prev_runnable_sum = nt_curr_sum; *curr_runnable_sum = 0; *nt_curr_runnable_sum = 0; + rq->curr_table = prev_table; + rq->prev_top = curr_top; + rq->curr_top = 0; } - if (!account_busy_for_cpu_time(rq, p, irqtime, event)) { - /* - * account_busy_for_cpu_time() = 0, so no update to the - * task's current window needs to be made. This could be - * for example - * - * - a wakeup event on a task within the current - * window (!new_window below, no action required), - * - switching to a new task from idle (PICK_NEXT_TASK) - * in a new window where irqtime is 0 and we aren't - * waiting on IO - */ - - if (!new_window) - return; - - /* - * A new window has started. The RQ demand must be rolled - * over if p is the current task. - */ - if (p_is_curr_task) { - /* p is idle task */ - BUG_ON(p != rq->idle); - } - - return; - } + if (!account_busy_for_cpu_time(rq, p, irqtime, event)) + goto done; if (!new_window) { /* @@ -2347,7 +2455,7 @@ static void update_cpu_busy_time(struct task_struct *p, struct rq *rq, p->ravg.curr_window_cpu[cpu] += delta; } - return; + goto done; } if (!p_is_curr_task) { @@ -2402,7 +2510,7 @@ static void update_cpu_busy_time(struct task_struct *p, struct rq *rq, p->ravg.curr_window_cpu[cpu] = delta; } - return; + goto done; } if (!irqtime || !is_idle_task(p) || cpu_is_waiting_on_io(rq)) { @@ -2462,7 +2570,7 @@ static void update_cpu_busy_time(struct task_struct *p, struct rq *rq, p->ravg.curr_window_cpu[cpu] = delta; } - return; + goto done; } if (irqtime) { @@ -2507,7 +2615,10 @@ static void update_cpu_busy_time(struct task_struct *p, struct rq *rq, return; } - BUG(); +done: + if (!is_idle_task(p) && !exiting_task(p)) + update_top_tasks(p, rq, old_curr_window, + new_window, full_window); } static inline u32 predict_and_update_buckets(struct rq *rq, @@ -3028,6 +3139,7 @@ void reset_all_window_stats(u64 window_start, unsigned int window_size) if (window_size) { sched_ravg_window = window_size * TICK_NSEC; set_hmp_defaults(); + sched_load_granule = sched_ravg_window / NUM_LOAD_INDICES; } enable_window_stats(); @@ -3039,9 +3151,15 @@ void reset_all_window_stats(u64 window_start, unsigned int window_size) rq->window_start = window_start; rq->curr_runnable_sum = rq->prev_runnable_sum = 0; rq->nt_curr_runnable_sum = rq->nt_prev_runnable_sum = 0; - for (i = 0; i < NUM_SUBTRACTION_WINDOWS; i++) + for (i = 0; i < NUM_TRACKED_WINDOWS; i++) { memset(&rq->load_subs[i], 0, sizeof(struct load_subtractions)); + clear_top_tasks_table(rq->top_tasks[i]); + } + + rq->curr_table = 0; + rq->curr_top = 0; + rq->prev_top = 0; reset_cpu_hmp_stats(cpu, 1); } @@ -3088,7 +3206,7 @@ static inline void account_load_subtractions(struct rq *rq) struct load_subtractions *ls = rq->load_subs; int i; - for (i = 0; i < NUM_SUBTRACTION_WINDOWS; i++) { + for (i = 0; i < NUM_TRACKED_WINDOWS; i++) { if (ls[i].window_start == ws) { rq->curr_runnable_sum -= ls[i].subs; rq->nt_curr_runnable_sum -= ls[i].new_subs; @@ -3357,7 +3475,7 @@ static bool get_subtraction_index(struct rq *rq, u64 ws) u64 oldest = ULLONG_MAX; int oldest_index = 0; - for (i = 0; i < NUM_SUBTRACTION_WINDOWS; i++) { + for (i = 0; i < NUM_TRACKED_WINDOWS; i++) { u64 entry_ws = rq->load_subs[i].window_start; if (ws == entry_ws) @@ -3454,6 +3572,68 @@ static inline void inter_cluster_migration_fixup BUG_ON((s64)src_rq->nt_curr_runnable_sum < 0); } +static int find_next_top_index(u8 *tasks, int end) +{ + int i; + + if (end <= 1) + return 0; + + for (i = end - 1; i >= 0; i--) { + if (tasks[i]) + return i; + } + + return 0; +} + +static void +migrate_top_tasks(struct task_struct *p, struct rq *src_rq, struct rq *dst_rq) +{ + int index; + int top_index; + u32 curr_window = p->ravg.curr_window; + u32 prev_window = p->ravg.prev_window; + u8 src = src_rq->curr_table; + u8 dst = dst_rq->curr_table; + u8 *src_table; + u8 *dst_table; + + if (curr_window) { + src_table = src_rq->top_tasks[src]; + dst_table = dst_rq->top_tasks[dst]; + index = load_to_index(curr_window); + src_table[index] -= 1; + dst_table[index] += 1; + + if (index > dst_rq->curr_top) + dst_rq->curr_top = index; + + top_index = src_rq->curr_top; + if (index == top_index && !src_table[index]) + src_rq->curr_top = + find_next_top_index(src_table, top_index); + } + + if (prev_window) { + src = 1 - src; + dst = 1 - dst; + src_table = src_rq->top_tasks[src]; + dst_table = dst_rq->top_tasks[dst]; + index = load_to_index(prev_window); + src_table[index] -= 1; + dst_table[index] += 1; + + if (index > dst_rq->prev_top) + dst_rq->prev_top = index; + + top_index = src_rq->prev_top; + if (index == top_index && !src_table[index]) + src_rq->prev_top = + find_next_top_index(src_table, top_index); + } +} + void fixup_busy_time(struct task_struct *p, int new_cpu) { struct rq *src_rq = task_rq(p); @@ -3544,6 +3724,7 @@ void fixup_busy_time(struct task_struct *p, int new_cpu) task_cpu(p), new_task); } + migrate_top_tasks(p, src_rq, dest_rq); if (p == src_rq->ed_task) { src_rq->ed_task = NULL; |
