diff options
Diffstat (limited to 'kernel')
| -rw-r--r-- | kernel/sched/core.c | 192 | ||||
| -rw-r--r-- | kernel/sched/fair.c | 1 | ||||
| -rw-r--r-- | kernel/sched/sched.h | 15 |
3 files changed, 169 insertions, 39 deletions
diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 0cdd0cf0718f..3afa3c5d5ebd 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -797,6 +797,9 @@ sched_set_cpu_cstate(int cpu, int cstate, int wakeup_energy, int wakeup_latency) static ktime_t ktime_last; static bool sched_ktime_suspended; +static bool use_cycle_counter; +static struct cpu_cycle_counter_cb cpu_cycle_counter_cb; + u64 sched_ktime_clock(void) { if (unlikely(sched_ktime_suspended)) @@ -1398,6 +1401,7 @@ static struct sched_cluster init_cluster = { .max_freq = 1, .min_freq = 1, .max_possible_freq = 1, + .cpu_cycle_max_scale_factor = 1, .dstate = 0, .dstate_wakeup_energy = 0, .dstate_wakeup_latency = 0, @@ -1546,6 +1550,7 @@ static struct sched_cluster *alloc_new_cluster(const struct cpumask *cpus) cluster->max_freq = 1; cluster->min_freq = 1; cluster->max_possible_freq = 1; + cluster->cpu_cycle_max_scale_factor = 1; cluster->dstate = 0; cluster->dstate_wakeup_energy = 0; cluster->dstate_wakeup_latency = 0; @@ -1612,6 +1617,44 @@ static void init_clusters(void) INIT_LIST_HEAD(&cluster_head); } +static inline void +__update_cpu_cycle_max_possible_freq(struct sched_cluster *cluster) +{ + int cpu = cluster_first_cpu(cluster); + + cluster->cpu_cycle_max_scale_factor = + div64_u64(cluster->max_possible_freq * NSEC_PER_USEC, + cpu_cycle_counter_cb.get_cpu_cycles_max_per_us(cpu)); +} + +static inline void +update_cpu_cycle_max_possible_freq(struct sched_cluster *cluster) +{ + if (!use_cycle_counter) + return; + + __update_cpu_cycle_max_possible_freq(cluster); +} + +int register_cpu_cycle_counter_cb(struct cpu_cycle_counter_cb *cb) +{ + struct sched_cluster *cluster = NULL; + + mutex_lock(&cluster_lock); + if (!cb->get_cpu_cycle_counter || !cb->get_cpu_cycles_max_per_us) { + mutex_unlock(&cluster_lock); + return -EINVAL; + } + + cpu_cycle_counter_cb = *cb; + for_each_sched_cluster(cluster) + __update_cpu_cycle_max_possible_freq(cluster); + use_cycle_counter = true; + mutex_unlock(&cluster_lock); + + return 0; +} + static int __init set_sched_enable_hmp(char *str) { int enable_hmp = 0; @@ -1718,12 +1761,24 @@ static inline void clear_boost_kick(int cpu) { } static inline void clear_hmp_request(int cpu) { } +int register_cpu_cycle_counter_cb(struct cpu_cycle_counter_cb *cb) +{ + return 0; +} + #ifdef CONFIG_SMP static void update_cluster_topology(void) { } #endif #endif /* CONFIG_SCHED_HMP */ +#define SCHED_MIN_FREQ 1 + +struct cpu_cycle { + u64 cycles; + u64 time; +}; + #if defined(CONFIG_SCHED_HMP) /* @@ -1865,19 +1920,17 @@ update_window_start(struct rq *rq, u64 wallclock) rq->window_start += (u64)nr_windows * (u64)sched_ravg_window; } -static inline u64 scale_exec_time(u64 delta, struct rq *rq) +#define DIV64_U64_ROUNDUP(X, Y) div64_u64((X) + (Y - 1), Y) + +static inline u64 scale_exec_time(u64 delta, struct rq *rq, + const struct cpu_cycle *cc) { int cpu = cpu_of(rq); - unsigned int cur_freq = cpu_cur_freq(cpu); int sf; - if (unlikely(cur_freq > max_possible_freq)) - cur_freq = max_possible_freq; - - /* round up div64 */ - delta = div64_u64(delta * cur_freq + max_possible_freq - 1, - max_possible_freq); - + delta = DIV64_U64_ROUNDUP(delta * cc->cycles * + cpu_cycle_max_scale_factor(cpu), + max_possible_freq * cc->time); sf = DIV_ROUND_UP(cpu_efficiency(cpu) * 1024, max_possible_efficiency); delta *= sf; @@ -2251,7 +2304,8 @@ void update_task_pred_demand(struct rq *rq, struct task_struct *p, int event) * Account cpu activity in its busy time counters (rq->curr/prev_runnable_sum) */ static void update_cpu_busy_time(struct task_struct *p, struct rq *rq, - int event, u64 wallclock, u64 irqtime) + int event, u64 wallclock, u64 irqtime, + const struct cpu_cycle *cc) { int new_window, nr_full_windows = 0; int p_is_curr_task = (p == rq->curr); @@ -2341,7 +2395,7 @@ static void update_cpu_busy_time(struct task_struct *p, struct rq *rq, delta = wallclock - mark_start; else delta = irqtime; - delta = scale_exec_time(delta, rq); + delta = scale_exec_time(delta, rq, cc); rq->curr_runnable_sum += delta; if (new_task) rq->nt_curr_runnable_sum += delta; @@ -2366,14 +2420,15 @@ static void update_cpu_busy_time(struct task_struct *p, struct rq *rq, if (!nr_full_windows) { /* A full window hasn't elapsed, account partial * contribution to previous completed window. */ - delta = scale_exec_time(window_start - mark_start, rq); + delta = scale_exec_time(window_start - mark_start, rq, + cc); if (!exiting_task(p)) p->ravg.prev_window += delta; } else { /* Since at least one full window has elapsed, * the contribution to the previous window is the * full window (window_size). */ - delta = scale_exec_time(window_size, rq); + delta = scale_exec_time(window_size, rq, cc); if (!exiting_task(p)) p->ravg.prev_window = delta; } @@ -2382,7 +2437,7 @@ static void update_cpu_busy_time(struct task_struct *p, struct rq *rq, rq->nt_prev_runnable_sum += delta; /* Account piece of busy time in the current window. */ - delta = scale_exec_time(wallclock - window_start, rq); + delta = scale_exec_time(wallclock - window_start, rq, cc); rq->curr_runnable_sum += delta; if (new_task) rq->nt_curr_runnable_sum += delta; @@ -2408,7 +2463,8 @@ static void update_cpu_busy_time(struct task_struct *p, struct rq *rq, if (!nr_full_windows) { /* A full window hasn't elapsed, account partial * contribution to previous completed window. */ - delta = scale_exec_time(window_start - mark_start, rq); + delta = scale_exec_time(window_start - mark_start, rq, + cc); if (!is_idle_task(p) && !exiting_task(p)) p->ravg.prev_window += delta; @@ -2421,7 +2477,7 @@ static void update_cpu_busy_time(struct task_struct *p, struct rq *rq, /* Since at least one full window has elapsed, * the contribution to the previous window is the * full window (window_size). */ - delta = scale_exec_time(window_size, rq); + delta = scale_exec_time(window_size, rq, cc); if (!is_idle_task(p) && !exiting_task(p)) p->ravg.prev_window = delta; @@ -2439,7 +2495,7 @@ static void update_cpu_busy_time(struct task_struct *p, struct rq *rq, rq->prev_runnable_sum = delta; /* Account piece of busy time in the current window. */ - delta = scale_exec_time(wallclock - window_start, rq); + delta = scale_exec_time(wallclock - window_start, rq, cc); rq->curr_runnable_sum = delta; if (new_task) rq->nt_curr_runnable_sum = delta; @@ -2471,7 +2527,8 @@ static void update_cpu_busy_time(struct task_struct *p, struct rq *rq, rq->nt_prev_runnable_sum = rq->nt_curr_runnable_sum; rq->nt_curr_runnable_sum = 0; if (mark_start > window_start) { - rq->curr_runnable_sum = scale_exec_time(irqtime, rq); + rq->curr_runnable_sum = scale_exec_time(irqtime, rq, + cc); return; } @@ -2480,12 +2537,12 @@ static void update_cpu_busy_time(struct task_struct *p, struct rq *rq, delta = window_start - mark_start; if (delta > window_size) delta = window_size; - delta = scale_exec_time(delta, rq); + delta = scale_exec_time(delta, rq, cc); rq->prev_runnable_sum += delta; /* Process the remaining IRQ busy time in the current window. */ delta = wallclock - window_start; - rq->curr_runnable_sum = scale_exec_time(delta, rq); + rq->curr_runnable_sum = scale_exec_time(delta, rq, cc); return; } @@ -2515,7 +2572,7 @@ update_task_pred_demand(struct rq *rq, struct task_struct *p, int event) } static inline void update_cpu_busy_time(struct task_struct *p, struct rq *rq, - int event, u64 wallclock, u64 irqtime) + int event, u64 wallclock, u64 irqtime, const struct cpu_cycle *cc) { } @@ -2528,6 +2585,41 @@ static inline u32 predict_and_update_buckets(struct rq *rq, #endif /* CONFIG_SCHED_FREQ_INPUT */ +static void update_task_cpu_cycles(struct task_struct *p, int cpu) +{ + if (use_cycle_counter) + p->cpu_cycles = cpu_cycle_counter_cb.get_cpu_cycle_counter(cpu); +} + +static struct cpu_cycle +get_task_cpu_cycles(struct task_struct *p, struct rq *rq, int event, + u64 wallclock) +{ + u64 cur_cycles; + struct cpu_cycle cc; + int cpu = cpu_of(rq); + + if (!use_cycle_counter) { + cc.cycles = cpu_cur_freq(cpu); + cc.time = 1; + return cc; + } + + cur_cycles = cpu_cycle_counter_cb.get_cpu_cycle_counter(cpu); + if (unlikely(cur_cycles < p->cpu_cycles)) + cc.cycles = cur_cycles + (U64_MAX - p->cpu_cycles); + else + cc.cycles = cur_cycles - p->cpu_cycles; + cc.time = wallclock - p->ravg.mark_start; + BUG_ON((s64)cc.time < 0); + + p->cpu_cycles = cur_cycles; + + trace_sched_get_task_cpu_cycles(cpu, event, cc.cycles, cc.time); + + return cc; +} + static int account_busy_for_task_demand(struct task_struct *p, int event) { /* No need to bother updating task demand for exiting tasks @@ -2614,9 +2706,9 @@ done: } static void add_to_task_demand(struct rq *rq, struct task_struct *p, - u64 delta) + u64 delta, const struct cpu_cycle *cc) { - delta = scale_exec_time(delta, rq); + delta = scale_exec_time(delta, rq, cc); p->ravg.sum += delta; if (unlikely(p->ravg.sum > sched_ravg_window)) p->ravg.sum = sched_ravg_window; @@ -2673,7 +2765,8 @@ static void add_to_task_demand(struct rq *rq, struct task_struct *p, * depends on it! */ static void update_task_demand(struct task_struct *p, struct rq *rq, - int event, u64 wallclock) + int event, u64 wallclock, + const struct cpu_cycle *cc) { u64 mark_start = p->ravg.mark_start; u64 delta, window_start = rq->window_start; @@ -2696,7 +2789,7 @@ static void update_task_demand(struct task_struct *p, struct rq *rq, if (!new_window) { /* The simple case - busy time contained within the existing * window. */ - add_to_task_demand(rq, p, wallclock - mark_start); + add_to_task_demand(rq, p, wallclock - mark_start, cc); return; } @@ -2707,12 +2800,12 @@ static void update_task_demand(struct task_struct *p, struct rq *rq, window_start -= (u64)nr_full_windows * (u64)window_size; /* Process (window_start - mark_start) first */ - add_to_task_demand(rq, p, window_start - mark_start); + add_to_task_demand(rq, p, window_start - mark_start, cc); /* Push new sample(s) into task's demand history */ update_history(rq, p, p->ravg.sum, 1, event); if (nr_full_windows) - update_history(rq, p, scale_exec_time(window_size, rq), + update_history(rq, p, scale_exec_time(window_size, rq, cc), nr_full_windows, event); /* Roll window_start back to current to process any remainder @@ -2721,30 +2814,39 @@ static void update_task_demand(struct task_struct *p, struct rq *rq, /* Process (wallclock - window_start) next */ mark_start = window_start; - add_to_task_demand(rq, p, wallclock - mark_start); + add_to_task_demand(rq, p, wallclock - mark_start, cc); } /* Reflect task activity on its demand and cpu's busy time statistics */ -static void update_task_ravg(struct task_struct *p, struct rq *rq, - int event, u64 wallclock, u64 irqtime) +static struct cpu_cycle +update_task_ravg(struct task_struct *p, struct rq *rq, int event, + u64 wallclock, u64 irqtime) { + struct cpu_cycle cc = { .cycles = SCHED_MIN_FREQ, .time = 1 }; + if (sched_use_pelt || !rq->window_start || sched_disable_window_stats) - return; + return cc; lockdep_assert_held(&rq->lock); update_window_start(rq, wallclock); - if (!p->ravg.mark_start) + if (!p->ravg.mark_start) { + update_task_cpu_cycles(p, cpu_of(rq)); goto done; + } - update_task_demand(p, rq, event, wallclock); - update_cpu_busy_time(p, rq, event, wallclock, irqtime); + cc = get_task_cpu_cycles(p, rq, event, wallclock); + update_task_demand(p, rq, event, wallclock, &cc); + update_cpu_busy_time(p, rq, event, wallclock, irqtime, &cc); update_task_pred_demand(rq, p, event); done: - trace_sched_update_task_ravg(p, rq, event, wallclock, irqtime); + trace_sched_update_task_ravg(p, rq, event, wallclock, irqtime, + cc.cycles, cc.time); p->ravg.mark_start = wallclock; + + return cc; } void sched_account_irqtime(int cpu, struct task_struct *curr, @@ -2812,6 +2914,7 @@ static inline void mark_task_starting(struct task_struct *p) wallclock = sched_ktime_clock(); p->ravg.mark_start = p->last_wake_ts = wallclock; p->last_switch_out_ts = 0; + update_task_cpu_cycles(p, cpu_of(rq)); } static inline void set_window_start(struct rq *rq) @@ -3029,6 +3132,7 @@ void sched_get_cpus_busy(struct sched_load *busy, int early_detection[cpus]; int cpu, i = 0; unsigned int window_size; + struct cpu_cycle cc; if (unlikely(cpus == 0)) return; @@ -3047,8 +3151,10 @@ void sched_get_cpus_busy(struct sched_load *busy, for_each_cpu(cpu, query_cpus) { rq = cpu_rq(cpu); - update_task_ravg(rq->curr, rq, TASK_UPDATE, - sched_ktime_clock(), 0); + cc = update_task_ravg(rq->curr, rq, TASK_UPDATE, + sched_ktime_clock(), 0); + cur_freq[i] = cpu_cycles_to_freq(i, cc.cycles, cc.time); + load[i] = rq->old_busy_time = rq->prev_runnable_sum; nload[i] = rq->nt_prev_runnable_sum; pload[i] = rq->hmp_stats.pred_demands_sum; @@ -3066,7 +3172,6 @@ void sched_get_cpus_busy(struct sched_load *busy, notifier_sent[i] = rq->notifier_sent; early_detection[i] = (rq->ed_task != NULL); rq->notifier_sent = 0; - cur_freq[i] = cpu_cur_freq(cpu); max_freq[i] = cpu_max_freq(cpu); i++; } @@ -3212,6 +3317,8 @@ static void fixup_busy_time(struct task_struct *p, int new_cpu) update_task_ravg(p, task_rq(p), TASK_MIGRATE, wallclock, 0); + update_task_cpu_cycles(p, new_cpu); + new_task = is_new_task(p); if (p->ravg.curr_window) { @@ -3531,6 +3638,7 @@ static int cpufreq_notifier_policy(struct notifier_block *nb, sort_clusters(); update_all_clusters_stats(); + update_cpu_cycle_max_possible_freq(cluster); mutex_unlock(&cluster_lock); continue; } @@ -3685,10 +3793,16 @@ heavy_task_wakeup(struct task_struct *p, struct rq *rq, int event) return 0; } -static inline void +static struct cpu_cycle update_task_ravg(struct task_struct *p, struct rq *rq, int event, u64 wallclock, u64 irqtime) { + static const struct cpu_cycle cc = { + .cycles = SCHED_MIN_FREQ, + .time = 1 + }; + + return cc; } static inline void mark_task_starting(struct task_struct *p) {} diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 997339470655..aa30f55dc5ee 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -4808,6 +4808,7 @@ void init_new_task_load(struct task_struct *p) rcu_assign_pointer(p->grp, NULL); INIT_LIST_HEAD(&p->grp_list); memset(&p->ravg, 0, sizeof(struct ravg)); + p->cpu_cycles = 0; if (init_load_pct) { init_load_pelt = div64_u64((u64)init_load_pct * diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index a11e74c191f3..19033bfc3f8e 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -387,6 +387,11 @@ struct sched_cluster { * max_possible_freq = maximum supported by hardware */ unsigned int cur_freq, max_freq, min_freq, max_possible_freq; + /* + * cpu_cycle_max_scale_factor represents number of cycles per NSEC at + * CPU's fmax. + */ + u32 cpu_cycle_max_scale_factor; bool freq_init_done; int dstate, dstate_wakeup_latency, dstate_wakeup_energy; unsigned int static_cluster_pwr_cost; @@ -1120,6 +1125,16 @@ static inline int cpu_max_power_cost(int cpu) return cpu_rq(cpu)->cluster->max_power_cost; } +static inline int cpu_cycle_max_scale_factor(int cpu) +{ + return cpu_rq(cpu)->cluster->cpu_cycle_max_scale_factor; +} + +static inline u32 cpu_cycles_to_freq(int cpu, u64 cycles, u32 period) +{ + return div64_u64(cycles * cpu_cycle_max_scale_factor(cpu), period); +} + static inline bool hmp_capable(void) { return max_possible_capacity != min_max_possible_capacity; |
