summaryrefslogtreecommitdiff
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/sched/core.c192
-rw-r--r--kernel/sched/fair.c1
-rw-r--r--kernel/sched/sched.h15
3 files changed, 169 insertions, 39 deletions
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 0cdd0cf0718f..3afa3c5d5ebd 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -797,6 +797,9 @@ sched_set_cpu_cstate(int cpu, int cstate, int wakeup_energy, int wakeup_latency)
static ktime_t ktime_last;
static bool sched_ktime_suspended;
+static bool use_cycle_counter;
+static struct cpu_cycle_counter_cb cpu_cycle_counter_cb;
+
u64 sched_ktime_clock(void)
{
if (unlikely(sched_ktime_suspended))
@@ -1398,6 +1401,7 @@ static struct sched_cluster init_cluster = {
.max_freq = 1,
.min_freq = 1,
.max_possible_freq = 1,
+ .cpu_cycle_max_scale_factor = 1,
.dstate = 0,
.dstate_wakeup_energy = 0,
.dstate_wakeup_latency = 0,
@@ -1546,6 +1550,7 @@ static struct sched_cluster *alloc_new_cluster(const struct cpumask *cpus)
cluster->max_freq = 1;
cluster->min_freq = 1;
cluster->max_possible_freq = 1;
+ cluster->cpu_cycle_max_scale_factor = 1;
cluster->dstate = 0;
cluster->dstate_wakeup_energy = 0;
cluster->dstate_wakeup_latency = 0;
@@ -1612,6 +1617,44 @@ static void init_clusters(void)
INIT_LIST_HEAD(&cluster_head);
}
+static inline void
+__update_cpu_cycle_max_possible_freq(struct sched_cluster *cluster)
+{
+ int cpu = cluster_first_cpu(cluster);
+
+ cluster->cpu_cycle_max_scale_factor =
+ div64_u64(cluster->max_possible_freq * NSEC_PER_USEC,
+ cpu_cycle_counter_cb.get_cpu_cycles_max_per_us(cpu));
+}
+
+static inline void
+update_cpu_cycle_max_possible_freq(struct sched_cluster *cluster)
+{
+ if (!use_cycle_counter)
+ return;
+
+ __update_cpu_cycle_max_possible_freq(cluster);
+}
+
+int register_cpu_cycle_counter_cb(struct cpu_cycle_counter_cb *cb)
+{
+ struct sched_cluster *cluster = NULL;
+
+ mutex_lock(&cluster_lock);
+ if (!cb->get_cpu_cycle_counter || !cb->get_cpu_cycles_max_per_us) {
+ mutex_unlock(&cluster_lock);
+ return -EINVAL;
+ }
+
+ cpu_cycle_counter_cb = *cb;
+ for_each_sched_cluster(cluster)
+ __update_cpu_cycle_max_possible_freq(cluster);
+ use_cycle_counter = true;
+ mutex_unlock(&cluster_lock);
+
+ return 0;
+}
+
static int __init set_sched_enable_hmp(char *str)
{
int enable_hmp = 0;
@@ -1718,12 +1761,24 @@ static inline void clear_boost_kick(int cpu) { }
static inline void clear_hmp_request(int cpu) { }
+int register_cpu_cycle_counter_cb(struct cpu_cycle_counter_cb *cb)
+{
+ return 0;
+}
+
#ifdef CONFIG_SMP
static void update_cluster_topology(void) { }
#endif
#endif /* CONFIG_SCHED_HMP */
+#define SCHED_MIN_FREQ 1
+
+struct cpu_cycle {
+ u64 cycles;
+ u64 time;
+};
+
#if defined(CONFIG_SCHED_HMP)
/*
@@ -1865,19 +1920,17 @@ update_window_start(struct rq *rq, u64 wallclock)
rq->window_start += (u64)nr_windows * (u64)sched_ravg_window;
}
-static inline u64 scale_exec_time(u64 delta, struct rq *rq)
+#define DIV64_U64_ROUNDUP(X, Y) div64_u64((X) + (Y - 1), Y)
+
+static inline u64 scale_exec_time(u64 delta, struct rq *rq,
+ const struct cpu_cycle *cc)
{
int cpu = cpu_of(rq);
- unsigned int cur_freq = cpu_cur_freq(cpu);
int sf;
- if (unlikely(cur_freq > max_possible_freq))
- cur_freq = max_possible_freq;
-
- /* round up div64 */
- delta = div64_u64(delta * cur_freq + max_possible_freq - 1,
- max_possible_freq);
-
+ delta = DIV64_U64_ROUNDUP(delta * cc->cycles *
+ cpu_cycle_max_scale_factor(cpu),
+ max_possible_freq * cc->time);
sf = DIV_ROUND_UP(cpu_efficiency(cpu) * 1024, max_possible_efficiency);
delta *= sf;
@@ -2251,7 +2304,8 @@ void update_task_pred_demand(struct rq *rq, struct task_struct *p, int event)
* Account cpu activity in its busy time counters (rq->curr/prev_runnable_sum)
*/
static void update_cpu_busy_time(struct task_struct *p, struct rq *rq,
- int event, u64 wallclock, u64 irqtime)
+ int event, u64 wallclock, u64 irqtime,
+ const struct cpu_cycle *cc)
{
int new_window, nr_full_windows = 0;
int p_is_curr_task = (p == rq->curr);
@@ -2341,7 +2395,7 @@ static void update_cpu_busy_time(struct task_struct *p, struct rq *rq,
delta = wallclock - mark_start;
else
delta = irqtime;
- delta = scale_exec_time(delta, rq);
+ delta = scale_exec_time(delta, rq, cc);
rq->curr_runnable_sum += delta;
if (new_task)
rq->nt_curr_runnable_sum += delta;
@@ -2366,14 +2420,15 @@ static void update_cpu_busy_time(struct task_struct *p, struct rq *rq,
if (!nr_full_windows) {
/* A full window hasn't elapsed, account partial
* contribution to previous completed window. */
- delta = scale_exec_time(window_start - mark_start, rq);
+ delta = scale_exec_time(window_start - mark_start, rq,
+ cc);
if (!exiting_task(p))
p->ravg.prev_window += delta;
} else {
/* Since at least one full window has elapsed,
* the contribution to the previous window is the
* full window (window_size). */
- delta = scale_exec_time(window_size, rq);
+ delta = scale_exec_time(window_size, rq, cc);
if (!exiting_task(p))
p->ravg.prev_window = delta;
}
@@ -2382,7 +2437,7 @@ static void update_cpu_busy_time(struct task_struct *p, struct rq *rq,
rq->nt_prev_runnable_sum += delta;
/* Account piece of busy time in the current window. */
- delta = scale_exec_time(wallclock - window_start, rq);
+ delta = scale_exec_time(wallclock - window_start, rq, cc);
rq->curr_runnable_sum += delta;
if (new_task)
rq->nt_curr_runnable_sum += delta;
@@ -2408,7 +2463,8 @@ static void update_cpu_busy_time(struct task_struct *p, struct rq *rq,
if (!nr_full_windows) {
/* A full window hasn't elapsed, account partial
* contribution to previous completed window. */
- delta = scale_exec_time(window_start - mark_start, rq);
+ delta = scale_exec_time(window_start - mark_start, rq,
+ cc);
if (!is_idle_task(p) && !exiting_task(p))
p->ravg.prev_window += delta;
@@ -2421,7 +2477,7 @@ static void update_cpu_busy_time(struct task_struct *p, struct rq *rq,
/* Since at least one full window has elapsed,
* the contribution to the previous window is the
* full window (window_size). */
- delta = scale_exec_time(window_size, rq);
+ delta = scale_exec_time(window_size, rq, cc);
if (!is_idle_task(p) && !exiting_task(p))
p->ravg.prev_window = delta;
@@ -2439,7 +2495,7 @@ static void update_cpu_busy_time(struct task_struct *p, struct rq *rq,
rq->prev_runnable_sum = delta;
/* Account piece of busy time in the current window. */
- delta = scale_exec_time(wallclock - window_start, rq);
+ delta = scale_exec_time(wallclock - window_start, rq, cc);
rq->curr_runnable_sum = delta;
if (new_task)
rq->nt_curr_runnable_sum = delta;
@@ -2471,7 +2527,8 @@ static void update_cpu_busy_time(struct task_struct *p, struct rq *rq,
rq->nt_prev_runnable_sum = rq->nt_curr_runnable_sum;
rq->nt_curr_runnable_sum = 0;
if (mark_start > window_start) {
- rq->curr_runnable_sum = scale_exec_time(irqtime, rq);
+ rq->curr_runnable_sum = scale_exec_time(irqtime, rq,
+ cc);
return;
}
@@ -2480,12 +2537,12 @@ static void update_cpu_busy_time(struct task_struct *p, struct rq *rq,
delta = window_start - mark_start;
if (delta > window_size)
delta = window_size;
- delta = scale_exec_time(delta, rq);
+ delta = scale_exec_time(delta, rq, cc);
rq->prev_runnable_sum += delta;
/* Process the remaining IRQ busy time in the current window. */
delta = wallclock - window_start;
- rq->curr_runnable_sum = scale_exec_time(delta, rq);
+ rq->curr_runnable_sum = scale_exec_time(delta, rq, cc);
return;
}
@@ -2515,7 +2572,7 @@ update_task_pred_demand(struct rq *rq, struct task_struct *p, int event)
}
static inline void update_cpu_busy_time(struct task_struct *p, struct rq *rq,
- int event, u64 wallclock, u64 irqtime)
+ int event, u64 wallclock, u64 irqtime, const struct cpu_cycle *cc)
{
}
@@ -2528,6 +2585,41 @@ static inline u32 predict_and_update_buckets(struct rq *rq,
#endif /* CONFIG_SCHED_FREQ_INPUT */
+static void update_task_cpu_cycles(struct task_struct *p, int cpu)
+{
+ if (use_cycle_counter)
+ p->cpu_cycles = cpu_cycle_counter_cb.get_cpu_cycle_counter(cpu);
+}
+
+static struct cpu_cycle
+get_task_cpu_cycles(struct task_struct *p, struct rq *rq, int event,
+ u64 wallclock)
+{
+ u64 cur_cycles;
+ struct cpu_cycle cc;
+ int cpu = cpu_of(rq);
+
+ if (!use_cycle_counter) {
+ cc.cycles = cpu_cur_freq(cpu);
+ cc.time = 1;
+ return cc;
+ }
+
+ cur_cycles = cpu_cycle_counter_cb.get_cpu_cycle_counter(cpu);
+ if (unlikely(cur_cycles < p->cpu_cycles))
+ cc.cycles = cur_cycles + (U64_MAX - p->cpu_cycles);
+ else
+ cc.cycles = cur_cycles - p->cpu_cycles;
+ cc.time = wallclock - p->ravg.mark_start;
+ BUG_ON((s64)cc.time < 0);
+
+ p->cpu_cycles = cur_cycles;
+
+ trace_sched_get_task_cpu_cycles(cpu, event, cc.cycles, cc.time);
+
+ return cc;
+}
+
static int account_busy_for_task_demand(struct task_struct *p, int event)
{
/* No need to bother updating task demand for exiting tasks
@@ -2614,9 +2706,9 @@ done:
}
static void add_to_task_demand(struct rq *rq, struct task_struct *p,
- u64 delta)
+ u64 delta, const struct cpu_cycle *cc)
{
- delta = scale_exec_time(delta, rq);
+ delta = scale_exec_time(delta, rq, cc);
p->ravg.sum += delta;
if (unlikely(p->ravg.sum > sched_ravg_window))
p->ravg.sum = sched_ravg_window;
@@ -2673,7 +2765,8 @@ static void add_to_task_demand(struct rq *rq, struct task_struct *p,
* depends on it!
*/
static void update_task_demand(struct task_struct *p, struct rq *rq,
- int event, u64 wallclock)
+ int event, u64 wallclock,
+ const struct cpu_cycle *cc)
{
u64 mark_start = p->ravg.mark_start;
u64 delta, window_start = rq->window_start;
@@ -2696,7 +2789,7 @@ static void update_task_demand(struct task_struct *p, struct rq *rq,
if (!new_window) {
/* The simple case - busy time contained within the existing
* window. */
- add_to_task_demand(rq, p, wallclock - mark_start);
+ add_to_task_demand(rq, p, wallclock - mark_start, cc);
return;
}
@@ -2707,12 +2800,12 @@ static void update_task_demand(struct task_struct *p, struct rq *rq,
window_start -= (u64)nr_full_windows * (u64)window_size;
/* Process (window_start - mark_start) first */
- add_to_task_demand(rq, p, window_start - mark_start);
+ add_to_task_demand(rq, p, window_start - mark_start, cc);
/* Push new sample(s) into task's demand history */
update_history(rq, p, p->ravg.sum, 1, event);
if (nr_full_windows)
- update_history(rq, p, scale_exec_time(window_size, rq),
+ update_history(rq, p, scale_exec_time(window_size, rq, cc),
nr_full_windows, event);
/* Roll window_start back to current to process any remainder
@@ -2721,30 +2814,39 @@ static void update_task_demand(struct task_struct *p, struct rq *rq,
/* Process (wallclock - window_start) next */
mark_start = window_start;
- add_to_task_demand(rq, p, wallclock - mark_start);
+ add_to_task_demand(rq, p, wallclock - mark_start, cc);
}
/* Reflect task activity on its demand and cpu's busy time statistics */
-static void update_task_ravg(struct task_struct *p, struct rq *rq,
- int event, u64 wallclock, u64 irqtime)
+static struct cpu_cycle
+update_task_ravg(struct task_struct *p, struct rq *rq, int event,
+ u64 wallclock, u64 irqtime)
{
+ struct cpu_cycle cc = { .cycles = SCHED_MIN_FREQ, .time = 1 };
+
if (sched_use_pelt || !rq->window_start || sched_disable_window_stats)
- return;
+ return cc;
lockdep_assert_held(&rq->lock);
update_window_start(rq, wallclock);
- if (!p->ravg.mark_start)
+ if (!p->ravg.mark_start) {
+ update_task_cpu_cycles(p, cpu_of(rq));
goto done;
+ }
- update_task_demand(p, rq, event, wallclock);
- update_cpu_busy_time(p, rq, event, wallclock, irqtime);
+ cc = get_task_cpu_cycles(p, rq, event, wallclock);
+ update_task_demand(p, rq, event, wallclock, &cc);
+ update_cpu_busy_time(p, rq, event, wallclock, irqtime, &cc);
update_task_pred_demand(rq, p, event);
done:
- trace_sched_update_task_ravg(p, rq, event, wallclock, irqtime);
+ trace_sched_update_task_ravg(p, rq, event, wallclock, irqtime,
+ cc.cycles, cc.time);
p->ravg.mark_start = wallclock;
+
+ return cc;
}
void sched_account_irqtime(int cpu, struct task_struct *curr,
@@ -2812,6 +2914,7 @@ static inline void mark_task_starting(struct task_struct *p)
wallclock = sched_ktime_clock();
p->ravg.mark_start = p->last_wake_ts = wallclock;
p->last_switch_out_ts = 0;
+ update_task_cpu_cycles(p, cpu_of(rq));
}
static inline void set_window_start(struct rq *rq)
@@ -3029,6 +3132,7 @@ void sched_get_cpus_busy(struct sched_load *busy,
int early_detection[cpus];
int cpu, i = 0;
unsigned int window_size;
+ struct cpu_cycle cc;
if (unlikely(cpus == 0))
return;
@@ -3047,8 +3151,10 @@ void sched_get_cpus_busy(struct sched_load *busy,
for_each_cpu(cpu, query_cpus) {
rq = cpu_rq(cpu);
- update_task_ravg(rq->curr, rq, TASK_UPDATE,
- sched_ktime_clock(), 0);
+ cc = update_task_ravg(rq->curr, rq, TASK_UPDATE,
+ sched_ktime_clock(), 0);
+ cur_freq[i] = cpu_cycles_to_freq(i, cc.cycles, cc.time);
+
load[i] = rq->old_busy_time = rq->prev_runnable_sum;
nload[i] = rq->nt_prev_runnable_sum;
pload[i] = rq->hmp_stats.pred_demands_sum;
@@ -3066,7 +3172,6 @@ void sched_get_cpus_busy(struct sched_load *busy,
notifier_sent[i] = rq->notifier_sent;
early_detection[i] = (rq->ed_task != NULL);
rq->notifier_sent = 0;
- cur_freq[i] = cpu_cur_freq(cpu);
max_freq[i] = cpu_max_freq(cpu);
i++;
}
@@ -3212,6 +3317,8 @@ static void fixup_busy_time(struct task_struct *p, int new_cpu)
update_task_ravg(p, task_rq(p), TASK_MIGRATE,
wallclock, 0);
+ update_task_cpu_cycles(p, new_cpu);
+
new_task = is_new_task(p);
if (p->ravg.curr_window) {
@@ -3531,6 +3638,7 @@ static int cpufreq_notifier_policy(struct notifier_block *nb,
sort_clusters();
update_all_clusters_stats();
+ update_cpu_cycle_max_possible_freq(cluster);
mutex_unlock(&cluster_lock);
continue;
}
@@ -3685,10 +3793,16 @@ heavy_task_wakeup(struct task_struct *p, struct rq *rq, int event)
return 0;
}
-static inline void
+static struct cpu_cycle
update_task_ravg(struct task_struct *p, struct rq *rq,
int event, u64 wallclock, u64 irqtime)
{
+ static const struct cpu_cycle cc = {
+ .cycles = SCHED_MIN_FREQ,
+ .time = 1
+ };
+
+ return cc;
}
static inline void mark_task_starting(struct task_struct *p) {}
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 997339470655..aa30f55dc5ee 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -4808,6 +4808,7 @@ void init_new_task_load(struct task_struct *p)
rcu_assign_pointer(p->grp, NULL);
INIT_LIST_HEAD(&p->grp_list);
memset(&p->ravg, 0, sizeof(struct ravg));
+ p->cpu_cycles = 0;
if (init_load_pct) {
init_load_pelt = div64_u64((u64)init_load_pct *
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index a11e74c191f3..19033bfc3f8e 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -387,6 +387,11 @@ struct sched_cluster {
* max_possible_freq = maximum supported by hardware
*/
unsigned int cur_freq, max_freq, min_freq, max_possible_freq;
+ /*
+ * cpu_cycle_max_scale_factor represents number of cycles per NSEC at
+ * CPU's fmax.
+ */
+ u32 cpu_cycle_max_scale_factor;
bool freq_init_done;
int dstate, dstate_wakeup_latency, dstate_wakeup_energy;
unsigned int static_cluster_pwr_cost;
@@ -1120,6 +1125,16 @@ static inline int cpu_max_power_cost(int cpu)
return cpu_rq(cpu)->cluster->max_power_cost;
}
+static inline int cpu_cycle_max_scale_factor(int cpu)
+{
+ return cpu_rq(cpu)->cluster->cpu_cycle_max_scale_factor;
+}
+
+static inline u32 cpu_cycles_to_freq(int cpu, u64 cycles, u32 period)
+{
+ return div64_u64(cycles * cpu_cycle_max_scale_factor(cpu), period);
+}
+
static inline bool hmp_capable(void)
{
return max_possible_capacity != min_max_possible_capacity;