diff options
Diffstat (limited to 'kernel')
| -rw-r--r-- | kernel/sched/core.c | 5 | ||||
| -rw-r--r-- | kernel/sched/debug.c | 12 | ||||
| -rw-r--r-- | kernel/sched/fair.c | 355 | ||||
| -rw-r--r-- | kernel/sched/sched.h | 29 |
4 files changed, 350 insertions, 51 deletions
diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 8ad6ea28b278..aee448df0f41 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -2085,8 +2085,9 @@ void reset_all_window_stats(u64 window_start, unsigned int window_size) #ifdef CONFIG_SCHED_FREQ_INPUT rq->curr_runnable_sum = rq->prev_runnable_sum = 0; #endif - rq->hmp_stats.cumulative_runnable_avg = 0; - fixup_nr_big_small_task(cpu); + reset_cpu_hmp_stats(cpu, 1); + + fixup_nr_big_small_task(cpu, 0); } if (sched_window_stats_policy != sysctl_sched_window_stats_policy) { diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c index f0d1e0b40195..1154330bda65 100644 --- a/kernel/sched/debug.c +++ b/kernel/sched/debug.c @@ -227,6 +227,16 @@ void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq) cfs_rq->throttled); SEQ_printf(m, " .%-30s: %d\n", "throttle_count", cfs_rq->throttle_count); + SEQ_printf(m, " .%-30s: %d\n", "runtime_enabled", + cfs_rq->runtime_enabled); +#ifdef CONFIG_SCHED_HMP + SEQ_printf(m, " .%-30s: %d\n", "nr_big_tasks", + cfs_rq->hmp_stats.nr_big_tasks); + SEQ_printf(m, " .%-30s: %d\n", "nr_small_tasks", + cfs_rq->hmp_stats.nr_small_tasks); + SEQ_printf(m, " .%-30s: %llu\n", "cumulative_runnable_avg", + cfs_rq->hmp_stats.cumulative_runnable_avg); +#endif #endif #ifdef CONFIG_FAIR_GROUP_SCHED @@ -322,6 +332,8 @@ do { \ #ifdef CONFIG_SCHED_HMP P(hmp_stats.nr_big_tasks); P(hmp_stats.nr_small_tasks); + SEQ_printf(m, " .%-30s: %llu\n", "hmp_stats.cumulative_runnable_avg", + rq->hmp_stats.cumulative_runnable_avg); #endif #undef P #undef PN diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 3b5f061ec020..fced56a17acf 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -3495,31 +3495,195 @@ dec_nr_big_small_task(struct hmp_sched_stats *stats, struct task_struct *p) BUG_ON(stats->nr_big_tasks < 0 || stats->nr_small_tasks < 0); } -static void inc_rq_hmp_stats(struct rq *rq, struct task_struct *p) +static void +inc_rq_hmp_stats(struct rq *rq, struct task_struct *p, int change_cra) { - inc_cumulative_runnable_avg(&rq->hmp_stats, p); inc_nr_big_small_task(&rq->hmp_stats, p); + if (change_cra) + inc_cumulative_runnable_avg(&rq->hmp_stats, p); } -static void dec_rq_hmp_stats(struct rq *rq, struct task_struct *p) +static void +dec_rq_hmp_stats(struct rq *rq, struct task_struct *p, int change_cra) { - dec_cumulative_runnable_avg(&rq->hmp_stats, p); dec_nr_big_small_task(&rq->hmp_stats, p); + if (change_cra) + dec_cumulative_runnable_avg(&rq->hmp_stats, p); +} + +static void reset_hmp_stats(struct hmp_sched_stats *stats, int reset_cra) +{ + stats->nr_big_tasks = stats->nr_small_tasks = 0; + if (reset_cra) + stats->cumulative_runnable_avg = 0; +} + + +#ifdef CONFIG_CFS_BANDWIDTH + +static inline struct task_group *next_task_group(struct task_group *tg) +{ + tg = list_entry_rcu(tg->list.next, typeof(struct task_group), list); + + return (&tg->list == &task_groups) ? NULL : tg; +} + +/* Iterate over all cfs_rq in a cpu */ +#define for_each_cfs_rq(cfs_rq, tg, cpu) \ + for (tg = container_of(&task_groups, struct task_group, list); \ + ((tg = next_task_group(tg)) && (cfs_rq = tg->cfs_rq[cpu]));) + +static void reset_cfs_rq_hmp_stats(int cpu, int reset_cra) +{ + struct task_group *tg; + struct cfs_rq *cfs_rq; + + rcu_read_lock(); + + for_each_cfs_rq(cfs_rq, tg, cpu) + reset_hmp_stats(&cfs_rq->hmp_stats, reset_cra); + + rcu_read_unlock(); } +#else /* CONFIG_CFS_BANDWIDTH */ + +static inline void reset_cfs_rq_hmp_stats(int cpu, int reset_cra) { } + +#endif /* CONFIG_CFS_BANDWIDTH */ + +/* + * reset_cpu_hmp_stats - reset HMP stats for a cpu + * nr_big_tasks, nr_small_tasks + * cumulative_runnable_avg (iff reset_cra is true) + */ +void reset_cpu_hmp_stats(int cpu, int reset_cra) +{ + reset_cfs_rq_hmp_stats(cpu, reset_cra); + reset_hmp_stats(&cpu_rq(cpu)->hmp_stats, reset_cra); +} + +#ifdef CONFIG_CFS_BANDWIDTH + +static inline int cfs_rq_throttled(struct cfs_rq *cfs_rq); + +static void inc_cfs_rq_hmp_stats(struct cfs_rq *cfs_rq, + struct task_struct *p, int change_cra); +static void dec_cfs_rq_hmp_stats(struct cfs_rq *cfs_rq, + struct task_struct *p, int change_cra); + +/* Add task's contribution to a cpu' HMP statistics */ +static void +_inc_hmp_sched_stats_fair(struct rq *rq, struct task_struct *p, int change_cra) +{ + struct cfs_rq *cfs_rq; + struct sched_entity *se = &p->se; + + /* + * Although below check is not strictly required (as + * inc/dec_nr_big_small_task and inc/dec_cumulative_runnable_avg called + * from inc_cfs_rq_hmp_stats() have similar checks), we gain a bit on + * efficiency by short-circuiting for_each_sched_entity() loop when + * !sched_enable_hmp || sched_disable_window_stats + */ + if (!sched_enable_hmp || sched_disable_window_stats) + return; + + for_each_sched_entity(se) { + cfs_rq = cfs_rq_of(se); + inc_cfs_rq_hmp_stats(cfs_rq, p, change_cra); + if (cfs_rq_throttled(cfs_rq)) + break; + } + + /* Update rq->hmp_stats only if we didn't find any throttled cfs_rq */ + if (!se) + inc_rq_hmp_stats(rq, p, change_cra); +} + +/* Remove task's contribution from a cpu' HMP statistics */ +static void +_dec_hmp_sched_stats_fair(struct rq *rq, struct task_struct *p, int change_cra) +{ + struct cfs_rq *cfs_rq; + struct sched_entity *se = &p->se; + + /* See comment on efficiency in _inc_hmp_sched_stats_fair */ + if (!sched_enable_hmp || sched_disable_window_stats) + return; + + for_each_sched_entity(se) { + cfs_rq = cfs_rq_of(se); + dec_cfs_rq_hmp_stats(cfs_rq, p, change_cra); + if (cfs_rq_throttled(cfs_rq)) + break; + } + + /* Update rq->hmp_stats only if we didn't find any throttled cfs_rq */ + if (!se) + dec_rq_hmp_stats(rq, p, change_cra); +} + +static void inc_hmp_sched_stats_fair(struct rq *rq, struct task_struct *p) +{ + _inc_hmp_sched_stats_fair(rq, p, 1); +} + +static void dec_hmp_sched_stats_fair(struct rq *rq, struct task_struct *p) +{ + _dec_hmp_sched_stats_fair(rq, p, 1); +} + +static int task_will_be_throttled(struct task_struct *p); + +#else /* CONFIG_CFS_BANDWIDTH */ + +static void +inc_hmp_sched_stats_fair(struct rq *rq, struct task_struct *p) +{ + inc_nr_big_small_task(&rq->hmp_stats, p); + inc_cumulative_runnable_avg(&rq->hmp_stats, p); +} + +static void +dec_hmp_sched_stats_fair(struct rq *rq, struct task_struct *p) +{ + dec_nr_big_small_task(&rq->hmp_stats, p); + dec_cumulative_runnable_avg(&rq->hmp_stats, p); +} + +static inline int task_will_be_throttled(struct task_struct *p) +{ + return 0; +} + +static void +_inc_hmp_sched_stats_fair(struct rq *rq, struct task_struct *p, int change_cra) +{ + inc_nr_big_small_task(&rq->hmp_stats, p); +} + +#endif /* CONFIG_CFS_BANDWIDTH */ + /* * Walk runqueue of cpu and re-initialize 'nr_big_tasks' and 'nr_small_tasks' * counters. */ -void fixup_nr_big_small_task(int cpu) +void fixup_nr_big_small_task(int cpu, int reset_stats) { struct rq *rq = cpu_rq(cpu); struct task_struct *p; - rq->hmp_stats.nr_big_tasks = 0; - rq->hmp_stats.nr_small_tasks = 0; + /* fixup_nr_big_small_task() is called from two functions. In one of + * them stats are already reset, don't waste time resetting them again + */ + if (reset_stats) { + /* Do not reset cumulative_runnable_avg */ + reset_cpu_hmp_stats(cpu, 0); + } + list_for_each_entry(p, &rq->cfs_tasks, se.group_node) - inc_nr_big_small_task(&rq->hmp_stats, p); + _inc_hmp_sched_stats_fair(rq, p, 0); } /* Disable interrupts and grab runqueue lock of all cpus listed in @cpus */ @@ -3543,7 +3707,7 @@ void post_big_small_task_count_change(const struct cpumask *cpus) /* Assumes local_irq_disable() keeps online cpumap stable */ for_each_cpu(i, cpus) - fixup_nr_big_small_task(i); + fixup_nr_big_small_task(i, 1); for_each_cpu(i, cpus) raw_spin_unlock(&cpu_rq(i)->lock); @@ -3774,6 +3938,10 @@ static inline int migration_needed(struct rq *rq, struct task_struct *p) if (!sched_enable_hmp || p->state != TASK_RUNNING) return 0; + /* No need to migrate task that is about to be throttled */ + if (task_will_be_throttled(p)) + return 0; + if (sched_boost()) { if (rq->capacity != max_capacity) return MOVE_TO_BIG_CPU; @@ -3887,20 +4055,6 @@ unsigned int cpu_temp(int cpu) return 0; } -static void -inc_hmp_sched_stats_fair(struct rq *rq, struct task_struct *p) -{ - inc_cumulative_runnable_avg(&rq->hmp_stats, p); - inc_nr_big_small_task(&rq->hmp_stats, p); -} - -static void -dec_hmp_sched_stats_fair(struct rq *rq, struct task_struct *p) -{ - dec_cumulative_runnable_avg(&rq->hmp_stats, p); - dec_nr_big_small_task(&rq->hmp_stats, p); -} - #else /* CONFIG_SCHED_HMP */ #define sysctl_sched_enable_power_aware 0 @@ -3972,18 +4126,16 @@ unsigned int cpu_temp(int cpu) return 0; } -static inline void inc_rq_hmp_stats(struct rq *rq, struct task_struct *p) { } -static inline void dec_rq_hmp_stats(struct rq *rq, struct task_struct *p) { } +static inline void +inc_rq_hmp_stats(struct rq *rq, struct task_struct *p, int change_cra) { } +static inline void +dec_rq_hmp_stats(struct rq *rq, struct task_struct *p, int change_cra) { } static inline void -inc_hmp_sched_stats_fair(struct rq *rq, struct task_struct *p) -{ -} +inc_hmp_sched_stats_fair(struct rq *rq, struct task_struct *p) { } static inline void -dec_hmp_sched_stats_fair(struct rq *rq, struct task_struct *p) -{ -} +dec_hmp_sched_stats_fair(struct rq *rq, struct task_struct *p) { } #endif /* CONFIG_SCHED_HMP */ @@ -4397,8 +4549,10 @@ static inline int idle_balance(struct rq *rq) return 0; } -static inline void inc_rq_hmp_stats(struct rq *rq, struct task_struct *p) { } -static inline void dec_rq_hmp_stats(struct rq *rq, struct task_struct *p) { } +static inline void +inc_rq_hmp_stats(struct rq *rq, struct task_struct *p, int change_cra) { } +static inline void +dec_rq_hmp_stats(struct rq *rq, struct task_struct *p, int change_cra) { } #endif /* CONFIG_SMP */ @@ -4452,6 +4606,62 @@ static inline void decay_scaled_stat(struct sched_avg *sa, u64 periods) periods); } +#ifdef CONFIG_CFS_BANDWIDTH + +static void init_cfs_rq_hmp_stats(struct cfs_rq *cfs_rq) +{ + cfs_rq->hmp_stats.nr_big_tasks = 0; + cfs_rq->hmp_stats.nr_small_tasks = 0; + cfs_rq->hmp_stats.cumulative_runnable_avg = 0; +} + +static void inc_cfs_rq_hmp_stats(struct cfs_rq *cfs_rq, + struct task_struct *p, int change_cra) +{ + inc_nr_big_small_task(&cfs_rq->hmp_stats, p); + if (change_cra) + inc_cumulative_runnable_avg(&cfs_rq->hmp_stats, p); +} + +static void dec_cfs_rq_hmp_stats(struct cfs_rq *cfs_rq, + struct task_struct *p, int change_cra) +{ + dec_nr_big_small_task(&cfs_rq->hmp_stats, p); + if (change_cra) + dec_cumulative_runnable_avg(&cfs_rq->hmp_stats, p); +} + +static void inc_throttled_cfs_rq_hmp_stats(struct hmp_sched_stats *stats, + struct cfs_rq *cfs_rq) +{ + stats->nr_big_tasks += cfs_rq->hmp_stats.nr_big_tasks; + stats->nr_small_tasks += cfs_rq->hmp_stats.nr_small_tasks; + stats->cumulative_runnable_avg += + cfs_rq->hmp_stats.cumulative_runnable_avg; +} + +static void dec_throttled_cfs_rq_hmp_stats(struct hmp_sched_stats *stats, + struct cfs_rq *cfs_rq) +{ + stats->nr_big_tasks -= cfs_rq->hmp_stats.nr_big_tasks; + stats->nr_small_tasks -= cfs_rq->hmp_stats.nr_small_tasks; + stats->cumulative_runnable_avg -= + cfs_rq->hmp_stats.cumulative_runnable_avg; + + BUG_ON(stats->nr_big_tasks < 0 || stats->nr_small_tasks < 0 || + (s64)stats->cumulative_runnable_avg < 0); +} + +#else /* CONFIG_CFS_BANDWIDTH */ + +static inline void inc_cfs_rq_hmp_stats(struct cfs_rq *cfs_rq, + struct task_struct *p, int change_cra) { } + +static inline void dec_cfs_rq_hmp_stats(struct cfs_rq *cfs_rq, + struct task_struct *p, int change_cra) { } + +#endif /* CONFIG_CFS_BANDWIDTH */ + #else /* CONFIG_SCHED_HMP */ static inline void @@ -4463,6 +4673,24 @@ static inline void decay_scaled_stat(struct sched_avg *sa, u64 periods) { } +static inline void init_cfs_rq_hmp_stats(struct cfs_rq *cfs_rq) { } + +static inline void inc_cfs_rq_hmp_stats(struct cfs_rq *cfs_rq, + struct task_struct *p, int change_cra) { } + +static inline void dec_cfs_rq_hmp_stats(struct cfs_rq *cfs_rq, + struct task_struct *p, int change_cra) { } + +static inline void inc_throttled_cfs_rq_hmp_stats(struct hmp_sched_stats *stats, + struct cfs_rq *cfs_rq) +{ +} + +static inline void dec_throttled_cfs_rq_hmp_stats(struct hmp_sched_stats *stats, + struct cfs_rq *cfs_rq) +{ +} + #endif /* CONFIG_SCHED_HMP */ static void enqueue_sleeper(struct cfs_rq *cfs_rq, struct sched_entity *se) @@ -5078,6 +5306,33 @@ static inline int cfs_rq_throttled(struct cfs_rq *cfs_rq) return cfs_bandwidth_used() && cfs_rq->throttled; } +/* + * Check if task is part of a hierarchy where some cfs_rq does not have any + * runtime left. + * + * We can't rely on throttled_hierarchy() to do this test, as + * cfs_rq->throttle_count will not be updated yet when this function is called + * from scheduler_tick() + */ +static int task_will_be_throttled(struct task_struct *p) +{ + struct sched_entity *se = &p->se; + struct cfs_rq *cfs_rq; + + if (!cfs_bandwidth_used()) + return 0; + + for_each_sched_entity(se) { + cfs_rq = cfs_rq_of(se); + if (!cfs_rq->runtime_enabled) + continue; + if (cfs_rq->runtime_remaining <= 0) + return 1; + } + + return 0; +} + /* check whether cfs_rq, or any parent, is throttled */ static inline int throttled_hierarchy(struct cfs_rq *cfs_rq) { @@ -5157,13 +5412,16 @@ static void throttle_cfs_rq(struct cfs_rq *cfs_rq) if (dequeue) dequeue_entity(qcfs_rq, se, DEQUEUE_SLEEP); qcfs_rq->h_nr_running -= task_delta; + dec_throttled_cfs_rq_hmp_stats(&qcfs_rq->hmp_stats, cfs_rq); if (qcfs_rq->load.weight) dequeue = 0; } - if (!se) + if (!se) { sub_nr_running(rq, task_delta); + dec_throttled_cfs_rq_hmp_stats(&rq->hmp_stats, cfs_rq); + } cfs_rq->throttled = 1; cfs_rq->throttled_clock = rq_clock(rq); @@ -5184,6 +5442,13 @@ static void throttle_cfs_rq(struct cfs_rq *cfs_rq) start_cfs_bandwidth(cfs_b); raw_spin_unlock(&cfs_b->lock); + + /* Log effect on hmp stats after throttling */ + trace_sched_cpu_load(rq, idle_cpu(cpu_of(rq)), + mostly_idle_cpu(cpu_of(rq)), + sched_irqload(cpu_of(rq)), + power_cost_at_freq(cpu_of(rq), 0), + cpu_temp(cpu_of(rq))); } void unthrottle_cfs_rq(struct cfs_rq *cfs_rq) @@ -5193,6 +5458,7 @@ void unthrottle_cfs_rq(struct cfs_rq *cfs_rq) struct sched_entity *se; int enqueue = 1; long task_delta; + struct cfs_rq *tcfs_rq = cfs_rq; se = cfs_rq->tg->se[cpu_of(rq)]; @@ -5220,17 +5486,27 @@ void unthrottle_cfs_rq(struct cfs_rq *cfs_rq) if (enqueue) enqueue_entity(cfs_rq, se, ENQUEUE_WAKEUP); cfs_rq->h_nr_running += task_delta; + inc_throttled_cfs_rq_hmp_stats(&cfs_rq->hmp_stats, tcfs_rq); if (cfs_rq_throttled(cfs_rq)) break; } - if (!se) + if (!se) { add_nr_running(rq, task_delta); + inc_throttled_cfs_rq_hmp_stats(&rq->hmp_stats, tcfs_rq); + } /* determine whether we need to wake up potentially idle cpu */ if (rq->curr == rq->idle && rq->cfs.nr_running) resched_curr(rq); + + /* Log effect on hmp stats after un-throttling */ + trace_sched_cpu_load(rq, idle_cpu(cpu_of(rq)), + mostly_idle_cpu(cpu_of(rq)), + sched_irqload(cpu_of(rq)), + power_cost_at_freq(cpu_of(rq), 0), + cpu_temp(cpu_of(rq))); } static u64 distribute_cfs_runtime(struct cfs_bandwidth *cfs_b, @@ -5551,6 +5827,7 @@ static void init_cfs_rq_runtime(struct cfs_rq *cfs_rq) { cfs_rq->runtime_enabled = 0; INIT_LIST_HEAD(&cfs_rq->throttled_list); + init_cfs_rq_hmp_stats(cfs_rq); } void start_cfs_bandwidth(struct cfs_bandwidth *cfs_b) @@ -5730,6 +6007,7 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags) if (cfs_rq_throttled(cfs_rq)) break; cfs_rq->h_nr_running++; + inc_cfs_rq_hmp_stats(cfs_rq, p, 1); flags = ENQUEUE_WAKEUP; } @@ -5737,6 +6015,7 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags) for_each_sched_entity(se) { cfs_rq = cfs_rq_of(se); cfs_rq->h_nr_running++; + inc_cfs_rq_hmp_stats(cfs_rq, p, 1); if (cfs_rq_throttled(cfs_rq)) break; @@ -5747,7 +6026,7 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags) if (!se) { add_nr_running(rq, 1); - inc_rq_hmp_stats(rq, p); + inc_rq_hmp_stats(rq, p, 1); } hrtick_update(rq); } @@ -5778,6 +6057,7 @@ static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags) if (cfs_rq_throttled(cfs_rq)) break; cfs_rq->h_nr_running--; + dec_cfs_rq_hmp_stats(cfs_rq, p, 1); /* Don't dequeue parent if it has other entities besides us */ if (cfs_rq->load.weight) { @@ -5798,6 +6078,7 @@ static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags) for_each_sched_entity(se) { cfs_rq = cfs_rq_of(se); cfs_rq->h_nr_running--; + dec_cfs_rq_hmp_stats(cfs_rq, p, 1); if (cfs_rq_throttled(cfs_rq)) break; @@ -5808,7 +6089,7 @@ static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags) if (!se) { sub_nr_running(rq, 1); - dec_rq_hmp_stats(rq, p); + dec_rq_hmp_stats(rq, p, 1); } hrtick_update(rq); } diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index d514d36e4685..57a5685daac7 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -422,6 +422,11 @@ struct cfs_rq { struct task_group *tg; /* group that "owns" this runqueue */ #ifdef CONFIG_CFS_BANDWIDTH + +#ifdef CONFIG_SCHED_HMP + struct hmp_sched_stats hmp_stats; +#endif + int runtime_enabled; u64 runtime_expires; s64 runtime_remaining; @@ -990,20 +995,11 @@ extern unsigned int sched_upmigrate; extern unsigned int sched_downmigrate; extern unsigned int sched_init_task_load_pelt; extern unsigned int sched_init_task_load_windows; -#ifdef CONFIG_SCHED_HMP -extern void fixup_nr_big_small_task(int cpu); extern u64 scale_load_to_cpu(u64 load, int cpu); -#else -static inline void fixup_nr_big_small_task(int cpu) {} - -static inline u64 scale_load_to_cpu(u64 load, int cpu) -{ - return load; -} -#endif extern unsigned int sched_heavy_task; -extern void fixup_nr_big_small_task(int cpu); -unsigned int max_task_load(void); +extern void reset_cpu_hmp_stats(int cpu, int reset_cra); +extern void fixup_nr_big_small_task(int cpu, int reset_stats); +extern unsigned int max_task_load(void); extern void sched_account_irqtime(int cpu, struct task_struct *curr, u64 delta, u64 wallclock); unsigned int cpu_temp(int cpu); @@ -1080,6 +1076,15 @@ static inline int sched_cpu_high_irqload(int cpu) struct hmp_sched_stats; +static inline void fixup_nr_big_small_task(int cpu, int reset_stats) +{ +} + +static inline u64 scale_load_to_cpu(u64 load, int cpu) +{ + return load; +} + static inline int pct_task_load(struct task_struct *p) { return 0; } static inline int capacity(struct rq *rq) |
