diff options
Diffstat (limited to 'kernel/sched/rt.c')
-rw-r--r-- | kernel/sched/rt.c | 202 |
1 files changed, 198 insertions, 4 deletions
diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c index 8ec86abe0ea1..ba4403e910d8 100644 --- a/kernel/sched/rt.c +++ b/kernel/sched/rt.c @@ -7,6 +7,7 @@ #include <linux/slab.h> #include <linux/irq_work.h> +#include <trace/events/sched.h> int sched_rr_timeslice = RR_TIMESLICE; @@ -264,8 +265,12 @@ static void pull_rt_task(struct rq *this_rq); static inline bool need_pull_rt_task(struct rq *rq, struct task_struct *prev) { - /* Try to pull RT tasks here if we lower this rq's prio */ - return rq->rt.highest_prio.curr > prev->prio; + /* + * Try to pull RT tasks here if we lower this rq's prio and cpu is not + * isolated + */ + return rq->rt.highest_prio.curr > prev->prio && + !cpu_isolated(cpu_of(rq)); } static inline int rt_overloaded(struct rq *rq) @@ -889,6 +894,51 @@ static inline int rt_se_prio(struct sched_rt_entity *rt_se) return rt_task_of(rt_se)->prio; } +static void dump_throttled_rt_tasks(struct rt_rq *rt_rq) +{ + struct rt_prio_array *array = &rt_rq->active; + struct sched_rt_entity *rt_se; + char buf[500]; + char *pos = buf; + char *end = buf + sizeof(buf); + int idx; + + pos += snprintf(pos, sizeof(buf), + "sched: RT throttling activated for rt_rq %p (cpu %d)\n", + rt_rq, cpu_of(rq_of_rt_rq(rt_rq))); + + if (bitmap_empty(array->bitmap, MAX_RT_PRIO)) + goto out; + + pos += snprintf(pos, end - pos, "potential CPU hogs:\n"); + idx = sched_find_first_bit(array->bitmap); + while (idx < MAX_RT_PRIO) { + list_for_each_entry(rt_se, array->queue + idx, run_list) { + struct task_struct *p; + + if (!rt_entity_is_task(rt_se)) + continue; + + p = rt_task_of(rt_se); + if (pos < end) + pos += snprintf(pos, end - pos, "\t%s (%d)\n", + p->comm, p->pid); + } + idx = find_next_bit(array->bitmap, MAX_RT_PRIO, idx + 1); + } +out: +#ifdef CONFIG_PANIC_ON_RT_THROTTLING + /* + * Use pr_err() in the BUG() case since printk_sched() will + * not get flushed and deadlock is not a concern. + */ + pr_err("%s", buf); + BUG(); +#else + printk_deferred("%s", buf); +#endif +} + static int sched_rt_runtime_exceeded(struct rt_rq *rt_rq) { u64 runtime = sched_rt_runtime(rt_rq); @@ -912,8 +962,14 @@ static int sched_rt_runtime_exceeded(struct rt_rq *rt_rq) * but accrue some time due to boosting. */ if (likely(rt_b->rt_runtime)) { + static bool once = false; + rt_rq->rt_throttled = 1; - printk_deferred_once("sched: RT throttling activated\n"); + + if (!once) { + once = true; + dump_throttled_rt_tasks(rt_rq); + } } else { /* * In case we did anyway, make it go away, @@ -1130,6 +1186,41 @@ void dec_rt_group(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq) {} #endif /* CONFIG_RT_GROUP_SCHED */ +#ifdef CONFIG_SCHED_HMP + +static void +inc_hmp_sched_stats_rt(struct rq *rq, struct task_struct *p) +{ + inc_cumulative_runnable_avg(&rq->hmp_stats, p); +} + +static void +dec_hmp_sched_stats_rt(struct rq *rq, struct task_struct *p) +{ + dec_cumulative_runnable_avg(&rq->hmp_stats, p); +} + +static void +fixup_hmp_sched_stats_rt(struct rq *rq, struct task_struct *p, + u32 new_task_load, u32 new_pred_demand) +{ + s64 task_load_delta = (s64)new_task_load - task_load(p); + s64 pred_demand_delta = PRED_DEMAND_DELTA; + + fixup_cumulative_runnable_avg(&rq->hmp_stats, p, task_load_delta, + pred_demand_delta); +} + +#else /* CONFIG_SCHED_HMP */ + +static inline void +inc_hmp_sched_stats_rt(struct rq *rq, struct task_struct *p) { } + +static inline void +dec_hmp_sched_stats_rt(struct rq *rq, struct task_struct *p) { } + +#endif /* CONFIG_SCHED_HMP */ + static inline unsigned int rt_se_nr_running(struct sched_rt_entity *rt_se) { @@ -1261,6 +1352,7 @@ enqueue_task_rt(struct rq *rq, struct task_struct *p, int flags) rt_se->timeout = 0; enqueue_rt_entity(rt_se, flags & ENQUEUE_HEAD); + inc_hmp_sched_stats_rt(rq, p); if (!task_current(rq, p) && p->nr_cpus_allowed > 1) enqueue_pushable_task(rq, p); @@ -1272,6 +1364,7 @@ static void dequeue_task_rt(struct rq *rq, struct task_struct *p, int flags) update_curr_rt(rq); dequeue_rt_entity(rt_se); + dec_hmp_sched_stats_rt(rq, p); dequeue_pushable_task(rq, p); } @@ -1314,11 +1407,28 @@ static void yield_task_rt(struct rq *rq) static int find_lowest_rq(struct task_struct *task); static int +select_task_rq_rt_hmp(struct task_struct *p, int cpu, int sd_flag, int flags) +{ + int target; + + rcu_read_lock(); + target = find_lowest_rq(p); + if (target != -1) + cpu = target; + rcu_read_unlock(); + + return cpu; +} + +static int select_task_rq_rt(struct task_struct *p, int cpu, int sd_flag, int flags) { struct task_struct *curr; struct rq *rq; + if (sched_enable_hmp) + return select_task_rq_rt_hmp(p, cpu, sd_flag, flags); + /* For anything but wake ups, just return the task_cpu */ if (sd_flag != SD_BALANCE_WAKE && sd_flag != SD_BALANCE_FORK) goto out; @@ -1556,6 +1666,76 @@ static struct task_struct *pick_highest_pushable_task(struct rq *rq, int cpu) static DEFINE_PER_CPU(cpumask_var_t, local_cpu_mask); +#ifdef CONFIG_SCHED_HMP + +static int find_lowest_rq_hmp(struct task_struct *task) +{ + struct cpumask *lowest_mask = *this_cpu_ptr(&local_cpu_mask); + struct cpumask candidate_mask = CPU_MASK_NONE; + struct sched_cluster *cluster; + int best_cpu = -1; + int prev_cpu = task_cpu(task); + u64 cpu_load, min_load = ULLONG_MAX; + int i; + int restrict_cluster = sched_boost() ? 0 : + sysctl_sched_restrict_cluster_spill; + + /* Make sure the mask is initialized first */ + if (unlikely(!lowest_mask)) + return best_cpu; + + if (task->nr_cpus_allowed == 1) + return best_cpu; /* No other targets possible */ + + if (!cpupri_find(&task_rq(task)->rd->cpupri, task, lowest_mask)) + return best_cpu; /* No targets found */ + + /* + * At this point we have built a mask of cpus representing the + * lowest priority tasks in the system. Now we want to elect + * the best one based on our affinity and topology. + */ + + for_each_sched_cluster(cluster) { + cpumask_and(&candidate_mask, &cluster->cpus, lowest_mask); + cpumask_andnot(&candidate_mask, &candidate_mask, + cpu_isolated_mask); + + if (cpumask_empty(&candidate_mask)) + continue; + + for_each_cpu(i, &candidate_mask) { + if (sched_cpu_high_irqload(i)) + continue; + + cpu_load = cpu_rq(i)->hmp_stats.cumulative_runnable_avg; + if (!restrict_cluster) + cpu_load = scale_load_to_cpu(cpu_load, i); + + if (cpu_load < min_load || + (cpu_load == min_load && + (i == prev_cpu || (best_cpu != prev_cpu && + cpus_share_cache(prev_cpu, i))))) { + min_load = cpu_load; + best_cpu = i; + } + } + if (restrict_cluster && best_cpu != -1) + break; + } + + return best_cpu; +} + +#else /* CONFIG_SCHED_HMP */ + +static int find_lowest_rq_hmp(struct task_struct *task) +{ + return -1; +} + +#endif /* CONFIG_SCHED_HMP */ + static int find_lowest_rq(struct task_struct *task) { struct sched_domain *sd; @@ -1563,6 +1743,9 @@ static int find_lowest_rq(struct task_struct *task) int this_cpu = smp_processor_id(); int cpu = task_cpu(task); + if (sched_enable_hmp) + return find_lowest_rq_hmp(task); + /* Make sure the mask is initialized first */ if (unlikely(!lowest_mask)) return -1; @@ -1780,7 +1963,9 @@ retry: } deactivate_task(rq, next_task, 0); + next_task->on_rq = TASK_ON_RQ_MIGRATING; set_task_cpu(next_task, lowest_rq->cpu); + next_task->on_rq = TASK_ON_RQ_QUEUED; activate_task(lowest_rq, next_task, 0); ret = 1; @@ -2034,7 +2219,9 @@ static void pull_rt_task(struct rq *this_rq) resched = true; deactivate_task(src_rq, p, 0); + p->on_rq = TASK_ON_RQ_MIGRATING; set_task_cpu(p, this_cpu); + p->on_rq = TASK_ON_RQ_QUEUED; activate_task(this_rq, p, 0); /* * We continue with the search, just in @@ -2101,7 +2288,8 @@ static void switched_from_rt(struct rq *rq, struct task_struct *p) * we may need to handle the pulling of RT tasks * now. */ - if (!task_on_rq_queued(p) || rq->rt.rt_nr_running) + if (!task_on_rq_queued(p) || rq->rt.rt_nr_running || + cpu_isolated(cpu_of(rq))) return; queue_pull_task(rq); @@ -2116,6 +2304,7 @@ void __init init_sched_rt_class(void) GFP_KERNEL, cpu_to_node(i)); } } + #endif /* CONFIG_SMP */ /* @@ -2290,6 +2479,11 @@ const struct sched_class rt_sched_class = { .switched_to = switched_to_rt, .update_curr = update_curr_rt, +#ifdef CONFIG_SCHED_HMP + .inc_hmp_sched_stats = inc_hmp_sched_stats_rt, + .dec_hmp_sched_stats = dec_hmp_sched_stats_rt, + .fixup_hmp_sched_stats = fixup_hmp_sched_stats_rt, +#endif }; #ifdef CONFIG_SCHED_DEBUG |