summaryrefslogtreecommitdiff
path: root/kernel/sched/rt.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/sched/rt.c')
-rw-r--r--kernel/sched/rt.c202
1 files changed, 198 insertions, 4 deletions
diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c
index 8ec86abe0ea1..ba4403e910d8 100644
--- a/kernel/sched/rt.c
+++ b/kernel/sched/rt.c
@@ -7,6 +7,7 @@
#include <linux/slab.h>
#include <linux/irq_work.h>
+#include <trace/events/sched.h>
int sched_rr_timeslice = RR_TIMESLICE;
@@ -264,8 +265,12 @@ static void pull_rt_task(struct rq *this_rq);
static inline bool need_pull_rt_task(struct rq *rq, struct task_struct *prev)
{
- /* Try to pull RT tasks here if we lower this rq's prio */
- return rq->rt.highest_prio.curr > prev->prio;
+ /*
+ * Try to pull RT tasks here if we lower this rq's prio and cpu is not
+ * isolated
+ */
+ return rq->rt.highest_prio.curr > prev->prio &&
+ !cpu_isolated(cpu_of(rq));
}
static inline int rt_overloaded(struct rq *rq)
@@ -889,6 +894,51 @@ static inline int rt_se_prio(struct sched_rt_entity *rt_se)
return rt_task_of(rt_se)->prio;
}
+static void dump_throttled_rt_tasks(struct rt_rq *rt_rq)
+{
+ struct rt_prio_array *array = &rt_rq->active;
+ struct sched_rt_entity *rt_se;
+ char buf[500];
+ char *pos = buf;
+ char *end = buf + sizeof(buf);
+ int idx;
+
+ pos += snprintf(pos, sizeof(buf),
+ "sched: RT throttling activated for rt_rq %p (cpu %d)\n",
+ rt_rq, cpu_of(rq_of_rt_rq(rt_rq)));
+
+ if (bitmap_empty(array->bitmap, MAX_RT_PRIO))
+ goto out;
+
+ pos += snprintf(pos, end - pos, "potential CPU hogs:\n");
+ idx = sched_find_first_bit(array->bitmap);
+ while (idx < MAX_RT_PRIO) {
+ list_for_each_entry(rt_se, array->queue + idx, run_list) {
+ struct task_struct *p;
+
+ if (!rt_entity_is_task(rt_se))
+ continue;
+
+ p = rt_task_of(rt_se);
+ if (pos < end)
+ pos += snprintf(pos, end - pos, "\t%s (%d)\n",
+ p->comm, p->pid);
+ }
+ idx = find_next_bit(array->bitmap, MAX_RT_PRIO, idx + 1);
+ }
+out:
+#ifdef CONFIG_PANIC_ON_RT_THROTTLING
+ /*
+ * Use pr_err() in the BUG() case since printk_sched() will
+ * not get flushed and deadlock is not a concern.
+ */
+ pr_err("%s", buf);
+ BUG();
+#else
+ printk_deferred("%s", buf);
+#endif
+}
+
static int sched_rt_runtime_exceeded(struct rt_rq *rt_rq)
{
u64 runtime = sched_rt_runtime(rt_rq);
@@ -912,8 +962,14 @@ static int sched_rt_runtime_exceeded(struct rt_rq *rt_rq)
* but accrue some time due to boosting.
*/
if (likely(rt_b->rt_runtime)) {
+ static bool once = false;
+
rt_rq->rt_throttled = 1;
- printk_deferred_once("sched: RT throttling activated\n");
+
+ if (!once) {
+ once = true;
+ dump_throttled_rt_tasks(rt_rq);
+ }
} else {
/*
* In case we did anyway, make it go away,
@@ -1130,6 +1186,41 @@ void dec_rt_group(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq) {}
#endif /* CONFIG_RT_GROUP_SCHED */
+#ifdef CONFIG_SCHED_HMP
+
+static void
+inc_hmp_sched_stats_rt(struct rq *rq, struct task_struct *p)
+{
+ inc_cumulative_runnable_avg(&rq->hmp_stats, p);
+}
+
+static void
+dec_hmp_sched_stats_rt(struct rq *rq, struct task_struct *p)
+{
+ dec_cumulative_runnable_avg(&rq->hmp_stats, p);
+}
+
+static void
+fixup_hmp_sched_stats_rt(struct rq *rq, struct task_struct *p,
+ u32 new_task_load, u32 new_pred_demand)
+{
+ s64 task_load_delta = (s64)new_task_load - task_load(p);
+ s64 pred_demand_delta = PRED_DEMAND_DELTA;
+
+ fixup_cumulative_runnable_avg(&rq->hmp_stats, p, task_load_delta,
+ pred_demand_delta);
+}
+
+#else /* CONFIG_SCHED_HMP */
+
+static inline void
+inc_hmp_sched_stats_rt(struct rq *rq, struct task_struct *p) { }
+
+static inline void
+dec_hmp_sched_stats_rt(struct rq *rq, struct task_struct *p) { }
+
+#endif /* CONFIG_SCHED_HMP */
+
static inline
unsigned int rt_se_nr_running(struct sched_rt_entity *rt_se)
{
@@ -1261,6 +1352,7 @@ enqueue_task_rt(struct rq *rq, struct task_struct *p, int flags)
rt_se->timeout = 0;
enqueue_rt_entity(rt_se, flags & ENQUEUE_HEAD);
+ inc_hmp_sched_stats_rt(rq, p);
if (!task_current(rq, p) && p->nr_cpus_allowed > 1)
enqueue_pushable_task(rq, p);
@@ -1272,6 +1364,7 @@ static void dequeue_task_rt(struct rq *rq, struct task_struct *p, int flags)
update_curr_rt(rq);
dequeue_rt_entity(rt_se);
+ dec_hmp_sched_stats_rt(rq, p);
dequeue_pushable_task(rq, p);
}
@@ -1314,11 +1407,28 @@ static void yield_task_rt(struct rq *rq)
static int find_lowest_rq(struct task_struct *task);
static int
+select_task_rq_rt_hmp(struct task_struct *p, int cpu, int sd_flag, int flags)
+{
+ int target;
+
+ rcu_read_lock();
+ target = find_lowest_rq(p);
+ if (target != -1)
+ cpu = target;
+ rcu_read_unlock();
+
+ return cpu;
+}
+
+static int
select_task_rq_rt(struct task_struct *p, int cpu, int sd_flag, int flags)
{
struct task_struct *curr;
struct rq *rq;
+ if (sched_enable_hmp)
+ return select_task_rq_rt_hmp(p, cpu, sd_flag, flags);
+
/* For anything but wake ups, just return the task_cpu */
if (sd_flag != SD_BALANCE_WAKE && sd_flag != SD_BALANCE_FORK)
goto out;
@@ -1556,6 +1666,76 @@ static struct task_struct *pick_highest_pushable_task(struct rq *rq, int cpu)
static DEFINE_PER_CPU(cpumask_var_t, local_cpu_mask);
+#ifdef CONFIG_SCHED_HMP
+
+static int find_lowest_rq_hmp(struct task_struct *task)
+{
+ struct cpumask *lowest_mask = *this_cpu_ptr(&local_cpu_mask);
+ struct cpumask candidate_mask = CPU_MASK_NONE;
+ struct sched_cluster *cluster;
+ int best_cpu = -1;
+ int prev_cpu = task_cpu(task);
+ u64 cpu_load, min_load = ULLONG_MAX;
+ int i;
+ int restrict_cluster = sched_boost() ? 0 :
+ sysctl_sched_restrict_cluster_spill;
+
+ /* Make sure the mask is initialized first */
+ if (unlikely(!lowest_mask))
+ return best_cpu;
+
+ if (task->nr_cpus_allowed == 1)
+ return best_cpu; /* No other targets possible */
+
+ if (!cpupri_find(&task_rq(task)->rd->cpupri, task, lowest_mask))
+ return best_cpu; /* No targets found */
+
+ /*
+ * At this point we have built a mask of cpus representing the
+ * lowest priority tasks in the system. Now we want to elect
+ * the best one based on our affinity and topology.
+ */
+
+ for_each_sched_cluster(cluster) {
+ cpumask_and(&candidate_mask, &cluster->cpus, lowest_mask);
+ cpumask_andnot(&candidate_mask, &candidate_mask,
+ cpu_isolated_mask);
+
+ if (cpumask_empty(&candidate_mask))
+ continue;
+
+ for_each_cpu(i, &candidate_mask) {
+ if (sched_cpu_high_irqload(i))
+ continue;
+
+ cpu_load = cpu_rq(i)->hmp_stats.cumulative_runnable_avg;
+ if (!restrict_cluster)
+ cpu_load = scale_load_to_cpu(cpu_load, i);
+
+ if (cpu_load < min_load ||
+ (cpu_load == min_load &&
+ (i == prev_cpu || (best_cpu != prev_cpu &&
+ cpus_share_cache(prev_cpu, i))))) {
+ min_load = cpu_load;
+ best_cpu = i;
+ }
+ }
+ if (restrict_cluster && best_cpu != -1)
+ break;
+ }
+
+ return best_cpu;
+}
+
+#else /* CONFIG_SCHED_HMP */
+
+static int find_lowest_rq_hmp(struct task_struct *task)
+{
+ return -1;
+}
+
+#endif /* CONFIG_SCHED_HMP */
+
static int find_lowest_rq(struct task_struct *task)
{
struct sched_domain *sd;
@@ -1563,6 +1743,9 @@ static int find_lowest_rq(struct task_struct *task)
int this_cpu = smp_processor_id();
int cpu = task_cpu(task);
+ if (sched_enable_hmp)
+ return find_lowest_rq_hmp(task);
+
/* Make sure the mask is initialized first */
if (unlikely(!lowest_mask))
return -1;
@@ -1780,7 +1963,9 @@ retry:
}
deactivate_task(rq, next_task, 0);
+ next_task->on_rq = TASK_ON_RQ_MIGRATING;
set_task_cpu(next_task, lowest_rq->cpu);
+ next_task->on_rq = TASK_ON_RQ_QUEUED;
activate_task(lowest_rq, next_task, 0);
ret = 1;
@@ -2034,7 +2219,9 @@ static void pull_rt_task(struct rq *this_rq)
resched = true;
deactivate_task(src_rq, p, 0);
+ p->on_rq = TASK_ON_RQ_MIGRATING;
set_task_cpu(p, this_cpu);
+ p->on_rq = TASK_ON_RQ_QUEUED;
activate_task(this_rq, p, 0);
/*
* We continue with the search, just in
@@ -2101,7 +2288,8 @@ static void switched_from_rt(struct rq *rq, struct task_struct *p)
* we may need to handle the pulling of RT tasks
* now.
*/
- if (!task_on_rq_queued(p) || rq->rt.rt_nr_running)
+ if (!task_on_rq_queued(p) || rq->rt.rt_nr_running ||
+ cpu_isolated(cpu_of(rq)))
return;
queue_pull_task(rq);
@@ -2116,6 +2304,7 @@ void __init init_sched_rt_class(void)
GFP_KERNEL, cpu_to_node(i));
}
}
+
#endif /* CONFIG_SMP */
/*
@@ -2290,6 +2479,11 @@ const struct sched_class rt_sched_class = {
.switched_to = switched_to_rt,
.update_curr = update_curr_rt,
+#ifdef CONFIG_SCHED_HMP
+ .inc_hmp_sched_stats = inc_hmp_sched_stats_rt,
+ .dec_hmp_sched_stats = dec_hmp_sched_stats_rt,
+ .fixup_hmp_sched_stats = fixup_hmp_sched_stats_rt,
+#endif
};
#ifdef CONFIG_SCHED_DEBUG