summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--include/linux/interrupt.h7
-rw-r--r--kernel/sched/cpupri.c37
-rw-r--r--kernel/sched/rt.c47
-rw-r--r--kernel/sched/sched.h5
-rw-r--r--kernel/softirq.c9
5 files changed, 97 insertions, 8 deletions
diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h
index ad16809c8596..b3b1af8a8f8c 100644
--- a/include/linux/interrupt.h
+++ b/include/linux/interrupt.h
@@ -423,6 +423,12 @@ enum
};
#define SOFTIRQ_STOP_IDLE_MASK (~(1 << RCU_SOFTIRQ))
+/* Softirq's where the handling might be long: */
+#define LONG_SOFTIRQ_MASK ((1 << NET_TX_SOFTIRQ) | \
+ (1 << NET_RX_SOFTIRQ) | \
+ (1 << BLOCK_SOFTIRQ) | \
+ (1 << BLOCK_IOPOLL_SOFTIRQ) | \
+ (1 << TASKLET_SOFTIRQ))
/* map softirq index to softirq name. update 'softirq_to_name' in
* kernel/softirq.c when adding a new softirq.
@@ -458,6 +464,7 @@ extern void raise_softirq_irqoff(unsigned int nr);
extern void raise_softirq(unsigned int nr);
DECLARE_PER_CPU(struct task_struct *, ksoftirqd);
+DECLARE_PER_CPU(__u32, active_softirqs);
static inline struct task_struct *this_cpu_ksoftirqd(void)
{
diff --git a/kernel/sched/cpupri.c b/kernel/sched/cpupri.c
index 981fcd7dc394..1d00cf8c00fa 100644
--- a/kernel/sched/cpupri.c
+++ b/kernel/sched/cpupri.c
@@ -27,6 +27,8 @@
* of the License.
*/
+#include "sched.h"
+
#include <linux/gfp.h>
#include <linux/sched.h>
#include <linux/sched/rt.h>
@@ -51,6 +53,27 @@ static int convert_prio(int prio)
}
/**
+ * drop_nopreempt_cpus - remove a cpu from the mask if it is likely
+ * non-preemptible
+ * @lowest_mask: mask with selected CPUs (non-NULL)
+ */
+static void
+drop_nopreempt_cpus(struct cpumask *lowest_mask)
+{
+ unsigned int cpu = cpumask_first(lowest_mask);
+
+ while (cpu < nr_cpu_ids) {
+ /* unlocked access */
+ struct task_struct *task = READ_ONCE(cpu_rq(cpu)->curr);
+
+ if (task_may_not_preempt(task, cpu))
+ cpumask_clear_cpu(cpu, lowest_mask);
+
+ cpu = cpumask_next(cpu, lowest_mask);
+ }
+}
+
+/**
* cpupri_find - find the best (lowest-pri) CPU in the system
* @cp: The cpupri context
* @p: The task
@@ -70,9 +93,11 @@ int cpupri_find(struct cpupri *cp, struct task_struct *p,
{
int idx = 0;
int task_pri = convert_prio(p->prio);
+ bool drop_nopreempts = task_pri <= MAX_RT_PRIO;
BUG_ON(task_pri >= CPUPRI_NR_PRIORITIES);
+retry:
for (idx = 0; idx < task_pri; idx++) {
struct cpupri_vec *vec = &cp->pri_to_cpu[idx];
int skip = 0;
@@ -108,7 +133,8 @@ int cpupri_find(struct cpupri *cp, struct task_struct *p,
if (lowest_mask) {
cpumask_and(lowest_mask, &p->cpus_allowed, vec->mask);
-
+ if (drop_nopreempts)
+ drop_nopreempt_cpus(lowest_mask);
/*
* We have to ensure that we have at least one bit
* still set in the array, since the map could have
@@ -123,7 +149,14 @@ int cpupri_find(struct cpupri *cp, struct task_struct *p,
return 1;
}
-
+ /*
+ * If we can't find any non-preemptible cpu's, retry so we can
+ * find the lowest priority target and avoid priority inversion.
+ */
+ if (drop_nopreempts) {
+ drop_nopreempts = false;
+ goto retry;
+ }
return 0;
}
diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c
index fd27394354a1..c03d51a017bf 100644
--- a/kernel/sched/rt.c
+++ b/kernel/sched/rt.c
@@ -5,6 +5,7 @@
#include "sched.h"
+#include <linux/interrupt.h>
#include <linux/slab.h>
#include <linux/irq_work.h>
#include <trace/events/sched.h>
@@ -1459,11 +1460,30 @@ select_task_rq_rt_hmp(struct task_struct *p, int cpu, int sd_flag, int flags)
}
#endif
+/*
+ * Return whether the task on the given cpu is currently non-preemptible
+ * while handling a potentially long softint, or if the task is likely
+ * to block preemptions soon because it is a ksoftirq thread that is
+ * handling slow softints.
+ */
+bool
+task_may_not_preempt(struct task_struct *task, int cpu)
+{
+ __u32 softirqs = per_cpu(active_softirqs, cpu) |
+ __IRQ_STAT(cpu, __softirq_pending);
+ struct task_struct *cpu_ksoftirqd = per_cpu(ksoftirqd, cpu);
+
+ return ((softirqs & LONG_SOFTIRQ_MASK) &&
+ (task == cpu_ksoftirqd ||
+ task_thread_info(task)->preempt_count & SOFTIRQ_MASK));
+}
+
static int
select_task_rq_rt(struct task_struct *p, int cpu, int sd_flag, int flags)
{
struct task_struct *curr;
struct rq *rq;
+ bool may_not_preempt;
#ifdef CONFIG_SCHED_HMP
return select_task_rq_rt_hmp(p, cpu, sd_flag, flags);
@@ -1479,7 +1499,17 @@ select_task_rq_rt(struct task_struct *p, int cpu, int sd_flag, int flags)
curr = READ_ONCE(rq->curr); /* unlocked access */
/*
- * If the current task on @p's runqueue is an RT task, then
+ * If the current task on @p's runqueue is a softirq task,
+ * it may run without preemption for a time that is
+ * ill-suited for a waiting RT task. Therefore, try to
+ * wake this RT task on another runqueue.
+ *
+ * Also, if the current task on @p's runqueue is an RT task, then
+ * it may run without preemption for a time that is
+ * ill-suited for a waiting RT task. Therefore, try to
+ * wake this RT task on another runqueue.
+ *
+ * Also, if the current task on @p's runqueue is an RT task, then
* try to see if we can wake this RT task up on another
* runqueue. Otherwise simply start this RT task
* on its current runqueue.
@@ -1500,17 +1530,22 @@ select_task_rq_rt(struct task_struct *p, int cpu, int sd_flag, int flags)
* This test is optimistic, if we get it wrong the load-balancer
* will have to sort it out.
*/
- if (curr && unlikely(rt_task(curr)) &&
+ may_not_preempt = task_may_not_preempt(curr, cpu);
+ if (may_not_preempt ||
+ (unlikely(rt_task(curr)) &&
(curr->nr_cpus_allowed < 2 ||
- curr->prio <= p->prio)) {
+ curr->prio <= p->prio))) {
int target = find_lowest_rq(p);
/*
- * Don't bother moving it if the destination CPU is
- * not running a lower priority task.
+ * If cpu is non-preemptible, prefer remote cpu
+ * even if it's running a higher-prio task.
+ * Otherwise: Don't bother moving it if the
+ * destination CPU is not running a lower priority task.
*/
if (target != -1 &&
- p->prio < cpu_rq(target)->rt.highest_prio.curr)
+ (may_not_preempt ||
+ p->prio < cpu_rq(target)->rt.highest_prio.curr))
cpu = target;
}
rcu_read_unlock();
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index a8ad8e71076a..67b7da81f8a2 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -2681,6 +2681,11 @@ static inline void double_rq_unlock(struct rq *rq1, struct rq *rq2)
__release(rq2->lock);
}
+/*
+ * task_may_not_preempt - check whether a task may not be preemptible soon
+ */
+extern bool task_may_not_preempt(struct task_struct *task, int cpu);
+
#else /* CONFIG_SMP */
/*
diff --git a/kernel/softirq.c b/kernel/softirq.c
index 479e4436f787..39ffd41594ce 100644
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -57,6 +57,13 @@ static struct softirq_action softirq_vec[NR_SOFTIRQS] __cacheline_aligned_in_smp
DEFINE_PER_CPU(struct task_struct *, ksoftirqd);
+/*
+ * active_softirqs -- per cpu, a mask of softirqs that are being handled,
+ * with the expectation that approximate answers are acceptable and therefore
+ * no synchronization.
+ */
+DEFINE_PER_CPU(__u32, active_softirqs);
+
const char * const softirq_to_name[NR_SOFTIRQS] = {
"HI", "TIMER", "NET_TX", "NET_RX", "BLOCK", "BLOCK_IOPOLL",
"TASKLET", "SCHED", "HRTIMER", "RCU"
@@ -253,6 +260,7 @@ asmlinkage __visible void __do_softirq(void)
restart:
/* Reset the pending bitmask before enabling irqs */
set_softirq_pending(0);
+ __this_cpu_write(active_softirqs, pending);
local_irq_enable();
@@ -282,6 +290,7 @@ restart:
pending >>= softirq_bit;
}
+ __this_cpu_write(active_softirqs, 0);
rcu_bh_qs();
local_irq_disable();