diff options
Diffstat (limited to 'kernel/rcu/tree_plugin.h')
-rw-r--r-- | kernel/rcu/tree_plugin.h | 814 |
1 files changed, 590 insertions, 224 deletions
diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h index 02ac0fb186b8..3ec85cb5d544 100644 --- a/kernel/rcu/tree_plugin.h +++ b/kernel/rcu/tree_plugin.h @@ -30,13 +30,24 @@ #include <linux/smpboot.h> #include "../time/tick-internal.h" -#define RCU_KTHREAD_PRIO 1 - #ifdef CONFIG_RCU_BOOST -#define RCU_BOOST_PRIO CONFIG_RCU_BOOST_PRIO -#else -#define RCU_BOOST_PRIO RCU_KTHREAD_PRIO -#endif + +#include "../locking/rtmutex_common.h" + +/* rcuc/rcub kthread realtime priority */ +static int kthread_prio = CONFIG_RCU_KTHREAD_PRIO; +module_param(kthread_prio, int, 0644); + +/* + * Control variables for per-CPU and per-rcu_node kthreads. These + * handle all flavors of RCU. + */ +static DEFINE_PER_CPU(struct task_struct *, rcu_cpu_kthread_task); +DEFINE_PER_CPU(unsigned int, rcu_cpu_kthread_status); +DEFINE_PER_CPU(unsigned int, rcu_cpu_kthread_loops); +DEFINE_PER_CPU(char, rcu_cpu_has_work); + +#endif /* #ifdef CONFIG_RCU_BOOST */ #ifdef CONFIG_RCU_NOCB_CPU static cpumask_var_t rcu_nocb_mask; /* CPUs to have callbacks offloaded. */ @@ -71,9 +82,6 @@ static void __init rcu_bootup_announce_oddness(void) #ifdef CONFIG_RCU_TORTURE_TEST_RUNNABLE pr_info("\tRCU torture testing starts during boot.\n"); #endif -#if defined(CONFIG_TREE_PREEMPT_RCU) && !defined(CONFIG_RCU_CPU_STALL_VERBOSE) - pr_info("\tDump stacks of tasks blocking RCU-preempt GP.\n"); -#endif #if defined(CONFIG_RCU_CPU_STALL_INFO) pr_info("\tAdditional per-CPU info printed with stalls.\n"); #endif @@ -84,36 +92,12 @@ static void __init rcu_bootup_announce_oddness(void) pr_info("\tBoot-time adjustment of leaf fanout to %d.\n", rcu_fanout_leaf); if (nr_cpu_ids != NR_CPUS) pr_info("\tRCU restricting CPUs from NR_CPUS=%d to nr_cpu_ids=%d.\n", NR_CPUS, nr_cpu_ids); -#ifdef CONFIG_RCU_NOCB_CPU -#ifndef CONFIG_RCU_NOCB_CPU_NONE - if (!have_rcu_nocb_mask) { - zalloc_cpumask_var(&rcu_nocb_mask, GFP_KERNEL); - have_rcu_nocb_mask = true; - } -#ifdef CONFIG_RCU_NOCB_CPU_ZERO - pr_info("\tOffload RCU callbacks from CPU 0\n"); - cpumask_set_cpu(0, rcu_nocb_mask); -#endif /* #ifdef CONFIG_RCU_NOCB_CPU_ZERO */ -#ifdef CONFIG_RCU_NOCB_CPU_ALL - pr_info("\tOffload RCU callbacks from all CPUs\n"); - cpumask_copy(rcu_nocb_mask, cpu_possible_mask); -#endif /* #ifdef CONFIG_RCU_NOCB_CPU_ALL */ -#endif /* #ifndef CONFIG_RCU_NOCB_CPU_NONE */ - if (have_rcu_nocb_mask) { - if (!cpumask_subset(rcu_nocb_mask, cpu_possible_mask)) { - pr_info("\tNote: kernel parameter 'rcu_nocbs=' contains nonexistent CPUs.\n"); - cpumask_and(rcu_nocb_mask, cpu_possible_mask, - rcu_nocb_mask); - } - cpulist_scnprintf(nocb_buf, sizeof(nocb_buf), rcu_nocb_mask); - pr_info("\tOffload RCU callbacks from CPUs: %s.\n", nocb_buf); - if (rcu_nocb_poll) - pr_info("\tPoll for callbacks from no-CBs CPUs.\n"); - } -#endif /* #ifdef CONFIG_RCU_NOCB_CPU */ +#ifdef CONFIG_RCU_BOOST + pr_info("\tRCU kthread priority: %d.\n", kthread_prio); +#endif } -#ifdef CONFIG_TREE_PREEMPT_RCU +#ifdef CONFIG_PREEMPT_RCU RCU_STATE_INITIALIZER(rcu_preempt, 'p', call_rcu); static struct rcu_state *rcu_state_p = &rcu_preempt_state; @@ -133,7 +117,7 @@ static void __init rcu_bootup_announce(void) * Return the number of RCU-preempt batches processed thus far * for debug and statistics. */ -long rcu_batches_completed_preempt(void) +static long rcu_batches_completed_preempt(void) { return rcu_preempt_state.completed; } @@ -154,18 +138,19 @@ EXPORT_SYMBOL_GPL(rcu_batches_completed); * not in a quiescent state. There might be any number of tasks blocked * while in an RCU read-side critical section. * - * Unlike the other rcu_*_qs() functions, callers to this function - * must disable irqs in order to protect the assignment to - * ->rcu_read_unlock_special. - */ -static void rcu_preempt_qs(int cpu) -{ - struct rcu_data *rdp = &per_cpu(rcu_preempt_data, cpu); - - if (rdp->passed_quiesce == 0) - trace_rcu_grace_period(TPS("rcu_preempt"), rdp->gpnum, TPS("cpuqs")); - rdp->passed_quiesce = 1; - current->rcu_read_unlock_special &= ~RCU_READ_UNLOCK_NEED_QS; + * As with the other rcu_*_qs() functions, callers to this function + * must disable preemption. + */ +static void rcu_preempt_qs(void) +{ + if (!__this_cpu_read(rcu_preempt_data.passed_quiesce)) { + trace_rcu_grace_period(TPS("rcu_preempt"), + __this_cpu_read(rcu_preempt_data.gpnum), + TPS("cpuqs")); + __this_cpu_write(rcu_preempt_data.passed_quiesce, 1); + barrier(); /* Coordinate with rcu_preempt_check_callbacks(). */ + current->rcu_read_unlock_special.b.need_qs = false; + } } /* @@ -181,7 +166,7 @@ static void rcu_preempt_qs(int cpu) * * Caller must disable preemption. */ -static void rcu_preempt_note_context_switch(int cpu) +static void rcu_preempt_note_context_switch(void) { struct task_struct *t = current; unsigned long flags; @@ -189,14 +174,14 @@ static void rcu_preempt_note_context_switch(int cpu) struct rcu_node *rnp; if (t->rcu_read_lock_nesting > 0 && - (t->rcu_read_unlock_special & RCU_READ_UNLOCK_BLOCKED) == 0) { + !t->rcu_read_unlock_special.b.blocked) { /* Possibly blocking in an RCU read-side critical section. */ - rdp = per_cpu_ptr(rcu_preempt_state.rda, cpu); + rdp = this_cpu_ptr(rcu_preempt_state.rda); rnp = rdp->mynode; raw_spin_lock_irqsave(&rnp->lock, flags); smp_mb__after_unlock_lock(); - t->rcu_read_unlock_special |= RCU_READ_UNLOCK_BLOCKED; + t->rcu_read_unlock_special.b.blocked = true; t->rcu_blocked_node = rnp; /* @@ -238,7 +223,7 @@ static void rcu_preempt_note_context_switch(int cpu) : rnp->gpnum + 1); raw_spin_unlock_irqrestore(&rnp->lock, flags); } else if (t->rcu_read_lock_nesting < 0 && - t->rcu_read_unlock_special) { + t->rcu_read_unlock_special.s) { /* * Complete exit from RCU read-side critical section on @@ -256,9 +241,7 @@ static void rcu_preempt_note_context_switch(int cpu) * grace period, then the fact that the task has been enqueued * means that we continue to block the current grace period. */ - local_irq_save(flags); - rcu_preempt_qs(cpu); - local_irq_restore(flags); + rcu_preempt_qs(); } /* @@ -336,10 +319,10 @@ void rcu_read_unlock_special(struct task_struct *t) unsigned long flags; struct list_head *np; #ifdef CONFIG_RCU_BOOST - struct rt_mutex *rbmp = NULL; + bool drop_boost_mutex = false; #endif /* #ifdef CONFIG_RCU_BOOST */ struct rcu_node *rnp; - int special; + union rcu_special special; /* NMI handlers cannot block and cannot safely manipulate state. */ if (in_nmi()) @@ -349,12 +332,13 @@ void rcu_read_unlock_special(struct task_struct *t) /* * If RCU core is waiting for this CPU to exit critical section, - * let it know that we have done so. + * let it know that we have done so. Because irqs are disabled, + * t->rcu_read_unlock_special cannot change. */ special = t->rcu_read_unlock_special; - if (special & RCU_READ_UNLOCK_NEED_QS) { - rcu_preempt_qs(smp_processor_id()); - if (!t->rcu_read_unlock_special) { + if (special.b.need_qs) { + rcu_preempt_qs(); + if (!t->rcu_read_unlock_special.s) { local_irq_restore(flags); return; } @@ -367,8 +351,8 @@ void rcu_read_unlock_special(struct task_struct *t) } /* Clean up if blocked during RCU read-side critical section. */ - if (special & RCU_READ_UNLOCK_BLOCKED) { - t->rcu_read_unlock_special &= ~RCU_READ_UNLOCK_BLOCKED; + if (special.b.blocked) { + t->rcu_read_unlock_special.b.blocked = false; /* * Remove this task from the list it blocked on. The @@ -398,11 +382,8 @@ void rcu_read_unlock_special(struct task_struct *t) #ifdef CONFIG_RCU_BOOST if (&t->rcu_node_entry == rnp->boost_tasks) rnp->boost_tasks = np; - /* Snapshot/clear ->rcu_boost_mutex with rcu_node lock held. */ - if (t->rcu_boost_mutex) { - rbmp = t->rcu_boost_mutex; - t->rcu_boost_mutex = NULL; - } + /* Snapshot ->boost_mtx ownership with rcu_node lock held. */ + drop_boost_mutex = rt_mutex_owner(&rnp->boost_mtx) == t; #endif /* #ifdef CONFIG_RCU_BOOST */ /* @@ -427,8 +408,10 @@ void rcu_read_unlock_special(struct task_struct *t) #ifdef CONFIG_RCU_BOOST /* Unboost if we were boosted. */ - if (rbmp) - rt_mutex_unlock(rbmp); + if (drop_boost_mutex) { + rt_mutex_unlock(&rnp->boost_mtx); + complete(&rnp->boost_completion); + } #endif /* #ifdef CONFIG_RCU_BOOST */ /* @@ -442,8 +425,6 @@ void rcu_read_unlock_special(struct task_struct *t) } } -#ifdef CONFIG_RCU_CPU_STALL_VERBOSE - /* * Dump detailed information for all tasks blocking the current RCU * grace period on the specified rcu_node structure. @@ -478,14 +459,6 @@ static void rcu_print_detail_task_stall(struct rcu_state *rsp) rcu_print_detail_task_stall_rnp(rnp); } -#else /* #ifdef CONFIG_RCU_CPU_STALL_VERBOSE */ - -static void rcu_print_detail_task_stall(struct rcu_state *rsp) -{ -} - -#endif /* #else #ifdef CONFIG_RCU_CPU_STALL_VERBOSE */ - #ifdef CONFIG_RCU_CPU_STALL_INFO static void rcu_print_task_stall_begin(struct rcu_node *rnp) @@ -648,17 +621,18 @@ static int rcu_preempt_offline_tasks(struct rcu_state *rsp, * * Caller must disable hard irqs. */ -static void rcu_preempt_check_callbacks(int cpu) +static void rcu_preempt_check_callbacks(void) { struct task_struct *t = current; if (t->rcu_read_lock_nesting == 0) { - rcu_preempt_qs(cpu); + rcu_preempt_qs(); return; } if (t->rcu_read_lock_nesting > 0 && - per_cpu(rcu_preempt_data, cpu).qs_pending) - t->rcu_read_unlock_special |= RCU_READ_UNLOCK_NEED_QS; + __this_cpu_read(rcu_preempt_data.qs_pending) && + !__this_cpu_read(rcu_preempt_data.passed_quiesce)) + t->rcu_read_unlock_special.b.need_qs = true; } #ifdef CONFIG_RCU_BOOST @@ -819,11 +793,6 @@ sync_rcu_preempt_exp_init(struct rcu_state *rsp, struct rcu_node *rnp) * In fact, if you are using synchronize_rcu_expedited() in a loop, * please restructure your code to batch your updates, and then Use a * single synchronize_rcu() instead. - * - * Note that it is illegal to call this function while holding any lock - * that is acquired by a CPU-hotplug notifier. And yes, it is also illegal - * to call this function from a CPU-hotplug notifier. Failing to observe - * these restriction will result in deadlock. */ void synchronize_rcu_expedited(void) { @@ -845,7 +814,11 @@ void synchronize_rcu_expedited(void) * being boosted. This simplifies the process of moving tasks * from leaf to root rcu_node structures. */ - get_online_cpus(); + if (!try_get_online_cpus()) { + /* CPU-hotplug operation in flight, fall back to normal GP. */ + wait_rcu_gp(call_rcu); + return; + } /* * Acquire lock, falling back to synchronize_rcu() if too many @@ -897,7 +870,8 @@ void synchronize_rcu_expedited(void) /* Clean up and exit. */ smp_mb(); /* ensure expedited GP seen before counter increment. */ - ACCESS_ONCE(sync_rcu_preempt_exp_count)++; + ACCESS_ONCE(sync_rcu_preempt_exp_count) = + sync_rcu_preempt_exp_count + 1; unlock_mb_ret: mutex_unlock(&sync_rcu_preempt_exp_mutex); mb_ret: @@ -941,11 +915,11 @@ void exit_rcu(void) return; t->rcu_read_lock_nesting = 1; barrier(); - t->rcu_read_unlock_special = RCU_READ_UNLOCK_BLOCKED; + t->rcu_read_unlock_special.b.blocked = true; __rcu_read_unlock(); } -#else /* #ifdef CONFIG_TREE_PREEMPT_RCU */ +#else /* #ifdef CONFIG_PREEMPT_RCU */ static struct rcu_state *rcu_state_p = &rcu_sched_state; @@ -971,7 +945,7 @@ EXPORT_SYMBOL_GPL(rcu_batches_completed); * Because preemptible RCU does not exist, we never have to check for * CPUs being in quiescent states. */ -static void rcu_preempt_note_context_switch(int cpu) +static void rcu_preempt_note_context_switch(void) { } @@ -988,6 +962,7 @@ static int rcu_preempt_blocked_readers_cgp(struct rcu_node *rnp) /* Because preemptible RCU does not exist, no quieting of tasks. */ static void rcu_report_unblock_qs_rnp(struct rcu_node *rnp, unsigned long flags) + __releases(rnp->lock) { raw_spin_unlock_irqrestore(&rnp->lock, flags); } @@ -1042,7 +1017,7 @@ static int rcu_preempt_offline_tasks(struct rcu_state *rsp, * Because preemptible RCU does not exist, it never has any callbacks * to check. */ -static void rcu_preempt_check_callbacks(int cpu) +static void rcu_preempt_check_callbacks(void) { } @@ -1095,7 +1070,7 @@ void exit_rcu(void) { } -#endif /* #else #ifdef CONFIG_TREE_PREEMPT_RCU */ +#endif /* #else #ifdef CONFIG_PREEMPT_RCU */ #ifdef CONFIG_RCU_BOOST @@ -1149,7 +1124,6 @@ static void rcu_wake_cond(struct task_struct *t, int status) static int rcu_boost(struct rcu_node *rnp) { unsigned long flags; - struct rt_mutex mtx; struct task_struct *t; struct list_head *tb; @@ -1200,11 +1174,15 @@ static int rcu_boost(struct rcu_node *rnp) * section. */ t = container_of(tb, struct task_struct, rcu_node_entry); - rt_mutex_init_proxy_locked(&mtx, t); - t->rcu_boost_mutex = &mtx; + rt_mutex_init_proxy_locked(&rnp->boost_mtx, t); + init_completion(&rnp->boost_completion); raw_spin_unlock_irqrestore(&rnp->lock, flags); - rt_mutex_lock(&mtx); /* Side effect: boosts task t's priority. */ - rt_mutex_unlock(&mtx); /* Keep lockdep happy. */ + /* Lock only for side effect: boosts task t's priority. */ + rt_mutex_lock(&rnp->boost_mtx); + rt_mutex_unlock(&rnp->boost_mtx); /* Then keep lockdep happy. */ + + /* Wait for boostee to be done w/boost_mtx before reinitializing. */ + wait_for_completion(&rnp->boost_completion); return ACCESS_ONCE(rnp->exp_tasks) != NULL || ACCESS_ONCE(rnp->boost_tasks) != NULL; @@ -1256,6 +1234,7 @@ static int rcu_boost_kthread(void *arg) * about it going away. */ static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags) + __releases(rnp->lock) { struct task_struct *t; @@ -1347,7 +1326,7 @@ static int rcu_spawn_one_boost_kthread(struct rcu_state *rsp, smp_mb__after_unlock_lock(); rnp->boost_kthread_task = t; raw_spin_unlock_irqrestore(&rnp->lock, flags); - sp.sched_priority = RCU_BOOST_PRIO; + sp.sched_priority = kthread_prio; sched_setscheduler_nocheck(t, SCHED_FIFO, &sp); wake_up_process(t); /* get to TASK_INTERRUPTIBLE quickly. */ return 0; @@ -1364,7 +1343,7 @@ static void rcu_cpu_kthread_setup(unsigned int cpu) { struct sched_param sp; - sp.sched_priority = RCU_KTHREAD_PRIO; + sp.sched_priority = kthread_prio; sched_setscheduler_nocheck(current, SCHED_FIFO, &sp); } @@ -1457,14 +1436,13 @@ static struct smp_hotplug_thread rcu_cpu_thread_spec = { }; /* - * Spawn all kthreads -- called as soon as the scheduler is running. + * Spawn boost kthreads -- called as soon as the scheduler is running. */ -static int __init rcu_spawn_kthreads(void) +static void __init rcu_spawn_boost_kthreads(void) { struct rcu_node *rnp; int cpu; - rcu_scheduler_fully_active = 1; for_each_possible_cpu(cpu) per_cpu(rcu_cpu_has_work, cpu) = 0; BUG_ON(smpboot_register_percpu_thread(&rcu_cpu_thread_spec)); @@ -1474,9 +1452,7 @@ static int __init rcu_spawn_kthreads(void) rcu_for_each_leaf_node(rcu_state_p, rnp) (void)rcu_spawn_one_boost_kthread(rcu_state_p, rnp); } - return 0; } -early_initcall(rcu_spawn_kthreads); static void rcu_prepare_kthreads(int cpu) { @@ -1491,6 +1467,7 @@ static void rcu_prepare_kthreads(int cpu) #else /* #ifdef CONFIG_RCU_BOOST */ static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags) + __releases(rnp->lock) { raw_spin_unlock_irqrestore(&rnp->lock, flags); } @@ -1513,12 +1490,9 @@ static void rcu_boost_kthread_setaffinity(struct rcu_node *rnp, int outgoingcpu) { } -static int __init rcu_scheduler_really_started(void) +static void __init rcu_spawn_boost_kthreads(void) { - rcu_scheduler_fully_active = 1; - return 0; } -early_initcall(rcu_scheduler_really_started); static void rcu_prepare_kthreads(int cpu) { @@ -1538,10 +1512,10 @@ static void rcu_prepare_kthreads(int cpu) * any flavor of RCU. */ #ifndef CONFIG_RCU_NOCB_CPU_ALL -int rcu_needs_cpu(int cpu, unsigned long *delta_jiffies) +int rcu_needs_cpu(unsigned long *delta_jiffies) { *delta_jiffies = ULONG_MAX; - return rcu_cpu_has_callbacks(cpu, NULL); + return rcu_cpu_has_callbacks(NULL); } #endif /* #ifndef CONFIG_RCU_NOCB_CPU_ALL */ @@ -1549,7 +1523,7 @@ int rcu_needs_cpu(int cpu, unsigned long *delta_jiffies) * Because we do not have RCU_FAST_NO_HZ, don't bother cleaning up * after it. */ -static void rcu_cleanup_after_idle(int cpu) +static void rcu_cleanup_after_idle(void) { } @@ -1557,7 +1531,7 @@ static void rcu_cleanup_after_idle(int cpu) * Do the idle-entry grace-period work, which, because CONFIG_RCU_FAST_NO_HZ=n, * is nothing. */ -static void rcu_prepare_for_idle(int cpu) +static void rcu_prepare_for_idle(void) { } @@ -1619,7 +1593,7 @@ static bool __maybe_unused rcu_try_advance_all_cbs(void) /* Exit early if we advanced recently. */ if (jiffies == rdtp->last_advance_all) - return 0; + return false; rdtp->last_advance_all = jiffies; for_each_rcu_flavor(rsp) { @@ -1650,15 +1624,15 @@ static bool __maybe_unused rcu_try_advance_all_cbs(void) * The caller must have disabled interrupts. */ #ifndef CONFIG_RCU_NOCB_CPU_ALL -int rcu_needs_cpu(int cpu, unsigned long *dj) +int rcu_needs_cpu(unsigned long *dj) { - struct rcu_dynticks *rdtp = &per_cpu(rcu_dynticks, cpu); + struct rcu_dynticks *rdtp = this_cpu_ptr(&rcu_dynticks); /* Snapshot to detect later posting of non-lazy callback. */ rdtp->nonlazy_posted_snap = rdtp->nonlazy_posted; /* If no callbacks, RCU doesn't need the CPU. */ - if (!rcu_cpu_has_callbacks(cpu, &rdtp->all_lazy)) { + if (!rcu_cpu_has_callbacks(&rdtp->all_lazy)) { *dj = ULONG_MAX; return 0; } @@ -1692,12 +1666,12 @@ int rcu_needs_cpu(int cpu, unsigned long *dj) * * The caller must have disabled interrupts. */ -static void rcu_prepare_for_idle(int cpu) +static void rcu_prepare_for_idle(void) { #ifndef CONFIG_RCU_NOCB_CPU_ALL bool needwake; struct rcu_data *rdp; - struct rcu_dynticks *rdtp = &per_cpu(rcu_dynticks, cpu); + struct rcu_dynticks *rdtp = this_cpu_ptr(&rcu_dynticks); struct rcu_node *rnp; struct rcu_state *rsp; int tne; @@ -1705,7 +1679,7 @@ static void rcu_prepare_for_idle(int cpu) /* Handle nohz enablement switches conservatively. */ tne = ACCESS_ONCE(tick_nohz_active); if (tne != rdtp->tick_nohz_enabled_snap) { - if (rcu_cpu_has_callbacks(cpu, NULL)) + if (rcu_cpu_has_callbacks(NULL)) invoke_rcu_core(); /* force nohz to see update. */ rdtp->tick_nohz_enabled_snap = tne; return; @@ -1714,7 +1688,7 @@ static void rcu_prepare_for_idle(int cpu) return; /* If this is a no-CBs CPU, no callbacks, just return. */ - if (rcu_is_nocb_cpu(cpu)) + if (rcu_is_nocb_cpu(smp_processor_id())) return; /* @@ -1738,7 +1712,7 @@ static void rcu_prepare_for_idle(int cpu) return; rdtp->last_accelerate = jiffies; for_each_rcu_flavor(rsp) { - rdp = per_cpu_ptr(rsp->rda, cpu); + rdp = this_cpu_ptr(rsp->rda); if (!*rdp->nxttail[RCU_DONE_TAIL]) continue; rnp = rdp->mynode; @@ -1757,10 +1731,10 @@ static void rcu_prepare_for_idle(int cpu) * any grace periods that elapsed while the CPU was idle, and if any * callbacks are now ready to invoke, initiate invocation. */ -static void rcu_cleanup_after_idle(int cpu) +static void rcu_cleanup_after_idle(void) { #ifndef CONFIG_RCU_NOCB_CPU_ALL - if (rcu_is_nocb_cpu(cpu)) + if (rcu_is_nocb_cpu(smp_processor_id())) return; if (rcu_try_advance_all_cbs()) invoke_rcu_core(); @@ -1842,7 +1816,7 @@ static int rcu_oom_notify(struct notifier_block *self, get_online_cpus(); for_each_online_cpu(cpu) { smp_call_function_single(cpu, rcu_oom_notify_cpu, NULL, 1); - cond_resched(); + cond_resched_rcu_qs(); } put_online_cpus(); @@ -2060,6 +2034,49 @@ bool rcu_is_nocb_cpu(int cpu) #endif /* #ifndef CONFIG_RCU_NOCB_CPU_ALL */ /* + * Kick the leader kthread for this NOCB group. + */ +static void wake_nocb_leader(struct rcu_data *rdp, bool force) +{ + struct rcu_data *rdp_leader = rdp->nocb_leader; + + if (!ACCESS_ONCE(rdp_leader->nocb_kthread)) + return; + if (ACCESS_ONCE(rdp_leader->nocb_leader_sleep) || force) { + /* Prior smp_mb__after_atomic() orders against prior enqueue. */ + ACCESS_ONCE(rdp_leader->nocb_leader_sleep) = false; + wake_up(&rdp_leader->nocb_wq); + } +} + +/* + * Does the specified CPU need an RCU callback for the specified flavor + * of rcu_barrier()? + */ +static bool rcu_nocb_cpu_needs_barrier(struct rcu_state *rsp, int cpu) +{ + struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu); + struct rcu_head *rhp; + + /* No-CBs CPUs might have callbacks on any of three lists. */ + rhp = ACCESS_ONCE(rdp->nocb_head); + if (!rhp) + rhp = ACCESS_ONCE(rdp->nocb_gp_head); + if (!rhp) + rhp = ACCESS_ONCE(rdp->nocb_follower_head); + + /* Having no rcuo kthread but CBs after scheduler starts is bad! */ + if (!ACCESS_ONCE(rdp->nocb_kthread) && rhp) { + /* RCU callback enqueued before CPU first came online??? */ + pr_err("RCU: Never-onlined no-CBs CPU %d has CB %p\n", + cpu, rhp->func); + WARN_ON_ONCE(1); + } + + return !!rhp; +} + +/* * Enqueue the specified string of rcu_head structures onto the specified * CPU's no-CBs lists. The CPU is specified by rdp, the head of the * string by rhp, and the tail of the string by rhtp. The non-lazy/lazy @@ -2082,6 +2099,7 @@ static void __call_rcu_nocb_enqueue(struct rcu_data *rdp, ACCESS_ONCE(*old_rhpp) = rhp; atomic_long_add(rhcount, &rdp->nocb_q_count); atomic_long_add(rhcount_lazy, &rdp->nocb_q_count_lazy); + smp_mb__after_atomic(); /* Store *old_rhpp before _wake test. */ /* If we are not being polled and there is a kthread, awaken it ... */ t = ACCESS_ONCE(rdp->nocb_kthread); @@ -2093,19 +2111,28 @@ static void __call_rcu_nocb_enqueue(struct rcu_data *rdp, len = atomic_long_read(&rdp->nocb_q_count); if (old_rhpp == &rdp->nocb_head) { if (!irqs_disabled_flags(flags)) { - wake_up(&rdp->nocb_wq); /* ... if queue was empty ... */ + /* ... if queue was empty ... */ + wake_nocb_leader(rdp, false); trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu, TPS("WakeEmpty")); } else { - rdp->nocb_defer_wakeup = true; + rdp->nocb_defer_wakeup = RCU_NOGP_WAKE; trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu, TPS("WakeEmptyIsDeferred")); } rdp->qlen_last_fqs_check = 0; } else if (len > rdp->qlen_last_fqs_check + qhimark) { - wake_up_process(t); /* ... or if many callbacks queued. */ + /* ... or if many callbacks queued. */ + if (!irqs_disabled_flags(flags)) { + wake_nocb_leader(rdp, true); + trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu, + TPS("WakeOvf")); + } else { + rdp->nocb_defer_wakeup = RCU_NOGP_WAKE_FORCE; + trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu, + TPS("WakeOvfIsDeferred")); + } rdp->qlen_last_fqs_check = LONG_MAX / 2; - trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu, TPS("WakeOvf")); } else { trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu, TPS("WakeNot")); } @@ -2126,7 +2153,7 @@ static bool __call_rcu_nocb(struct rcu_data *rdp, struct rcu_head *rhp, { if (!rcu_is_nocb_cpu(rdp->cpu)) - return 0; + return false; __call_rcu_nocb_enqueue(rdp, rhp, &rhp->next, 1, lazy, flags); if (__is_kfree_rcu_offset((unsigned long)rhp->func)) trace_rcu_kfree_callback(rdp->rsp->name, rhp, @@ -2137,7 +2164,18 @@ static bool __call_rcu_nocb(struct rcu_data *rdp, struct rcu_head *rhp, trace_rcu_callback(rdp->rsp->name, rhp, -atomic_long_read(&rdp->nocb_q_count_lazy), -atomic_long_read(&rdp->nocb_q_count)); - return 1; + + /* + * If called from an extended quiescent state with interrupts + * disabled, invoke the RCU core in order to allow the idle-entry + * deferred-wakeup check to function. + */ + if (irqs_disabled_flags(flags) && + !rcu_is_watching() && + cpu_online(smp_processor_id())) + invoke_rcu_core(); + + return true; } /* @@ -2153,7 +2191,7 @@ static bool __maybe_unused rcu_nocb_adopt_orphan_cbs(struct rcu_state *rsp, /* If this is not a no-CBs CPU, tell the caller to do it the old way. */ if (!rcu_is_nocb_cpu(smp_processor_id())) - return 0; + return false; rsp->qlen = 0; rsp->qlen_lazy = 0; @@ -2172,7 +2210,7 @@ static bool __maybe_unused rcu_nocb_adopt_orphan_cbs(struct rcu_state *rsp, rsp->orphan_nxtlist = NULL; rsp->orphan_nxttail = &rsp->orphan_nxtlist; } - return 1; + return true; } /* @@ -2205,7 +2243,7 @@ static void rcu_nocb_wait_gp(struct rcu_data *rdp) (d = ULONG_CMP_GE(ACCESS_ONCE(rnp->completed), c))); if (likely(d)) break; - flush_signals(current); + WARN_ON(signal_pending(current)); trace_rcu_future_gp(rnp, rdp, c, TPS("ResumeWait")); } trace_rcu_future_gp(rnp, rdp, c, TPS("EndWait")); @@ -2213,13 +2251,151 @@ static void rcu_nocb_wait_gp(struct rcu_data *rdp) } /* + * Leaders come here to wait for additional callbacks to show up. + * This function does not return until callbacks appear. + */ +static void nocb_leader_wait(struct rcu_data *my_rdp) +{ + bool firsttime = true; + bool gotcbs; + struct rcu_data *rdp; + struct rcu_head **tail; + +wait_again: + + /* Wait for callbacks to appear. */ + if (!rcu_nocb_poll) { + trace_rcu_nocb_wake(my_rdp->rsp->name, my_rdp->cpu, "Sleep"); + wait_event_interruptible(my_rdp->nocb_wq, + !ACCESS_ONCE(my_rdp->nocb_leader_sleep)); + /* Memory barrier handled by smp_mb() calls below and repoll. */ + } else if (firsttime) { + firsttime = false; /* Don't drown trace log with "Poll"! */ + trace_rcu_nocb_wake(my_rdp->rsp->name, my_rdp->cpu, "Poll"); + } + + /* + * Each pass through the following loop checks a follower for CBs. + * We are our own first follower. Any CBs found are moved to + * nocb_gp_head, where they await a grace period. + */ + gotcbs = false; + for (rdp = my_rdp; rdp; rdp = rdp->nocb_next_follower) { + rdp->nocb_gp_head = ACCESS_ONCE(rdp->nocb_head); + if (!rdp->nocb_gp_head) + continue; /* No CBs here, try next follower. */ + + /* Move callbacks to wait-for-GP list, which is empty. */ + ACCESS_ONCE(rdp->nocb_head) = NULL; + rdp->nocb_gp_tail = xchg(&rdp->nocb_tail, &rdp->nocb_head); + rdp->nocb_gp_count = atomic_long_xchg(&rdp->nocb_q_count, 0); + rdp->nocb_gp_count_lazy = + atomic_long_xchg(&rdp->nocb_q_count_lazy, 0); + gotcbs = true; + } + + /* + * If there were no callbacks, sleep a bit, rescan after a + * memory barrier, and go retry. + */ + if (unlikely(!gotcbs)) { + if (!rcu_nocb_poll) + trace_rcu_nocb_wake(my_rdp->rsp->name, my_rdp->cpu, + "WokeEmpty"); + WARN_ON(signal_pending(current)); + schedule_timeout_interruptible(1); + + /* Rescan in case we were a victim of memory ordering. */ + my_rdp->nocb_leader_sleep = true; + smp_mb(); /* Ensure _sleep true before scan. */ + for (rdp = my_rdp; rdp; rdp = rdp->nocb_next_follower) + if (ACCESS_ONCE(rdp->nocb_head)) { + /* Found CB, so short-circuit next wait. */ + my_rdp->nocb_leader_sleep = false; + break; + } + goto wait_again; + } + + /* Wait for one grace period. */ + rcu_nocb_wait_gp(my_rdp); + + /* + * We left ->nocb_leader_sleep unset to reduce cache thrashing. + * We set it now, but recheck for new callbacks while + * traversing our follower list. + */ + my_rdp->nocb_leader_sleep = true; + smp_mb(); /* Ensure _sleep true before scan of ->nocb_head. */ + + /* Each pass through the following loop wakes a follower, if needed. */ + for (rdp = my_rdp; rdp; rdp = rdp->nocb_next_follower) { + if (ACCESS_ONCE(rdp->nocb_head)) + my_rdp->nocb_leader_sleep = false;/* No need to sleep.*/ + if (!rdp->nocb_gp_head) + continue; /* No CBs, so no need to wake follower. */ + + /* Append callbacks to follower's "done" list. */ + tail = xchg(&rdp->nocb_follower_tail, rdp->nocb_gp_tail); + *tail = rdp->nocb_gp_head; + atomic_long_add(rdp->nocb_gp_count, &rdp->nocb_follower_count); + atomic_long_add(rdp->nocb_gp_count_lazy, + &rdp->nocb_follower_count_lazy); + smp_mb__after_atomic(); /* Store *tail before wakeup. */ + if (rdp != my_rdp && tail == &rdp->nocb_follower_head) { + /* + * List was empty, wake up the follower. + * Memory barriers supplied by atomic_long_add(). + */ + wake_up(&rdp->nocb_wq); + } + } + + /* If we (the leader) don't have CBs, go wait some more. */ + if (!my_rdp->nocb_follower_head) + goto wait_again; +} + +/* + * Followers come here to wait for additional callbacks to show up. + * This function does not return until callbacks appear. + */ +static void nocb_follower_wait(struct rcu_data *rdp) +{ + bool firsttime = true; + + for (;;) { + if (!rcu_nocb_poll) { + trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu, + "FollowerSleep"); + wait_event_interruptible(rdp->nocb_wq, + ACCESS_ONCE(rdp->nocb_follower_head)); + } else if (firsttime) { + /* Don't drown trace log with "Poll"! */ + firsttime = false; + trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu, "Poll"); + } + if (smp_load_acquire(&rdp->nocb_follower_head)) { + /* ^^^ Ensure CB invocation follows _head test. */ + return; + } + if (!rcu_nocb_poll) + trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu, + "WokeEmpty"); + WARN_ON(signal_pending(current)); + schedule_timeout_interruptible(1); + } +} + +/* * Per-rcu_data kthread, but only for no-CBs CPUs. Each kthread invokes - * callbacks queued by the corresponding no-CBs CPU. + * callbacks queued by the corresponding no-CBs CPU, however, there is + * an optional leader-follower relationship so that the grace-period + * kthreads don't have to do quite so many wakeups. */ static int rcu_nocb_kthread(void *arg) { int c, cl; - bool firsttime = 1; struct rcu_head *list; struct rcu_head *next; struct rcu_head **tail; @@ -2227,41 +2403,22 @@ static int rcu_nocb_kthread(void *arg) /* Each pass through this loop invokes one batch of callbacks */ for (;;) { - /* If not polling, wait for next batch of callbacks. */ - if (!rcu_nocb_poll) { - trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu, - TPS("Sleep")); - wait_event_interruptible(rdp->nocb_wq, rdp->nocb_head); - /* Memory barrier provide by xchg() below. */ - } else if (firsttime) { - firsttime = 0; - trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu, - TPS("Poll")); - } - list = ACCESS_ONCE(rdp->nocb_head); - if (!list) { - if (!rcu_nocb_poll) - trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu, - TPS("WokeEmpty")); - schedule_timeout_interruptible(1); - flush_signals(current); - continue; - } - firsttime = 1; - trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu, - TPS("WokeNonEmpty")); - - /* - * Extract queued callbacks, update counts, and wait - * for a grace period to elapse. - */ - ACCESS_ONCE(rdp->nocb_head) = NULL; - tail = xchg(&rdp->nocb_tail, &rdp->nocb_head); - c = atomic_long_xchg(&rdp->nocb_q_count, 0); - cl = atomic_long_xchg(&rdp->nocb_q_count_lazy, 0); - ACCESS_ONCE(rdp->nocb_p_count) += c; - ACCESS_ONCE(rdp->nocb_p_count_lazy) += cl; - rcu_nocb_wait_gp(rdp); + /* Wait for callbacks. */ + if (rdp->nocb_leader == rdp) + nocb_leader_wait(rdp); + else + nocb_follower_wait(rdp); + + /* Pull the ready-to-invoke callbacks onto local list. */ + list = ACCESS_ONCE(rdp->nocb_follower_head); + BUG_ON(!list); + trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu, "WokeNonEmpty"); + ACCESS_ONCE(rdp->nocb_follower_head) = NULL; + tail = xchg(&rdp->nocb_follower_tail, &rdp->nocb_follower_head); + c = atomic_long_xchg(&rdp->nocb_follower_count, 0); + cl = atomic_long_xchg(&rdp->nocb_follower_count_lazy, 0); + rdp->nocb_p_count += c; + rdp->nocb_p_count_lazy += cl; /* Each pass through the following loop invokes a callback. */ trace_rcu_batch_start(rdp->rsp->name, cl, c, -1); @@ -2286,15 +2443,16 @@ static int rcu_nocb_kthread(void *arg) list = next; } trace_rcu_batch_end(rdp->rsp->name, c, !!list, 0, 0, 1); - ACCESS_ONCE(rdp->nocb_p_count) -= c; - ACCESS_ONCE(rdp->nocb_p_count_lazy) -= cl; + ACCESS_ONCE(rdp->nocb_p_count) = rdp->nocb_p_count - c; + ACCESS_ONCE(rdp->nocb_p_count_lazy) = + rdp->nocb_p_count_lazy - cl; rdp->n_nocbs_invoked += c; } return 0; } /* Is a deferred wakeup of rcu_nocb_kthread() required? */ -static bool rcu_nocb_need_deferred_wakeup(struct rcu_data *rdp) +static int rcu_nocb_need_deferred_wakeup(struct rcu_data *rdp) { return ACCESS_ONCE(rdp->nocb_defer_wakeup); } @@ -2302,11 +2460,79 @@ static bool rcu_nocb_need_deferred_wakeup(struct rcu_data *rdp) /* Do a deferred wakeup of rcu_nocb_kthread(). */ static void do_nocb_deferred_wakeup(struct rcu_data *rdp) { + int ndw; + if (!rcu_nocb_need_deferred_wakeup(rdp)) return; - ACCESS_ONCE(rdp->nocb_defer_wakeup) = false; - wake_up(&rdp->nocb_wq); - trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu, TPS("DeferredWakeEmpty")); + ndw = ACCESS_ONCE(rdp->nocb_defer_wakeup); + ACCESS_ONCE(rdp->nocb_defer_wakeup) = RCU_NOGP_WAKE_NOT; + wake_nocb_leader(rdp, ndw == RCU_NOGP_WAKE_FORCE); + trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu, TPS("DeferredWake")); +} + +void __init rcu_init_nohz(void) +{ + int cpu; + bool need_rcu_nocb_mask = true; + struct rcu_state *rsp; + +#ifdef CONFIG_RCU_NOCB_CPU_NONE + need_rcu_nocb_mask = false; +#endif /* #ifndef CONFIG_RCU_NOCB_CPU_NONE */ + +#if defined(CONFIG_NO_HZ_FULL) + if (tick_nohz_full_running && cpumask_weight(tick_nohz_full_mask)) + need_rcu_nocb_mask = true; +#endif /* #if defined(CONFIG_NO_HZ_FULL) */ + + if (!have_rcu_nocb_mask && need_rcu_nocb_mask) { + if (!zalloc_cpumask_var(&rcu_nocb_mask, GFP_KERNEL)) { + pr_info("rcu_nocb_mask allocation failed, callback offloading disabled.\n"); + return; + } + have_rcu_nocb_mask = true; + } + if (!have_rcu_nocb_mask) + return; + +#ifdef CONFIG_RCU_NOCB_CPU_ZERO + pr_info("\tOffload RCU callbacks from CPU 0\n"); + cpumask_set_cpu(0, rcu_nocb_mask); +#endif /* #ifdef CONFIG_RCU_NOCB_CPU_ZERO */ +#ifdef CONFIG_RCU_NOCB_CPU_ALL + pr_info("\tOffload RCU callbacks from all CPUs\n"); + cpumask_copy(rcu_nocb_mask, cpu_possible_mask); +#endif /* #ifdef CONFIG_RCU_NOCB_CPU_ALL */ +#if defined(CONFIG_NO_HZ_FULL) + if (tick_nohz_full_running) + cpumask_or(rcu_nocb_mask, rcu_nocb_mask, tick_nohz_full_mask); +#endif /* #if defined(CONFIG_NO_HZ_FULL) */ + + if (!cpumask_subset(rcu_nocb_mask, cpu_possible_mask)) { + pr_info("\tNote: kernel parameter 'rcu_nocbs=' contains nonexistent CPUs.\n"); + cpumask_and(rcu_nocb_mask, cpu_possible_mask, + rcu_nocb_mask); + } + cpulist_scnprintf(nocb_buf, sizeof(nocb_buf), rcu_nocb_mask); + pr_info("\tOffload RCU callbacks from CPUs: %s.\n", nocb_buf); + if (rcu_nocb_poll) + pr_info("\tPoll for callbacks from no-CBs CPUs.\n"); + + for_each_rcu_flavor(rsp) { + for_each_cpu(cpu, rcu_nocb_mask) { + struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu); + + /* + * If there are early callbacks, they will need + * to be moved to the nocb lists. + */ + WARN_ON_ONCE(rdp->nxttail[RCU_NEXT_TAIL] != + &rdp->nxtlist && + rdp->nxttail[RCU_NEXT_TAIL] != NULL); + init_nocb_callback_list(rdp); + } + rcu_organize_nocb_kthreads(rsp); + } } /* Initialize per-rcu_data variables for no-CBs CPUs. */ @@ -2314,38 +2540,145 @@ static void __init rcu_boot_init_nocb_percpu_data(struct rcu_data *rdp) { rdp->nocb_tail = &rdp->nocb_head; init_waitqueue_head(&rdp->nocb_wq); + rdp->nocb_follower_tail = &rdp->nocb_follower_head; } -/* Create a kthread for each RCU flavor for each no-CBs CPU. */ -static void __init rcu_spawn_nocb_kthreads(struct rcu_state *rsp) +/* + * If the specified CPU is a no-CBs CPU that does not already have its + * rcuo kthread for the specified RCU flavor, spawn it. If the CPUs are + * brought online out of order, this can require re-organizing the + * leader-follower relationships. + */ +static void rcu_spawn_one_nocb_kthread(struct rcu_state *rsp, int cpu) { - int cpu; struct rcu_data *rdp; + struct rcu_data *rdp_last; + struct rcu_data *rdp_old_leader; + struct rcu_data *rdp_spawn = per_cpu_ptr(rsp->rda, cpu); struct task_struct *t; - if (rcu_nocb_mask == NULL) + /* + * If this isn't a no-CBs CPU or if it already has an rcuo kthread, + * then nothing to do. + */ + if (!rcu_is_nocb_cpu(cpu) || rdp_spawn->nocb_kthread) return; + + /* If we didn't spawn the leader first, reorganize! */ + rdp_old_leader = rdp_spawn->nocb_leader; + if (rdp_old_leader != rdp_spawn && !rdp_old_leader->nocb_kthread) { + rdp_last = NULL; + rdp = rdp_old_leader; + do { + rdp->nocb_leader = rdp_spawn; + if (rdp_last && rdp != rdp_spawn) + rdp_last->nocb_next_follower = rdp; + if (rdp == rdp_spawn) { + rdp = rdp->nocb_next_follower; + } else { + rdp_last = rdp; + rdp = rdp->nocb_next_follower; + rdp_last->nocb_next_follower = NULL; + } + } while (rdp); + rdp_spawn->nocb_next_follower = rdp_old_leader; + } + + /* Spawn the kthread for this CPU and RCU flavor. */ + t = kthread_run(rcu_nocb_kthread, rdp_spawn, + "rcuo%c/%d", rsp->abbr, cpu); + BUG_ON(IS_ERR(t)); + ACCESS_ONCE(rdp_spawn->nocb_kthread) = t; +} + +/* + * If the specified CPU is a no-CBs CPU that does not already have its + * rcuo kthreads, spawn them. + */ +static void rcu_spawn_all_nocb_kthreads(int cpu) +{ + struct rcu_state *rsp; + + if (rcu_scheduler_fully_active) + for_each_rcu_flavor(rsp) + rcu_spawn_one_nocb_kthread(rsp, cpu); +} + +/* + * Once the scheduler is running, spawn rcuo kthreads for all online + * no-CBs CPUs. This assumes that the early_initcall()s happen before + * non-boot CPUs come online -- if this changes, we will need to add + * some mutual exclusion. + */ +static void __init rcu_spawn_nocb_kthreads(void) +{ + int cpu; + + for_each_online_cpu(cpu) + rcu_spawn_all_nocb_kthreads(cpu); +} + +/* How many follower CPU IDs per leader? Default of -1 for sqrt(nr_cpu_ids). */ +static int rcu_nocb_leader_stride = -1; +module_param(rcu_nocb_leader_stride, int, 0444); + +/* + * Initialize leader-follower relationships for all no-CBs CPU. + */ +static void __init rcu_organize_nocb_kthreads(struct rcu_state *rsp) +{ + int cpu; + int ls = rcu_nocb_leader_stride; + int nl = 0; /* Next leader. */ + struct rcu_data *rdp; + struct rcu_data *rdp_leader = NULL; /* Suppress misguided gcc warn. */ + struct rcu_data *rdp_prev = NULL; + + if (!have_rcu_nocb_mask) + return; + if (ls == -1) { + ls = int_sqrt(nr_cpu_ids); + rcu_nocb_leader_stride = ls; + } + + /* + * Each pass through this loop sets up one rcu_data structure and + * spawns one rcu_nocb_kthread(). + */ for_each_cpu(cpu, rcu_nocb_mask) { rdp = per_cpu_ptr(rsp->rda, cpu); - t = kthread_run(rcu_nocb_kthread, rdp, - "rcuo%c/%d", rsp->abbr, cpu); - BUG_ON(IS_ERR(t)); - ACCESS_ONCE(rdp->nocb_kthread) = t; + if (rdp->cpu >= nl) { + /* New leader, set up for followers & next leader. */ + nl = DIV_ROUND_UP(rdp->cpu + 1, ls) * ls; + rdp->nocb_leader = rdp; + rdp_leader = rdp; + } else { + /* Another follower, link to previous leader. */ + rdp->nocb_leader = rdp_leader; + rdp_prev->nocb_next_follower = rdp; + } + rdp_prev = rdp; } } /* Prevent __call_rcu() from enqueuing callbacks on no-CBs CPUs */ static bool init_nocb_callback_list(struct rcu_data *rdp) { - if (rcu_nocb_mask == NULL || - !cpumask_test_cpu(rdp->cpu, rcu_nocb_mask)) + if (!rcu_is_nocb_cpu(rdp->cpu)) return false; + rdp->nxttail[RCU_NEXT_TAIL] = NULL; return true; } #else /* #ifdef CONFIG_RCU_NOCB_CPU */ +static bool rcu_nocb_cpu_needs_barrier(struct rcu_state *rsp, int cpu) +{ + WARN_ON_ONCE(1); /* Should be dead code. */ + return false; +} + static void rcu_nocb_gp_cleanup(struct rcu_state *rsp, struct rcu_node *rnp) { } @@ -2361,21 +2694,21 @@ static void rcu_init_one_nocb(struct rcu_node *rnp) static bool __call_rcu_nocb(struct rcu_data *rdp, struct rcu_head *rhp, bool lazy, unsigned long flags) { - return 0; + return false; } static bool __maybe_unused rcu_nocb_adopt_orphan_cbs(struct rcu_state *rsp, struct rcu_data *rdp, unsigned long flags) { - return 0; + return false; } static void __init rcu_boot_init_nocb_percpu_data(struct rcu_data *rdp) { } -static bool rcu_nocb_need_deferred_wakeup(struct rcu_data *rdp) +static int rcu_nocb_need_deferred_wakeup(struct rcu_data *rdp) { return false; } @@ -2384,7 +2717,11 @@ static void do_nocb_deferred_wakeup(struct rcu_data *rdp) { } -static void __init rcu_spawn_nocb_kthreads(struct rcu_state *rsp) +static void rcu_spawn_all_nocb_kthreads(int cpu) +{ +} + +static void __init rcu_spawn_nocb_kthreads(void) { } @@ -2415,16 +2752,6 @@ static void __maybe_unused rcu_kick_nohz_cpu(int cpu) #ifdef CONFIG_NO_HZ_FULL_SYSIDLE -/* - * Define RCU flavor that holds sysidle state. This needs to be the - * most active flavor of RCU. - */ -#ifdef CONFIG_PREEMPT_RCU -static struct rcu_state *rcu_sysidle_state = &rcu_preempt_state; -#else /* #ifdef CONFIG_PREEMPT_RCU */ -static struct rcu_state *rcu_sysidle_state = &rcu_sched_state; -#endif /* #else #ifdef CONFIG_PREEMPT_RCU */ - static int full_sysidle_state; /* Current system-idle state. */ #define RCU_SYSIDLE_NOT 0 /* Some CPU is not idle. */ #define RCU_SYSIDLE_SHORT 1 /* All CPUs idle for brief period. */ @@ -2438,9 +2765,14 @@ static int full_sysidle_state; /* Current system-idle state. */ * to detect full-system idle states, not RCU quiescent states and grace * periods. The caller must have disabled interrupts. */ -static void rcu_sysidle_enter(struct rcu_dynticks *rdtp, int irq) +static void rcu_sysidle_enter(int irq) { unsigned long j; + struct rcu_dynticks *rdtp = this_cpu_ptr(&rcu_dynticks); + + /* If there are no nohz_full= CPUs, no need to track this. */ + if (!tick_nohz_full_enabled()) + return; /* Adjust nesting, check for fully idle. */ if (irq) { @@ -2505,8 +2837,14 @@ void rcu_sysidle_force_exit(void) * usermode execution does -not- count as idle here! The caller must * have disabled interrupts. */ -static void rcu_sysidle_exit(struct rcu_dynticks *rdtp, int irq) +static void rcu_sysidle_exit(int irq) { + struct rcu_dynticks *rdtp = this_cpu_ptr(&rcu_dynticks); + + /* If there are no nohz_full= CPUs, no need to track this. */ + if (!tick_nohz_full_enabled()) + return; + /* Adjust nesting, check for already non-idle. */ if (irq) { rdtp->dynticks_idle_nesting++; @@ -2561,12 +2899,16 @@ static void rcu_sysidle_check_cpu(struct rcu_data *rdp, bool *isidle, unsigned long j; struct rcu_dynticks *rdtp = rdp->dynticks; + /* If there are no nohz_full= CPUs, don't check system-wide idleness. */ + if (!tick_nohz_full_enabled()) + return; + /* * If some other CPU has already reported non-idle, if this is * not the flavor of RCU that tracks sysidle state, or if this * is an offline or the timekeeping CPU, nothing to do. */ - if (!*isidle || rdp->rsp != rcu_sysidle_state || + if (!*isidle || rdp->rsp != rcu_state_p || cpu_is_offline(rdp->cpu) || rdp->cpu == tick_do_timer_cpu) return; if (rcu_gp_in_progress(rdp->rsp)) @@ -2592,7 +2934,7 @@ static void rcu_sysidle_check_cpu(struct rcu_data *rdp, bool *isidle, */ static bool is_sysidle_rcu_state(struct rcu_state *rsp) { - return rsp == rcu_sysidle_state; + return rsp == rcu_state_p; } /* @@ -2670,7 +3012,7 @@ static void rcu_sysidle_cancel(void) static void rcu_sysidle_report(struct rcu_state *rsp, int isidle, unsigned long maxj, bool gpkt) { - if (rsp != rcu_sysidle_state) + if (rsp != rcu_state_p) return; /* Wrong flavor, ignore. */ if (gpkt && nr_cpu_ids <= CONFIG_NO_HZ_FULL_SYSIDLE_SMALL) return; /* Running state machine from timekeeping CPU. */ @@ -2687,6 +3029,10 @@ static void rcu_sysidle_report(struct rcu_state *rsp, int isidle, static void rcu_sysidle_report_gp(struct rcu_state *rsp, int isidle, unsigned long maxj) { + /* If there are no nohz_full= CPUs, no need to track this. */ + if (!tick_nohz_full_enabled()) + return; + rcu_sysidle_report(rsp, isidle, maxj, true); } @@ -2713,7 +3059,8 @@ static void rcu_sysidle_cb(struct rcu_head *rhp) /* * Check to see if the system is fully idle, other than the timekeeping CPU. - * The caller must have disabled interrupts. + * The caller must have disabled interrupts. This is not intended to be + * called unless tick_nohz_full_enabled(). */ bool rcu_sys_is_idle(void) { @@ -2739,13 +3086,12 @@ bool rcu_sys_is_idle(void) /* Scan all the CPUs looking for nonidle CPUs. */ for_each_possible_cpu(cpu) { - rdp = per_cpu_ptr(rcu_sysidle_state->rda, cpu); + rdp = per_cpu_ptr(rcu_state_p->rda, cpu); rcu_sysidle_check_cpu(rdp, &isidle, &maxj); if (!isidle) break; } - rcu_sysidle_report(rcu_sysidle_state, - isidle, maxj, false); + rcu_sysidle_report(rcu_state_p, isidle, maxj, false); oldrss = rss; rss = ACCESS_ONCE(full_sysidle_state); } @@ -2772,7 +3118,7 @@ bool rcu_sys_is_idle(void) * provided by the memory allocator. */ if (nr_cpu_ids > CONFIG_NO_HZ_FULL_SYSIDLE_SMALL && - !rcu_gp_in_progress(rcu_sysidle_state) && + !rcu_gp_in_progress(rcu_state_p) && !rsh.inuse && xchg(&rsh.inuse, 1) == 0) call_rcu(&rsh.rh, rcu_sysidle_cb); return false; @@ -2788,11 +3134,11 @@ static void rcu_sysidle_init_percpu_data(struct rcu_dynticks *rdtp) #else /* #ifdef CONFIG_NO_HZ_FULL_SYSIDLE */ -static void rcu_sysidle_enter(struct rcu_dynticks *rdtp, int irq) +static void rcu_sysidle_enter(int irq) { } -static void rcu_sysidle_exit(struct rcu_dynticks *rdtp, int irq) +static void rcu_sysidle_exit(int irq) { } @@ -2843,12 +3189,32 @@ static bool rcu_nohz_full_cpu(struct rcu_state *rsp) */ static void rcu_bind_gp_kthread(void) { -#ifdef CONFIG_NO_HZ_FULL - int cpu = ACCESS_ONCE(tick_do_timer_cpu); + int __maybe_unused cpu; - if (cpu < 0 || cpu >= nr_cpu_ids) + if (!tick_nohz_full_enabled()) return; - if (raw_smp_processor_id() != cpu) +#ifdef CONFIG_NO_HZ_FULL_SYSIDLE + cpu = tick_do_timer_cpu; + if (cpu >= 0 && cpu < nr_cpu_ids && raw_smp_processor_id() != cpu) set_cpus_allowed_ptr(current, cpumask_of(cpu)); -#endif /* #ifdef CONFIG_NO_HZ_FULL */ +#else /* #ifdef CONFIG_NO_HZ_FULL_SYSIDLE */ + if (!is_housekeeping_cpu(raw_smp_processor_id())) + housekeeping_affine(current); +#endif /* #else #ifdef CONFIG_NO_HZ_FULL_SYSIDLE */ +} + +/* Record the current task on dyntick-idle entry. */ +static void rcu_dynticks_task_enter(void) +{ +#if defined(CONFIG_TASKS_RCU) && defined(CONFIG_NO_HZ_FULL) + ACCESS_ONCE(current->rcu_tasks_idle_cpu) = smp_processor_id(); +#endif /* #if defined(CONFIG_TASKS_RCU) && defined(CONFIG_NO_HZ_FULL) */ +} + +/* Record no current task on dyntick-idle exit. */ +static void rcu_dynticks_task_exit(void) +{ +#if defined(CONFIG_TASKS_RCU) && defined(CONFIG_NO_HZ_FULL) + ACCESS_ONCE(current->rcu_tasks_idle_cpu) = -1; +#endif /* #if defined(CONFIG_TASKS_RCU) && defined(CONFIG_NO_HZ_FULL) */ } |