From 8315f42295d2667a7f942f154b73a86fd7cb2227 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Fri, 27 Jun 2014 13:42:20 -0700 Subject: rcu: Add call_rcu_tasks() This commit adds a new RCU-tasks flavor of RCU, which provides call_rcu_tasks(). This RCU flavor's quiescent states are voluntary context switch (not preemption!) and userspace execution (not the idle loop -- use some sort of schedule_on_each_cpu() if you need to handle the idle tasks. Note that unlike other RCU flavors, these quiescent states occur in tasks, not necessarily CPUs. Includes fixes from Steven Rostedt. This RCU flavor is assumed to have very infrequent latency-tolerant updaters. This assumption permits significant simplifications, including a single global callback list protected by a single global lock, along with a single task-private linked list containing all tasks that have not yet passed through a quiescent state. If experience shows this assumption to be incorrect, the required additional complexity will be added. Suggested-by: Steven Rostedt Signed-off-by: Paul E. McKenney --- include/linux/init_task.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include/linux/init_task.h') diff --git a/include/linux/init_task.h b/include/linux/init_task.h index 2bb4c4f3531a..dffd9258ee60 100644 --- a/include/linux/init_task.h +++ b/include/linux/init_task.h @@ -117,6 +117,14 @@ extern struct group_info init_groups; #else #define INIT_TASK_RCU_PREEMPT(tsk) #endif +#ifdef CONFIG_TASKS_RCU +#define INIT_TASK_RCU_TASKS(tsk) \ + .rcu_tasks_holdout = false, \ + .rcu_tasks_holdout_list = \ + LIST_HEAD_INIT(tsk.rcu_tasks_holdout_list), +#else +#define INIT_TASK_RCU_TASKS(tsk) +#endif extern struct cred init_cred; @@ -224,6 +232,7 @@ extern struct task_group root_task_group; INIT_FTRACE_GRAPH \ INIT_TRACE_RECURSION \ INIT_TASK_RCU_PREEMPT(tsk) \ + INIT_TASK_RCU_TASKS(tsk) \ INIT_CPUSET_SEQ(tsk) \ INIT_RT_MUTEXES(tsk) \ INIT_VTIME(tsk) \ -- cgit v1.2.3 From 176f8f7a52cc6d09d686f0d900abda6942a52fbb Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 4 Aug 2014 17:43:50 -0700 Subject: rcu: Make TASKS_RCU handle nohz_full= CPUs Currently TASKS_RCU would ignore a CPU running a task in nohz_full= usermode execution. There would be neither a context switch nor a scheduling-clock interrupt to tell TASKS_RCU that the task in question had passed through a quiescent state. The grace period would therefore extend indefinitely. This commit therefore makes RCU's dyntick-idle subsystem record the task_struct structure of the task that is running in dyntick-idle mode on each CPU. The TASKS_RCU grace period can then access this information and record a quiescent state on behalf of any CPU running in dyntick-idle usermode. Signed-off-by: Paul E. McKenney --- include/linux/init_task.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux/init_task.h') diff --git a/include/linux/init_task.h b/include/linux/init_task.h index dffd9258ee60..03b274873b06 100644 --- a/include/linux/init_task.h +++ b/include/linux/init_task.h @@ -121,7 +121,8 @@ extern struct group_info init_groups; #define INIT_TASK_RCU_TASKS(tsk) \ .rcu_tasks_holdout = false, \ .rcu_tasks_holdout_list = \ - LIST_HEAD_INIT(tsk.rcu_tasks_holdout_list), + LIST_HEAD_INIT(tsk.rcu_tasks_holdout_list), \ + .rcu_tasks_idle_cpu = -1, #else #define INIT_TASK_RCU_TASKS(tsk) #endif -- cgit v1.2.3 From 1d082fd061884a587c490c4fc8a2056ce1e47624 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Thu, 14 Aug 2014 16:01:53 -0700 Subject: rcu: Remove local_irq_disable() in rcu_preempt_note_context_switch() The rcu_preempt_note_context_switch() function is on a scheduling fast path, so it would be good to avoid disabling irqs. The reason that irqs are disabled is to synchronize process-level and irq-handler access to the task_struct ->rcu_read_unlock_special bitmask. This commit therefore makes ->rcu_read_unlock_special instead be a union of bools with a short allowing single-access checks in RCU's __rcu_read_unlock(). This results in the process-level and irq-handler accesses being simple loads and stores, so that irqs need no longer be disabled. This commit therefore removes the irq disabling from rcu_preempt_note_context_switch(). Reported-by: Peter Zijlstra Signed-off-by: Paul E. McKenney --- include/linux/init_task.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux/init_task.h') diff --git a/include/linux/init_task.h b/include/linux/init_task.h index 03b274873b06..77fc43f8fb72 100644 --- a/include/linux/init_task.h +++ b/include/linux/init_task.h @@ -111,7 +111,7 @@ extern struct group_info init_groups; #ifdef CONFIG_PREEMPT_RCU #define INIT_TASK_RCU_PREEMPT(tsk) \ .rcu_read_lock_nesting = 0, \ - .rcu_read_unlock_special = 0, \ + .rcu_read_unlock_special.s = 0, \ .rcu_node_entry = LIST_HEAD_INIT(tsk.rcu_node_entry), \ INIT_TASK_RCU_TREE_PREEMPT() #else -- cgit v1.2.3 From 28f6569ab7d036cd4ee94c26bb76dc1b3f3fc056 Mon Sep 17 00:00:00 2001 From: Pranith Kumar Date: Mon, 22 Sep 2014 14:00:48 -0400 Subject: rcu: Remove redundant TREE_PREEMPT_RCU config option PREEMPT_RCU and TREE_PREEMPT_RCU serve the same function after TINY_PREEMPT_RCU has been removed. This patch removes TREE_PREEMPT_RCU and uses PREEMPT_RCU config option in its place. Signed-off-by: Pranith Kumar Signed-off-by: Paul E. McKenney --- include/linux/init_task.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux/init_task.h') diff --git a/include/linux/init_task.h b/include/linux/init_task.h index 77fc43f8fb72..d996aef8044f 100644 --- a/include/linux/init_task.h +++ b/include/linux/init_task.h @@ -102,7 +102,7 @@ extern struct group_info init_groups; #define INIT_IDS #endif -#ifdef CONFIG_TREE_PREEMPT_RCU +#ifdef CONFIG_PREEMPT_RCU #define INIT_TASK_RCU_TREE_PREEMPT() \ .rcu_blocked_node = NULL, #else -- cgit v1.2.3 From d8b163c4c657478ef33c082cff78d03a4ca07bb2 Mon Sep 17 00:00:00 2001 From: Kirill Tkhai Date: Tue, 11 Nov 2014 12:46:29 +0300 Subject: sched/numa: Init numa balancing fields of init_task We do not initialize init_task.numa_preferred_nid, but this value is inherited by userspace "init" process: rest_init()->kernel_thread(kernel_init)->do_fork(CLONE_VM); __sched_fork() { if (clone_flags & CLONE_VM) p->numa_preferred_nid = current->numa_preferred_nid; else p->numa_preferred_nid = -1; } kernel_init() becomes userspace "init" process. So, we propagate garbage nid to userspace, and it may be used during numa balancing. Currently, we do not have reports about this brings a problem, but it seem we should set it for sure. Even if init_task.numa_preferred_nid is zero, we may meet a weird configuration without nid#0. On sparc64, where processors are numbered physically, I saw a machine without cpu#1, while cpu#2 existed. Possible, something similar may be with numa nodes. So, let's initialize it and be sure we're safe. Signed-off-by: Kirill Tkhai Signed-off-by: Peter Zijlstra (Intel) Cc: Eric Paris Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Paul E. McKenney Cc: Sergey Dyasly Link: http://lkml.kernel.org/r/1415699189.15631.6.camel@tkhai Signed-off-by: Ingo Molnar --- include/linux/init_task.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include/linux/init_task.h') diff --git a/include/linux/init_task.h b/include/linux/init_task.h index 77fc43f8fb72..5f30ac8c82bc 100644 --- a/include/linux/init_task.h +++ b/include/linux/init_task.h @@ -166,6 +166,15 @@ extern struct task_group root_task_group; # define INIT_RT_MUTEXES(tsk) #endif +#ifdef CONFIG_NUMA_BALANCING +# define INIT_NUMA_BALANCING(tsk) \ + .numa_preferred_nid = -1, \ + .numa_group = NULL, \ + .numa_faults = NULL, +#else +# define INIT_NUMA_BALANCING(tsk) +#endif + /* * INIT_TASK is used to set up the first task table, touch at * your own risk!. Base=0, limit=0x1fffff (=2MB) @@ -237,6 +246,7 @@ extern struct task_group root_task_group; INIT_CPUSET_SEQ(tsk) \ INIT_RT_MUTEXES(tsk) \ INIT_VTIME(tsk) \ + INIT_NUMA_BALANCING(tsk) \ } -- cgit v1.2.3