From 559f9badd11ddf399f88b18b4c0f110fd511ae53 Mon Sep 17 00:00:00 2001
From: Dave Jones <davej@redhat.com>
Date: Wed, 14 Mar 2012 22:17:39 -0400
Subject: rcu: List-debug variants of rcu list routines.

* Make __list_add_rcu check the next->prev and prev->next pointers
  just like __list_add does.
* Make list_del_rcu use __list_del_entry, which does the same checking
  at deletion time.

Has been running for a week here without anything being tripped up,
but it seems worth adding for completeness just in case something
ever does corrupt those lists.

Signed-off-by: Dave Jones <davej@redhat.com>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
 include/linux/rculist.h | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/rculist.h b/include/linux/rculist.h
index d079290843a9..a20c05096231 100644
--- a/include/linux/rculist.h
+++ b/include/linux/rculist.h
@@ -30,6 +30,7 @@
  * This is only for internal list manipulation where we know
  * the prev/next entries already!
  */
+#ifndef CONFIG_DEBUG_LIST
 static inline void __list_add_rcu(struct list_head *new,
 		struct list_head *prev, struct list_head *next)
 {
@@ -38,6 +39,10 @@ static inline void __list_add_rcu(struct list_head *new,
 	rcu_assign_pointer(list_next_rcu(prev), new);
 	next->prev = new;
 }
+#else
+extern void __list_add_rcu(struct list_head *new,
+		struct list_head *prev, struct list_head *next);
+#endif
 
 /**
  * list_add_rcu - add a new entry to rcu-protected list
@@ -108,7 +113,7 @@ static inline void list_add_tail_rcu(struct list_head *new,
  */
 static inline void list_del_rcu(struct list_head *entry)
 {
-	__list_del(entry->prev, entry->next);
+	__list_del_entry(entry);
 	entry->prev = LIST_POISON2;
 }
 
-- 
cgit v1.2.3


From f88022a4f650ac1778cafcc17d2e522283bdf590 Mon Sep 17 00:00:00 2001
From: Michel Machado <michel@digirati.com.br>
Date: Tue, 10 Apr 2012 14:07:40 -0400
Subject: rcu: Replace list_first_entry_rcu() with list_first_or_null_rcu()

The list_first_entry_rcu() macro is inherently unsafe because it cannot
be applied to an empty list.  But because RCU readers do not exclude
updaters, a list might become empty between the time that list_empty()
claimed it was non-empty and the time that list_first_entry_rcu() is
invoked.  Therefore, the list_empty() test cannot be separated from the
list_first_entry_rcu() call.  This commit therefore combines these to
macros to create a new list_first_or_null_rcu() macro that replaces
the old (and unsafe) list_first_entry_rcu() macro.

This patch incorporates Paul's review comments on the previous version of
this patch available here:

https://lkml.org/lkml/2012/4/2/536

This patch cannot break any upstream code because list_first_entry_rcu()
is not being used anywhere in the kernel (tested with grep(1)), and any
external code using it is probably broken as a result of using it.

Signed-off-by: Michel Machado <michel@digirati.com.br>
CC: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
CC: Dipankar Sarma <dipankar@in.ibm.com>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
 include/linux/rculist.h | 33 +++++++++++++++++++++++++++++----
 1 file changed, 29 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/rculist.h b/include/linux/rculist.h
index a20c05096231..e0f0fab20415 100644
--- a/include/linux/rculist.h
+++ b/include/linux/rculist.h
@@ -233,18 +233,43 @@ static inline void list_splice_init_rcu(struct list_head *list,
 	})
 
 /**
- * list_first_entry_rcu - get the first element from a list
+ * Where are list_empty_rcu() and list_first_entry_rcu()?
+ *
+ * Implementing those functions following their counterparts list_empty() and
+ * list_first_entry() is not advisable because they lead to subtle race
+ * conditions as the following snippet shows:
+ *
+ * if (!list_empty_rcu(mylist)) {
+ *	struct foo *bar = list_first_entry_rcu(mylist, struct foo, list_member);
+ *	do_something(bar);
+ * }
+ *
+ * The list may not be empty when list_empty_rcu checks it, but it may be when
+ * list_first_entry_rcu rereads the ->next pointer.
+ *
+ * Rereading the ->next pointer is not a problem for list_empty() and
+ * list_first_entry() because they would be protected by a lock that blocks
+ * writers.
+ *
+ * See list_first_or_null_rcu for an alternative.
+ */
+
+/**
+ * list_first_or_null_rcu - get the first element from a list
  * @ptr:        the list head to take the element from.
  * @type:       the type of the struct this is embedded in.
  * @member:     the name of the list_struct within the struct.
  *
- * Note, that list is expected to be not empty.
+ * Note that if the list is empty, it returns NULL.
  *
  * This primitive may safely run concurrently with the _rcu list-mutation
  * primitives such as list_add_rcu() as long as it's guarded by rcu_read_lock().
  */
-#define list_first_entry_rcu(ptr, type, member) \
-	list_entry_rcu((ptr)->next, type, member)
+#define list_first_or_null_rcu(ptr, type, member) \
+	({struct list_head *__ptr = (ptr); \
+	  struct list_head __rcu *__next = list_next_rcu(__ptr); \
+	  likely(__ptr != __next) ? container_of(__next, type, member) : NULL; \
+	})
 
 /**
  * list_for_each_entry_rcu	-	iterate over rcu list of given type
-- 
cgit v1.2.3


From d8169d4c369e8aa2fda10df705a4957331b5a4db Mon Sep 17 00:00:00 2001
From: Jan Engelhardt <jengelh@medozas.de>
Date: Thu, 19 Apr 2012 11:44:39 -0700
Subject: rcu: Make __kfree_rcu() less dependent on compiler choices

Currently, __kfree_rcu() is implemented as an inline function, and
contains a BUILD_BUG_ON() that malfunctions if __kfree_rcu() is compiled
as an out-of-line function.  Unfortunately, there are compiler settings
(e.g., -O0) that can result in __kfree_rcu() being compiled out of line,
resulting in annoying build breakage.  This commit therefore converts
both __kfree_rcu() and __is_kfree_rcu_offset() from inline functions to
macros to prevent such misbehavior on the part of the compiler.

Signed-off-by: Jan Engelhardt <jengelh@medozas.de>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Reviewed-by: Josh Triplett <josh@joshtriplett.org>
---
 include/linux/rcupdate.h | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index 20fb776a1d4a..d5dfb109dfe1 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -922,6 +922,21 @@ void __kfree_rcu(struct rcu_head *head, unsigned long offset)
 	kfree_call_rcu(head, (rcu_callback)offset);
 }
 
+/*
+ * Does the specified offset indicate that the corresponding rcu_head
+ * structure can be handled by kfree_rcu()?
+ */
+#define __is_kfree_rcu_offset(offset) ((offset) < 4096)
+
+/*
+ * Helper macro for kfree_rcu() to prevent argument-expansion eyestrain.
+ */
+#define __kfree_rcu(head, offset) \
+	do { \
+		BUILD_BUG_ON(!__is_kfree_rcu_offset(offset)); \
+		call_rcu(head, (void (*)(struct rcu_head *))(unsigned long)(offset)); \
+	} while (0)
+
 /**
  * kfree_rcu() - kfree an object after a grace period.
  * @ptr:	pointer to kfree
@@ -944,6 +959,9 @@ void __kfree_rcu(struct rcu_head *head, unsigned long offset)
  *
  * Note that the allowable offset might decrease in the future, for example,
  * to allow something like kmem_cache_free_rcu().
+ *
+ * The BUILD_BUG_ON check must not involve any function calls, hence the
+ * checks are done in macros here.
  */
 #define kfree_rcu(ptr, rcu_head)					\
 	__kfree_rcu(&((ptr)->rcu_head), offsetof(typeof(*(ptr)), rcu_head))
-- 
cgit v1.2.3


From 6d8133919bac4270883b24328500875a49e71b36 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paul.mckenney@linaro.org>
Date: Thu, 23 Feb 2012 13:30:16 -0800
Subject: rcu: Document why rcu_blocking_is_gp() is safe

The rcu_blocking_is_gp() function tests to see if there is only one
online CPU, and if so, synchronize_sched() and friends become no-ops.
However, for larger systems, num_online_cpus() scans a large vector,
and might be preempted while doing so.  While preempted, any number
of CPUs might come online and go offline, potentially resulting in
num_online_cpus() returning 1 when there never had only been one
CPU online.  This could result in a too-short RCU grace period, which
could in turn result in total failure, except that the only way that
the grace period is too short is if there is an RCU read-side critical
section spanning it.  For RCU-sched and RCU-bh (which are the only
cases using rcu_blocking_is_gp()), RCU read-side critical sections
have either preemption or bh disabled, which prevents CPUs from going
offline.  This in turn prevents actual failures from occurring.

This commit therefore adds a large block comment to rcu_blocking_is_gp()
documenting why it is safe.  This commit also moves rcu_blocking_is_gp()
into kernel/rcutree.c, which should help prevent unwary developers from
mistaking it for a generally useful function.

Signed-off-by: Paul E. McKenney <paul.mckenney@linaro.org>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
 include/linux/rcutree.h | 7 -------
 1 file changed, 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h
index e8ee5dd0854c..b06363055ef8 100644
--- a/include/linux/rcutree.h
+++ b/include/linux/rcutree.h
@@ -98,13 +98,6 @@ extern void rcu_force_quiescent_state(void);
 extern void rcu_bh_force_quiescent_state(void);
 extern void rcu_sched_force_quiescent_state(void);
 
-/* A context switch is a grace period for RCU-sched and RCU-bh. */
-static inline int rcu_blocking_is_gp(void)
-{
-	might_sleep();  /* Check for RCU read-side critical section. */
-	return num_online_cpus() == 1;
-}
-
 extern void rcu_scheduler_starting(void);
 extern int rcu_scheduler_active __read_mostly;
 
-- 
cgit v1.2.3


From cef50120b61c2af4ce34bc165e19cad66296f93d Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paul.mckenney@linaro.org>
Date: Sun, 5 Feb 2012 07:42:44 -0800
Subject: rcu: Direct algorithmic SRCU implementation

The current implementation of synchronize_srcu_expedited() can cause
severe OS jitter due to its use of synchronize_sched(), which in turn
invokes try_stop_cpus(), which causes each CPU to be sent an IPI.
This can result in severe performance degradation for real-time workloads
and especially for short-interation-length HPC workloads.  Furthermore,
because only one instance of try_stop_cpus() can be making forward progress
at a given time, only one instance of synchronize_srcu_expedited() can
make forward progress at a time, even if they are all operating on
distinct srcu_struct structures.

This commit, inspired by an earlier implementation by Peter Zijlstra
(https://lkml.org/lkml/2012/1/31/211) and by further offline discussions,
takes a strictly algorithmic bits-in-memory approach.  This has the
disadvantage of requiring one explicit memory-barrier instruction in
each of srcu_read_lock() and srcu_read_unlock(), but on the other hand
completely dispenses with OS jitter and furthermore allows SRCU to be
used freely by CPUs that RCU believes to be idle or offline.

The update-side implementation handles the single read-side memory
barrier by rechecking the per-CPU counters after summing them and
by running through the update-side state machine twice.

This implementation has passed moderate rcutorture testing on both
x86 and Power.  Also updated to use this_cpu_ptr() instead of per_cpu_ptr(),
as suggested by Peter Zijlstra.

Reported-by: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Paul E. McKenney <paul.mckenney@linaro.org>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Reviewed-by: Lai Jiangshan <laijs@cn.fujitsu.com>
---
 include/linux/srcu.h | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/srcu.h b/include/linux/srcu.h
index d3d5fa54f25e..a478c8eb8479 100644
--- a/include/linux/srcu.h
+++ b/include/linux/srcu.h
@@ -31,13 +31,19 @@
 #include <linux/rcupdate.h>
 
 struct srcu_struct_array {
-	int c[2];
+	unsigned long c[2];
 };
 
+/* Bit definitions for field ->c above and ->snap below. */
+#define SRCU_USAGE_BITS		2
+#define SRCU_REF_MASK		(ULONG_MAX >> SRCU_USAGE_BITS)
+#define SRCU_USAGE_COUNT	(SRCU_REF_MASK + 1)
+
 struct srcu_struct {
-	int completed;
+	unsigned completed;
 	struct srcu_struct_array __percpu *per_cpu_ref;
 	struct mutex mutex;
+	unsigned long snap[NR_CPUS];
 #ifdef CONFIG_DEBUG_LOCK_ALLOC
 	struct lockdep_map dep_map;
 #endif /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */
-- 
cgit v1.2.3


From 440253c17fc4ed41d778492a7fb44dc0d756eccc Mon Sep 17 00:00:00 2001
From: Lai Jiangshan <laijs@cn.fujitsu.com>
Date: Wed, 22 Feb 2012 13:29:06 -0800
Subject: rcu: Increment upper bit only for srcu_read_lock()

The purpose of the upper bit of SRCU's per-CPU counters is to guarantee
that no reasonable series of srcu_read_lock() and srcu_read_unlock()
operations can return the value of the counter to its original value.
This guarantee is require only after the index has been switched to
the other set of counters, so at most one srcu_read_lock() can affect
a given CPU's counter.  The number of srcu_read_unlock() operations
on a given counter is limited to the number of tasks in the system,
which given the Linux kernel's current structure is limited to far less
than 2^30 on 32-bit systems and far less than 2^62 on 64-bit systems.
(Something about a limited number of bytes in the kernel's address space.)

Therefore, if srcu_read_lock() increments the upper bits, then
srcu_read_unlock() need not do so.  In this case, an srcu_read_lock() and
an srcu_read_unlock() will flip the lower bit of the upper field of the
counter.  An unreasonably large additional number of srcu_read_unlock()
operations would be required to return the counter to its initial value,
thus preserving the guarantee.

This commit takes this approach, which further allows it to shrink
the size of the upper field to one bit, making the number of
srcu_read_unlock() operations required to return the counter to its
initial value even more unreasonable than before.

Signed-off-by: Lai Jiangshan <laijs@cn.fujitsu.com>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
 include/linux/srcu.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/srcu.h b/include/linux/srcu.h
index a478c8eb8479..5b49d41868c8 100644
--- a/include/linux/srcu.h
+++ b/include/linux/srcu.h
@@ -35,7 +35,7 @@ struct srcu_struct_array {
 };
 
 /* Bit definitions for field ->c above and ->snap below. */
-#define SRCU_USAGE_BITS		2
+#define SRCU_USAGE_BITS		1
 #define SRCU_REF_MASK		(ULONG_MAX >> SRCU_USAGE_BITS)
 #define SRCU_USAGE_COUNT	(SRCU_REF_MASK + 1)
 
-- 
cgit v1.2.3


From b52ce066c55a6a53cf1f8d71308d74f908e31b99 Mon Sep 17 00:00:00 2001
From: Lai Jiangshan <laijs@cn.fujitsu.com>
Date: Mon, 27 Feb 2012 09:29:09 -0800
Subject: rcu: Implement a variant of Peter's SRCU algorithm

This commit implements a variant of Peter's algorithm, which may be found
at https://lkml.org/lkml/2012/2/1/119.

o	Make the checking lock-free to enable parallel checking.
	Parallel checking is required when (1) the original checking
	task is preempted for a long time, (2) sychronize_srcu_expedited()
	starts during an ongoing SRCU grace period, or (3) we wish to
	avoid acquiring a lock.

o	Since the checking is lock-free, we avoid a mutex in state machine
	for call_srcu().

o	Remove the SRCU_REF_MASK and remove the coupling with the flipping.
	This might allow us to remove the preempt_disable() in future
	versions, though such removal will need great care because it
	rescinds the one-old-reader-per-CPU guarantee.

o	Remove a smp_mb(), simplify the comments and make the smp_mb() pairs
	more intuitive.

Inspired-by: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Lai Jiangshan <laijs@cn.fujitsu.com>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
 include/linux/srcu.h | 7 +------
 1 file changed, 1 insertion(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/srcu.h b/include/linux/srcu.h
index 5b49d41868c8..15354db3e865 100644
--- a/include/linux/srcu.h
+++ b/include/linux/srcu.h
@@ -32,18 +32,13 @@
 
 struct srcu_struct_array {
 	unsigned long c[2];
+	unsigned long seq[2];
 };
 
-/* Bit definitions for field ->c above and ->snap below. */
-#define SRCU_USAGE_BITS		1
-#define SRCU_REF_MASK		(ULONG_MAX >> SRCU_USAGE_BITS)
-#define SRCU_USAGE_COUNT	(SRCU_REF_MASK + 1)
-
 struct srcu_struct {
 	unsigned completed;
 	struct srcu_struct_array __percpu *per_cpu_ref;
 	struct mutex mutex;
-	unsigned long snap[NR_CPUS];
 #ifdef CONFIG_DEBUG_LOCK_ALLOC
 	struct lockdep_map dep_map;
 #endif /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */
-- 
cgit v1.2.3


From 966f58c2f6df826f385706673a9bb1edcfd3499a Mon Sep 17 00:00:00 2001
From: Lai Jiangshan <laijs@cn.fujitsu.com>
Date: Tue, 6 Mar 2012 17:57:33 +0800
Subject: rcu: Remove unused srcu_barrier()

The old srcu_barrier() macro is now unused.  This commit removes it so
that it may be used for the SRCU flavor of rcu_barrier(), which will in
turn be needed to allow the upcoming call_srcu() to be used from within
modules.

Signed-off-by: Lai Jiangshan <laijs@cn.fujitsu.com>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
 include/linux/srcu.h | 6 ------
 1 file changed, 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/srcu.h b/include/linux/srcu.h
index 15354db3e865..e5ce80452b62 100644
--- a/include/linux/srcu.h
+++ b/include/linux/srcu.h
@@ -44,12 +44,6 @@ struct srcu_struct {
 #endif /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */
 };
 
-#ifndef CONFIG_PREEMPT
-#define srcu_barrier() barrier()
-#else /* #ifndef CONFIG_PREEMPT */
-#define srcu_barrier()
-#endif /* #else #ifndef CONFIG_PREEMPT */
-
 #ifdef CONFIG_DEBUG_LOCK_ALLOC
 
 int __init_srcu_struct(struct srcu_struct *sp, const char *name,
-- 
cgit v1.2.3


From 931ea9d1a6e06a5e3af03aa4aaaa7c7fd90e163f Mon Sep 17 00:00:00 2001
From: Lai Jiangshan <laijs@cn.fujitsu.com>
Date: Mon, 19 Mar 2012 16:12:13 +0800
Subject: rcu: Implement per-domain single-threaded call_srcu() state machine

This commit implements an SRCU state machine in support of call_srcu().
The state machine is preemptible, light-weight, and single-threaded,
minimizing synchronization overhead.  In particular, there is no longer
any need for synchronize_srcu() to be guarded by a mutex.

Expedited processing is handled, at least in the absence of concurrent
grace-period operations on that same srcu_struct structure, by having
the synchronize_srcu_expedited() thread take on the role of the
workqueue thread for one iteration.

There is a reasonable probability that a given SRCU callback will
be invoked on the same CPU that registered it, however, there is no
guarantee.  Concurrent SRCU grace-period primitives can cause callbacks
to be executed elsewhere, even in absence of CPU-hotplug operations.

Callbacks execute in process context, but under the influence of
local_bh_disable(), so it is illegal to sleep in an SRCU callback
function.

Signed-off-by: Lai Jiangshan <laijs@cn.fujitsu.com>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
 include/linux/srcu.h | 37 ++++++++++++++++++++++++++++++++++++-
 1 file changed, 36 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/srcu.h b/include/linux/srcu.h
index e5ce80452b62..55a5c52cbb25 100644
--- a/include/linux/srcu.h
+++ b/include/linux/srcu.h
@@ -29,16 +29,30 @@
 
 #include <linux/mutex.h>
 #include <linux/rcupdate.h>
+#include <linux/workqueue.h>
 
 struct srcu_struct_array {
 	unsigned long c[2];
 	unsigned long seq[2];
 };
 
+struct rcu_batch {
+	struct rcu_head *head, **tail;
+};
+
 struct srcu_struct {
 	unsigned completed;
 	struct srcu_struct_array __percpu *per_cpu_ref;
-	struct mutex mutex;
+	spinlock_t queue_lock; /* protect ->batch_queue, ->running */
+	bool running;
+	/* callbacks just queued */
+	struct rcu_batch batch_queue;
+	/* callbacks try to do the first check_zero */
+	struct rcu_batch batch_check0;
+	/* callbacks done with the first check_zero and the flip */
+	struct rcu_batch batch_check1;
+	struct rcu_batch batch_done;
+	struct delayed_work work;
 #ifdef CONFIG_DEBUG_LOCK_ALLOC
 	struct lockdep_map dep_map;
 #endif /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */
@@ -62,12 +76,33 @@ int init_srcu_struct(struct srcu_struct *sp);
 
 #endif /* #else #ifdef CONFIG_DEBUG_LOCK_ALLOC */
 
+/**
+ * call_srcu() - Queue a callback for invocation after an SRCU grace period
+ * @sp: srcu_struct in queue the callback
+ * @head: structure to be used for queueing the SRCU callback.
+ * @func: function to be invoked after the SRCU grace period
+ *
+ * The callback function will be invoked some time after a full SRCU
+ * grace period elapses, in other words after all pre-existing SRCU
+ * read-side critical sections have completed.  However, the callback
+ * function might well execute concurrently with other SRCU read-side
+ * critical sections that started after call_srcu() was invoked.  SRCU
+ * read-side critical sections are delimited by srcu_read_lock() and
+ * srcu_read_unlock(), and may be nested.
+ *
+ * The callback will be invoked from process context, but must nevertheless
+ * be fast and must not block.
+ */
+void call_srcu(struct srcu_struct *sp, struct rcu_head *head,
+		void (*func)(struct rcu_head *head));
+
 void cleanup_srcu_struct(struct srcu_struct *sp);
 int __srcu_read_lock(struct srcu_struct *sp) __acquires(sp);
 void __srcu_read_unlock(struct srcu_struct *sp, int idx) __releases(sp);
 void synchronize_srcu(struct srcu_struct *sp);
 void synchronize_srcu_expedited(struct srcu_struct *sp);
 long srcu_batches_completed(struct srcu_struct *sp);
+void srcu_barrier(struct srcu_struct *sp);
 
 #ifdef CONFIG_DEBUG_LOCK_ALLOC
 
-- 
cgit v1.2.3


From 616c310e83b872024271c915c1b9ab505b9efad9 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paul.mckenney@linaro.org>
Date: Tue, 27 Mar 2012 16:02:08 -0700
Subject: rcu: Move PREEMPT_RCU preemption to switch_to() invocation

Currently, PREEMPT_RCU readers are enqueued upon entry to the scheduler.
This is inefficient because enqueuing is required only if there is a
context switch, and entry to the scheduler does not guarantee a context
switch.

The commit therefore moves the enqueuing to immediately precede the
call to switch_to() from the scheduler.

Signed-off-by: Paul E. McKenney <paul.mckenney@linaro.org>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Tested-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/rcupdate.h |  1 +
 include/linux/rcutiny.h  |  6 ------
 include/linux/sched.h    | 10 ++++++++++
 3 files changed, 11 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index 20fb776a1d4a..bbfe7854a6a6 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -184,6 +184,7 @@ static inline int rcu_preempt_depth(void)
 /* Internal to kernel */
 extern void rcu_sched_qs(int cpu);
 extern void rcu_bh_qs(int cpu);
+extern void rcu_preempt_note_context_switch(void);
 extern void rcu_check_callbacks(int cpu, int user);
 struct notifier_block;
 extern void rcu_idle_enter(void);
diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h
index e93df77176d1..080b5bdda28e 100644
--- a/include/linux/rcutiny.h
+++ b/include/linux/rcutiny.h
@@ -87,10 +87,6 @@ static inline void kfree_call_rcu(struct rcu_head *head,
 
 #ifdef CONFIG_TINY_RCU
 
-static inline void rcu_preempt_note_context_switch(void)
-{
-}
-
 static inline void exit_rcu(void)
 {
 }
@@ -102,7 +98,6 @@ static inline int rcu_needs_cpu(int cpu)
 
 #else /* #ifdef CONFIG_TINY_RCU */
 
-void rcu_preempt_note_context_switch(void);
 extern void exit_rcu(void);
 int rcu_preempt_needs_cpu(void);
 
@@ -116,7 +111,6 @@ static inline int rcu_needs_cpu(int cpu)
 static inline void rcu_note_context_switch(int cpu)
 {
 	rcu_sched_qs(cpu);
-	rcu_preempt_note_context_switch();
 }
 
 /*
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 81a173c0897d..8f3fd945070f 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1905,12 +1905,22 @@ static inline void rcu_copy_process(struct task_struct *p)
 	INIT_LIST_HEAD(&p->rcu_node_entry);
 }
 
+static inline void rcu_switch_from(struct task_struct *prev)
+{
+	if (prev->rcu_read_lock_nesting != 0)
+		rcu_preempt_note_context_switch();
+}
+
 #else
 
 static inline void rcu_copy_process(struct task_struct *p)
 {
 }
 
+static inline void rcu_switch_from(struct task_struct *prev)
+{
+}
+
 #endif
 
 #ifdef CONFIG_SMP
-- 
cgit v1.2.3


From 9dd8fb16c36178df2066387d2abd44d8b4dca8c8 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paul.mckenney@linaro.org>
Date: Fri, 13 Apr 2012 12:54:22 -0700
Subject: rcu: Make exit_rcu() more precise and consolidate

When running preemptible RCU, if a task exits in an RCU read-side
critical section having blocked within that same RCU read-side critical
section, the task must be removed from the list of tasks blocking a
grace period (perhaps the current grace period, perhaps the next grace
period, depending on timing).  The exit() path invokes exit_rcu() to
do this cleanup.

However, the current implementation of exit_rcu() needlessly does the
cleanup even if the task did not block within the current RCU read-side
critical section, which wastes time and needlessly increases the size
of the state space.  Fix this by only doing the cleanup if the current
task is actually on the list of tasks blocking some grace period.

While we are at it, consolidate the two identical exit_rcu() functions
into a single function.

Signed-off-by: Paul E. McKenney <paul.mckenney@linaro.org>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Tested-by: Linus Torvalds <torvalds@linux-foundation.org>

Conflicts:

	kernel/rcupdate.c
---
 include/linux/rcupdate.h |  1 +
 include/linux/rcutiny.h  |  5 -----
 include/linux/rcutree.h  | 12 ------------
 3 files changed, 1 insertion(+), 17 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index bbfe7854a6a6..29665a3b3ac5 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -191,6 +191,7 @@ extern void rcu_idle_enter(void);
 extern void rcu_idle_exit(void);
 extern void rcu_irq_enter(void);
 extern void rcu_irq_exit(void);
+extern void exit_rcu(void);
 
 /**
  * RCU_NONIDLE - Indicate idle-loop code that needs RCU readers
diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h
index 080b5bdda28e..adb5e5a38cae 100644
--- a/include/linux/rcutiny.h
+++ b/include/linux/rcutiny.h
@@ -87,10 +87,6 @@ static inline void kfree_call_rcu(struct rcu_head *head,
 
 #ifdef CONFIG_TINY_RCU
 
-static inline void exit_rcu(void)
-{
-}
-
 static inline int rcu_needs_cpu(int cpu)
 {
 	return 0;
@@ -98,7 +94,6 @@ static inline int rcu_needs_cpu(int cpu)
 
 #else /* #ifdef CONFIG_TINY_RCU */
 
-extern void exit_rcu(void);
 int rcu_preempt_needs_cpu(void);
 
 static inline int rcu_needs_cpu(int cpu)
diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h
index e8ee5dd0854c..782a8ab51bc1 100644
--- a/include/linux/rcutree.h
+++ b/include/linux/rcutree.h
@@ -45,18 +45,6 @@ static inline void rcu_virt_note_context_switch(int cpu)
 	rcu_note_context_switch(cpu);
 }
 
-#ifdef CONFIG_TREE_PREEMPT_RCU
-
-extern void exit_rcu(void);
-
-#else /* #ifdef CONFIG_TREE_PREEMPT_RCU */
-
-static inline void exit_rcu(void)
-{
-}
-
-#endif /* #else #ifdef CONFIG_TREE_PREEMPT_RCU */
-
 extern void synchronize_rcu_bh(void);
 extern void synchronize_sched_expedited(void);
 extern void synchronize_rcu_expedited(void);
-- 
cgit v1.2.3