summaryrefslogtreecommitdiff
path: root/kernel/locking
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/locking')
-rw-r--r--kernel/locking/Makefile3
-rw-r--r--kernel/locking/lockdep.c27
-rw-r--r--kernel/locking/locktorture.c4
-rw-r--r--kernel/locking/mutex-debug.c12
-rw-r--r--kernel/locking/mutex-debug.h4
-rw-r--r--kernel/locking/mutex.c11
-rw-r--r--kernel/locking/mutex.h2
-rw-r--r--kernel/locking/osq_lock.c13
-rw-r--r--kernel/locking/percpu-rwsem.c229
-rw-r--r--kernel/locking/qspinlock.c8
-rw-r--r--kernel/locking/rtmutex.c68
-rw-r--r--kernel/locking/rtmutex_common.h5
-rw-r--r--kernel/locking/rwsem-xadd.c27
13 files changed, 288 insertions, 125 deletions
diff --git a/kernel/locking/Makefile b/kernel/locking/Makefile
index 8e96f6cc2a4a..31322a4275cd 100644
--- a/kernel/locking/Makefile
+++ b/kernel/locking/Makefile
@@ -1,3 +1,6 @@
+# Any varying coverage in these files is non-deterministic
+# and is generally not a function of system call inputs.
+KCOV_INSTRUMENT := n
obj-y += mutex.o semaphore.o rwsem.o percpu-rwsem.o
diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c
index 60ace56618f6..774ab79d3ec7 100644
--- a/kernel/locking/lockdep.c
+++ b/kernel/locking/lockdep.c
@@ -1264,11 +1264,11 @@ unsigned long lockdep_count_forward_deps(struct lock_class *class)
this.parent = NULL;
this.class = class;
- local_irq_save(flags);
+ raw_local_irq_save(flags);
arch_spin_lock(&lockdep_lock);
ret = __lockdep_count_forward_deps(&this);
arch_spin_unlock(&lockdep_lock);
- local_irq_restore(flags);
+ raw_local_irq_restore(flags);
return ret;
}
@@ -1291,11 +1291,11 @@ unsigned long lockdep_count_backward_deps(struct lock_class *class)
this.parent = NULL;
this.class = class;
- local_irq_save(flags);
+ raw_local_irq_save(flags);
arch_spin_lock(&lockdep_lock);
ret = __lockdep_count_backward_deps(&this);
arch_spin_unlock(&lockdep_lock);
- local_irq_restore(flags);
+ raw_local_irq_restore(flags);
return ret;
}
@@ -3128,10 +3128,17 @@ static int __lock_acquire(struct lockdep_map *lock, unsigned int subclass,
if (depth) {
hlock = curr->held_locks + depth - 1;
if (hlock->class_idx == class_idx && nest_lock) {
- if (hlock->references)
+ if (hlock->references) {
+ /*
+ * Check: unsigned int references:12, overflow.
+ */
+ if (DEBUG_LOCKS_WARN_ON(hlock->references == (1 << 12)-1))
+ return 0;
+
hlock->references++;
- else
+ } else {
hlock->references = 2;
+ }
return 1;
}
@@ -3819,7 +3826,7 @@ void lock_contended(struct lockdep_map *lock, unsigned long ip)
{
unsigned long flags;
- if (unlikely(!lock_stat))
+ if (unlikely(!lock_stat || !debug_locks))
return;
if (unlikely(current->lockdep_recursion))
@@ -3839,7 +3846,7 @@ void lock_acquired(struct lockdep_map *lock, unsigned long ip)
{
unsigned long flags;
- if (unlikely(!lock_stat))
+ if (unlikely(!lock_stat || !debug_locks))
return;
if (unlikely(current->lockdep_recursion))
@@ -4116,7 +4123,7 @@ void debug_check_no_locks_freed(const void *mem_from, unsigned long mem_len)
if (unlikely(!debug_locks))
return;
- local_irq_save(flags);
+ raw_local_irq_save(flags);
for (i = 0; i < curr->lockdep_depth; i++) {
hlock = curr->held_locks + i;
@@ -4127,7 +4134,7 @@ void debug_check_no_locks_freed(const void *mem_from, unsigned long mem_len)
print_freed_lock_bug(curr, mem_from, mem_from + mem_len, hlock);
break;
}
- local_irq_restore(flags);
+ raw_local_irq_restore(flags);
}
EXPORT_SYMBOL_GPL(debug_check_no_locks_freed);
diff --git a/kernel/locking/locktorture.c b/kernel/locking/locktorture.c
index 8ef1919d63b2..d580b7d6ee6d 100644
--- a/kernel/locking/locktorture.c
+++ b/kernel/locking/locktorture.c
@@ -776,6 +776,8 @@ static void lock_torture_cleanup(void)
else
lock_torture_print_module_parms(cxt.cur_ops,
"End of test: SUCCESS");
+ kfree(cxt.lwsa);
+ kfree(cxt.lrsa);
torture_cleanup_end();
}
@@ -917,6 +919,8 @@ static int __init lock_torture_init(void)
GFP_KERNEL);
if (reader_tasks == NULL) {
VERBOSE_TOROUT_ERRSTRING("reader_tasks: Out of memory");
+ kfree(writer_tasks);
+ writer_tasks = NULL;
firsterr = -ENOMEM;
goto unwind;
}
diff --git a/kernel/locking/mutex-debug.c b/kernel/locking/mutex-debug.c
index 3ef3736002d8..9c951fade415 100644
--- a/kernel/locking/mutex-debug.c
+++ b/kernel/locking/mutex-debug.c
@@ -49,21 +49,21 @@ void debug_mutex_free_waiter(struct mutex_waiter *waiter)
}
void debug_mutex_add_waiter(struct mutex *lock, struct mutex_waiter *waiter,
- struct thread_info *ti)
+ struct task_struct *task)
{
SMP_DEBUG_LOCKS_WARN_ON(!spin_is_locked(&lock->wait_lock));
/* Mark the current thread as blocked on the lock: */
- ti->task->blocked_on = waiter;
+ task->blocked_on = waiter;
}
void mutex_remove_waiter(struct mutex *lock, struct mutex_waiter *waiter,
- struct thread_info *ti)
+ struct task_struct *task)
{
DEBUG_LOCKS_WARN_ON(list_empty(&waiter->list));
- DEBUG_LOCKS_WARN_ON(waiter->task != ti->task);
- DEBUG_LOCKS_WARN_ON(ti->task->blocked_on != waiter);
- ti->task->blocked_on = NULL;
+ DEBUG_LOCKS_WARN_ON(waiter->task != task);
+ DEBUG_LOCKS_WARN_ON(task->blocked_on != waiter);
+ task->blocked_on = NULL;
list_del_init(&waiter->list);
waiter->task = NULL;
diff --git a/kernel/locking/mutex-debug.h b/kernel/locking/mutex-debug.h
index 0799fd3e4cfa..d06ae3bb46c5 100644
--- a/kernel/locking/mutex-debug.h
+++ b/kernel/locking/mutex-debug.h
@@ -20,9 +20,9 @@ extern void debug_mutex_wake_waiter(struct mutex *lock,
extern void debug_mutex_free_waiter(struct mutex_waiter *waiter);
extern void debug_mutex_add_waiter(struct mutex *lock,
struct mutex_waiter *waiter,
- struct thread_info *ti);
+ struct task_struct *task);
extern void mutex_remove_waiter(struct mutex *lock, struct mutex_waiter *waiter,
- struct thread_info *ti);
+ struct task_struct *task);
extern void debug_mutex_unlock(struct mutex *lock);
extern void debug_mutex_init(struct mutex *lock, const char *name,
struct lock_class_key *key);
diff --git a/kernel/locking/mutex.c b/kernel/locking/mutex.c
index 89350f924c85..a70b90db3909 100644
--- a/kernel/locking/mutex.c
+++ b/kernel/locking/mutex.c
@@ -537,7 +537,7 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass,
goto skip_wait;
debug_mutex_lock_common(lock, &waiter);
- debug_mutex_add_waiter(lock, &waiter, task_thread_info(task));
+ debug_mutex_add_waiter(lock, &waiter, task);
/* add waiting tasks to the end of the waitqueue (FIFO): */
list_add_tail(&waiter.list, &lock->wait_list);
@@ -584,7 +584,7 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass,
}
__set_task_state(task, TASK_RUNNING);
- mutex_remove_waiter(lock, &waiter, current_thread_info());
+ mutex_remove_waiter(lock, &waiter, task);
/* set it to 0 if there are no waiters left: */
if (likely(list_empty(&lock->wait_list)))
atomic_set(&lock->count, 0);
@@ -605,7 +605,7 @@ skip_wait:
return 0;
err:
- mutex_remove_waiter(lock, &waiter, task_thread_info(task));
+ mutex_remove_waiter(lock, &waiter, task);
spin_unlock_mutex(&lock->wait_lock, flags);
debug_mutex_free_waiter(&waiter);
mutex_release(&lock->dep_map, 1, ip);
@@ -719,6 +719,7 @@ static inline void
__mutex_unlock_common_slowpath(struct mutex *lock, int nested)
{
unsigned long flags;
+ WAKE_Q(wake_q);
/*
* As a performance measurement, release the lock before doing other
@@ -746,11 +747,11 @@ __mutex_unlock_common_slowpath(struct mutex *lock, int nested)
struct mutex_waiter, list);
debug_mutex_wake_waiter(lock, waiter);
-
- wake_up_process(waiter->task);
+ wake_q_add(&wake_q, waiter->task);
}
spin_unlock_mutex(&lock->wait_lock, flags);
+ wake_up_q(&wake_q);
}
/*
diff --git a/kernel/locking/mutex.h b/kernel/locking/mutex.h
index 5cda397607f2..a68bae5e852a 100644
--- a/kernel/locking/mutex.h
+++ b/kernel/locking/mutex.h
@@ -13,7 +13,7 @@
do { spin_lock(lock); (void)(flags); } while (0)
#define spin_unlock_mutex(lock, flags) \
do { spin_unlock(lock); (void)(flags); } while (0)
-#define mutex_remove_waiter(lock, waiter, ti) \
+#define mutex_remove_waiter(lock, waiter, task) \
__list_del((waiter)->list.prev, (waiter)->list.next)
#ifdef CONFIG_MUTEX_SPIN_ON_OWNER
diff --git a/kernel/locking/osq_lock.c b/kernel/locking/osq_lock.c
index 05a37857ab55..8d7047ecef4e 100644
--- a/kernel/locking/osq_lock.c
+++ b/kernel/locking/osq_lock.c
@@ -104,6 +104,19 @@ bool osq_lock(struct optimistic_spin_queue *lock)
prev = decode_cpu(old);
node->prev = prev;
+
+ /*
+ * osq_lock() unqueue
+ *
+ * node->prev = prev osq_wait_next()
+ * WMB MB
+ * prev->next = node next->prev = prev // unqueue-C
+ *
+ * Here 'node->prev' and 'next->prev' are the same variable and we need
+ * to ensure these stores happen in-order to avoid corrupting the list.
+ */
+ smp_wmb();
+
WRITE_ONCE(prev->next, node);
/*
diff --git a/kernel/locking/percpu-rwsem.c b/kernel/locking/percpu-rwsem.c
index f231e0bb311c..ce182599cf2e 100644
--- a/kernel/locking/percpu-rwsem.c
+++ b/kernel/locking/percpu-rwsem.c
@@ -8,151 +8,186 @@
#include <linux/sched.h>
#include <linux/errno.h>
-int __percpu_init_rwsem(struct percpu_rw_semaphore *brw,
+int __percpu_init_rwsem(struct percpu_rw_semaphore *sem,
const char *name, struct lock_class_key *rwsem_key)
{
- brw->fast_read_ctr = alloc_percpu(int);
- if (unlikely(!brw->fast_read_ctr))
+ sem->read_count = alloc_percpu(int);
+ if (unlikely(!sem->read_count))
return -ENOMEM;
/* ->rw_sem represents the whole percpu_rw_semaphore for lockdep */
- __init_rwsem(&brw->rw_sem, name, rwsem_key);
- rcu_sync_init(&brw->rss, RCU_SCHED_SYNC);
- atomic_set(&brw->slow_read_ctr, 0);
- init_waitqueue_head(&brw->write_waitq);
+ rcu_sync_init(&sem->rss, RCU_SCHED_SYNC);
+ __init_rwsem(&sem->rw_sem, name, rwsem_key);
+ init_waitqueue_head(&sem->writer);
+ sem->readers_block = 0;
return 0;
}
EXPORT_SYMBOL_GPL(__percpu_init_rwsem);
-void percpu_free_rwsem(struct percpu_rw_semaphore *brw)
+void percpu_free_rwsem(struct percpu_rw_semaphore *sem)
{
/*
* XXX: temporary kludge. The error path in alloc_super()
* assumes that percpu_free_rwsem() is safe after kzalloc().
*/
- if (!brw->fast_read_ctr)
+ if (!sem->read_count)
return;
- rcu_sync_dtor(&brw->rss);
- free_percpu(brw->fast_read_ctr);
- brw->fast_read_ctr = NULL; /* catch use after free bugs */
+ rcu_sync_dtor(&sem->rss);
+ free_percpu(sem->read_count);
+ sem->read_count = NULL; /* catch use after free bugs */
}
+EXPORT_SYMBOL_GPL(percpu_free_rwsem);
-/*
- * This is the fast-path for down_read/up_read. If it succeeds we rely
- * on the barriers provided by rcu_sync_enter/exit; see the comments in
- * percpu_down_write() and percpu_up_write().
- *
- * If this helper fails the callers rely on the normal rw_semaphore and
- * atomic_dec_and_test(), so in this case we have the necessary barriers.
- */
-static bool update_fast_ctr(struct percpu_rw_semaphore *brw, unsigned int val)
+int __percpu_down_read(struct percpu_rw_semaphore *sem, int try)
{
- bool success;
+ /*
+ * Due to having preemption disabled the decrement happens on
+ * the same CPU as the increment, avoiding the
+ * increment-on-one-CPU-and-decrement-on-another problem.
+ *
+ * If the reader misses the writer's assignment of readers_block, then
+ * the writer is guaranteed to see the reader's increment.
+ *
+ * Conversely, any readers that increment their sem->read_count after
+ * the writer looks are guaranteed to see the readers_block value,
+ * which in turn means that they are guaranteed to immediately
+ * decrement their sem->read_count, so that it doesn't matter that the
+ * writer missed them.
+ */
- preempt_disable();
- success = rcu_sync_is_idle(&brw->rss);
- if (likely(success))
- __this_cpu_add(*brw->fast_read_ctr, val);
- preempt_enable();
+ smp_mb(); /* A matches D */
- return success;
-}
+ /*
+ * If !readers_block the critical section starts here, matched by the
+ * release in percpu_up_write().
+ */
+ if (likely(!smp_load_acquire(&sem->readers_block)))
+ return 1;
-/*
- * Like the normal down_read() this is not recursive, the writer can
- * come after the first percpu_down_read() and create the deadlock.
- *
- * Note: returns with lock_is_held(brw->rw_sem) == T for lockdep,
- * percpu_up_read() does rwsem_release(). This pairs with the usage
- * of ->rw_sem in percpu_down/up_write().
- */
-void percpu_down_read(struct percpu_rw_semaphore *brw)
-{
- might_sleep();
- rwsem_acquire_read(&brw->rw_sem.dep_map, 0, 0, _RET_IP_);
+ /*
+ * Per the above comment; we still have preemption disabled and
+ * will thus decrement on the same CPU as we incremented.
+ */
+ __percpu_up_read(sem);
- if (likely(update_fast_ctr(brw, +1)))
- return;
+ if (try)
+ return 0;
- /* Avoid rwsem_acquire_read() and rwsem_release() */
- __down_read(&brw->rw_sem);
- atomic_inc(&brw->slow_read_ctr);
- __up_read(&brw->rw_sem);
-}
-EXPORT_SYMBOL_GPL(percpu_down_read);
+ /*
+ * We either call schedule() in the wait, or we'll fall through
+ * and reschedule on the preempt_enable() in percpu_down_read().
+ */
+ preempt_enable_no_resched();
-int percpu_down_read_trylock(struct percpu_rw_semaphore *brw)
-{
- if (unlikely(!update_fast_ctr(brw, +1))) {
- if (!__down_read_trylock(&brw->rw_sem))
- return 0;
- atomic_inc(&brw->slow_read_ctr);
- __up_read(&brw->rw_sem);
- }
-
- rwsem_acquire_read(&brw->rw_sem.dep_map, 0, 1, _RET_IP_);
+ /*
+ * Avoid lockdep for the down/up_read() we already have them.
+ */
+ __down_read(&sem->rw_sem);
+ this_cpu_inc(*sem->read_count);
+ __up_read(&sem->rw_sem);
+
+ preempt_disable();
return 1;
}
+EXPORT_SYMBOL_GPL(__percpu_down_read);
-void percpu_up_read(struct percpu_rw_semaphore *brw)
+void __percpu_up_read(struct percpu_rw_semaphore *sem)
{
- rwsem_release(&brw->rw_sem.dep_map, 1, _RET_IP_);
-
- if (likely(update_fast_ctr(brw, -1)))
- return;
+ smp_mb(); /* B matches C */
+ /*
+ * In other words, if they see our decrement (presumably to aggregate
+ * zero, as that is the only time it matters) they will also see our
+ * critical section.
+ */
+ __this_cpu_dec(*sem->read_count);
- /* false-positive is possible but harmless */
- if (atomic_dec_and_test(&brw->slow_read_ctr))
- wake_up_all(&brw->write_waitq);
+ /* Prod writer to recheck readers_active */
+ wake_up(&sem->writer);
}
-EXPORT_SYMBOL_GPL(percpu_up_read);
+EXPORT_SYMBOL_GPL(__percpu_up_read);
+
+#define per_cpu_sum(var) \
+({ \
+ typeof(var) __sum = 0; \
+ int cpu; \
+ compiletime_assert_atomic_type(__sum); \
+ for_each_possible_cpu(cpu) \
+ __sum += per_cpu(var, cpu); \
+ __sum; \
+})
-static int clear_fast_ctr(struct percpu_rw_semaphore *brw)
+/*
+ * Return true if the modular sum of the sem->read_count per-CPU variable is
+ * zero. If this sum is zero, then it is stable due to the fact that if any
+ * newly arriving readers increment a given counter, they will immediately
+ * decrement that same counter.
+ */
+static bool readers_active_check(struct percpu_rw_semaphore *sem)
{
- unsigned int sum = 0;
- int cpu;
+ if (per_cpu_sum(*sem->read_count) != 0)
+ return false;
+
+ /*
+ * If we observed the decrement; ensure we see the entire critical
+ * section.
+ */
- for_each_possible_cpu(cpu) {
- sum += per_cpu(*brw->fast_read_ctr, cpu);
- per_cpu(*brw->fast_read_ctr, cpu) = 0;
- }
+ smp_mb(); /* C matches B */
- return sum;
+ return true;
}
-void percpu_down_write(struct percpu_rw_semaphore *brw)
+void percpu_down_write(struct percpu_rw_semaphore *sem)
{
+ /* Notify readers to take the slow path. */
+ rcu_sync_enter(&sem->rss);
+
+ down_write(&sem->rw_sem);
+
/*
- * Make rcu_sync_is_idle() == F and thus disable the fast-path in
- * percpu_down_read() and percpu_up_read(), and wait for gp pass.
- *
- * The latter synchronises us with the preceding readers which used
- * the fast-past, so we can not miss the result of __this_cpu_add()
- * or anything else inside their criticial sections.
+ * Notify new readers to block; up until now, and thus throughout the
+ * longish rcu_sync_enter() above, new readers could still come in.
*/
- rcu_sync_enter(&brw->rss);
+ WRITE_ONCE(sem->readers_block, 1);
- /* exclude other writers, and block the new readers completely */
- down_write(&brw->rw_sem);
+ smp_mb(); /* D matches A */
- /* nobody can use fast_read_ctr, move its sum into slow_read_ctr */
- atomic_add(clear_fast_ctr(brw), &brw->slow_read_ctr);
+ /*
+ * If they don't see our writer of readers_block, then we are
+ * guaranteed to see their sem->read_count increment, and therefore
+ * will wait for them.
+ */
- /* wait for all readers to complete their percpu_up_read() */
- wait_event(brw->write_waitq, !atomic_read(&brw->slow_read_ctr));
+ /* Wait for all now active readers to complete. */
+ wait_event(sem->writer, readers_active_check(sem));
}
EXPORT_SYMBOL_GPL(percpu_down_write);
-void percpu_up_write(struct percpu_rw_semaphore *brw)
+void percpu_up_write(struct percpu_rw_semaphore *sem)
{
- /* release the lock, but the readers can't use the fast-path */
- up_write(&brw->rw_sem);
/*
- * Enable the fast-path in percpu_down_read() and percpu_up_read()
- * but only after another gp pass; this adds the necessary barrier
- * to ensure the reader can't miss the changes done by us.
+ * Signal the writer is done, no fast path yet.
+ *
+ * One reason that we cannot just immediately flip to readers_fast is
+ * that new readers might fail to see the results of this writer's
+ * critical section.
+ *
+ * Therefore we force it through the slow path which guarantees an
+ * acquire and thereby guarantees the critical section's consistency.
+ */
+ smp_store_release(&sem->readers_block, 0);
+
+ /*
+ * Release the write lock, this will allow readers back in the game.
+ */
+ up_write(&sem->rw_sem);
+
+ /*
+ * Once this completes (at least one RCU-sched grace period hence) the
+ * reader fast path will be available again. Safe to use outside the
+ * exclusive write lock because its counting.
*/
- rcu_sync_exit(&brw->rss);
+ rcu_sync_exit(&sem->rss);
}
EXPORT_SYMBOL_GPL(percpu_up_write);
diff --git a/kernel/locking/qspinlock.c b/kernel/locking/qspinlock.c
index 8173bc7fec92..3b40c8809e52 100644
--- a/kernel/locking/qspinlock.c
+++ b/kernel/locking/qspinlock.c
@@ -423,6 +423,14 @@ queue:
tail = encode_tail(smp_processor_id(), idx);
node += idx;
+
+ /*
+ * Ensure that we increment the head node->count before initialising
+ * the actual node. If the compiler is kind enough to reorder these
+ * stores, then an IRQ could overwrite our assignments.
+ */
+ barrier();
+
node->locked = 0;
node->next = NULL;
pv_init_node(node);
diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c
index 8251e75dd9c0..b066724d7a5b 100644
--- a/kernel/locking/rtmutex.c
+++ b/kernel/locking/rtmutex.c
@@ -65,8 +65,72 @@ static inline void clear_rt_mutex_waiters(struct rt_mutex *lock)
static void fixup_rt_mutex_waiters(struct rt_mutex *lock)
{
- if (!rt_mutex_has_waiters(lock))
- clear_rt_mutex_waiters(lock);
+ unsigned long owner, *p = (unsigned long *) &lock->owner;
+
+ if (rt_mutex_has_waiters(lock))
+ return;
+
+ /*
+ * The rbtree has no waiters enqueued, now make sure that the
+ * lock->owner still has the waiters bit set, otherwise the
+ * following can happen:
+ *
+ * CPU 0 CPU 1 CPU2
+ * l->owner=T1
+ * rt_mutex_lock(l)
+ * lock(l->lock)
+ * l->owner = T1 | HAS_WAITERS;
+ * enqueue(T2)
+ * boost()
+ * unlock(l->lock)
+ * block()
+ *
+ * rt_mutex_lock(l)
+ * lock(l->lock)
+ * l->owner = T1 | HAS_WAITERS;
+ * enqueue(T3)
+ * boost()
+ * unlock(l->lock)
+ * block()
+ * signal(->T2) signal(->T3)
+ * lock(l->lock)
+ * dequeue(T2)
+ * deboost()
+ * unlock(l->lock)
+ * lock(l->lock)
+ * dequeue(T3)
+ * ==> wait list is empty
+ * deboost()
+ * unlock(l->lock)
+ * lock(l->lock)
+ * fixup_rt_mutex_waiters()
+ * if (wait_list_empty(l) {
+ * l->owner = owner
+ * owner = l->owner & ~HAS_WAITERS;
+ * ==> l->owner = T1
+ * }
+ * lock(l->lock)
+ * rt_mutex_unlock(l) fixup_rt_mutex_waiters()
+ * if (wait_list_empty(l) {
+ * owner = l->owner & ~HAS_WAITERS;
+ * cmpxchg(l->owner, T1, NULL)
+ * ===> Success (l->owner = NULL)
+ *
+ * l->owner = owner
+ * ==> l->owner = T1
+ * }
+ *
+ * With the check for the waiter bit in place T3 on CPU2 will not
+ * overwrite. All tasks fiddling with the waiters bit are
+ * serialized by l->lock, so nothing else can modify the waiters
+ * bit. If the bit is set then nothing can change l->owner either
+ * so the simple RMW is safe. The cmpxchg() will simply fail if it
+ * happens in the middle of the RMW because the waiters bit is
+ * still set.
+ */
+ owner = READ_ONCE(*p);
+ if (owner & RT_MUTEX_HAS_WAITERS)
+ WRITE_ONCE(*p, owner & ~RT_MUTEX_HAS_WAITERS);
}
/*
diff --git a/kernel/locking/rtmutex_common.h b/kernel/locking/rtmutex_common.h
index 4f5f83c7d2d3..e317e1cbb3eb 100644
--- a/kernel/locking/rtmutex_common.h
+++ b/kernel/locking/rtmutex_common.h
@@ -75,8 +75,9 @@ task_top_pi_waiter(struct task_struct *p)
static inline struct task_struct *rt_mutex_owner(struct rt_mutex *lock)
{
- return (struct task_struct *)
- ((unsigned long)lock->owner & ~RT_MUTEX_OWNER_MASKALL);
+ unsigned long owner = (unsigned long) READ_ONCE(lock->owner);
+
+ return (struct task_struct *) (owner & ~RT_MUTEX_OWNER_MASKALL);
}
/*
diff --git a/kernel/locking/rwsem-xadd.c b/kernel/locking/rwsem-xadd.c
index a4d4de05b2d1..1be33caf157d 100644
--- a/kernel/locking/rwsem-xadd.c
+++ b/kernel/locking/rwsem-xadd.c
@@ -511,6 +511,33 @@ struct rw_semaphore *rwsem_wake(struct rw_semaphore *sem)
unsigned long flags;
/*
+ * __rwsem_down_write_failed_common(sem)
+ * rwsem_optimistic_spin(sem)
+ * osq_unlock(sem->osq)
+ * ...
+ * atomic_long_add_return(&sem->count)
+ *
+ * - VS -
+ *
+ * __up_write()
+ * if (atomic_long_sub_return_release(&sem->count) < 0)
+ * rwsem_wake(sem)
+ * osq_is_locked(&sem->osq)
+ *
+ * And __up_write() must observe !osq_is_locked() when it observes the
+ * atomic_long_add_return() in order to not miss a wakeup.
+ *
+ * This boils down to:
+ *
+ * [S.rel] X = 1 [RmW] r0 = (Y += 0)
+ * MB RMB
+ * [RmW] Y += 1 [L] r1 = X
+ *
+ * exists (r0=1 /\ r1=0)
+ */
+ smp_rmb();
+
+ /*
* If a spinner is present, it is not necessary to do the wakeup.
* Try to do wakeup only if the trylock succeeds to minimize
* spinlock contention which may introduce too much delay in the