From 242489cfe97d44290e7f88b12591fab6c0819045 Mon Sep 17 00:00:00 2001 From: Davidlohr Bueso Date: Wed, 30 Jul 2014 13:41:50 -0700 Subject: locking/mutexes: Standardize arguments in lock/unlock slowpaths Just how the locking-end behaves, when unlocking, go ahead and obtain the proper data structure immediately after the previous (asm-end) call exits and there are (probably) pending waiters. This simplifies a bit some of the layering. Signed-off-by: Davidlohr Bueso Signed-off-by: Peter Zijlstra Cc: jason.low2@hp.com Cc: aswin@hp.com Cc: mingo@kernel.org Cc: Linus Torvalds Cc: linux-kernel@vger.kernel.org Link: http://lkml.kernel.org/r/1406752916-3341-1-git-send-email-davidlohr@hp.com Signed-off-by: Ingo Molnar --- kernel/locking/mutex.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'kernel/locking') diff --git a/kernel/locking/mutex.c b/kernel/locking/mutex.c index ae712b25e492..ad0e3335c481 100644 --- a/kernel/locking/mutex.c +++ b/kernel/locking/mutex.c @@ -679,9 +679,8 @@ EXPORT_SYMBOL_GPL(__ww_mutex_lock_interruptible); * Release the lock, slowpath: */ static inline void -__mutex_unlock_common_slowpath(atomic_t *lock_count, int nested) +__mutex_unlock_common_slowpath(struct mutex *lock, int nested) { - struct mutex *lock = container_of(lock_count, struct mutex, count); unsigned long flags; /* @@ -716,7 +715,9 @@ __mutex_unlock_common_slowpath(atomic_t *lock_count, int nested) __visible void __mutex_unlock_slowpath(atomic_t *lock_count) { - __mutex_unlock_common_slowpath(lock_count, 1); + struct mutex *lock = container_of(lock_count, struct mutex, count); + + __mutex_unlock_common_slowpath(lock, 1); } #ifndef CONFIG_DEBUG_LOCK_ALLOC -- cgit v1.2.3 From 42fa566bd74aa7b95413fb00611ec983b488222d Mon Sep 17 00:00:00 2001 From: Davidlohr Bueso Date: Wed, 30 Jul 2014 13:41:51 -0700 Subject: locking/mutexes: Document quick lock release when unlocking When unlocking, we always want to reach the slowpath with the lock's counter indicating it is unlocked. -- as returned by the asm fastpath call or by explicitly setting it. While doing so, at least in theory, we can optimize and allow faster lock stealing. When unlocking, we always want to reach the slowpath with the lock's counter indicating it is unlocked. -- as returned by the asm fastpath call or by explicitly setting it. While doing so, at least in theory, we can optimize and allow faster lock stealing. Signed-off-by: Davidlohr Bueso Signed-off-by: Peter Zijlstra Cc: jason.low2@hp.com Cc: aswin@hp.com Cc: Linus Torvalds Link: http://lkml.kernel.org/r/1406752916-3341-2-git-send-email-davidlohr@hp.com Signed-off-by: Ingo Molnar --- kernel/locking/mutex.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) (limited to 'kernel/locking') diff --git a/kernel/locking/mutex.c b/kernel/locking/mutex.c index ad0e3335c481..93bec48f09ed 100644 --- a/kernel/locking/mutex.c +++ b/kernel/locking/mutex.c @@ -684,9 +684,16 @@ __mutex_unlock_common_slowpath(struct mutex *lock, int nested) unsigned long flags; /* - * some architectures leave the lock unlocked in the fastpath failure + * As a performance measurement, release the lock before doing other + * wakeup related duties to follow. This allows other tasks to acquire + * the lock sooner, while still handling cleanups in past unlock calls. + * This can be done as we do not enforce strict equivalence between the + * mutex counter and wait_list. + * + * + * Some architectures leave the lock unlocked in the fastpath failure * case, others need to leave it locked. In the later case we have to - * unlock it here + * unlock it here - as the lock counter is currently 0 or negative. */ if (__mutex_slowpath_needs_to_unlock()) atomic_set(&lock->count, 1); -- cgit v1.2.3 From aa9fc0c19bee0cbc152e0e06488095fb69229236 Mon Sep 17 00:00:00 2001 From: Davidlohr Bueso Date: Wed, 30 Jul 2014 13:41:52 -0700 Subject: locking/mcs: Remove obsolete comment ... as we clearly inline mcs_spin_lock() now. Signed-off-by: Davidlohr Bueso Acked-by: Jason Low Signed-off-by: Peter Zijlstra Cc: aswin@hp.com Cc: Linus Torvalds Link: http://lkml.kernel.org/r/1406752916-3341-3-git-send-email-davidlohr@hp.com Signed-off-by: Ingo Molnar --- kernel/locking/mcs_spinlock.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'kernel/locking') diff --git a/kernel/locking/mcs_spinlock.h b/kernel/locking/mcs_spinlock.h index 23e89c5930e9..4d60986fcbee 100644 --- a/kernel/locking/mcs_spinlock.h +++ b/kernel/locking/mcs_spinlock.h @@ -56,9 +56,6 @@ do { \ * If the lock has already been acquired, then this will proceed to spin * on this node->locked until the previous lock holder sets the node->locked * in mcs_spin_unlock(). - * - * We don't inline mcs_spin_lock() so that perf can correctly account for the - * time spent in this lock function. */ static inline void mcs_spin_lock(struct mcs_spinlock **lock, struct mcs_spinlock *node) -- cgit v1.2.3 From 76916515d9d84e6552ee5e218e0ed566ad75e600 Mon Sep 17 00:00:00 2001 From: Davidlohr Bueso Date: Wed, 30 Jul 2014 13:41:53 -0700 Subject: locking/mutexes: Refactor optimistic spinning code When we fail to acquire the mutex in the fastpath, we end up calling __mutex_lock_common(). A *lot* goes on in this function. Move out the optimistic spinning code into mutex_optimistic_spin() and simplify the former a bit. Furthermore, this is similar to what we have in rwsems. No logical changes. Signed-off-by: Davidlohr Bueso Acked-by: Jason Low Signed-off-by: Peter Zijlstra Cc: aswin@hp.com Cc: mingo@kernel.org Cc: Linus Torvalds Link: http://lkml.kernel.org/r/1406752916-3341-4-git-send-email-davidlohr@hp.com Signed-off-by: Ingo Molnar --- kernel/locking/mutex.c | 396 ++++++++++++++++++++++++++----------------------- 1 file changed, 214 insertions(+), 182 deletions(-) (limited to 'kernel/locking') diff --git a/kernel/locking/mutex.c b/kernel/locking/mutex.c index 93bec48f09ed..0d8b6ed93874 100644 --- a/kernel/locking/mutex.c +++ b/kernel/locking/mutex.c @@ -106,6 +106,92 @@ void __sched mutex_lock(struct mutex *lock) EXPORT_SYMBOL(mutex_lock); #endif +static __always_inline void ww_mutex_lock_acquired(struct ww_mutex *ww, + struct ww_acquire_ctx *ww_ctx) +{ +#ifdef CONFIG_DEBUG_MUTEXES + /* + * If this WARN_ON triggers, you used ww_mutex_lock to acquire, + * but released with a normal mutex_unlock in this call. + * + * This should never happen, always use ww_mutex_unlock. + */ + DEBUG_LOCKS_WARN_ON(ww->ctx); + + /* + * Not quite done after calling ww_acquire_done() ? + */ + DEBUG_LOCKS_WARN_ON(ww_ctx->done_acquire); + + if (ww_ctx->contending_lock) { + /* + * After -EDEADLK you tried to + * acquire a different ww_mutex? Bad! + */ + DEBUG_LOCKS_WARN_ON(ww_ctx->contending_lock != ww); + + /* + * You called ww_mutex_lock after receiving -EDEADLK, + * but 'forgot' to unlock everything else first? + */ + DEBUG_LOCKS_WARN_ON(ww_ctx->acquired > 0); + ww_ctx->contending_lock = NULL; + } + + /* + * Naughty, using a different class will lead to undefined behavior! + */ + DEBUG_LOCKS_WARN_ON(ww_ctx->ww_class != ww->ww_class); +#endif + ww_ctx->acquired++; +} + +/* + * after acquiring lock with fastpath or when we lost out in contested + * slowpath, set ctx and wake up any waiters so they can recheck. + * + * This function is never called when CONFIG_DEBUG_LOCK_ALLOC is set, + * as the fastpath and opportunistic spinning are disabled in that case. + */ +static __always_inline void +ww_mutex_set_context_fastpath(struct ww_mutex *lock, + struct ww_acquire_ctx *ctx) +{ + unsigned long flags; + struct mutex_waiter *cur; + + ww_mutex_lock_acquired(lock, ctx); + + lock->ctx = ctx; + + /* + * The lock->ctx update should be visible on all cores before + * the atomic read is done, otherwise contended waiters might be + * missed. The contended waiters will either see ww_ctx == NULL + * and keep spinning, or it will acquire wait_lock, add itself + * to waiter list and sleep. + */ + smp_mb(); /* ^^^ */ + + /* + * Check if lock is contended, if not there is nobody to wake up + */ + if (likely(atomic_read(&lock->base.count) == 0)) + return; + + /* + * Uh oh, we raced in fastpath, wake up everyone in this case, + * so they can see the new lock->ctx. + */ + spin_lock_mutex(&lock->base.wait_lock, flags); + list_for_each_entry(cur, &lock->base.wait_list, list) { + debug_mutex_wake_waiter(&lock->base, cur); + wake_up_process(cur->task); + } + spin_unlock_mutex(&lock->base.wait_lock, flags); +} + + #ifdef CONFIG_MUTEX_SPIN_ON_OWNER /* * In order to avoid a stampede of mutex spinners from acquiring the mutex @@ -180,6 +266,129 @@ static inline int mutex_can_spin_on_owner(struct mutex *lock) */ return retval; } + +/* + * Atomically try to take the lock when it is available + */ +static inline bool mutex_try_to_acquire(struct mutex *lock) +{ + return !mutex_is_locked(lock) && + (atomic_cmpxchg(&lock->count, 1, 0) == 1); +} + +/* + * Optimistic spinning. + * + * We try to spin for acquisition when we find that the lock owner + * is currently running on a (different) CPU and while we don't + * need to reschedule. The rationale is that if the lock owner is + * running, it is likely to release the lock soon. + * + * Since this needs the lock owner, and this mutex implementation + * doesn't track the owner atomically in the lock field, we need to + * track it non-atomically. + * + * We can't do this for DEBUG_MUTEXES because that relies on wait_lock + * to serialize everything. + * + * The mutex spinners are queued up using MCS lock so that only one + * spinner can compete for the mutex. However, if mutex spinning isn't + * going to happen, there is no point in going through the lock/unlock + * overhead. + * + * Returns true when the lock was taken, otherwise false, indicating + * that we need to jump to the slowpath and sleep. + */ +static bool mutex_optimistic_spin(struct mutex *lock, + struct ww_acquire_ctx *ww_ctx, const bool use_ww_ctx) +{ + struct task_struct *task = current; + + if (!mutex_can_spin_on_owner(lock)) + goto done; + + if (!osq_lock(&lock->osq)) + goto done; + + while (true) { + struct task_struct *owner; + + if (use_ww_ctx && ww_ctx->acquired > 0) { + struct ww_mutex *ww; + + ww = container_of(lock, struct ww_mutex, base); + /* + * If ww->ctx is set the contents are undefined, only + * by acquiring wait_lock there is a guarantee that + * they are not invalid when reading. + * + * As such, when deadlock detection needs to be + * performed the optimistic spinning cannot be done. + */ + if (ACCESS_ONCE(ww->ctx)) + break; + } + + /* + * If there's an owner, wait for it to either + * release the lock or go to sleep. + */ + owner = ACCESS_ONCE(lock->owner); + if (owner && !mutex_spin_on_owner(lock, owner)) + break; + + /* Try to acquire the mutex if it is unlocked. */ + if (mutex_try_to_acquire(lock)) { + lock_acquired(&lock->dep_map, ip); + + if (use_ww_ctx) { + struct ww_mutex *ww; + ww = container_of(lock, struct ww_mutex, base); + + ww_mutex_set_context_fastpath(ww, ww_ctx); + } + + mutex_set_owner(lock); + osq_unlock(&lock->osq); + return true; + } + + /* + * When there's no owner, we might have preempted between the + * owner acquiring the lock and setting the owner field. If + * we're an RT task that will live-lock because we won't let + * the owner complete. + */ + if (!owner && (need_resched() || rt_task(task))) + break; + + /* + * The cpu_relax() call is a compiler barrier which forces + * everything in this loop to be re-loaded. We don't need + * memory barriers as we'll eventually observe the right + * values at the cost of a few extra spins. + */ + cpu_relax_lowlatency(); + } + + osq_unlock(&lock->osq); +done: + /* + * If we fell out of the spin path because of need_resched(), + * reschedule now, before we try-lock the mutex. This avoids getting + * scheduled out right after we obtained the mutex. + */ + if (need_resched()) + schedule_preempt_disabled(); + + return false; +} +#else +static bool mutex_optimistic_spin(struct mutex *lock, + struct ww_acquire_ctx *ww_ctx, const bool use_ww_ctx) +{ + return false; +} #endif __visible __used noinline @@ -277,91 +486,6 @@ __mutex_lock_check_stamp(struct mutex *lock, struct ww_acquire_ctx *ctx) return 0; } -static __always_inline void ww_mutex_lock_acquired(struct ww_mutex *ww, - struct ww_acquire_ctx *ww_ctx) -{ -#ifdef CONFIG_DEBUG_MUTEXES - /* - * If this WARN_ON triggers, you used ww_mutex_lock to acquire, - * but released with a normal mutex_unlock in this call. - * - * This should never happen, always use ww_mutex_unlock. - */ - DEBUG_LOCKS_WARN_ON(ww->ctx); - - /* - * Not quite done after calling ww_acquire_done() ? - */ - DEBUG_LOCKS_WARN_ON(ww_ctx->done_acquire); - - if (ww_ctx->contending_lock) { - /* - * After -EDEADLK you tried to - * acquire a different ww_mutex? Bad! - */ - DEBUG_LOCKS_WARN_ON(ww_ctx->contending_lock != ww); - - /* - * You called ww_mutex_lock after receiving -EDEADLK, - * but 'forgot' to unlock everything else first? - */ - DEBUG_LOCKS_WARN_ON(ww_ctx->acquired > 0); - ww_ctx->contending_lock = NULL; - } - - /* - * Naughty, using a different class will lead to undefined behavior! - */ - DEBUG_LOCKS_WARN_ON(ww_ctx->ww_class != ww->ww_class); -#endif - ww_ctx->acquired++; -} - -/* - * after acquiring lock with fastpath or when we lost out in contested - * slowpath, set ctx and wake up any waiters so they can recheck. - * - * This function is never called when CONFIG_DEBUG_LOCK_ALLOC is set, - * as the fastpath and opportunistic spinning are disabled in that case. - */ -static __always_inline void -ww_mutex_set_context_fastpath(struct ww_mutex *lock, - struct ww_acquire_ctx *ctx) -{ - unsigned long flags; - struct mutex_waiter *cur; - - ww_mutex_lock_acquired(lock, ctx); - - lock->ctx = ctx; - - /* - * The lock->ctx update should be visible on all cores before - * the atomic read is done, otherwise contended waiters might be - * missed. The contended waiters will either see ww_ctx == NULL - * and keep spinning, or it will acquire wait_lock, add itself - * to waiter list and sleep. - */ - smp_mb(); /* ^^^ */ - - /* - * Check if lock is contended, if not there is nobody to wake up - */ - if (likely(atomic_read(&lock->base.count) == 0)) - return; - - /* - * Uh oh, we raced in fastpath, wake up everyone in this case, - * so they can see the new lock->ctx. - */ - spin_lock_mutex(&lock->base.wait_lock, flags); - list_for_each_entry(cur, &lock->base.wait_list, list) { - debug_mutex_wake_waiter(&lock->base, cur); - wake_up_process(cur->task); - } - spin_unlock_mutex(&lock->base.wait_lock, flags); -} - /* * Lock a mutex (possibly interruptible), slowpath: */ @@ -378,104 +502,12 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass, preempt_disable(); mutex_acquire_nest(&lock->dep_map, subclass, 0, nest_lock, ip); -#ifdef CONFIG_MUTEX_SPIN_ON_OWNER - /* - * Optimistic spinning. - * - * We try to spin for acquisition when we find that the lock owner - * is currently running on a (different) CPU and while we don't - * need to reschedule. The rationale is that if the lock owner is - * running, it is likely to release the lock soon. - * - * Since this needs the lock owner, and this mutex implementation - * doesn't track the owner atomically in the lock field, we need to - * track it non-atomically. - * - * We can't do this for DEBUG_MUTEXES because that relies on wait_lock - * to serialize everything. - * - * The mutex spinners are queued up using MCS lock so that only one - * spinner can compete for the mutex. However, if mutex spinning isn't - * going to happen, there is no point in going through the lock/unlock - * overhead. - */ - if (!mutex_can_spin_on_owner(lock)) - goto slowpath; - - if (!osq_lock(&lock->osq)) - goto slowpath; - - for (;;) { - struct task_struct *owner; - - if (use_ww_ctx && ww_ctx->acquired > 0) { - struct ww_mutex *ww; - - ww = container_of(lock, struct ww_mutex, base); - /* - * If ww->ctx is set the contents are undefined, only - * by acquiring wait_lock there is a guarantee that - * they are not invalid when reading. - * - * As such, when deadlock detection needs to be - * performed the optimistic spinning cannot be done. - */ - if (ACCESS_ONCE(ww->ctx)) - break; - } - - /* - * If there's an owner, wait for it to either - * release the lock or go to sleep. - */ - owner = ACCESS_ONCE(lock->owner); - if (owner && !mutex_spin_on_owner(lock, owner)) - break; - - /* Try to acquire the mutex if it is unlocked. */ - if (!mutex_is_locked(lock) && - (atomic_cmpxchg(&lock->count, 1, 0) == 1)) { - lock_acquired(&lock->dep_map, ip); - if (use_ww_ctx) { - struct ww_mutex *ww; - ww = container_of(lock, struct ww_mutex, base); - - ww_mutex_set_context_fastpath(ww, ww_ctx); - } - - mutex_set_owner(lock); - osq_unlock(&lock->osq); - preempt_enable(); - return 0; - } - - /* - * When there's no owner, we might have preempted between the - * owner acquiring the lock and setting the owner field. If - * we're an RT task that will live-lock because we won't let - * the owner complete. - */ - if (!owner && (need_resched() || rt_task(task))) - break; - - /* - * The cpu_relax() call is a compiler barrier which forces - * everything in this loop to be re-loaded. We don't need - * memory barriers as we'll eventually observe the right - * values at the cost of a few extra spins. - */ - cpu_relax_lowlatency(); + if (mutex_optimistic_spin(lock, ww_ctx, use_ww_ctx)) { + /* got the lock, yay! */ + preempt_enable(); + return 0; } - osq_unlock(&lock->osq); -slowpath: - /* - * If we fell out of the spin path because of need_resched(), - * reschedule now, before we try-lock the mutex. This avoids getting - * scheduled out right after we obtained the mutex. - */ - if (need_resched()) - schedule_preempt_disabled(); -#endif + spin_lock_mutex(&lock->wait_lock, flags); /* -- cgit v1.2.3 From 7608a43d8f2e02f8b532f8e11481d7ecf8b5d3f9 Mon Sep 17 00:00:00 2001 From: Davidlohr Bueso Date: Wed, 30 Jul 2014 13:41:54 -0700 Subject: locking/mutexes: Use MUTEX_SPIN_ON_OWNER when appropriate 4badad35 ("locking/mutex: Disable optimistic spinning on some architectures") added a ARCH_SUPPORTS_ATOMIC_RMW flag to disable the mutex optimistic feature on specific archs. Because CONFIG_MUTEX_SPIN_ON_OWNER only depended on DEBUG and SMP, it was ok to have the ->owner field conditional a bit flexible. However by adding a new variable to the matter, we can waste space with the unused field, ie: CONFIG_SMP && (!CONFIG_MUTEX_SPIN_ON_OWNER && !CONFIG_DEBUG_MUTEX). Signed-off-by: Davidlohr Bueso Acked-by: Jason Low Signed-off-by: Peter Zijlstra Cc: aswin@hp.com Cc: Davidlohr Bueso Cc: Heiko Carstens Cc: Jason Low Cc: Linus Torvalds Cc: Paul E. McKenney Cc: Tim Chen Link: http://lkml.kernel.org/r/1406752916-3341-5-git-send-email-davidlohr@hp.com Signed-off-by: Ingo Molnar --- kernel/locking/mutex.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel/locking') diff --git a/kernel/locking/mutex.h b/kernel/locking/mutex.h index 4115fbf83b12..5cda397607f2 100644 --- a/kernel/locking/mutex.h +++ b/kernel/locking/mutex.h @@ -16,7 +16,7 @@ #define mutex_remove_waiter(lock, waiter, ti) \ __list_del((waiter)->list.prev, (waiter)->list.next) -#ifdef CONFIG_SMP +#ifdef CONFIG_MUTEX_SPIN_ON_OWNER static inline void mutex_set_owner(struct mutex *lock) { lock->owner = current; -- cgit v1.2.3 From 214e0aed639ef40987bf6159fad303171a6de31e Mon Sep 17 00:00:00 2001 From: Davidlohr Bueso Date: Wed, 30 Jul 2014 13:41:55 -0700 Subject: locking/Documentation: Move locking related docs into Documentation/locking/ Specifically: Documentation/locking/lockdep-design.txt Documentation/locking/lockstat.txt Documentation/locking/mutex-design.txt Documentation/locking/rt-mutex-design.txt Documentation/locking/rt-mutex.txt Documentation/locking/spinlocks.txt Documentation/locking/ww-mutex-design.txt Signed-off-by: Davidlohr Bueso Acked-by: Randy Dunlap Signed-off-by: Peter Zijlstra Cc: jason.low2@hp.com Cc: aswin@hp.com Cc: Alexei Starovoitov Cc: Al Viro Cc: Andrew Morton Cc: Chris Mason Cc: Dan Streetman Cc: David Airlie Cc: Davidlohr Bueso Cc: David S. Miller Cc: Greg Kroah-Hartman Cc: Heiko Carstens Cc: Jason Low Cc: Josef Bacik Cc: Kees Cook Cc: Linus Torvalds Cc: Lubomir Rintel Cc: Masanari Iida Cc: Paul E. McKenney Cc: Randy Dunlap Cc: Tim Chen Cc: Vineet Gupta Cc: fengguang.wu@intel.com Link: http://lkml.kernel.org/r/1406752916-3341-6-git-send-email-davidlohr@hp.com Signed-off-by: Ingo Molnar --- kernel/locking/mutex.c | 2 +- kernel/locking/rtmutex.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'kernel/locking') diff --git a/kernel/locking/mutex.c b/kernel/locking/mutex.c index 0d8b6ed93874..dadbf88c22c4 100644 --- a/kernel/locking/mutex.c +++ b/kernel/locking/mutex.c @@ -15,7 +15,7 @@ * by Steven Rostedt, based on work by Gregory Haskins, Peter Morreale * and Sven Dietrich. * - * Also see Documentation/mutex-design.txt. + * Also see Documentation/locking/mutex-design.txt. */ #include #include diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c index a0ea2a141b3b..7c98873a3077 100644 --- a/kernel/locking/rtmutex.c +++ b/kernel/locking/rtmutex.c @@ -8,7 +8,7 @@ * Copyright (C) 2005 Kihon Technologies Inc., Steven Rostedt * Copyright (C) 2006 Esben Nielsen * - * See Documentation/rt-mutex-design.txt for details. + * See Documentation/locking/rt-mutex-design.txt for details. */ #include #include -- cgit v1.2.3 From f0bab73cb539fb803c4d419951e8d28aa4964f8f Mon Sep 17 00:00:00 2001 From: Waiman Long Date: Wed, 6 Aug 2014 13:22:01 -0400 Subject: locking/lockdep: Restrict the use of recursive read_lock() with qrwlock Unlike the original unfair rwlock implementation, queued rwlock will grant lock according to the chronological sequence of the lock requests except when the lock requester is in the interrupt context. Consequently, recursive read_lock calls will now hang the process if there is a write_lock call somewhere in between the read_lock calls. This patch updates the lockdep implementation to look for recursive read_lock calls. A new read state (3) is used to mark those read_lock call that cannot be recursively called except in the interrupt context. The new read state does exhaust the 2 bits available in held_lock:read bit field. The addition of any new read state in the future may require a redesign of how all those bits are squeezed together in the held_lock structure. Signed-off-by: Waiman Long Signed-off-by: Peter Zijlstra Cc: Maarten Lankhorst Cc: Rik van Riel Cc: Scott J Norton Cc: Fengguang Wu Cc: Linus Torvalds Link: http://lkml.kernel.org/r/1407345722-61615-2-git-send-email-Waiman.Long@hp.com Signed-off-by: Ingo Molnar --- kernel/locking/lockdep.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'kernel/locking') diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c index 88d0d4420ad2..420ba685c4e5 100644 --- a/kernel/locking/lockdep.c +++ b/kernel/locking/lockdep.c @@ -3597,6 +3597,12 @@ void lock_acquire(struct lockdep_map *lock, unsigned int subclass, raw_local_irq_save(flags); check_flags(flags); + /* + * An interrupt recursive read in interrupt context can be considered + * to be the same as a recursive read from checking perspective. + */ + if ((read == 3) && in_interrupt()) + read = 2; current->lockdep_recursion = 1; trace_lock_acquire(lock, subclass, trylock, read, check, nest_lock, ip); __lock_acquire(lock, subclass, trylock, read, check, -- cgit v1.2.3 From 315427691c7a064718b5ad7d378d7f1c1898a626 Mon Sep 17 00:00:00 2001 From: Mark Rustad Date: Wed, 3 Sep 2014 03:17:24 -0700 Subject: locking/semaphore: Resolve some shadow warnings Resolve some shadow warnings resulting from using the name jiffies, which is a well-known global. This is not a problem of course, but it could be a trap for someone copying and pasting code, and it just makes W=2 a little cleaner. Signed-off-by: Mark Rustad Signed-off-by: Jeff Kirsher Acked-by: Peter Zijlstra Cc: Linus Torvalds Cc: Andrew Morton Cc: Thomas Gleixner Cc: Paul E. McKenney Link: http://lkml.kernel.org/r/1409739444-13635-1-git-send-email-jeffrey.t.kirsher@intel.com Signed-off-by: Ingo Molnar --- kernel/locking/semaphore.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'kernel/locking') diff --git a/kernel/locking/semaphore.c b/kernel/locking/semaphore.c index 6815171a4fff..b8120abe594b 100644 --- a/kernel/locking/semaphore.c +++ b/kernel/locking/semaphore.c @@ -36,7 +36,7 @@ static noinline void __down(struct semaphore *sem); static noinline int __down_interruptible(struct semaphore *sem); static noinline int __down_killable(struct semaphore *sem); -static noinline int __down_timeout(struct semaphore *sem, long jiffies); +static noinline int __down_timeout(struct semaphore *sem, long timeout); static noinline void __up(struct semaphore *sem); /** @@ -145,14 +145,14 @@ EXPORT_SYMBOL(down_trylock); /** * down_timeout - acquire the semaphore within a specified time * @sem: the semaphore to be acquired - * @jiffies: how long to wait before failing + * @timeout: how long to wait before failing * * Attempts to acquire the semaphore. If no more tasks are allowed to * acquire the semaphore, calling this function will put the task to sleep. * If the semaphore is not released within the specified number of jiffies, * this function returns -ETIME. It returns 0 if the semaphore was acquired. */ -int down_timeout(struct semaphore *sem, long jiffies) +int down_timeout(struct semaphore *sem, long timeout) { unsigned long flags; int result = 0; @@ -161,7 +161,7 @@ int down_timeout(struct semaphore *sem, long jiffies) if (likely(sem->count > 0)) sem->count--; else - result = __down_timeout(sem, jiffies); + result = __down_timeout(sem, timeout); raw_spin_unlock_irqrestore(&sem->lock, flags); return result; @@ -248,9 +248,9 @@ static noinline int __sched __down_killable(struct semaphore *sem) return __down_common(sem, TASK_KILLABLE, MAX_SCHEDULE_TIMEOUT); } -static noinline int __sched __down_timeout(struct semaphore *sem, long jiffies) +static noinline int __sched __down_timeout(struct semaphore *sem, long timeout) { - return __down_common(sem, TASK_UNINTERRUPTIBLE, jiffies); + return __down_common(sem, TASK_UNINTERRUPTIBLE, timeout); } static noinline void __sched __up(struct semaphore *sem) -- cgit v1.2.3 From db0e716a1512179e8374a74c1f3184e9ce15d138 Mon Sep 17 00:00:00 2001 From: Davidlohr Bueso Date: Thu, 11 Sep 2014 22:34:25 -0700 Subject: locking/rwsem: Move EXPORT_SYMBOL() lines to follow function definition rw-semaphore is the only type of lock doing this ugliness of exporting at the end of the file. Signed-off-by: Davidlohr Bueso Cc: dave@stgolabs.net Cc: peterz@infradead.org Link: http://lkml.kernel.org/r/1410500066-5909-1-git-send-email-dave@stgolabs.net Signed-off-by: Ingo Molnar --- kernel/locking/rwsem-xadd.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) (limited to 'kernel/locking') diff --git a/kernel/locking/rwsem-xadd.c b/kernel/locking/rwsem-xadd.c index d6203faf2eb1..12166ec9b7e7 100644 --- a/kernel/locking/rwsem-xadd.c +++ b/kernel/locking/rwsem-xadd.c @@ -246,6 +246,7 @@ struct rw_semaphore __sched *rwsem_down_read_failed(struct rw_semaphore *sem) return sem; } +EXPORT_SYMBOL(rwsem_down_read_failed); static inline bool rwsem_try_write_lock(long count, struct rw_semaphore *sem) { @@ -465,6 +466,7 @@ struct rw_semaphore __sched *rwsem_down_write_failed(struct rw_semaphore *sem) return sem; } +EXPORT_SYMBOL(rwsem_down_write_failed); /* * handle waking up a waiter on the semaphore @@ -485,6 +487,7 @@ struct rw_semaphore *rwsem_wake(struct rw_semaphore *sem) return sem; } +EXPORT_SYMBOL(rwsem_wake); /* * downgrade a write lock into a read lock @@ -506,8 +509,4 @@ struct rw_semaphore *rwsem_downgrade_wake(struct rw_semaphore *sem) return sem; } - -EXPORT_SYMBOL(rwsem_down_read_failed); -EXPORT_SYMBOL(rwsem_down_write_failed); -EXPORT_SYMBOL(rwsem_wake); EXPORT_SYMBOL(rwsem_downgrade_wake); -- cgit v1.2.3 From 23a8e5c2d2a481fcf382490369c27b405a650212 Mon Sep 17 00:00:00 2001 From: Davidlohr Bueso Date: Thu, 11 Sep 2014 20:40:16 -0700 Subject: locktorture: Rename locktorture_runnable parameter ... to just 'torture_runnable'. It follows other variable naming and is shorter. Signed-off-by: Davidlohr Bueso Signed-off-by: Paul E. McKenney --- kernel/locking/locktorture.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'kernel/locking') diff --git a/kernel/locking/locktorture.c b/kernel/locking/locktorture.c index 0955b885d0dc..8c770b2c6e2a 100644 --- a/kernel/locking/locktorture.c +++ b/kernel/locking/locktorture.c @@ -87,9 +87,9 @@ static struct lock_writer_stress_stats *lwsa; #else #define LOCKTORTURE_RUNNABLE_INIT 0 #endif -int locktorture_runnable = LOCKTORTURE_RUNNABLE_INIT; -module_param(locktorture_runnable, int, 0444); -MODULE_PARM_DESC(locktorture_runnable, "Start locktorture at module init"); +int torture_runnable = LOCKTORTURE_RUNNABLE_INIT; +module_param(torture_runnable, int, 0444); +MODULE_PARM_DESC(torture_runnable, "Start locktorture at module init"); /* Forward reference. */ static void lock_torture_cleanup(void); @@ -355,7 +355,7 @@ static int __init lock_torture_init(void) &lock_busted_ops, &spin_lock_ops, &spin_lock_irq_ops, }; - if (!torture_init_begin(torture_type, verbose, &locktorture_runnable)) + if (!torture_init_begin(torture_type, verbose, &torture_runnable)) return -EBUSY; /* Process args and tell the world that the torturer is on the job. */ -- cgit v1.2.3 From 42ddc75ddd478edac6ad9dc8c63abb4441541af2 Mon Sep 17 00:00:00 2001 From: Davidlohr Bueso Date: Thu, 11 Sep 2014 20:40:18 -0700 Subject: locktorture: Support mutexes Add a "mutex_lock" torture test. The main difference with the already existing spinlock tests is that the latency of the critical region is much larger. We randomly delay for (arbitrarily) either 500 ms or, otherwise, 25 ms. While this can considerably reduce the amount of writes compared to non blocking locks, if run long enough it can have the same torturous effect. Furthermore it is more representative of mutex hold times and can stress better things like thrashing. Signed-off-by: Davidlohr Bueso Signed-off-by: Paul E. McKenney --- kernel/locking/locktorture.c | 41 +++++++++++++++++++++++++++++++++++++++-- 1 file changed, 39 insertions(+), 2 deletions(-) (limited to 'kernel/locking') diff --git a/kernel/locking/locktorture.c b/kernel/locking/locktorture.c index 8c770b2c6e2a..414ba45d580f 100644 --- a/kernel/locking/locktorture.c +++ b/kernel/locking/locktorture.c @@ -27,6 +27,7 @@ #include #include #include +#include #include #include #include @@ -66,7 +67,7 @@ torture_param(bool, verbose, true, static char *torture_type = "spin_lock"; module_param(torture_type, charp, 0444); MODULE_PARM_DESC(torture_type, - "Type of lock to torture (spin_lock, spin_lock_irq, ...)"); + "Type of lock to torture (spin_lock, spin_lock_irq, mutex_lock, ...)"); static atomic_t n_lock_torture_errors; @@ -206,6 +207,42 @@ static struct lock_torture_ops spin_lock_irq_ops = { .name = "spin_lock_irq" }; +static DEFINE_MUTEX(torture_mutex); + +static int torture_mutex_lock(void) __acquires(torture_mutex) +{ + mutex_lock(&torture_mutex); + return 0; +} + +static void torture_mutex_delay(struct torture_random_state *trsp) +{ + const unsigned long longdelay_ms = 100; + + /* We want a long delay occasionally to force massive contention. */ + if (!(torture_random(trsp) % + (nrealwriters_stress * 2000 * longdelay_ms))) + mdelay(longdelay_ms * 5); + else + mdelay(longdelay_ms / 5); +#ifdef CONFIG_PREEMPT + if (!(torture_random(trsp) % (nrealwriters_stress * 20000))) + preempt_schedule(); /* Allow test to be preempted. */ +#endif +} + +static void torture_mutex_unlock(void) __releases(torture_mutex) +{ + mutex_unlock(&torture_mutex); +} + +static struct lock_torture_ops mutex_lock_ops = { + .writelock = torture_mutex_lock, + .write_delay = torture_mutex_delay, + .writeunlock = torture_mutex_unlock, + .name = "mutex_lock" +}; + /* * Lock torture writer kthread. Repeatedly acquires and releases * the lock, checking for duplicate acquisitions. @@ -352,7 +389,7 @@ static int __init lock_torture_init(void) int i; int firsterr = 0; static struct lock_torture_ops *torture_ops[] = { - &lock_busted_ops, &spin_lock_ops, &spin_lock_irq_ops, + &lock_busted_ops, &spin_lock_ops, &spin_lock_irq_ops, &mutex_lock_ops, }; if (!torture_init_begin(torture_type, verbose, &torture_runnable)) -- cgit v1.2.3 From f095bfc0ea04829d6962edaf06a5c56e0c251f5b Mon Sep 17 00:00:00 2001 From: Davidlohr Bueso Date: Thu, 11 Sep 2014 20:40:19 -0700 Subject: locktorture: Teach about lock debugging Regular locks are very different than locks with debugging. For instance for mutexes, debugging forces to only take the slowpaths. As such, the locktorture module should take this into account when printing related information -- specifically when printing user passed parameters, it seems the right place for such info. Signed-off-by: Davidlohr Bueso Signed-off-by: Paul E. McKenney --- kernel/locking/locktorture.c | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) (limited to 'kernel/locking') diff --git a/kernel/locking/locktorture.c b/kernel/locking/locktorture.c index 414ba45d580f..a6049fa2287e 100644 --- a/kernel/locking/locktorture.c +++ b/kernel/locking/locktorture.c @@ -64,6 +64,7 @@ torture_param(int, stutter, 5, "Number of jiffies to run/halt test, 0=disable"); torture_param(bool, verbose, true, "Enable verbose debugging printk()s"); +static bool debug_lock = false; static char *torture_type = "spin_lock"; module_param(torture_type, charp, 0444); MODULE_PARM_DESC(torture_type, @@ -349,8 +350,9 @@ lock_torture_print_module_parms(struct lock_torture_ops *cur_ops, const char *tag) { pr_alert("%s" TORTURE_FLAG - "--- %s: nwriters_stress=%d stat_interval=%d verbose=%d shuffle_interval=%d stutter=%d shutdown_secs=%d onoff_interval=%d onoff_holdoff=%d\n", - torture_type, tag, nrealwriters_stress, stat_interval, verbose, + "--- %s%s: nwriters_stress=%d stat_interval=%d verbose=%d shuffle_interval=%d stutter=%d shutdown_secs=%d onoff_interval=%d onoff_holdoff=%d\n", + torture_type, tag, debug_lock ? " [debug]": "", + nrealwriters_stress, stat_interval, verbose, shuffle_interval, stutter, shutdown_secs, onoff_interval, onoff_holdoff); } @@ -418,6 +420,15 @@ static int __init lock_torture_init(void) nrealwriters_stress = nwriters_stress; else nrealwriters_stress = 2 * num_online_cpus(); + +#ifdef CONFIG_DEBUG_MUTEXES + if (strncmp(torture_type, "mutex", 5) == 0) + debug_lock = true; +#endif +#ifdef CONFIG_DEBUG_SPINLOCK + if (strncmp(torture_type, "spin", 4) == 0) + debug_lock = true; +#endif lock_torture_print_module_parms(cur_ops, "Start of test"); /* Initialize the statistics so that each run gets its own numbers. */ -- cgit v1.2.3 From 1e6757a92189278c484799ea98fc69bdc528940e Mon Sep 17 00:00:00 2001 From: Davidlohr Bueso Date: Thu, 11 Sep 2014 20:40:20 -0700 Subject: locktorture: Make statistics generic The statistics structure can serve well for both reader and writer locks, thus simply rename some fields that mention 'write' and leave the declaration of lwsa. Signed-off-by: Davidlohr Bueso Signed-off-by: Paul E. McKenney --- kernel/locking/locktorture.c | 32 ++++++++++++++++---------------- 1 file changed, 16 insertions(+), 16 deletions(-) (limited to 'kernel/locking') diff --git a/kernel/locking/locktorture.c b/kernel/locking/locktorture.c index a6049fa2287e..de703a769c1d 100644 --- a/kernel/locking/locktorture.c +++ b/kernel/locking/locktorture.c @@ -78,11 +78,11 @@ static struct task_struct **writer_tasks; static int nrealwriters_stress; static bool lock_is_write_held; -struct lock_writer_stress_stats { - long n_write_lock_fail; - long n_write_lock_acquired; +struct lock_stress_stats { + long n_lock_fail; + long n_lock_acquired; }; -static struct lock_writer_stress_stats *lwsa; +static struct lock_stress_stats *lwsa; /* writer statistics */ #if defined(MODULE) #define LOCKTORTURE_RUNNABLE_INIT 1 @@ -250,7 +250,7 @@ static struct lock_torture_ops mutex_lock_ops = { */ static int lock_torture_writer(void *arg) { - struct lock_writer_stress_stats *lwsp = arg; + struct lock_stress_stats *lwsp = arg; static DEFINE_TORTURE_RANDOM(rand); VERBOSE_TOROUT_STRING("lock_torture_writer task started"); @@ -261,9 +261,9 @@ static int lock_torture_writer(void *arg) schedule_timeout_uninterruptible(1); cur_ops->writelock(); if (WARN_ON_ONCE(lock_is_write_held)) - lwsp->n_write_lock_fail++; + lwsp->n_lock_fail++; lock_is_write_held = 1; - lwsp->n_write_lock_acquired++; + lwsp->n_lock_acquired++; cur_ops->write_delay(&rand); lock_is_write_held = 0; cur_ops->writeunlock(); @@ -281,17 +281,17 @@ static void lock_torture_printk(char *page) bool fail = 0; int i; long max = 0; - long min = lwsa[0].n_write_lock_acquired; + long min = lwsa[0].n_lock_acquired; long long sum = 0; for (i = 0; i < nrealwriters_stress; i++) { - if (lwsa[i].n_write_lock_fail) + if (lwsa[i].n_lock_fail) fail = true; - sum += lwsa[i].n_write_lock_acquired; - if (max < lwsa[i].n_write_lock_fail) - max = lwsa[i].n_write_lock_fail; - if (min > lwsa[i].n_write_lock_fail) - min = lwsa[i].n_write_lock_fail; + sum += lwsa[i].n_lock_acquired; + if (max < lwsa[i].n_lock_fail) + max = lwsa[i].n_lock_fail; + if (min > lwsa[i].n_lock_fail) + min = lwsa[i].n_lock_fail; } page += sprintf(page, "%s%s ", torture_type, TORTURE_FLAG); page += sprintf(page, @@ -441,8 +441,8 @@ static int __init lock_torture_init(void) goto unwind; } for (i = 0; i < nrealwriters_stress; i++) { - lwsa[i].n_write_lock_fail = 0; - lwsa[i].n_write_lock_acquired = 0; + lwsa[i].n_lock_fail = 0; + lwsa[i].n_lock_acquired = 0; } /* Start up the kthreads. */ -- cgit v1.2.3 From d36a7a0d5e8b5bff1671723d733eb61621b0cee4 Mon Sep 17 00:00:00 2001 From: Davidlohr Bueso Date: Thu, 11 Sep 2014 20:40:21 -0700 Subject: torture: Address race in module cleanup When performing module cleanups by calling torture_cleanup() the 'torture_type' string in nullified However, callers are not necessarily done, and might still need to reference the variable. This impacts both rcutorture and locktorture, causing printing things like: [ 94.226618] (null)-torture: Stopping lock_torture_writer task [ 94.226624] (null)-torture: Stopping lock_torture_stats task Thus delay this operation until the very end of the cleanup process. The consequence (which shouldn't matter for this kid of program) is, of course, that we delay the window between rmmod and modprobing, for instance in module_torture_begin(). Signed-off-by: Davidlohr Bueso Signed-off-by: Paul E. McKenney --- kernel/locking/locktorture.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'kernel/locking') diff --git a/kernel/locking/locktorture.c b/kernel/locking/locktorture.c index de703a769c1d..988267cc92c1 100644 --- a/kernel/locking/locktorture.c +++ b/kernel/locking/locktorture.c @@ -361,7 +361,7 @@ static void lock_torture_cleanup(void) { int i; - if (torture_cleanup()) + if (torture_cleanup_begin()) return; if (writer_tasks) { @@ -384,6 +384,7 @@ static void lock_torture_cleanup(void) else lock_torture_print_module_parms(cur_ops, "End of test: SUCCESS"); + torture_cleanup_end(); } static int __init lock_torture_init(void) -- cgit v1.2.3 From 4f6332c1dce9c64ef6bf93842067250dd850e482 Mon Sep 17 00:00:00 2001 From: Davidlohr Bueso Date: Thu, 11 Sep 2014 21:40:41 -0700 Subject: locktorture: Add infrastructure for torturing read locks Most of it is based on what we already have for writers. This allows readers to be very independent (and thus configurable), enabling future module parameters to control things such as rw distribution. Furthermore, readers have their own delaying function, allowing us to test different rw critical region latencies, and stress locking internals. Similarly, statistics, for now will only serve for the number of lock acquisitions -- as opposed to writers, readers have no failure detection. In addition, introduce a new nreaders_stress module parameter. The default number of readers will be the same number of writers threads. Writer threads are interleaved with readers. Documentation is updated, respectively. Signed-off-by: Davidlohr Bueso Signed-off-by: Paul E. McKenney --- kernel/locking/locktorture.c | 176 ++++++++++++++++++++++++++++++++++++++----- 1 file changed, 156 insertions(+), 20 deletions(-) (limited to 'kernel/locking') diff --git a/kernel/locking/locktorture.c b/kernel/locking/locktorture.c index 988267cc92c1..c1073d79e440 100644 --- a/kernel/locking/locktorture.c +++ b/kernel/locking/locktorture.c @@ -52,6 +52,8 @@ MODULE_AUTHOR("Paul E. McKenney "); torture_param(int, nwriters_stress, -1, "Number of write-locking stress-test threads"); +torture_param(int, nreaders_stress, -1, + "Number of read-locking stress-test threads"); torture_param(int, onoff_holdoff, 0, "Time after boot before CPU hotplugs (s)"); torture_param(int, onoff_interval, 0, "Time between CPU hotplugs (s), 0=disable"); @@ -74,15 +76,19 @@ static atomic_t n_lock_torture_errors; static struct task_struct *stats_task; static struct task_struct **writer_tasks; +static struct task_struct **reader_tasks; static int nrealwriters_stress; static bool lock_is_write_held; +static int nrealreaders_stress; +static bool lock_is_read_held; struct lock_stress_stats { long n_lock_fail; long n_lock_acquired; }; static struct lock_stress_stats *lwsa; /* writer statistics */ +static struct lock_stress_stats *lrsa; /* reader statistics */ #if defined(MODULE) #define LOCKTORTURE_RUNNABLE_INIT 1 @@ -104,6 +110,9 @@ struct lock_torture_ops { int (*writelock)(void); void (*write_delay)(struct torture_random_state *trsp); void (*writeunlock)(void); + int (*readlock)(void); + void (*read_delay)(struct torture_random_state *trsp); + void (*readunlock)(void); unsigned long flags; const char *name; }; @@ -142,6 +151,9 @@ static struct lock_torture_ops lock_busted_ops = { .writelock = torture_lock_busted_write_lock, .write_delay = torture_lock_busted_write_delay, .writeunlock = torture_lock_busted_write_unlock, + .readlock = NULL, + .read_delay = NULL, + .readunlock = NULL, .name = "lock_busted" }; @@ -182,6 +194,9 @@ static struct lock_torture_ops spin_lock_ops = { .writelock = torture_spin_lock_write_lock, .write_delay = torture_spin_lock_write_delay, .writeunlock = torture_spin_lock_write_unlock, + .readlock = NULL, + .read_delay = NULL, + .readunlock = NULL, .name = "spin_lock" }; @@ -205,6 +220,9 @@ static struct lock_torture_ops spin_lock_irq_ops = { .writelock = torture_spin_lock_write_lock_irq, .write_delay = torture_spin_lock_write_delay, .writeunlock = torture_lock_spin_write_unlock_irq, + .readlock = NULL, + .read_delay = NULL, + .readunlock = NULL, .name = "spin_lock_irq" }; @@ -241,6 +259,9 @@ static struct lock_torture_ops mutex_lock_ops = { .writelock = torture_mutex_lock, .write_delay = torture_mutex_delay, .writeunlock = torture_mutex_unlock, + .readlock = NULL, + .read_delay = NULL, + .readunlock = NULL, .name = "mutex_lock" }; @@ -273,29 +294,58 @@ static int lock_torture_writer(void *arg) return 0; } +/* + * Lock torture reader kthread. Repeatedly acquires and releases + * the reader lock. + */ +static int lock_torture_reader(void *arg) +{ + struct lock_stress_stats *lrsp = arg; + static DEFINE_TORTURE_RANDOM(rand); + + VERBOSE_TOROUT_STRING("lock_torture_reader task started"); + set_user_nice(current, MAX_NICE); + + do { + if ((torture_random(&rand) & 0xfffff) == 0) + schedule_timeout_uninterruptible(1); + cur_ops->readlock(); + lock_is_read_held = 1; + lrsp->n_lock_acquired++; + cur_ops->read_delay(&rand); + lock_is_read_held = 0; + cur_ops->readunlock(); + stutter_wait("lock_torture_reader"); + } while (!torture_must_stop()); + torture_kthread_stopping("lock_torture_reader"); + return 0; +} + /* * Create an lock-torture-statistics message in the specified buffer. */ -static void lock_torture_printk(char *page) +static void __torture_print_stats(char *page, + struct lock_stress_stats *statp, bool write) { bool fail = 0; - int i; + int i, n_stress; long max = 0; - long min = lwsa[0].n_lock_acquired; + long min = statp[0].n_lock_acquired; long long sum = 0; - for (i = 0; i < nrealwriters_stress; i++) { - if (lwsa[i].n_lock_fail) + n_stress = write ? nrealwriters_stress : nrealreaders_stress; + for (i = 0; i < n_stress; i++) { + if (statp[i].n_lock_fail) fail = true; - sum += lwsa[i].n_lock_acquired; - if (max < lwsa[i].n_lock_fail) - max = lwsa[i].n_lock_fail; - if (min > lwsa[i].n_lock_fail) - min = lwsa[i].n_lock_fail; + sum += statp[i].n_lock_acquired; + if (max < statp[i].n_lock_fail) + max = statp[i].n_lock_fail; + if (min > statp[i].n_lock_fail) + min = statp[i].n_lock_fail; } - page += sprintf(page, "%s%s ", torture_type, TORTURE_FLAG); page += sprintf(page, - "Writes: Total: %lld Max/Min: %ld/%ld %s Fail: %d %s\n", + "%s: Total: %lld Max/Min: %ld/%ld %s Fail: %d %s\n", + write ? "Writes" : "Reads ", sum, max, min, max / 2 > min ? "???" : "", fail, fail ? "!!!" : ""); if (fail) @@ -315,15 +365,32 @@ static void lock_torture_stats_print(void) int size = nrealwriters_stress * 200 + 8192; char *buf; + if (cur_ops->readlock) + size += nrealreaders_stress * 200 + 8192; + buf = kmalloc(size, GFP_KERNEL); if (!buf) { pr_err("lock_torture_stats_print: Out of memory, need: %d", size); return; } - lock_torture_printk(buf); + + __torture_print_stats(buf, lwsa, true); pr_alert("%s", buf); kfree(buf); + + if (cur_ops->readlock) { + buf = kmalloc(size, GFP_KERNEL); + if (!buf) { + pr_err("lock_torture_stats_print: Out of memory, need: %d", + size); + return; + } + + __torture_print_stats(buf, lrsa, false); + pr_alert("%s", buf); + kfree(buf); + } } /* @@ -350,10 +417,10 @@ lock_torture_print_module_parms(struct lock_torture_ops *cur_ops, const char *tag) { pr_alert("%s" TORTURE_FLAG - "--- %s%s: nwriters_stress=%d stat_interval=%d verbose=%d shuffle_interval=%d stutter=%d shutdown_secs=%d onoff_interval=%d onoff_holdoff=%d\n", + "--- %s%s: nwriters_stress=%d nreaders_stress=%d stat_interval=%d verbose=%d shuffle_interval=%d stutter=%d shutdown_secs=%d onoff_interval=%d onoff_holdoff=%d\n", torture_type, tag, debug_lock ? " [debug]": "", - nrealwriters_stress, stat_interval, verbose, - shuffle_interval, stutter, shutdown_secs, + nrealwriters_stress, nrealreaders_stress, stat_interval, + verbose, shuffle_interval, stutter, shutdown_secs, onoff_interval, onoff_holdoff); } @@ -372,6 +439,14 @@ static void lock_torture_cleanup(void) writer_tasks = NULL; } + if (reader_tasks) { + for (i = 0; i < nrealreaders_stress; i++) + torture_stop_kthread(lock_torture_reader, + reader_tasks[i]); + kfree(reader_tasks); + reader_tasks = NULL; + } + torture_stop_kthread(lock_torture_stats, stats_task); lock_torture_stats_print(); /* -After- the stats thread is stopped! */ @@ -389,7 +464,7 @@ static void lock_torture_cleanup(void) static int __init lock_torture_init(void) { - int i; + int i, j; int firsterr = 0; static struct lock_torture_ops *torture_ops[] = { &lock_busted_ops, &spin_lock_ops, &spin_lock_irq_ops, &mutex_lock_ops, @@ -430,7 +505,6 @@ static int __init lock_torture_init(void) if (strncmp(torture_type, "spin", 4) == 0) debug_lock = true; #endif - lock_torture_print_module_parms(cur_ops, "Start of test"); /* Initialize the statistics so that each run gets its own numbers. */ @@ -446,8 +520,37 @@ static int __init lock_torture_init(void) lwsa[i].n_lock_acquired = 0; } - /* Start up the kthreads. */ + if (cur_ops->readlock) { + if (nreaders_stress >= 0) + nrealreaders_stress = nreaders_stress; + else { + /* + * By default distribute evenly the number of + * readers and writers. We still run the same number + * of threads as the writer-only locks default. + */ + if (nwriters_stress < 0) /* user doesn't care */ + nrealwriters_stress = num_online_cpus(); + nrealreaders_stress = nrealwriters_stress; + } + + lock_is_read_held = 0; + lrsa = kmalloc(sizeof(*lrsa) * nrealreaders_stress, GFP_KERNEL); + if (lrsa == NULL) { + VERBOSE_TOROUT_STRING("lrsa: Out of memory"); + firsterr = -ENOMEM; + kfree(lwsa); + goto unwind; + } + for (i = 0; i < nrealreaders_stress; i++) { + lrsa[i].n_lock_fail = 0; + lrsa[i].n_lock_acquired = 0; + } + } + lock_torture_print_module_parms(cur_ops, "Start of test"); + + /* Prepare torture context. */ if (onoff_interval > 0) { firsterr = torture_onoff_init(onoff_holdoff * HZ, onoff_interval * HZ); @@ -478,11 +581,44 @@ static int __init lock_torture_init(void) firsterr = -ENOMEM; goto unwind; } - for (i = 0; i < nrealwriters_stress; i++) { + + if (cur_ops->readlock) { + reader_tasks = kzalloc(nrealreaders_stress * sizeof(reader_tasks[0]), + GFP_KERNEL); + if (reader_tasks == NULL) { + VERBOSE_TOROUT_ERRSTRING("reader_tasks: Out of memory"); + firsterr = -ENOMEM; + goto unwind; + } + } + + /* + * Create the kthreads and start torturing (oh, those poor little locks). + * + * TODO: Note that we interleave writers with readers, giving writers a + * slight advantage, by creating its kthread first. This can be modified + * for very specific needs, or even let the user choose the policy, if + * ever wanted. + */ + for (i = 0, j = 0; i < nrealwriters_stress || + j < nrealreaders_stress; i++, j++) { + if (i >= nrealwriters_stress) + goto create_reader; + + /* Create writer. */ firsterr = torture_create_kthread(lock_torture_writer, &lwsa[i], writer_tasks[i]); if (firsterr) goto unwind; + + create_reader: + if (cur_ops->readlock == NULL || (j >= nrealreaders_stress)) + continue; + /* Create reader. */ + firsterr = torture_create_kthread(lock_torture_reader, &lrsa[j], + reader_tasks[j]); + if (firsterr) + goto unwind; } if (stat_interval > 0) { firsterr = torture_create_kthread(lock_torture_stats, NULL, -- cgit v1.2.3 From 4a3b427f0b27c7e15edfa607524ff012a155337a Mon Sep 17 00:00:00 2001 From: Davidlohr Bueso Date: Thu, 11 Sep 2014 21:41:30 -0700 Subject: locktorture: Support rwsems We can easily do so with our new reader lock support. Just an arbitrary design default: readers have higher (5x) critical region latencies than writers: 50 ms and 10 ms, respectively. Signed-off-by: Davidlohr Bueso Signed-off-by: Paul E. McKenney --- kernel/locking/locktorture.c | 68 +++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 67 insertions(+), 1 deletion(-) (limited to 'kernel/locking') diff --git a/kernel/locking/locktorture.c b/kernel/locking/locktorture.c index c1073d79e440..8480118c0ca8 100644 --- a/kernel/locking/locktorture.c +++ b/kernel/locking/locktorture.c @@ -265,6 +265,71 @@ static struct lock_torture_ops mutex_lock_ops = { .name = "mutex_lock" }; +static DECLARE_RWSEM(torture_rwsem); +static int torture_rwsem_down_write(void) __acquires(torture_rwsem) +{ + down_write(&torture_rwsem); + return 0; +} + +static void torture_rwsem_write_delay(struct torture_random_state *trsp) +{ + const unsigned long longdelay_ms = 100; + + /* We want a long delay occasionally to force massive contention. */ + if (!(torture_random(trsp) % + (nrealwriters_stress * 2000 * longdelay_ms))) + mdelay(longdelay_ms * 10); + else + mdelay(longdelay_ms / 10); +#ifdef CONFIG_PREEMPT + if (!(torture_random(trsp) % (nrealwriters_stress * 20000))) + preempt_schedule(); /* Allow test to be preempted. */ +#endif +} + +static void torture_rwsem_up_write(void) __releases(torture_rwsem) +{ + up_write(&torture_rwsem); +} + +static int torture_rwsem_down_read(void) __acquires(torture_rwsem) +{ + down_read(&torture_rwsem); + return 0; +} + +static void torture_rwsem_read_delay(struct torture_random_state *trsp) +{ + const unsigned long longdelay_ms = 100; + + /* We want a long delay occasionally to force massive contention. */ + if (!(torture_random(trsp) % + (nrealwriters_stress * 2000 * longdelay_ms))) + mdelay(longdelay_ms * 2); + else + mdelay(longdelay_ms / 2); +#ifdef CONFIG_PREEMPT + if (!(torture_random(trsp) % (nrealreaders_stress * 20000))) + preempt_schedule(); /* Allow test to be preempted. */ +#endif +} + +static void torture_rwsem_up_read(void) __releases(torture_rwsem) +{ + up_read(&torture_rwsem); +} + +static struct lock_torture_ops rwsem_lock_ops = { + .writelock = torture_rwsem_down_write, + .write_delay = torture_rwsem_write_delay, + .writeunlock = torture_rwsem_up_write, + .readlock = torture_rwsem_down_read, + .read_delay = torture_rwsem_read_delay, + .readunlock = torture_rwsem_up_read, + .name = "rwsem_lock" +}; + /* * Lock torture writer kthread. Repeatedly acquires and releases * the lock, checking for duplicate acquisitions. @@ -467,7 +532,8 @@ static int __init lock_torture_init(void) int i, j; int firsterr = 0; static struct lock_torture_ops *torture_ops[] = { - &lock_busted_ops, &spin_lock_ops, &spin_lock_irq_ops, &mutex_lock_ops, + &lock_busted_ops, &spin_lock_ops, &spin_lock_irq_ops, + &mutex_lock_ops, &rwsem_lock_ops, }; if (!torture_init_begin(torture_type, verbose, &torture_runnable)) -- cgit v1.2.3 From 630952c22b04ada7e88ad93b87ad893cd818cc6b Mon Sep 17 00:00:00 2001 From: Davidlohr Bueso Date: Thu, 11 Sep 2014 21:42:25 -0700 Subject: locktorture: Introduce torture context The amount of global variables is getting pretty ugly. Group variables related to the execution (ie: not parameters) in a new context structure. Signed-off-by: Davidlohr Bueso Signed-off-by: Paul E. McKenney --- kernel/locking/locktorture.c | 161 ++++++++++++++++++++++--------------------- 1 file changed, 82 insertions(+), 79 deletions(-) (limited to 'kernel/locking') diff --git a/kernel/locking/locktorture.c b/kernel/locking/locktorture.c index 8480118c0ca8..540d5dfe1112 100644 --- a/kernel/locking/locktorture.c +++ b/kernel/locking/locktorture.c @@ -66,29 +66,22 @@ torture_param(int, stutter, 5, "Number of jiffies to run/halt test, 0=disable"); torture_param(bool, verbose, true, "Enable verbose debugging printk()s"); -static bool debug_lock = false; static char *torture_type = "spin_lock"; module_param(torture_type, charp, 0444); MODULE_PARM_DESC(torture_type, "Type of lock to torture (spin_lock, spin_lock_irq, mutex_lock, ...)"); -static atomic_t n_lock_torture_errors; - static struct task_struct *stats_task; static struct task_struct **writer_tasks; static struct task_struct **reader_tasks; -static int nrealwriters_stress; static bool lock_is_write_held; -static int nrealreaders_stress; static bool lock_is_read_held; struct lock_stress_stats { long n_lock_fail; long n_lock_acquired; }; -static struct lock_stress_stats *lwsa; /* writer statistics */ -static struct lock_stress_stats *lrsa; /* reader statistics */ #if defined(MODULE) #define LOCKTORTURE_RUNNABLE_INIT 1 @@ -117,8 +110,18 @@ struct lock_torture_ops { const char *name; }; -static struct lock_torture_ops *cur_ops; - +struct lock_torture_cxt { + int nrealwriters_stress; + int nrealreaders_stress; + bool debug_lock; + atomic_t n_lock_torture_errors; + struct lock_torture_ops *cur_ops; + struct lock_stress_stats *lwsa; /* writer statistics */ + struct lock_stress_stats *lrsa; /* reader statistics */ +}; +static struct lock_torture_cxt cxt = { 0, 0, false, + ATOMIC_INIT(0), + NULL, NULL}; /* * Definitions for lock torture testing. */ @@ -134,10 +137,10 @@ static void torture_lock_busted_write_delay(struct torture_random_state *trsp) /* We want a long delay occasionally to force massive contention. */ if (!(torture_random(trsp) % - (nrealwriters_stress * 2000 * longdelay_us))) + (cxt.nrealwriters_stress * 2000 * longdelay_us))) mdelay(longdelay_us); #ifdef CONFIG_PREEMPT - if (!(torture_random(trsp) % (nrealwriters_stress * 20000))) + if (!(torture_random(trsp) % (cxt.nrealwriters_stress * 20000))) preempt_schedule(); /* Allow test to be preempted. */ #endif } @@ -174,13 +177,13 @@ static void torture_spin_lock_write_delay(struct torture_random_state *trsp) * we want a long delay occasionally to force massive contention. */ if (!(torture_random(trsp) % - (nrealwriters_stress * 2000 * longdelay_us))) + (cxt.nrealwriters_stress * 2000 * longdelay_us))) mdelay(longdelay_us); if (!(torture_random(trsp) % - (nrealwriters_stress * 2 * shortdelay_us))) + (cxt.nrealwriters_stress * 2 * shortdelay_us))) udelay(shortdelay_us); #ifdef CONFIG_PREEMPT - if (!(torture_random(trsp) % (nrealwriters_stress * 20000))) + if (!(torture_random(trsp) % (cxt.nrealwriters_stress * 20000))) preempt_schedule(); /* Allow test to be preempted. */ #endif } @@ -206,14 +209,14 @@ __acquires(torture_spinlock_irq) unsigned long flags; spin_lock_irqsave(&torture_spinlock, flags); - cur_ops->flags = flags; + cxt.cur_ops->flags = flags; return 0; } static void torture_lock_spin_write_unlock_irq(void) __releases(torture_spinlock) { - spin_unlock_irqrestore(&torture_spinlock, cur_ops->flags); + spin_unlock_irqrestore(&torture_spinlock, cxt.cur_ops->flags); } static struct lock_torture_ops spin_lock_irq_ops = { @@ -240,12 +243,12 @@ static void torture_mutex_delay(struct torture_random_state *trsp) /* We want a long delay occasionally to force massive contention. */ if (!(torture_random(trsp) % - (nrealwriters_stress * 2000 * longdelay_ms))) + (cxt.nrealwriters_stress * 2000 * longdelay_ms))) mdelay(longdelay_ms * 5); else mdelay(longdelay_ms / 5); #ifdef CONFIG_PREEMPT - if (!(torture_random(trsp) % (nrealwriters_stress * 20000))) + if (!(torture_random(trsp) % (cxt.nrealwriters_stress * 20000))) preempt_schedule(); /* Allow test to be preempted. */ #endif } @@ -278,12 +281,12 @@ static void torture_rwsem_write_delay(struct torture_random_state *trsp) /* We want a long delay occasionally to force massive contention. */ if (!(torture_random(trsp) % - (nrealwriters_stress * 2000 * longdelay_ms))) + (cxt.nrealwriters_stress * 2000 * longdelay_ms))) mdelay(longdelay_ms * 10); else mdelay(longdelay_ms / 10); #ifdef CONFIG_PREEMPT - if (!(torture_random(trsp) % (nrealwriters_stress * 20000))) + if (!(torture_random(trsp) % (cxt.nrealwriters_stress * 20000))) preempt_schedule(); /* Allow test to be preempted. */ #endif } @@ -305,12 +308,12 @@ static void torture_rwsem_read_delay(struct torture_random_state *trsp) /* We want a long delay occasionally to force massive contention. */ if (!(torture_random(trsp) % - (nrealwriters_stress * 2000 * longdelay_ms))) + (cxt.nrealwriters_stress * 2000 * longdelay_ms))) mdelay(longdelay_ms * 2); else mdelay(longdelay_ms / 2); #ifdef CONFIG_PREEMPT - if (!(torture_random(trsp) % (nrealreaders_stress * 20000))) + if (!(torture_random(trsp) % (cxt.nrealreaders_stress * 20000))) preempt_schedule(); /* Allow test to be preempted. */ #endif } @@ -345,14 +348,14 @@ static int lock_torture_writer(void *arg) do { if ((torture_random(&rand) & 0xfffff) == 0) schedule_timeout_uninterruptible(1); - cur_ops->writelock(); + cxt.cur_ops->writelock(); if (WARN_ON_ONCE(lock_is_write_held)) lwsp->n_lock_fail++; lock_is_write_held = 1; lwsp->n_lock_acquired++; - cur_ops->write_delay(&rand); + cxt.cur_ops->write_delay(&rand); lock_is_write_held = 0; - cur_ops->writeunlock(); + cxt.cur_ops->writeunlock(); stutter_wait("lock_torture_writer"); } while (!torture_must_stop()); torture_kthread_stopping("lock_torture_writer"); @@ -374,12 +377,12 @@ static int lock_torture_reader(void *arg) do { if ((torture_random(&rand) & 0xfffff) == 0) schedule_timeout_uninterruptible(1); - cur_ops->readlock(); + cxt.cur_ops->readlock(); lock_is_read_held = 1; lrsp->n_lock_acquired++; - cur_ops->read_delay(&rand); + cxt.cur_ops->read_delay(&rand); lock_is_read_held = 0; - cur_ops->readunlock(); + cxt.cur_ops->readunlock(); stutter_wait("lock_torture_reader"); } while (!torture_must_stop()); torture_kthread_stopping("lock_torture_reader"); @@ -398,7 +401,7 @@ static void __torture_print_stats(char *page, long min = statp[0].n_lock_acquired; long long sum = 0; - n_stress = write ? nrealwriters_stress : nrealreaders_stress; + n_stress = write ? cxt.nrealwriters_stress : cxt.nrealreaders_stress; for (i = 0; i < n_stress; i++) { if (statp[i].n_lock_fail) fail = true; @@ -414,7 +417,7 @@ static void __torture_print_stats(char *page, sum, max, min, max / 2 > min ? "???" : "", fail, fail ? "!!!" : ""); if (fail) - atomic_inc(&n_lock_torture_errors); + atomic_inc(&cxt.n_lock_torture_errors); } /* @@ -427,11 +430,11 @@ static void __torture_print_stats(char *page, */ static void lock_torture_stats_print(void) { - int size = nrealwriters_stress * 200 + 8192; + int size = cxt.nrealwriters_stress * 200 + 8192; char *buf; - if (cur_ops->readlock) - size += nrealreaders_stress * 200 + 8192; + if (cxt.cur_ops->readlock) + size += cxt.nrealreaders_stress * 200 + 8192; buf = kmalloc(size, GFP_KERNEL); if (!buf) { @@ -440,11 +443,11 @@ static void lock_torture_stats_print(void) return; } - __torture_print_stats(buf, lwsa, true); + __torture_print_stats(buf, cxt.lwsa, true); pr_alert("%s", buf); kfree(buf); - if (cur_ops->readlock) { + if (cxt.cur_ops->readlock) { buf = kmalloc(size, GFP_KERNEL); if (!buf) { pr_err("lock_torture_stats_print: Out of memory, need: %d", @@ -452,7 +455,7 @@ static void lock_torture_stats_print(void) return; } - __torture_print_stats(buf, lrsa, false); + __torture_print_stats(buf, cxt.lrsa, false); pr_alert("%s", buf); kfree(buf); } @@ -483,8 +486,8 @@ lock_torture_print_module_parms(struct lock_torture_ops *cur_ops, { pr_alert("%s" TORTURE_FLAG "--- %s%s: nwriters_stress=%d nreaders_stress=%d stat_interval=%d verbose=%d shuffle_interval=%d stutter=%d shutdown_secs=%d onoff_interval=%d onoff_holdoff=%d\n", - torture_type, tag, debug_lock ? " [debug]": "", - nrealwriters_stress, nrealreaders_stress, stat_interval, + torture_type, tag, cxt.debug_lock ? " [debug]": "", + cxt.nrealwriters_stress, cxt.nrealreaders_stress, stat_interval, verbose, shuffle_interval, stutter, shutdown_secs, onoff_interval, onoff_holdoff); } @@ -497,7 +500,7 @@ static void lock_torture_cleanup(void) return; if (writer_tasks) { - for (i = 0; i < nrealwriters_stress; i++) + for (i = 0; i < cxt.nrealwriters_stress; i++) torture_stop_kthread(lock_torture_writer, writer_tasks[i]); kfree(writer_tasks); @@ -505,7 +508,7 @@ static void lock_torture_cleanup(void) } if (reader_tasks) { - for (i = 0; i < nrealreaders_stress; i++) + for (i = 0; i < cxt.nrealreaders_stress; i++) torture_stop_kthread(lock_torture_reader, reader_tasks[i]); kfree(reader_tasks); @@ -515,14 +518,14 @@ static void lock_torture_cleanup(void) torture_stop_kthread(lock_torture_stats, stats_task); lock_torture_stats_print(); /* -After- the stats thread is stopped! */ - if (atomic_read(&n_lock_torture_errors)) - lock_torture_print_module_parms(cur_ops, + if (atomic_read(&cxt.n_lock_torture_errors)) + lock_torture_print_module_parms(cxt.cur_ops, "End of test: FAILURE"); else if (torture_onoff_failures()) - lock_torture_print_module_parms(cur_ops, + lock_torture_print_module_parms(cxt.cur_ops, "End of test: LOCK_HOTPLUG"); else - lock_torture_print_module_parms(cur_ops, + lock_torture_print_module_parms(cxt.cur_ops, "End of test: SUCCESS"); torture_cleanup_end(); } @@ -541,8 +544,8 @@ static int __init lock_torture_init(void) /* Process args and tell the world that the torturer is on the job. */ for (i = 0; i < ARRAY_SIZE(torture_ops); i++) { - cur_ops = torture_ops[i]; - if (strcmp(torture_type, cur_ops->name) == 0) + cxt.cur_ops = torture_ops[i]; + if (strcmp(torture_type, cxt.cur_ops->name) == 0) break; } if (i == ARRAY_SIZE(torture_ops)) { @@ -555,40 +558,40 @@ static int __init lock_torture_init(void) torture_init_end(); return -EINVAL; } - if (cur_ops->init) - cur_ops->init(); /* no "goto unwind" prior to this point!!! */ + if (cxt.cur_ops->init) + cxt.cur_ops->init(); /* no "goto unwind" prior to this point!!! */ if (nwriters_stress >= 0) - nrealwriters_stress = nwriters_stress; + cxt.nrealwriters_stress = nwriters_stress; else - nrealwriters_stress = 2 * num_online_cpus(); + cxt.nrealwriters_stress = 2 * num_online_cpus(); #ifdef CONFIG_DEBUG_MUTEXES if (strncmp(torture_type, "mutex", 5) == 0) - debug_lock = true; + cxt.debug_lock = true; #endif #ifdef CONFIG_DEBUG_SPINLOCK if (strncmp(torture_type, "spin", 4) == 0) - debug_lock = true; + cxt.debug_lock = true; #endif /* Initialize the statistics so that each run gets its own numbers. */ lock_is_write_held = 0; - lwsa = kmalloc(sizeof(*lwsa) * nrealwriters_stress, GFP_KERNEL); - if (lwsa == NULL) { - VERBOSE_TOROUT_STRING("lwsa: Out of memory"); + cxt.lwsa = kmalloc(sizeof(*cxt.lwsa) * cxt.nrealwriters_stress, GFP_KERNEL); + if (cxt.lwsa == NULL) { + VERBOSE_TOROUT_STRING("cxt.lwsa: Out of memory"); firsterr = -ENOMEM; goto unwind; } - for (i = 0; i < nrealwriters_stress; i++) { - lwsa[i].n_lock_fail = 0; - lwsa[i].n_lock_acquired = 0; + for (i = 0; i < cxt.nrealwriters_stress; i++) { + cxt.lwsa[i].n_lock_fail = 0; + cxt.lwsa[i].n_lock_acquired = 0; } - if (cur_ops->readlock) { + if (cxt.cur_ops->readlock) { if (nreaders_stress >= 0) - nrealreaders_stress = nreaders_stress; + cxt.nrealreaders_stress = nreaders_stress; else { /* * By default distribute evenly the number of @@ -596,25 +599,25 @@ static int __init lock_torture_init(void) * of threads as the writer-only locks default. */ if (nwriters_stress < 0) /* user doesn't care */ - nrealwriters_stress = num_online_cpus(); - nrealreaders_stress = nrealwriters_stress; + cxt.nrealwriters_stress = num_online_cpus(); + cxt.nrealreaders_stress = cxt.nrealwriters_stress; } lock_is_read_held = 0; - lrsa = kmalloc(sizeof(*lrsa) * nrealreaders_stress, GFP_KERNEL); - if (lrsa == NULL) { - VERBOSE_TOROUT_STRING("lrsa: Out of memory"); + cxt.lrsa = kmalloc(sizeof(*cxt.lrsa) * cxt.nrealreaders_stress, GFP_KERNEL); + if (cxt.lrsa == NULL) { + VERBOSE_TOROUT_STRING("cxt.lrsa: Out of memory"); firsterr = -ENOMEM; - kfree(lwsa); + kfree(cxt.lwsa); goto unwind; } - for (i = 0; i < nrealreaders_stress; i++) { - lrsa[i].n_lock_fail = 0; - lrsa[i].n_lock_acquired = 0; + for (i = 0; i < cxt.nrealreaders_stress; i++) { + cxt.lrsa[i].n_lock_fail = 0; + cxt.lrsa[i].n_lock_acquired = 0; } } - lock_torture_print_module_parms(cur_ops, "Start of test"); + lock_torture_print_module_parms(cxt.cur_ops, "Start of test"); /* Prepare torture context. */ if (onoff_interval > 0) { @@ -640,7 +643,7 @@ static int __init lock_torture_init(void) goto unwind; } - writer_tasks = kzalloc(nrealwriters_stress * sizeof(writer_tasks[0]), + writer_tasks = kzalloc(cxt.nrealwriters_stress * sizeof(writer_tasks[0]), GFP_KERNEL); if (writer_tasks == NULL) { VERBOSE_TOROUT_ERRSTRING("writer_tasks: Out of memory"); @@ -648,8 +651,8 @@ static int __init lock_torture_init(void) goto unwind; } - if (cur_ops->readlock) { - reader_tasks = kzalloc(nrealreaders_stress * sizeof(reader_tasks[0]), + if (cxt.cur_ops->readlock) { + reader_tasks = kzalloc(cxt.nrealreaders_stress * sizeof(reader_tasks[0]), GFP_KERNEL); if (reader_tasks == NULL) { VERBOSE_TOROUT_ERRSTRING("reader_tasks: Out of memory"); @@ -666,22 +669,22 @@ static int __init lock_torture_init(void) * for very specific needs, or even let the user choose the policy, if * ever wanted. */ - for (i = 0, j = 0; i < nrealwriters_stress || - j < nrealreaders_stress; i++, j++) { - if (i >= nrealwriters_stress) + for (i = 0, j = 0; i < cxt.nrealwriters_stress || + j < cxt.nrealreaders_stress; i++, j++) { + if (i >= cxt.nrealwriters_stress) goto create_reader; /* Create writer. */ - firsterr = torture_create_kthread(lock_torture_writer, &lwsa[i], + firsterr = torture_create_kthread(lock_torture_writer, &cxt.lwsa[i], writer_tasks[i]); if (firsterr) goto unwind; create_reader: - if (cur_ops->readlock == NULL || (j >= nrealreaders_stress)) + if (cxt.cur_ops->readlock == NULL || (j >= cxt.nrealreaders_stress)) continue; /* Create reader. */ - firsterr = torture_create_kthread(lock_torture_reader, &lrsa[j], + firsterr = torture_create_kthread(lock_torture_reader, &cxt.lrsa[j], reader_tasks[j]); if (firsterr) goto unwind; -- cgit v1.2.3 From e34191fad8e5d9fe4e76f6d03b5e29e3eae7535a Mon Sep 17 00:00:00 2001 From: Davidlohr Bueso Date: Mon, 29 Sep 2014 06:14:23 -0700 Subject: locktorture: Support rwlocks Add a "rw_lock" torture test to stress kernel rwlocks and their irq variant. Reader critical regions are 5x longer than writers. As such a similar ratio of lock acquisitions is seen in the statistics. In the case of massive contention, both hold the lock for 1/10 of a second. Signed-off-by: Davidlohr Bueso Signed-off-by: Paul E. McKenney --- kernel/locking/locktorture.c | 115 +++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 112 insertions(+), 3 deletions(-) (limited to 'kernel/locking') diff --git a/kernel/locking/locktorture.c b/kernel/locking/locktorture.c index 540d5dfe1112..0762b25b4110 100644 --- a/kernel/locking/locktorture.c +++ b/kernel/locking/locktorture.c @@ -27,6 +27,7 @@ #include #include #include +#include #include #include #include @@ -229,6 +230,110 @@ static struct lock_torture_ops spin_lock_irq_ops = { .name = "spin_lock_irq" }; +static DEFINE_RWLOCK(torture_rwlock); + +static int torture_rwlock_write_lock(void) __acquires(torture_rwlock) +{ + write_lock(&torture_rwlock); + return 0; +} + +static void torture_rwlock_write_delay(struct torture_random_state *trsp) +{ + const unsigned long shortdelay_us = 2; + const unsigned long longdelay_ms = 100; + + /* We want a short delay mostly to emulate likely code, and + * we want a long delay occasionally to force massive contention. + */ + if (!(torture_random(trsp) % + (cxt.nrealwriters_stress * 2000 * longdelay_ms))) + mdelay(longdelay_ms); + else + udelay(shortdelay_us); +} + +static void torture_rwlock_write_unlock(void) __releases(torture_rwlock) +{ + write_unlock(&torture_rwlock); +} + +static int torture_rwlock_read_lock(void) __acquires(torture_rwlock) +{ + read_lock(&torture_rwlock); + return 0; +} + +static void torture_rwlock_read_delay(struct torture_random_state *trsp) +{ + const unsigned long shortdelay_us = 10; + const unsigned long longdelay_ms = 100; + + /* We want a short delay mostly to emulate likely code, and + * we want a long delay occasionally to force massive contention. + */ + if (!(torture_random(trsp) % + (cxt.nrealreaders_stress * 2000 * longdelay_ms))) + mdelay(longdelay_ms); + else + udelay(shortdelay_us); +} + +static void torture_rwlock_read_unlock(void) __releases(torture_rwlock) +{ + read_unlock(&torture_rwlock); +} + +static struct lock_torture_ops rw_lock_ops = { + .writelock = torture_rwlock_write_lock, + .write_delay = torture_rwlock_write_delay, + .writeunlock = torture_rwlock_write_unlock, + .readlock = torture_rwlock_read_lock, + .read_delay = torture_rwlock_read_delay, + .readunlock = torture_rwlock_read_unlock, + .name = "rw_lock" +}; + +static int torture_rwlock_write_lock_irq(void) __acquires(torture_rwlock) +{ + unsigned long flags; + + write_lock_irqsave(&torture_rwlock, flags); + cxt.cur_ops->flags = flags; + return 0; +} + +static void torture_rwlock_write_unlock_irq(void) +__releases(torture_rwlock) +{ + write_unlock_irqrestore(&torture_rwlock, cxt.cur_ops->flags); +} + +static int torture_rwlock_read_lock_irq(void) __acquires(torture_rwlock) +{ + unsigned long flags; + + read_lock_irqsave(&torture_rwlock, flags); + cxt.cur_ops->flags = flags; + return 0; +} + +static void torture_rwlock_read_unlock_irq(void) +__releases(torture_rwlock) +{ + write_unlock_irqrestore(&torture_rwlock, cxt.cur_ops->flags); +} + +static struct lock_torture_ops rw_lock_irq_ops = { + .writelock = torture_rwlock_write_lock_irq, + .write_delay = torture_rwlock_write_delay, + .writeunlock = torture_rwlock_write_unlock_irq, + .readlock = torture_rwlock_read_lock_irq, + .read_delay = torture_rwlock_read_delay, + .readunlock = torture_rwlock_read_unlock_irq, + .name = "rw_lock_irq" +}; + static DEFINE_MUTEX(torture_mutex); static int torture_mutex_lock(void) __acquires(torture_mutex) @@ -535,8 +640,11 @@ static int __init lock_torture_init(void) int i, j; int firsterr = 0; static struct lock_torture_ops *torture_ops[] = { - &lock_busted_ops, &spin_lock_ops, &spin_lock_irq_ops, - &mutex_lock_ops, &rwsem_lock_ops, + &lock_busted_ops, + &spin_lock_ops, &spin_lock_irq_ops, + &rw_lock_ops, &rw_lock_irq_ops, + &mutex_lock_ops, + &rwsem_lock_ops, }; if (!torture_init_begin(torture_type, verbose, &torture_runnable)) @@ -571,7 +679,8 @@ static int __init lock_torture_init(void) cxt.debug_lock = true; #endif #ifdef CONFIG_DEBUG_SPINLOCK - if (strncmp(torture_type, "spin", 4) == 0) + if ((strncmp(torture_type, "spin", 4) == 0) || + (strncmp(torture_type, "rw_lock", 7) == 0)) cxt.debug_lock = true; #endif -- cgit v1.2.3 From 219f800f99db6f4e43a582cb9e0d98931f13c012 Mon Sep 17 00:00:00 2001 From: Davidlohr Bueso Date: Mon, 29 Sep 2014 06:14:24 -0700 Subject: locktorture: Fix __acquire annotation for spinlock irq Its quite easy to get mixed up with the names -- 'torture_spinlock_irq' is not actually a valid spinlock name. Signed-off-by: Davidlohr Bueso Signed-off-by: Paul E. McKenney --- kernel/locking/locktorture.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel/locking') diff --git a/kernel/locking/locktorture.c b/kernel/locking/locktorture.c index 0762b25b4110..9e9cd111fb0f 100644 --- a/kernel/locking/locktorture.c +++ b/kernel/locking/locktorture.c @@ -205,7 +205,7 @@ static struct lock_torture_ops spin_lock_ops = { }; static int torture_spin_lock_write_lock_irq(void) -__acquires(torture_spinlock_irq) +__acquires(torture_spinlock) { unsigned long flags; -- cgit v1.2.3 From a1229491006a3d55cc0d7e6d496be39915ccefdd Mon Sep 17 00:00:00 2001 From: Davidlohr Bueso Date: Mon, 29 Sep 2014 06:14:25 -0700 Subject: locktorture: Cannot hold read and write lock ... trigger an error if so. Signed-off-by: Davidlohr Bueso Signed-off-by: Paul E. McKenney --- kernel/locking/locktorture.c | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'kernel/locking') diff --git a/kernel/locking/locktorture.c b/kernel/locking/locktorture.c index 9e9cd111fb0f..b05dc46c4297 100644 --- a/kernel/locking/locktorture.c +++ b/kernel/locking/locktorture.c @@ -453,14 +453,19 @@ static int lock_torture_writer(void *arg) do { if ((torture_random(&rand) & 0xfffff) == 0) schedule_timeout_uninterruptible(1); + cxt.cur_ops->writelock(); if (WARN_ON_ONCE(lock_is_write_held)) lwsp->n_lock_fail++; lock_is_write_held = 1; + if (WARN_ON_ONCE(lock_is_read_held)) + lwsp->n_lock_fail++; /* rare, but... */ + lwsp->n_lock_acquired++; cxt.cur_ops->write_delay(&rand); lock_is_write_held = 0; cxt.cur_ops->writeunlock(); + stutter_wait("lock_torture_writer"); } while (!torture_must_stop()); torture_kthread_stopping("lock_torture_writer"); @@ -482,12 +487,17 @@ static int lock_torture_reader(void *arg) do { if ((torture_random(&rand) & 0xfffff) == 0) schedule_timeout_uninterruptible(1); + cxt.cur_ops->readlock(); lock_is_read_held = 1; + if (WARN_ON_ONCE(lock_is_write_held)) + lrsp->n_lock_fail++; /* rare, but... */ + lrsp->n_lock_acquired++; cxt.cur_ops->read_delay(&rand); lock_is_read_held = 0; cxt.cur_ops->readunlock(); + stutter_wait("lock_torture_reader"); } while (!torture_must_stop()); torture_kthread_stopping("lock_torture_reader"); -- cgit v1.2.3 From c98fed9fc6a7449affd941d8a8e9fcb0c72977d6 Mon Sep 17 00:00:00 2001 From: Davidlohr Bueso Date: Mon, 29 Sep 2014 06:14:26 -0700 Subject: locktorture: Cleanup header usage Remove some unnecessary ones and explicitly include rwsem.h Signed-off-by: Davidlohr Bueso Signed-off-by: Paul E. McKenney --- kernel/locking/locktorture.c | 14 +------------- 1 file changed, 1 insertion(+), 13 deletions(-) (limited to 'kernel/locking') diff --git a/kernel/locking/locktorture.c b/kernel/locking/locktorture.c index b05dc46c4297..ec8cce259779 100644 --- a/kernel/locking/locktorture.c +++ b/kernel/locking/locktorture.c @@ -20,32 +20,20 @@ * Author: Paul E. McKenney * Based on kernel/rcu/torture.c. */ -#include #include -#include #include #include -#include #include #include #include +#include #include #include #include #include -#include -#include #include -#include -#include -#include -#include -#include #include -#include #include -#include -#include #include MODULE_LICENSE("GPL"); -- cgit v1.2.3 From debfab74e453f079cd8b12b0604387a8c510ef3a Mon Sep 17 00:00:00 2001 From: Jason Low Date: Tue, 16 Sep 2014 17:16:57 -0700 Subject: locking/rwsem: Avoid double checking before try acquiring write lock Commit 9b0fc9c09f1b ("rwsem: skip initial trylock in rwsem_down_write_failed") checks for if there are known active lockers in order to avoid write trylocking using expensive cmpxchg() when it likely wouldn't get the lock. However, a subsequent patch was added such that we directly check for sem->count == RWSEM_WAITING_BIAS right before trying that cmpxchg(). Thus, commit 9b0fc9c09f1b now just adds overhead. This patch modifies it so that we only do a check for if count == RWSEM_WAITING_BIAS. Also, add a comment on why we do an "extra check" of count before the cmpxchg(). Signed-off-by: Jason Low Acked-by: Davidlohr Bueso Signed-off-by: Peter Zijlstra (Intel) Cc: Aswin Chandramouleeswaran Cc: Chegu Vinod Cc: Peter Hurley Cc: Tim Chen Cc: Linus Torvalds Link: http://lkml.kernel.org/r/1410913017.2447.22.camel@j-VirtualBox Signed-off-by: Ingo Molnar --- kernel/locking/rwsem-xadd.c | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) (limited to 'kernel/locking') diff --git a/kernel/locking/rwsem-xadd.c b/kernel/locking/rwsem-xadd.c index 12166ec9b7e7..7628c3fc37ca 100644 --- a/kernel/locking/rwsem-xadd.c +++ b/kernel/locking/rwsem-xadd.c @@ -250,16 +250,18 @@ EXPORT_SYMBOL(rwsem_down_read_failed); static inline bool rwsem_try_write_lock(long count, struct rw_semaphore *sem) { - if (!(count & RWSEM_ACTIVE_MASK)) { - /* try acquiring the write lock */ - if (sem->count == RWSEM_WAITING_BIAS && - cmpxchg(&sem->count, RWSEM_WAITING_BIAS, - RWSEM_ACTIVE_WRITE_BIAS) == RWSEM_WAITING_BIAS) { - if (!list_is_singular(&sem->wait_list)) - rwsem_atomic_update(RWSEM_WAITING_BIAS, sem); - return true; - } + /* + * Try acquiring the write lock. Check count first in order + * to reduce unnecessary expensive cmpxchg() operations. + */ + if (count == RWSEM_WAITING_BIAS && + cmpxchg(&sem->count, RWSEM_WAITING_BIAS, + RWSEM_ACTIVE_WRITE_BIAS) == RWSEM_WAITING_BIAS) { + if (!list_is_singular(&sem->wait_list)) + rwsem_atomic_update(RWSEM_WAITING_BIAS, sem); + return true; } + return false; } -- cgit v1.2.3 From 8acd91e8620836a56ff62028ed28ba629f2881a0 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 30 Sep 2014 15:26:00 +0200 Subject: locking/lockdep: Revert qrwlock recusive stuff Commit f0bab73cb539 ("locking/lockdep: Restrict the use of recursive read_lock() with qrwlock") changed lockdep to try and conform to the qrwlock semantics which differ from the traditional rwlock semantics. In particular qrwlock is fair outside of interrupt context, but in interrupt context readers will ignore all fairness. The problem modeling this is that read and write side have different lock state (interrupts) semantics but we only have a single representation of these. Therefore lockdep will get confused, thinking the lock can cause interrupt lock inversions. So revert it for now; the old rwlock semantics were already imperfectly modeled and the qrwlock extra won't fit either. If we want to properly fix this, I think we need to resurrect the work by Gautham did a few years ago that split the read and write state of locks: http://lwn.net/Articles/332801/ FWIW the locking selftest that would've failed (and was reported by Borislav earlier) is something like: RL(X1); /* IRQ-ON */ LOCK(A); UNLOCK(A); RU(X1); IRQ_ENTER(); RL(X1); /* IN-IRQ */ RU(X1); IRQ_EXIT(); At which point it would report that because A is an IRQ-unsafe lock we can suffer the following inversion: CPU0 CPU1 lock(A) lock(X1) lock(A) lock(X1) And this is 'wrong' because X1 can recurse (assuming the above lock are in fact read-lock) but lockdep doesn't know about this. Signed-off-by: Peter Zijlstra (Intel) Cc: Waiman Long Cc: ego@linux.vnet.ibm.com Cc: bp@alien8.de Cc: Linus Torvalds Cc: Paul E. McKenney Link: http://lkml.kernel.org/r/20140930132600.GA7444@worktop.programming.kicks-ass.net Signed-off-by: Ingo Molnar --- kernel/locking/lockdep.c | 6 ------ 1 file changed, 6 deletions(-) (limited to 'kernel/locking') diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c index 420ba685c4e5..88d0d4420ad2 100644 --- a/kernel/locking/lockdep.c +++ b/kernel/locking/lockdep.c @@ -3597,12 +3597,6 @@ void lock_acquire(struct lockdep_map *lock, unsigned int subclass, raw_local_irq_save(flags); check_flags(flags); - /* - * An interrupt recursive read in interrupt context can be considered - * to be the same as a recursive read from checking perspective. - */ - if ((read == 3) && in_interrupt()) - read = 2; current->lockdep_recursion = 1; trace_lock_acquire(lock, subclass, trylock, read, check, nest_lock, ip); __lock_acquire(lock, subclass, trylock, read, check, -- cgit v1.2.3 From 6f942a1f264e875c5f3ad6f505d7b500a3e7fa82 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 24 Sep 2014 10:18:46 +0200 Subject: locking/mutex: Don't assume TASK_RUNNING We're going to make might_sleep() test for TASK_RUNNING, because blocking without TASK_RUNNING will destroy the task state by setting it to TASK_RUNNING. There are a few occasions where its 'valid' to call blocking primitives (and mutex_lock in particular) and not have TASK_RUNNING, typically such cases are right before we set TASK_RUNNING anyhow. Robustify the code by not assuming this; this has the beneficial side effect of allowing optional code emission for fixing the above might_sleep() false positives. Signed-off-by: Peter Zijlstra (Intel) Cc: tglx@linutronix.de Cc: ilya.dryomov@inktank.com Cc: umgwanakikbuti@gmail.com Cc: Oleg Nesterov Cc: Linus Torvalds Link: http://lkml.kernel.org/r/20140924082241.988560063@infradead.org Signed-off-by: Ingo Molnar --- kernel/locking/mutex.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'kernel/locking') diff --git a/kernel/locking/mutex.c b/kernel/locking/mutex.c index dadbf88c22c4..454195194d4a 100644 --- a/kernel/locking/mutex.c +++ b/kernel/locking/mutex.c @@ -378,8 +378,14 @@ done: * reschedule now, before we try-lock the mutex. This avoids getting * scheduled out right after we obtained the mutex. */ - if (need_resched()) + if (need_resched()) { + /* + * We _should_ have TASK_RUNNING here, but just in case + * we do not, make it so, otherwise we might get stuck. + */ + __set_current_state(TASK_RUNNING); schedule_preempt_disabled(); + } return false; } -- cgit v1.2.3 From a63b03e2d2477586440741677ecac45bcf28d7b1 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 6 Jan 2015 10:29:35 +0000 Subject: mutex: Always clear owner field upon mutex_unlock() Currently if DEBUG_MUTEXES is enabled, the mutex->owner field is only cleared iff debug_locks is active. This exposes a race to other users of the field where the mutex->owner may be still set to a stale value, potentially upsetting mutex_spin_on_owner() among others. References: https://bugs.freedesktop.org/show_bug.cgi?id=87955 Signed-off-by: Chris Wilson Signed-off-by: Peter Zijlstra (Intel) Acked-by: Davidlohr Bueso Cc: Daniel Vetter Cc: Linus Torvalds Link: http://lkml.kernel.org/r/1420540175-30204-1-git-send-email-chris@chris-wilson.co.uk Signed-off-by: Ingo Molnar --- kernel/locking/mutex-debug.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel/locking') diff --git a/kernel/locking/mutex-debug.c b/kernel/locking/mutex-debug.c index 5cf6731b98e9..3ef3736002d8 100644 --- a/kernel/locking/mutex-debug.c +++ b/kernel/locking/mutex-debug.c @@ -80,13 +80,13 @@ void debug_mutex_unlock(struct mutex *lock) DEBUG_LOCKS_WARN_ON(lock->owner != current); DEBUG_LOCKS_WARN_ON(!lock->wait_list.prev && !lock->wait_list.next); - mutex_clear_owner(lock); } /* * __mutex_slowpath_needs_to_unlock() is explicitly 0 for debug * mutexes so that we can do it here after we've verified state. */ + mutex_clear_owner(lock); atomic_set(&lock->count, 1); } -- cgit v1.2.3