WSL2-Linux-Kernel/include/linux/mutex.h

541 строка
18 KiB
C
Исходник Обычный вид История

/*
* Mutexes: blocking mutual exclusion locks
*
* started by Ingo Molnar:
*
* Copyright (C) 2004, 2005, 2006 Red Hat, Inc., Ingo Molnar <mingo@redhat.com>
*
* This file contains the main data structure and API definitions.
*/
#ifndef __LINUX_MUTEX_H
#define __LINUX_MUTEX_H
#include <asm/current.h>
#include <linux/list.h>
#include <linux/spinlock_types.h>
#include <linux/linkage.h>
#include <linux/lockdep.h>
#include <linux/atomic.h>
/*
* Simple, straightforward mutexes with strict semantics:
*
* - only one task can hold the mutex at a time
* - only the owner can unlock the mutex
* - multiple unlocks are not permitted
* - recursive locking is not permitted
* - a mutex object must be initialized via the API
* - a mutex object must not be initialized via memset or copying
* - task may not exit with mutex held
* - memory areas where held locks reside must not be freed
* - held mutexes must not be reinitialized
* - mutexes may not be used in hardware or software interrupt
* contexts such as tasklets and timers
*
* These semantics are fully enforced when DEBUG_MUTEXES is
* enabled. Furthermore, besides enforcing the above rules, the mutex
* debugging code also implements a number of additional features
* that make lock debugging easier and faster:
*
* - uses symbolic names of mutexes, whenever they are printed in debug output
* - point-of-acquire tracking, symbolic lookup of function names
* - list of all locks held in the system, printout of them
* - owner tracking
* - detects self-recursing locks and prints out all relevant info
* - detects multi-task circular deadlocks and prints out all affected
* locks and tasks (and only those tasks)
*/
struct mutex {
/* 1: unlocked, 0: locked, negative: locked, possible waiters */
atomic_t count;
spinlock_t wait_lock;
struct list_head wait_list;
mutex: implement adaptive spinning Change mutex contention behaviour such that it will sometimes busy wait on acquisition - moving its behaviour closer to that of spinlocks. This concept got ported to mainline from the -rt tree, where it was originally implemented for rtmutexes by Steven Rostedt, based on work by Gregory Haskins. Testing with Ingo's test-mutex application (http://lkml.org/lkml/2006/1/8/50) gave a 345% boost for VFS scalability on my testbox: # ./test-mutex-shm V 16 10 | grep "^avg ops" avg ops/sec: 296604 # ./test-mutex-shm V 16 10 | grep "^avg ops" avg ops/sec: 85870 The key criteria for the busy wait is that the lock owner has to be running on a (different) cpu. The idea is that as long as the owner is running, there is a fair chance it'll release the lock soon, and thus we'll be better off spinning instead of blocking/scheduling. Since regular mutexes (as opposed to rtmutexes) do not atomically track the owner, we add the owner in a non-atomic fashion and deal with the races in the slowpath. Furthermore, to ease the testing of the performance impact of this new code, there is means to disable this behaviour runtime (without having to reboot the system), when scheduler debugging is enabled (CONFIG_SCHED_DEBUG=y), by issuing the following command: # echo NO_OWNER_SPIN > /debug/sched_features This command re-enables spinning again (this is also the default): # echo OWNER_SPIN > /debug/sched_features Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl> Signed-off-by: Ingo Molnar <mingo@elte.hu>
2009-01-12 16:01:47 +03:00
#if defined(CONFIG_DEBUG_MUTEXES) || defined(CONFIG_SMP)
struct task_struct *owner;
mutex: implement adaptive spinning Change mutex contention behaviour such that it will sometimes busy wait on acquisition - moving its behaviour closer to that of spinlocks. This concept got ported to mainline from the -rt tree, where it was originally implemented for rtmutexes by Steven Rostedt, based on work by Gregory Haskins. Testing with Ingo's test-mutex application (http://lkml.org/lkml/2006/1/8/50) gave a 345% boost for VFS scalability on my testbox: # ./test-mutex-shm V 16 10 | grep "^avg ops" avg ops/sec: 296604 # ./test-mutex-shm V 16 10 | grep "^avg ops" avg ops/sec: 85870 The key criteria for the busy wait is that the lock owner has to be running on a (different) cpu. The idea is that as long as the owner is running, there is a fair chance it'll release the lock soon, and thus we'll be better off spinning instead of blocking/scheduling. Since regular mutexes (as opposed to rtmutexes) do not atomically track the owner, we add the owner in a non-atomic fashion and deal with the races in the slowpath. Furthermore, to ease the testing of the performance impact of this new code, there is means to disable this behaviour runtime (without having to reboot the system), when scheduler debugging is enabled (CONFIG_SCHED_DEBUG=y), by issuing the following command: # echo NO_OWNER_SPIN > /debug/sched_features This command re-enables spinning again (this is also the default): # echo OWNER_SPIN > /debug/sched_features Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl> Signed-off-by: Ingo Molnar <mingo@elte.hu>
2009-01-12 16:01:47 +03:00
#endif
mutex: Queue mutex spinners with MCS lock to reduce cacheline contention The current mutex spinning code (with MUTEX_SPIN_ON_OWNER option turned on) allow multiple tasks to spin on a single mutex concurrently. A potential problem with the current approach is that when the mutex becomes available, all the spinning tasks will try to acquire the mutex more or less simultaneously. As a result, there will be a lot of cacheline bouncing especially on systems with a large number of CPUs. This patch tries to reduce this kind of contention by putting the mutex spinners into a queue so that only the first one in the queue will try to acquire the mutex. This will reduce contention and allow all the tasks to move forward faster. The queuing of mutex spinners is done using an MCS lock based implementation which will further reduce contention on the mutex cacheline than a similar ticket spinlock based implementation. This patch will add a new field into the mutex data structure for holding the MCS lock. This expands the mutex size by 8 bytes for 64-bit system and 4 bytes for 32-bit system. This overhead will be avoid if the MUTEX_SPIN_ON_OWNER option is turned off. The following table shows the jobs per minute (JPM) scalability data on an 8-node 80-core Westmere box with a 3.7.10 kernel. The numactl command is used to restrict the running of the fserver workloads to 1/2/4/8 nodes with hyperthreading off. +-----------------+-----------+-----------+-------------+----------+ | Configuration | Mean JPM | Mean JPM | Mean JPM | % Change | | | w/o patch | patch 1 | patches 1&2 | 1->1&2 | +-----------------+------------------------------------------------+ | | User Range 1100 - 2000 | +-----------------+------------------------------------------------+ | 8 nodes, HT off | 227972 | 227237 | 305043 | +34.2% | | 4 nodes, HT off | 393503 | 381558 | 394650 | +3.4% | | 2 nodes, HT off | 334957 | 325240 | 338853 | +4.2% | | 1 node , HT off | 198141 | 197972 | 198075 | +0.1% | +-----------------+------------------------------------------------+ | | User Range 200 - 1000 | +-----------------+------------------------------------------------+ | 8 nodes, HT off | 282325 | 312870 | 332185 | +6.2% | | 4 nodes, HT off | 390698 | 378279 | 393419 | +4.0% | | 2 nodes, HT off | 336986 | 326543 | 340260 | +4.2% | | 1 node , HT off | 197588 | 197622 | 197582 | 0.0% | +-----------------+-----------+-----------+-------------+----------+ At low user range 10-100, the JPM differences were within +/-1%. So they are not that interesting. The fserver workload uses mutex spinning extensively. With just the mutex change in the first patch, there is no noticeable change in performance. Rather, there is a slight drop in performance. This mutex spinning patch more than recovers the lost performance and show a significant increase of +30% at high user load with the full 8 nodes. Similar improvements were also seen in a 3.8 kernel. The table below shows the %time spent by different kernel functions as reported by perf when running the fserver workload at 1500 users with all 8 nodes. +-----------------------+-----------+---------+-------------+ | Function | % time | % time | % time | | | w/o patch | patch 1 | patches 1&2 | +-----------------------+-----------+---------+-------------+ | __read_lock_failed | 34.96% | 34.91% | 29.14% | | __write_lock_failed | 10.14% | 10.68% | 7.51% | | mutex_spin_on_owner | 3.62% | 3.42% | 2.33% | | mspin_lock | N/A | N/A | 9.90% | | __mutex_lock_slowpath | 1.46% | 0.81% | 0.14% | | _raw_spin_lock | 2.25% | 2.50% | 1.10% | +-----------------------+-----------+---------+-------------+ The fserver workload for an 8-node system is dominated by the contention in the read/write lock. Mutex contention also plays a role. With the first patch only, mutex contention is down (as shown by the __mutex_lock_slowpath figure) which help a little bit. We saw only a few percents improvement with that. By applying patch 2 as well, the single mutex_spin_on_owner figure is now split out into an additional mspin_lock figure. The time increases from 3.42% to 11.23%. It shows a great reduction in contention among the spinners leading to a 30% improvement. The time ratio 9.9/2.33=4.3 indicates that there are on average 4+ spinners waiting in the spin_lock loop for each spinner in the mutex_spin_on_owner loop. Contention in other locking functions also go down by quite a lot. The table below shows the performance change of both patches 1 & 2 over patch 1 alone in other AIM7 workloads (at 8 nodes, hyperthreading off). +--------------+---------------+----------------+-----------------+ | Workload | mean % change | mean % change | mean % change | | | 10-100 users | 200-1000 users | 1100-2000 users | +--------------+---------------+----------------+-----------------+ | alltests | 0.0% | -0.8% | +0.6% | | five_sec | -0.3% | +0.8% | +0.8% | | high_systime | +0.4% | +2.4% | +2.1% | | new_fserver | +0.1% | +14.1% | +34.2% | | shared | -0.5% | -0.3% | -0.4% | | short | -1.7% | -9.8% | -8.3% | +--------------+---------------+----------------+-----------------+ The short workload is the only one that shows a decline in performance probably due to the spinner locking and queuing overhead. Signed-off-by: Waiman Long <Waiman.Long@hp.com> Reviewed-by: Davidlohr Bueso <davidlohr.bueso@hp.com> Acked-by: Rik van Riel <riel@redhat.com> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Andrew Morton <akpm@linux-foundation.org> Cc: Peter Zijlstra <a.p.zijlstra@chello.nl> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: Chandramouleeswaran Aswin <aswin@hp.com> Cc: Norton Scott J <scott.norton@hp.com> Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com> Cc: David Howells <dhowells@redhat.com> Cc: Dave Jones <davej@redhat.com> Cc: Clark Williams <williams@redhat.com> Cc: Peter Zijlstra <peterz@infradead.org> Link: http://lkml.kernel.org/r/1366226594-5506-4-git-send-email-Waiman.Long@hp.com Signed-off-by: Ingo Molnar <mingo@kernel.org>
2013-04-17 23:23:13 +04:00
#ifdef CONFIG_MUTEX_SPIN_ON_OWNER
void *spin_mlock; /* Spinner MCS lock */
#endif
mutex: implement adaptive spinning Change mutex contention behaviour such that it will sometimes busy wait on acquisition - moving its behaviour closer to that of spinlocks. This concept got ported to mainline from the -rt tree, where it was originally implemented for rtmutexes by Steven Rostedt, based on work by Gregory Haskins. Testing with Ingo's test-mutex application (http://lkml.org/lkml/2006/1/8/50) gave a 345% boost for VFS scalability on my testbox: # ./test-mutex-shm V 16 10 | grep "^avg ops" avg ops/sec: 296604 # ./test-mutex-shm V 16 10 | grep "^avg ops" avg ops/sec: 85870 The key criteria for the busy wait is that the lock owner has to be running on a (different) cpu. The idea is that as long as the owner is running, there is a fair chance it'll release the lock soon, and thus we'll be better off spinning instead of blocking/scheduling. Since regular mutexes (as opposed to rtmutexes) do not atomically track the owner, we add the owner in a non-atomic fashion and deal with the races in the slowpath. Furthermore, to ease the testing of the performance impact of this new code, there is means to disable this behaviour runtime (without having to reboot the system), when scheduler debugging is enabled (CONFIG_SCHED_DEBUG=y), by issuing the following command: # echo NO_OWNER_SPIN > /debug/sched_features This command re-enables spinning again (this is also the default): # echo OWNER_SPIN > /debug/sched_features Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl> Signed-off-by: Ingo Molnar <mingo@elte.hu>
2009-01-12 16:01:47 +03:00
#ifdef CONFIG_DEBUG_MUTEXES
const char *name;
void *magic;
#endif
#ifdef CONFIG_DEBUG_LOCK_ALLOC
struct lockdep_map dep_map;
#endif
};
/*
* This is the control structure for tasks blocked on mutex,
* which resides on the blocked task's kernel stack:
*/
struct mutex_waiter {
struct list_head list;
struct task_struct *task;
#ifdef CONFIG_DEBUG_MUTEXES
void *magic;
#endif
};
struct ww_class {
atomic_long_t stamp;
struct lock_class_key acquire_key;
struct lock_class_key mutex_key;
const char *acquire_name;
const char *mutex_name;
};
struct ww_acquire_ctx {
struct task_struct *task;
unsigned long stamp;
unsigned acquired;
#ifdef CONFIG_DEBUG_MUTEXES
unsigned done_acquire;
struct ww_class *ww_class;
struct ww_mutex *contending_lock;
#endif
#ifdef CONFIG_DEBUG_LOCK_ALLOC
struct lockdep_map dep_map;
#endif
mutex: Add w/w mutex slowpath debugging Injects EDEADLK conditions at pseudo-random interval, with exponential backoff up to UINT_MAX (to ensure that every lock operation still completes in a reasonable time). This way we can test the wound slowpath even for ww mutex users where contention is never expected, and the ww deadlock avoidance algorithm is only needed for correctness against malicious userspace. An example would be protecting kernel modesetting properties, which thanks to single-threaded X isn't really expected to contend, ever. I've looked into using the CONFIG_FAULT_INJECTION infrastructure, but decided against it for two reasons: - EDEADLK handling is mandatory for ww mutex users and should never affect the outcome of a syscall. This is in contrast to -ENOMEM injection. So fine configurability isn't required. - The fault injection framework only allows to set a simple probability for failure. Now the probability that a ww mutex acquire stage with N locks will never complete (due to too many injected EDEADLK backoffs) is zero. But the expected number of ww_mutex_lock operations for the completely uncontended case would be O(exp(N)). The per-acuiqire ctx exponential backoff solution choosen here only results in O(log N) overhead due to injection and so O(log N * N) lock operations. This way we can fail with high probability (and so have good test coverage even for fancy backoff and lock acquisition paths) without running into patalogical cases. Note that EDEADLK will only ever be injected when we managed to acquire the lock. This prevents any behaviour changes for users which rely on the EALREADY semantics. Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch> Signed-off-by: Maarten Lankhorst <maarten.lankhorst@canonical.com> Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl> Cc: dri-devel@lists.freedesktop.org Cc: linaro-mm-sig@lists.linaro.org Cc: rostedt@goodmis.org Cc: daniel@ffwll.ch Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Andrew Morton <akpm@linux-foundation.org> Cc: Thomas Gleixner <tglx@linutronix.de> Link: http://lkml.kernel.org/r/20130620113117.4001.21681.stgit@patser Signed-off-by: Ingo Molnar <mingo@kernel.org>
2013-06-20 15:31:17 +04:00
#ifdef CONFIG_DEBUG_WW_MUTEX_SLOWPATH
unsigned deadlock_inject_interval;
unsigned deadlock_inject_countdown;
#endif
};
struct ww_mutex {
struct mutex base;
struct ww_acquire_ctx *ctx;
#ifdef CONFIG_DEBUG_MUTEXES
struct ww_class *ww_class;
#endif
};
#ifdef CONFIG_DEBUG_MUTEXES
# include <linux/mutex-debug.h>
#else
# define __DEBUG_MUTEX_INITIALIZER(lockname)
/**
* mutex_init - initialize the mutex
* @mutex: the mutex to be initialized
*
* Initialize the mutex to unlocked state.
*
* It is not allowed to initialize an already locked mutex.
*/
# define mutex_init(mutex) \
do { \
static struct lock_class_key __key; \
\
__mutex_init((mutex), #mutex, &__key); \
} while (0)
static inline void mutex_destroy(struct mutex *lock) {}
#endif
#ifdef CONFIG_DEBUG_LOCK_ALLOC
# define __DEP_MAP_MUTEX_INITIALIZER(lockname) \
, .dep_map = { .name = #lockname }
# define __WW_CLASS_MUTEX_INITIALIZER(lockname, ww_class) \
, .ww_class = &ww_class
#else
# define __DEP_MAP_MUTEX_INITIALIZER(lockname)
# define __WW_CLASS_MUTEX_INITIALIZER(lockname, ww_class)
#endif
#define __MUTEX_INITIALIZER(lockname) \
{ .count = ATOMIC_INIT(1) \
, .wait_lock = __SPIN_LOCK_UNLOCKED(lockname.wait_lock) \
, .wait_list = LIST_HEAD_INIT(lockname.wait_list) \
__DEBUG_MUTEX_INITIALIZER(lockname) \
__DEP_MAP_MUTEX_INITIALIZER(lockname) }
#define __WW_CLASS_INITIALIZER(ww_class) \
{ .stamp = ATOMIC_LONG_INIT(0) \
, .acquire_name = #ww_class "_acquire" \
, .mutex_name = #ww_class "_mutex" }
#define __WW_MUTEX_INITIALIZER(lockname, class) \
{ .base = { \__MUTEX_INITIALIZER(lockname) } \
__WW_CLASS_MUTEX_INITIALIZER(lockname, class) }
#define DEFINE_MUTEX(mutexname) \
struct mutex mutexname = __MUTEX_INITIALIZER(mutexname)
#define DEFINE_WW_CLASS(classname) \
struct ww_class classname = __WW_CLASS_INITIALIZER(classname)
#define DEFINE_WW_MUTEX(mutexname, ww_class) \
struct ww_mutex mutexname = __WW_MUTEX_INITIALIZER(mutexname, ww_class)
extern void __mutex_init(struct mutex *lock, const char *name,
struct lock_class_key *key);
/**
* ww_mutex_init - initialize the w/w mutex
* @lock: the mutex to be initialized
* @ww_class: the w/w class the mutex should belong to
*
* Initialize the w/w mutex to unlocked state and associate it with the given
* class.
*
* It is not allowed to initialize an already locked mutex.
*/
static inline void ww_mutex_init(struct ww_mutex *lock,
struct ww_class *ww_class)
{
__mutex_init(&lock->base, ww_class->mutex_name, &ww_class->mutex_key);
lock->ctx = NULL;
#ifdef CONFIG_DEBUG_MUTEXES
lock->ww_class = ww_class;
#endif
}
/**
* mutex_is_locked - is the mutex locked
* @lock: the mutex to be queried
*
* Returns 1 if the mutex is locked, 0 if unlocked.
*/
static inline int mutex_is_locked(struct mutex *lock)
{
return atomic_read(&lock->count) != 1;
}
/*
* See kernel/mutex.c for detailed documentation of these APIs.
* Also see Documentation/mutex-design.txt.
*/
#ifdef CONFIG_DEBUG_LOCK_ALLOC
extern void mutex_lock_nested(struct mutex *lock, unsigned int subclass);
extern void _mutex_lock_nest_lock(struct mutex *lock, struct lockdep_map *nest_lock);
extern int __must_check mutex_lock_interruptible_nested(struct mutex *lock,
unsigned int subclass);
extern int __must_check mutex_lock_killable_nested(struct mutex *lock,
unsigned int subclass);
#define mutex_lock(lock) mutex_lock_nested(lock, 0)
#define mutex_lock_interruptible(lock) mutex_lock_interruptible_nested(lock, 0)
#define mutex_lock_killable(lock) mutex_lock_killable_nested(lock, 0)
#define mutex_lock_nest_lock(lock, nest_lock) \
do { \
typecheck(struct lockdep_map *, &(nest_lock)->dep_map); \
_mutex_lock_nest_lock(lock, &(nest_lock)->dep_map); \
} while (0)
#else
extern void mutex_lock(struct mutex *lock);
extern int __must_check mutex_lock_interruptible(struct mutex *lock);
extern int __must_check mutex_lock_killable(struct mutex *lock);
# define mutex_lock_nested(lock, subclass) mutex_lock(lock)
# define mutex_lock_interruptible_nested(lock, subclass) mutex_lock_interruptible(lock)
# define mutex_lock_killable_nested(lock, subclass) mutex_lock_killable(lock)
# define mutex_lock_nest_lock(lock, nest_lock) mutex_lock(lock)
#endif
/*
* NOTE: mutex_trylock() follows the spin_trylock() convention,
* not the down_trylock() convention!
*
* Returns 1 if the mutex has been acquired successfully, and 0 on contention.
*/
extern int mutex_trylock(struct mutex *lock);
extern void mutex_unlock(struct mutex *lock);
/**
* ww_acquire_init - initialize a w/w acquire context
* @ctx: w/w acquire context to initialize
* @ww_class: w/w class of the context
*
* Initializes an context to acquire multiple mutexes of the given w/w class.
*
* Context-based w/w mutex acquiring can be done in any order whatsoever within
* a given lock class. Deadlocks will be detected and handled with the
* wait/wound logic.
*
* Mixing of context-based w/w mutex acquiring and single w/w mutex locking can
* result in undetected deadlocks and is so forbidden. Mixing different contexts
* for the same w/w class when acquiring mutexes can also result in undetected
* deadlocks, and is hence also forbidden. Both types of abuse will be caught by
* enabling CONFIG_PROVE_LOCKING.
*
* Nesting of acquire contexts for _different_ w/w classes is possible, subject
* to the usual locking rules between different lock classes.
*
* An acquire context must be released with ww_acquire_fini by the same task
* before the memory is freed. It is recommended to allocate the context itself
* on the stack.
*/
static inline void ww_acquire_init(struct ww_acquire_ctx *ctx,
struct ww_class *ww_class)
{
ctx->task = current;
ctx->stamp = atomic_long_inc_return(&ww_class->stamp);
ctx->acquired = 0;
#ifdef CONFIG_DEBUG_MUTEXES
ctx->ww_class = ww_class;
ctx->done_acquire = 0;
ctx->contending_lock = NULL;
#endif
#ifdef CONFIG_DEBUG_LOCK_ALLOC
debug_check_no_locks_freed((void *)ctx, sizeof(*ctx));
lockdep_init_map(&ctx->dep_map, ww_class->acquire_name,
&ww_class->acquire_key, 0);
mutex_acquire(&ctx->dep_map, 0, 0, _RET_IP_);
#endif
mutex: Add w/w mutex slowpath debugging Injects EDEADLK conditions at pseudo-random interval, with exponential backoff up to UINT_MAX (to ensure that every lock operation still completes in a reasonable time). This way we can test the wound slowpath even for ww mutex users where contention is never expected, and the ww deadlock avoidance algorithm is only needed for correctness against malicious userspace. An example would be protecting kernel modesetting properties, which thanks to single-threaded X isn't really expected to contend, ever. I've looked into using the CONFIG_FAULT_INJECTION infrastructure, but decided against it for two reasons: - EDEADLK handling is mandatory for ww mutex users and should never affect the outcome of a syscall. This is in contrast to -ENOMEM injection. So fine configurability isn't required. - The fault injection framework only allows to set a simple probability for failure. Now the probability that a ww mutex acquire stage with N locks will never complete (due to too many injected EDEADLK backoffs) is zero. But the expected number of ww_mutex_lock operations for the completely uncontended case would be O(exp(N)). The per-acuiqire ctx exponential backoff solution choosen here only results in O(log N) overhead due to injection and so O(log N * N) lock operations. This way we can fail with high probability (and so have good test coverage even for fancy backoff and lock acquisition paths) without running into patalogical cases. Note that EDEADLK will only ever be injected when we managed to acquire the lock. This prevents any behaviour changes for users which rely on the EALREADY semantics. Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch> Signed-off-by: Maarten Lankhorst <maarten.lankhorst@canonical.com> Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl> Cc: dri-devel@lists.freedesktop.org Cc: linaro-mm-sig@lists.linaro.org Cc: rostedt@goodmis.org Cc: daniel@ffwll.ch Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Andrew Morton <akpm@linux-foundation.org> Cc: Thomas Gleixner <tglx@linutronix.de> Link: http://lkml.kernel.org/r/20130620113117.4001.21681.stgit@patser Signed-off-by: Ingo Molnar <mingo@kernel.org>
2013-06-20 15:31:17 +04:00
#ifdef CONFIG_DEBUG_WW_MUTEX_SLOWPATH
ctx->deadlock_inject_interval = 1;
ctx->deadlock_inject_countdown = ctx->stamp & 0xf;
#endif
}
/**
* ww_acquire_done - marks the end of the acquire phase
* @ctx: the acquire context
*
* Marks the end of the acquire phase, any further w/w mutex lock calls using
* this context are forbidden.
*
* Calling this function is optional, it is just useful to document w/w mutex
* code and clearly designated the acquire phase from actually using the locked
* data structures.
*/
static inline void ww_acquire_done(struct ww_acquire_ctx *ctx)
{
#ifdef CONFIG_DEBUG_MUTEXES
lockdep_assert_held(ctx);
DEBUG_LOCKS_WARN_ON(ctx->done_acquire);
ctx->done_acquire = 1;
#endif
}
/**
* ww_acquire_fini - releases a w/w acquire context
* @ctx: the acquire context to free
*
* Releases a w/w acquire context. This must be called _after_ all acquired w/w
* mutexes have been released with ww_mutex_unlock.
*/
static inline void ww_acquire_fini(struct ww_acquire_ctx *ctx)
{
#ifdef CONFIG_DEBUG_MUTEXES
mutex_release(&ctx->dep_map, 0, _THIS_IP_);
DEBUG_LOCKS_WARN_ON(ctx->acquired);
if (!config_enabled(CONFIG_PROVE_LOCKING))
/*
* lockdep will normally handle this,
* but fail without anyway
*/
ctx->done_acquire = 1;
if (!config_enabled(CONFIG_DEBUG_LOCK_ALLOC))
/* ensure ww_acquire_fini will still fail if called twice */
ctx->acquired = ~0U;
#endif
}
extern int __must_check __ww_mutex_lock(struct ww_mutex *lock,
struct ww_acquire_ctx *ctx);
extern int __must_check __ww_mutex_lock_interruptible(struct ww_mutex *lock,
struct ww_acquire_ctx *ctx);
/**
* ww_mutex_lock - acquire the w/w mutex
* @lock: the mutex to be acquired
* @ctx: w/w acquire context, or NULL to acquire only a single lock.
*
* Lock the w/w mutex exclusively for this task.
*
* Deadlocks within a given w/w class of locks are detected and handled with the
* wait/wound algorithm. If the lock isn't immediately avaiable this function
* will either sleep until it is (wait case). Or it selects the current context
* for backing off by returning -EDEADLK (wound case). Trying to acquire the
* same lock with the same context twice is also detected and signalled by
* returning -EALREADY. Returns 0 if the mutex was successfully acquired.
*
* In the wound case the caller must release all currently held w/w mutexes for
* the given context and then wait for this contending lock to be available by
* calling ww_mutex_lock_slow. Alternatively callers can opt to not acquire this
* lock and proceed with trying to acquire further w/w mutexes (e.g. when
* scanning through lru lists trying to free resources).
*
* The mutex must later on be released by the same task that
* acquired it. The task may not exit without first unlocking the mutex. Also,
* kernel memory where the mutex resides must not be freed with the mutex still
* locked. The mutex must first be initialized (or statically defined) before it
* can be locked. memset()-ing the mutex to 0 is not allowed. The mutex must be
* of the same w/w lock class as was used to initialize the acquire context.
*
* A mutex acquired with this function must be released with ww_mutex_unlock.
*/
static inline int ww_mutex_lock(struct ww_mutex *lock, struct ww_acquire_ctx *ctx)
{
if (ctx)
return __ww_mutex_lock(lock, ctx);
else {
mutex_lock(&lock->base);
return 0;
}
}
/**
* ww_mutex_lock_interruptible - acquire the w/w mutex, interruptible
* @lock: the mutex to be acquired
* @ctx: w/w acquire context
*
* Lock the w/w mutex exclusively for this task.
*
* Deadlocks within a given w/w class of locks are detected and handled with the
* wait/wound algorithm. If the lock isn't immediately avaiable this function
* will either sleep until it is (wait case). Or it selects the current context
* for backing off by returning -EDEADLK (wound case). Trying to acquire the
* same lock with the same context twice is also detected and signalled by
* returning -EALREADY. Returns 0 if the mutex was successfully acquired. If a
* signal arrives while waiting for the lock then this function returns -EINTR.
*
* In the wound case the caller must release all currently held w/w mutexes for
* the given context and then wait for this contending lock to be available by
* calling ww_mutex_lock_slow_interruptible. Alternatively callers can opt to
* not acquire this lock and proceed with trying to acquire further w/w mutexes
* (e.g. when scanning through lru lists trying to free resources).
*
* The mutex must later on be released by the same task that
* acquired it. The task may not exit without first unlocking the mutex. Also,
* kernel memory where the mutex resides must not be freed with the mutex still
* locked. The mutex must first be initialized (or statically defined) before it
* can be locked. memset()-ing the mutex to 0 is not allowed. The mutex must be
* of the same w/w lock class as was used to initialize the acquire context.
*
* A mutex acquired with this function must be released with ww_mutex_unlock.
*/
static inline int __must_check ww_mutex_lock_interruptible(struct ww_mutex *lock,
struct ww_acquire_ctx *ctx)
{
if (ctx)
return __ww_mutex_lock_interruptible(lock, ctx);
else
return mutex_lock_interruptible(&lock->base);
}
/**
* ww_mutex_lock_slow - slowpath acquiring of the w/w mutex
* @lock: the mutex to be acquired
* @ctx: w/w acquire context
*
* Acquires a w/w mutex with the given context after a wound case. This function
* will sleep until the lock becomes available.
*
* The caller must have released all w/w mutexes already acquired with the
* context and then call this function on the contended lock.
*
* Afterwards the caller may continue to (re)acquire the other w/w mutexes it
* needs with ww_mutex_lock. Note that the -EALREADY return code from
* ww_mutex_lock can be used to avoid locking this contended mutex twice.
*
* It is forbidden to call this function with any other w/w mutexes associated
* with the context held. It is forbidden to call this on anything else than the
* contending mutex.
*
* Note that the slowpath lock acquiring can also be done by calling
* ww_mutex_lock directly. This function here is simply to help w/w mutex
* locking code readability by clearly denoting the slowpath.
*/
static inline void
ww_mutex_lock_slow(struct ww_mutex *lock, struct ww_acquire_ctx *ctx)
{
int ret;
#ifdef CONFIG_DEBUG_MUTEXES
DEBUG_LOCKS_WARN_ON(!ctx->contending_lock);
#endif
ret = ww_mutex_lock(lock, ctx);
(void)ret;
}
/**
* ww_mutex_lock_slow_interruptible - slowpath acquiring of the w/w mutex,
* interruptible
* @lock: the mutex to be acquired
* @ctx: w/w acquire context
*
* Acquires a w/w mutex with the given context after a wound case. This function
* will sleep until the lock becomes available and returns 0 when the lock has
* been acquired. If a signal arrives while waiting for the lock then this
* function returns -EINTR.
*
* The caller must have released all w/w mutexes already acquired with the
* context and then call this function on the contended lock.
*
* Afterwards the caller may continue to (re)acquire the other w/w mutexes it
* needs with ww_mutex_lock. Note that the -EALREADY return code from
* ww_mutex_lock can be used to avoid locking this contended mutex twice.
*
* It is forbidden to call this function with any other w/w mutexes associated
* with the given context held. It is forbidden to call this on anything else
* than the contending mutex.
*
* Note that the slowpath lock acquiring can also be done by calling
* ww_mutex_lock_interruptible directly. This function here is simply to help
* w/w mutex locking code readability by clearly denoting the slowpath.
*/
static inline int __must_check
ww_mutex_lock_slow_interruptible(struct ww_mutex *lock,
struct ww_acquire_ctx *ctx)
{
#ifdef CONFIG_DEBUG_MUTEXES
DEBUG_LOCKS_WARN_ON(!ctx->contending_lock);
#endif
return ww_mutex_lock_interruptible(lock, ctx);
}
extern void ww_mutex_unlock(struct ww_mutex *lock);
/**
* ww_mutex_trylock - tries to acquire the w/w mutex without acquire context
* @lock: mutex to lock
*
* Trylocks a mutex without acquire context, so no deadlock detection is
* possible. Returns 1 if the mutex has been acquired successfully, 0 otherwise.
*/
static inline int __must_check ww_mutex_trylock(struct ww_mutex *lock)
{
return mutex_trylock(&lock->base);
}
/***
* ww_mutex_destroy - mark a w/w mutex unusable
* @lock: the mutex to be destroyed
*
* This function marks the mutex uninitialized, and any subsequent
* use of the mutex is forbidden. The mutex must not be locked when
* this function is called.
*/
static inline void ww_mutex_destroy(struct ww_mutex *lock)
{
mutex_destroy(&lock->base);
}
/**
* ww_mutex_is_locked - is the w/w mutex locked
* @lock: the mutex to be queried
*
* Returns 1 if the mutex is locked, 0 if unlocked.
*/
static inline bool ww_mutex_is_locked(struct ww_mutex *lock)
{
return mutex_is_locked(&lock->base);
}
extern int atomic_dec_and_mutex_lock(atomic_t *cnt, struct mutex *lock);
#ifndef CONFIG_HAVE_ARCH_MUTEX_CPU_RELAX
#define arch_mutex_cpu_relax() cpu_relax()
#endif
#endif