2013-08-29 05:13:26 +04:00
|
|
|
#ifndef __LINUX_LOCKREF_H
|
|
|
|
#define __LINUX_LOCKREF_H
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Locked reference counts.
|
|
|
|
*
|
|
|
|
* These are different from just plain atomic refcounts in that they
|
|
|
|
* are atomic with respect to the spinlock that goes with them. In
|
|
|
|
* particular, there can be implementations that don't actually get
|
|
|
|
* the spinlock for the common decrement/increment operations, but they
|
|
|
|
* still have to check that the operation is done semantically as if
|
|
|
|
* the spinlock had been taken (using a cmpxchg operation that covers
|
|
|
|
* both the lock and the count word, or using memory transactions, for
|
|
|
|
* example).
|
|
|
|
*/
|
|
|
|
|
|
|
|
#include <linux/spinlock.h>
|
2013-11-15 02:31:54 +04:00
|
|
|
#include <generated/bounds.h>
|
|
|
|
|
|
|
|
#define USE_CMPXCHG_LOCKREF \
|
|
|
|
(IS_ENABLED(CONFIG_ARCH_USE_CMPXCHG_LOCKREF) && \
|
2013-12-20 15:35:58 +04:00
|
|
|
IS_ENABLED(CONFIG_SMP) && SPINLOCK_SIZE <= 4)
|
2013-08-29 05:13:26 +04:00
|
|
|
|
|
|
|
struct lockref {
|
lockref: implement lockless reference count updates using cmpxchg()
Instead of taking the spinlock, the lockless versions atomically check
that the lock is not taken, and do the reference count update using a
cmpxchg() loop. This is semantically identical to doing the reference
count update protected by the lock, but avoids the "wait for lock"
contention that you get when accesses to the reference count are
contended.
Note that a "lockref" is absolutely _not_ equivalent to an atomic_t.
Even when the lockref reference counts are updated atomically with
cmpxchg, the fact that they also verify the state of the spinlock means
that the lockless updates can never happen while somebody else holds the
spinlock.
So while "lockref_put_or_lock()" looks a lot like just another name for
"atomic_dec_and_lock()", and both optimize to lockless updates, they are
fundamentally different: the decrement done by atomic_dec_and_lock() is
truly independent of any lock (as long as it doesn't decrement to zero),
so a locked region can still see the count change.
The lockref structure, in contrast, really is a *locked* reference
count. If you hold the spinlock, the reference count will be stable and
you can modify the reference count without using atomics, because even
the lockless updates will see and respect the state of the lock.
In order to enable the cmpxchg lockless code, the architecture needs to
do three things:
(1) Make sure that the "arch_spinlock_t" and an "unsigned int" can fit
in an aligned u64, and have a "cmpxchg()" implementation that works
on such a u64 data type.
(2) define a helper function to test for a spinlock being unlocked
("arch_spin_value_unlocked()")
(3) select the "ARCH_USE_CMPXCHG_LOCKREF" config variable in its
Kconfig file.
This enables it for x86-64 (but not 32-bit, we'd need to make sure
cmpxchg() turns into the proper cmpxchg8b in order to enable it for
32-bit mode).
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2013-09-02 23:12:15 +04:00
|
|
|
union {
|
2013-11-15 02:31:54 +04:00
|
|
|
#if USE_CMPXCHG_LOCKREF
|
lockref: implement lockless reference count updates using cmpxchg()
Instead of taking the spinlock, the lockless versions atomically check
that the lock is not taken, and do the reference count update using a
cmpxchg() loop. This is semantically identical to doing the reference
count update protected by the lock, but avoids the "wait for lock"
contention that you get when accesses to the reference count are
contended.
Note that a "lockref" is absolutely _not_ equivalent to an atomic_t.
Even when the lockref reference counts are updated atomically with
cmpxchg, the fact that they also verify the state of the spinlock means
that the lockless updates can never happen while somebody else holds the
spinlock.
So while "lockref_put_or_lock()" looks a lot like just another name for
"atomic_dec_and_lock()", and both optimize to lockless updates, they are
fundamentally different: the decrement done by atomic_dec_and_lock() is
truly independent of any lock (as long as it doesn't decrement to zero),
so a locked region can still see the count change.
The lockref structure, in contrast, really is a *locked* reference
count. If you hold the spinlock, the reference count will be stable and
you can modify the reference count without using atomics, because even
the lockless updates will see and respect the state of the lock.
In order to enable the cmpxchg lockless code, the architecture needs to
do three things:
(1) Make sure that the "arch_spinlock_t" and an "unsigned int" can fit
in an aligned u64, and have a "cmpxchg()" implementation that works
on such a u64 data type.
(2) define a helper function to test for a spinlock being unlocked
("arch_spin_value_unlocked()")
(3) select the "ARCH_USE_CMPXCHG_LOCKREF" config variable in its
Kconfig file.
This enables it for x86-64 (but not 32-bit, we'd need to make sure
cmpxchg() turns into the proper cmpxchg8b in order to enable it for
32-bit mode).
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2013-09-02 23:12:15 +04:00
|
|
|
aligned_u64 lock_count;
|
|
|
|
#endif
|
|
|
|
struct {
|
|
|
|
spinlock_t lock;
|
2015-01-10 02:19:03 +03:00
|
|
|
int count;
|
lockref: implement lockless reference count updates using cmpxchg()
Instead of taking the spinlock, the lockless versions atomically check
that the lock is not taken, and do the reference count update using a
cmpxchg() loop. This is semantically identical to doing the reference
count update protected by the lock, but avoids the "wait for lock"
contention that you get when accesses to the reference count are
contended.
Note that a "lockref" is absolutely _not_ equivalent to an atomic_t.
Even when the lockref reference counts are updated atomically with
cmpxchg, the fact that they also verify the state of the spinlock means
that the lockless updates can never happen while somebody else holds the
spinlock.
So while "lockref_put_or_lock()" looks a lot like just another name for
"atomic_dec_and_lock()", and both optimize to lockless updates, they are
fundamentally different: the decrement done by atomic_dec_and_lock() is
truly independent of any lock (as long as it doesn't decrement to zero),
so a locked region can still see the count change.
The lockref structure, in contrast, really is a *locked* reference
count. If you hold the spinlock, the reference count will be stable and
you can modify the reference count without using atomics, because even
the lockless updates will see and respect the state of the lock.
In order to enable the cmpxchg lockless code, the architecture needs to
do three things:
(1) Make sure that the "arch_spinlock_t" and an "unsigned int" can fit
in an aligned u64, and have a "cmpxchg()" implementation that works
on such a u64 data type.
(2) define a helper function to test for a spinlock being unlocked
("arch_spin_value_unlocked()")
(3) select the "ARCH_USE_CMPXCHG_LOCKREF" config variable in its
Kconfig file.
This enables it for x86-64 (but not 32-bit, we'd need to make sure
cmpxchg() turns into the proper cmpxchg8b in order to enable it for
32-bit mode).
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2013-09-02 23:12:15 +04:00
|
|
|
};
|
|
|
|
};
|
2013-08-29 05:13:26 +04:00
|
|
|
};
|
|
|
|
|
2013-09-02 22:58:20 +04:00
|
|
|
extern void lockref_get(struct lockref *);
|
2015-01-10 02:19:03 +03:00
|
|
|
extern int lockref_put_return(struct lockref *);
|
2013-09-02 22:58:20 +04:00
|
|
|
extern int lockref_get_not_zero(struct lockref *);
|
|
|
|
extern int lockref_get_or_lock(struct lockref *);
|
|
|
|
extern int lockref_put_or_lock(struct lockref *);
|
2013-08-29 05:13:26 +04:00
|
|
|
|
lockref: add ability to mark lockrefs "dead"
The only actual current lockref user (dcache) uses zero reference counts
even for perfectly live dentries, because it's a cache: there may not be
any users, but that doesn't mean that we want to throw away the dentry.
At the same time, the dentry cache does have a notion of a truly "dead"
dentry that we must not even increment the reference count of, because
we have pruned it and it is not valid.
Currently that distinction is not visible in the lockref itself, and the
dentry cache validation uses "lockref_get_or_lock()" to either get a new
reference to a dentry that already had existing references (and thus
cannot be dead), or get the dentry lock so that we can then verify the
dentry and increment the reference count under the lock if that
verification was successful.
That's all somewhat complicated.
This adds the concept of being "dead" to the lockref itself, by simply
using a count that is negative. This allows a usage scenario where we
can increment the refcount of a dentry without having to validate it,
and pushing the special "we killed it" case into the lockref code.
The dentry code itself doesn't actually use this yet, and it's probably
too late in the merge window to do that code (the dentry_kill() code
with its "should I decrement the count" logic really is pretty complex
code), but let's introduce the concept at the lockref level now.
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2013-09-08 02:49:18 +04:00
|
|
|
extern void lockref_mark_dead(struct lockref *);
|
|
|
|
extern int lockref_get_not_dead(struct lockref *);
|
|
|
|
|
2013-10-15 18:18:08 +04:00
|
|
|
/* Must be called under spinlock for reliable results */
|
|
|
|
static inline int __lockref_is_dead(const struct lockref *l)
|
|
|
|
{
|
|
|
|
return ((int)l->count < 0);
|
|
|
|
}
|
|
|
|
|
2013-08-29 05:13:26 +04:00
|
|
|
#endif /* __LINUX_LOCKREF_H */
|