powerpc: Fix atomic_xxx_return barrier semantics
The Documentation/memory-barriers.txt document requires that atomic operations that return a value act as a memory barrier both before and after the actual atomic operation. Our current implementation doesn't guarantee this. More specifically, while a load following the isync can not be issued before stwcx. has completed, that completion doesn't architecturally means that the result of stwcx. is visible to other processors (or any previous stores for that matter) (typically, the other processors L1 caches can still hold the old value). This has caused an actual crash in RCU torture testing on Power 7 This fixes it by changing those atomic ops to use new macros instead of RELEASE/ACQUIRE barriers, called ATOMIC_ENTRY and ATMOIC_EXIT barriers, which are then defined respectively to lwsync and sync. I haven't had a chance to measure the performance impact (or rather what I measured with kernel compiles is in the noise, I yet have to find a more precise benchmark) Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org> Acked-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
This commit is contained in:
Родитель
a9a8f77ac7
Коммит
b97021f855
|
@ -49,13 +49,13 @@ static __inline__ int atomic_add_return(int a, atomic_t *v)
|
||||||
int t;
|
int t;
|
||||||
|
|
||||||
__asm__ __volatile__(
|
__asm__ __volatile__(
|
||||||
PPC_RELEASE_BARRIER
|
PPC_ATOMIC_ENTRY_BARRIER
|
||||||
"1: lwarx %0,0,%2 # atomic_add_return\n\
|
"1: lwarx %0,0,%2 # atomic_add_return\n\
|
||||||
add %0,%1,%0\n"
|
add %0,%1,%0\n"
|
||||||
PPC405_ERR77(0,%2)
|
PPC405_ERR77(0,%2)
|
||||||
" stwcx. %0,0,%2 \n\
|
" stwcx. %0,0,%2 \n\
|
||||||
bne- 1b"
|
bne- 1b"
|
||||||
PPC_ACQUIRE_BARRIER
|
PPC_ATOMIC_EXIT_BARRIER
|
||||||
: "=&r" (t)
|
: "=&r" (t)
|
||||||
: "r" (a), "r" (&v->counter)
|
: "r" (a), "r" (&v->counter)
|
||||||
: "cc", "memory");
|
: "cc", "memory");
|
||||||
|
@ -85,13 +85,13 @@ static __inline__ int atomic_sub_return(int a, atomic_t *v)
|
||||||
int t;
|
int t;
|
||||||
|
|
||||||
__asm__ __volatile__(
|
__asm__ __volatile__(
|
||||||
PPC_RELEASE_BARRIER
|
PPC_ATOMIC_ENTRY_BARRIER
|
||||||
"1: lwarx %0,0,%2 # atomic_sub_return\n\
|
"1: lwarx %0,0,%2 # atomic_sub_return\n\
|
||||||
subf %0,%1,%0\n"
|
subf %0,%1,%0\n"
|
||||||
PPC405_ERR77(0,%2)
|
PPC405_ERR77(0,%2)
|
||||||
" stwcx. %0,0,%2 \n\
|
" stwcx. %0,0,%2 \n\
|
||||||
bne- 1b"
|
bne- 1b"
|
||||||
PPC_ACQUIRE_BARRIER
|
PPC_ATOMIC_EXIT_BARRIER
|
||||||
: "=&r" (t)
|
: "=&r" (t)
|
||||||
: "r" (a), "r" (&v->counter)
|
: "r" (a), "r" (&v->counter)
|
||||||
: "cc", "memory");
|
: "cc", "memory");
|
||||||
|
@ -119,13 +119,13 @@ static __inline__ int atomic_inc_return(atomic_t *v)
|
||||||
int t;
|
int t;
|
||||||
|
|
||||||
__asm__ __volatile__(
|
__asm__ __volatile__(
|
||||||
PPC_RELEASE_BARRIER
|
PPC_ATOMIC_ENTRY_BARRIER
|
||||||
"1: lwarx %0,0,%1 # atomic_inc_return\n\
|
"1: lwarx %0,0,%1 # atomic_inc_return\n\
|
||||||
addic %0,%0,1\n"
|
addic %0,%0,1\n"
|
||||||
PPC405_ERR77(0,%1)
|
PPC405_ERR77(0,%1)
|
||||||
" stwcx. %0,0,%1 \n\
|
" stwcx. %0,0,%1 \n\
|
||||||
bne- 1b"
|
bne- 1b"
|
||||||
PPC_ACQUIRE_BARRIER
|
PPC_ATOMIC_EXIT_BARRIER
|
||||||
: "=&r" (t)
|
: "=&r" (t)
|
||||||
: "r" (&v->counter)
|
: "r" (&v->counter)
|
||||||
: "cc", "xer", "memory");
|
: "cc", "xer", "memory");
|
||||||
|
@ -163,13 +163,13 @@ static __inline__ int atomic_dec_return(atomic_t *v)
|
||||||
int t;
|
int t;
|
||||||
|
|
||||||
__asm__ __volatile__(
|
__asm__ __volatile__(
|
||||||
PPC_RELEASE_BARRIER
|
PPC_ATOMIC_ENTRY_BARRIER
|
||||||
"1: lwarx %0,0,%1 # atomic_dec_return\n\
|
"1: lwarx %0,0,%1 # atomic_dec_return\n\
|
||||||
addic %0,%0,-1\n"
|
addic %0,%0,-1\n"
|
||||||
PPC405_ERR77(0,%1)
|
PPC405_ERR77(0,%1)
|
||||||
" stwcx. %0,0,%1\n\
|
" stwcx. %0,0,%1\n\
|
||||||
bne- 1b"
|
bne- 1b"
|
||||||
PPC_ACQUIRE_BARRIER
|
PPC_ATOMIC_EXIT_BARRIER
|
||||||
: "=&r" (t)
|
: "=&r" (t)
|
||||||
: "r" (&v->counter)
|
: "r" (&v->counter)
|
||||||
: "cc", "xer", "memory");
|
: "cc", "xer", "memory");
|
||||||
|
@ -194,7 +194,7 @@ static __inline__ int __atomic_add_unless(atomic_t *v, int a, int u)
|
||||||
int t;
|
int t;
|
||||||
|
|
||||||
__asm__ __volatile__ (
|
__asm__ __volatile__ (
|
||||||
PPC_RELEASE_BARRIER
|
PPC_ATOMIC_ENTRY_BARRIER
|
||||||
"1: lwarx %0,0,%1 # __atomic_add_unless\n\
|
"1: lwarx %0,0,%1 # __atomic_add_unless\n\
|
||||||
cmpw 0,%0,%3 \n\
|
cmpw 0,%0,%3 \n\
|
||||||
beq- 2f \n\
|
beq- 2f \n\
|
||||||
|
@ -202,7 +202,7 @@ static __inline__ int __atomic_add_unless(atomic_t *v, int a, int u)
|
||||||
PPC405_ERR77(0,%2)
|
PPC405_ERR77(0,%2)
|
||||||
" stwcx. %0,0,%1 \n\
|
" stwcx. %0,0,%1 \n\
|
||||||
bne- 1b \n"
|
bne- 1b \n"
|
||||||
PPC_ACQUIRE_BARRIER
|
PPC_ATOMIC_EXIT_BARRIER
|
||||||
" subf %0,%2,%0 \n\
|
" subf %0,%2,%0 \n\
|
||||||
2:"
|
2:"
|
||||||
: "=&r" (t)
|
: "=&r" (t)
|
||||||
|
@ -226,7 +226,7 @@ static __inline__ int atomic_dec_if_positive(atomic_t *v)
|
||||||
int t;
|
int t;
|
||||||
|
|
||||||
__asm__ __volatile__(
|
__asm__ __volatile__(
|
||||||
PPC_RELEASE_BARRIER
|
PPC_ATOMIC_ENTRY_BARRIER
|
||||||
"1: lwarx %0,0,%1 # atomic_dec_if_positive\n\
|
"1: lwarx %0,0,%1 # atomic_dec_if_positive\n\
|
||||||
cmpwi %0,1\n\
|
cmpwi %0,1\n\
|
||||||
addi %0,%0,-1\n\
|
addi %0,%0,-1\n\
|
||||||
|
@ -234,7 +234,7 @@ static __inline__ int atomic_dec_if_positive(atomic_t *v)
|
||||||
PPC405_ERR77(0,%1)
|
PPC405_ERR77(0,%1)
|
||||||
" stwcx. %0,0,%1\n\
|
" stwcx. %0,0,%1\n\
|
||||||
bne- 1b"
|
bne- 1b"
|
||||||
PPC_ACQUIRE_BARRIER
|
PPC_ATOMIC_EXIT_BARRIER
|
||||||
"\n\
|
"\n\
|
||||||
2:" : "=&b" (t)
|
2:" : "=&b" (t)
|
||||||
: "r" (&v->counter)
|
: "r" (&v->counter)
|
||||||
|
@ -285,12 +285,12 @@ static __inline__ long atomic64_add_return(long a, atomic64_t *v)
|
||||||
long t;
|
long t;
|
||||||
|
|
||||||
__asm__ __volatile__(
|
__asm__ __volatile__(
|
||||||
PPC_RELEASE_BARRIER
|
PPC_ATOMIC_ENTRY_BARRIER
|
||||||
"1: ldarx %0,0,%2 # atomic64_add_return\n\
|
"1: ldarx %0,0,%2 # atomic64_add_return\n\
|
||||||
add %0,%1,%0\n\
|
add %0,%1,%0\n\
|
||||||
stdcx. %0,0,%2 \n\
|
stdcx. %0,0,%2 \n\
|
||||||
bne- 1b"
|
bne- 1b"
|
||||||
PPC_ACQUIRE_BARRIER
|
PPC_ATOMIC_EXIT_BARRIER
|
||||||
: "=&r" (t)
|
: "=&r" (t)
|
||||||
: "r" (a), "r" (&v->counter)
|
: "r" (a), "r" (&v->counter)
|
||||||
: "cc", "memory");
|
: "cc", "memory");
|
||||||
|
@ -319,12 +319,12 @@ static __inline__ long atomic64_sub_return(long a, atomic64_t *v)
|
||||||
long t;
|
long t;
|
||||||
|
|
||||||
__asm__ __volatile__(
|
__asm__ __volatile__(
|
||||||
PPC_RELEASE_BARRIER
|
PPC_ATOMIC_ENTRY_BARRIER
|
||||||
"1: ldarx %0,0,%2 # atomic64_sub_return\n\
|
"1: ldarx %0,0,%2 # atomic64_sub_return\n\
|
||||||
subf %0,%1,%0\n\
|
subf %0,%1,%0\n\
|
||||||
stdcx. %0,0,%2 \n\
|
stdcx. %0,0,%2 \n\
|
||||||
bne- 1b"
|
bne- 1b"
|
||||||
PPC_ACQUIRE_BARRIER
|
PPC_ATOMIC_EXIT_BARRIER
|
||||||
: "=&r" (t)
|
: "=&r" (t)
|
||||||
: "r" (a), "r" (&v->counter)
|
: "r" (a), "r" (&v->counter)
|
||||||
: "cc", "memory");
|
: "cc", "memory");
|
||||||
|
@ -351,12 +351,12 @@ static __inline__ long atomic64_inc_return(atomic64_t *v)
|
||||||
long t;
|
long t;
|
||||||
|
|
||||||
__asm__ __volatile__(
|
__asm__ __volatile__(
|
||||||
PPC_RELEASE_BARRIER
|
PPC_ATOMIC_ENTRY_BARRIER
|
||||||
"1: ldarx %0,0,%1 # atomic64_inc_return\n\
|
"1: ldarx %0,0,%1 # atomic64_inc_return\n\
|
||||||
addic %0,%0,1\n\
|
addic %0,%0,1\n\
|
||||||
stdcx. %0,0,%1 \n\
|
stdcx. %0,0,%1 \n\
|
||||||
bne- 1b"
|
bne- 1b"
|
||||||
PPC_ACQUIRE_BARRIER
|
PPC_ATOMIC_EXIT_BARRIER
|
||||||
: "=&r" (t)
|
: "=&r" (t)
|
||||||
: "r" (&v->counter)
|
: "r" (&v->counter)
|
||||||
: "cc", "xer", "memory");
|
: "cc", "xer", "memory");
|
||||||
|
@ -393,12 +393,12 @@ static __inline__ long atomic64_dec_return(atomic64_t *v)
|
||||||
long t;
|
long t;
|
||||||
|
|
||||||
__asm__ __volatile__(
|
__asm__ __volatile__(
|
||||||
PPC_RELEASE_BARRIER
|
PPC_ATOMIC_ENTRY_BARRIER
|
||||||
"1: ldarx %0,0,%1 # atomic64_dec_return\n\
|
"1: ldarx %0,0,%1 # atomic64_dec_return\n\
|
||||||
addic %0,%0,-1\n\
|
addic %0,%0,-1\n\
|
||||||
stdcx. %0,0,%1\n\
|
stdcx. %0,0,%1\n\
|
||||||
bne- 1b"
|
bne- 1b"
|
||||||
PPC_ACQUIRE_BARRIER
|
PPC_ATOMIC_EXIT_BARRIER
|
||||||
: "=&r" (t)
|
: "=&r" (t)
|
||||||
: "r" (&v->counter)
|
: "r" (&v->counter)
|
||||||
: "cc", "xer", "memory");
|
: "cc", "xer", "memory");
|
||||||
|
@ -418,13 +418,13 @@ static __inline__ long atomic64_dec_if_positive(atomic64_t *v)
|
||||||
long t;
|
long t;
|
||||||
|
|
||||||
__asm__ __volatile__(
|
__asm__ __volatile__(
|
||||||
PPC_RELEASE_BARRIER
|
PPC_ATOMIC_ENTRY_BARRIER
|
||||||
"1: ldarx %0,0,%1 # atomic64_dec_if_positive\n\
|
"1: ldarx %0,0,%1 # atomic64_dec_if_positive\n\
|
||||||
addic. %0,%0,-1\n\
|
addic. %0,%0,-1\n\
|
||||||
blt- 2f\n\
|
blt- 2f\n\
|
||||||
stdcx. %0,0,%1\n\
|
stdcx. %0,0,%1\n\
|
||||||
bne- 1b"
|
bne- 1b"
|
||||||
PPC_ACQUIRE_BARRIER
|
PPC_ATOMIC_EXIT_BARRIER
|
||||||
"\n\
|
"\n\
|
||||||
2:" : "=&r" (t)
|
2:" : "=&r" (t)
|
||||||
: "r" (&v->counter)
|
: "r" (&v->counter)
|
||||||
|
@ -450,14 +450,14 @@ static __inline__ int atomic64_add_unless(atomic64_t *v, long a, long u)
|
||||||
long t;
|
long t;
|
||||||
|
|
||||||
__asm__ __volatile__ (
|
__asm__ __volatile__ (
|
||||||
PPC_RELEASE_BARRIER
|
PPC_ATOMIC_ENTRY_BARRIER
|
||||||
"1: ldarx %0,0,%1 # __atomic_add_unless\n\
|
"1: ldarx %0,0,%1 # __atomic_add_unless\n\
|
||||||
cmpd 0,%0,%3 \n\
|
cmpd 0,%0,%3 \n\
|
||||||
beq- 2f \n\
|
beq- 2f \n\
|
||||||
add %0,%2,%0 \n"
|
add %0,%2,%0 \n"
|
||||||
" stdcx. %0,0,%1 \n\
|
" stdcx. %0,0,%1 \n\
|
||||||
bne- 1b \n"
|
bne- 1b \n"
|
||||||
PPC_ACQUIRE_BARRIER
|
PPC_ATOMIC_EXIT_BARRIER
|
||||||
" subf %0,%2,%0 \n\
|
" subf %0,%2,%0 \n\
|
||||||
2:"
|
2:"
|
||||||
: "=&r" (t)
|
: "=&r" (t)
|
||||||
|
|
|
@ -124,14 +124,14 @@ static __inline__ unsigned long fn( \
|
||||||
return (old & mask); \
|
return (old & mask); \
|
||||||
}
|
}
|
||||||
|
|
||||||
DEFINE_TESTOP(test_and_set_bits, or, PPC_RELEASE_BARRIER,
|
DEFINE_TESTOP(test_and_set_bits, or, PPC_ATOMIC_ENTRY_BARRIER,
|
||||||
PPC_ACQUIRE_BARRIER, 0)
|
PPC_ATOMIC_EXIT_BARRIER, 0)
|
||||||
DEFINE_TESTOP(test_and_set_bits_lock, or, "",
|
DEFINE_TESTOP(test_and_set_bits_lock, or, "",
|
||||||
PPC_ACQUIRE_BARRIER, 1)
|
PPC_ACQUIRE_BARRIER, 1)
|
||||||
DEFINE_TESTOP(test_and_clear_bits, andc, PPC_RELEASE_BARRIER,
|
DEFINE_TESTOP(test_and_clear_bits, andc, PPC_ATOMIC_ENTRY_BARRIER,
|
||||||
PPC_ACQUIRE_BARRIER, 0)
|
PPC_ATOMIC_EXIT_BARRIER, 0)
|
||||||
DEFINE_TESTOP(test_and_change_bits, xor, PPC_RELEASE_BARRIER,
|
DEFINE_TESTOP(test_and_change_bits, xor, PPC_ATOMIC_ENTRY_BARRIER,
|
||||||
PPC_ACQUIRE_BARRIER, 0)
|
PPC_ATOMIC_EXIT_BARRIER, 0)
|
||||||
|
|
||||||
static __inline__ int test_and_set_bit(unsigned long nr,
|
static __inline__ int test_and_set_bit(unsigned long nr,
|
||||||
volatile unsigned long *addr)
|
volatile unsigned long *addr)
|
||||||
|
|
|
@ -11,12 +11,13 @@
|
||||||
|
|
||||||
#define __futex_atomic_op(insn, ret, oldval, uaddr, oparg) \
|
#define __futex_atomic_op(insn, ret, oldval, uaddr, oparg) \
|
||||||
__asm__ __volatile ( \
|
__asm__ __volatile ( \
|
||||||
PPC_RELEASE_BARRIER \
|
PPC_ATOMIC_ENTRY_BARRIER \
|
||||||
"1: lwarx %0,0,%2\n" \
|
"1: lwarx %0,0,%2\n" \
|
||||||
insn \
|
insn \
|
||||||
PPC405_ERR77(0, %2) \
|
PPC405_ERR77(0, %2) \
|
||||||
"2: stwcx. %1,0,%2\n" \
|
"2: stwcx. %1,0,%2\n" \
|
||||||
"bne- 1b\n" \
|
"bne- 1b\n" \
|
||||||
|
PPC_ATOMIC_EXIT_BARRIER \
|
||||||
"li %1,0\n" \
|
"li %1,0\n" \
|
||||||
"3: .section .fixup,\"ax\"\n" \
|
"3: .section .fixup,\"ax\"\n" \
|
||||||
"4: li %1,%3\n" \
|
"4: li %1,%3\n" \
|
||||||
|
@ -92,14 +93,14 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr,
|
||||||
return -EFAULT;
|
return -EFAULT;
|
||||||
|
|
||||||
__asm__ __volatile__ (
|
__asm__ __volatile__ (
|
||||||
PPC_RELEASE_BARRIER
|
PPC_ATOMIC_ENTRY_BARRIER
|
||||||
"1: lwarx %1,0,%3 # futex_atomic_cmpxchg_inatomic\n\
|
"1: lwarx %1,0,%3 # futex_atomic_cmpxchg_inatomic\n\
|
||||||
cmpw 0,%1,%4\n\
|
cmpw 0,%1,%4\n\
|
||||||
bne- 3f\n"
|
bne- 3f\n"
|
||||||
PPC405_ERR77(0,%3)
|
PPC405_ERR77(0,%3)
|
||||||
"2: stwcx. %5,0,%3\n\
|
"2: stwcx. %5,0,%3\n\
|
||||||
bne- 1b\n"
|
bne- 1b\n"
|
||||||
PPC_ACQUIRE_BARRIER
|
PPC_ATOMIC_EXIT_BARRIER
|
||||||
"3: .section .fixup,\"ax\"\n\
|
"3: .section .fixup,\"ax\"\n\
|
||||||
4: li %0,%6\n\
|
4: li %0,%6\n\
|
||||||
b 3b\n\
|
b 3b\n\
|
||||||
|
|
|
@ -42,11 +42,15 @@ static inline void isync(void)
|
||||||
START_LWSYNC_SECTION(97); \
|
START_LWSYNC_SECTION(97); \
|
||||||
isync; \
|
isync; \
|
||||||
MAKE_LWSYNC_SECTION_ENTRY(97, __lwsync_fixup);
|
MAKE_LWSYNC_SECTION_ENTRY(97, __lwsync_fixup);
|
||||||
#define PPC_ACQUIRE_BARRIER "\n" stringify_in_c(__PPC_ACQUIRE_BARRIER)
|
#define PPC_ACQUIRE_BARRIER "\n" stringify_in_c(__PPC_ACQUIRE_BARRIER)
|
||||||
#define PPC_RELEASE_BARRIER stringify_in_c(LWSYNC) "\n"
|
#define PPC_RELEASE_BARRIER stringify_in_c(LWSYNC) "\n"
|
||||||
|
#define PPC_ATOMIC_ENTRY_BARRIER "\n" stringify_in_c(LWSYNC) "\n"
|
||||||
|
#define PPC_ATOMIC_EXIT_BARRIER "\n" stringify_in_c(sync) "\n"
|
||||||
#else
|
#else
|
||||||
#define PPC_ACQUIRE_BARRIER
|
#define PPC_ACQUIRE_BARRIER
|
||||||
#define PPC_RELEASE_BARRIER
|
#define PPC_RELEASE_BARRIER
|
||||||
|
#define PPC_ATOMIC_ENTRY_BARRIER
|
||||||
|
#define PPC_ATOMIC_EXIT_BARRIER
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#endif /* __KERNEL__ */
|
#endif /* __KERNEL__ */
|
||||||
|
|
Загрузка…
Ссылка в новой задаче