ARC: atomics: implement relaxed variants
The current ARC fetch/return atomics provide fully ordered semantics only with 2 full barriers around the operation. Instead implement them as relaxed variants without any barriers and rely on generic code to generate the fully-ordered, acquire and release varaints by adding the appropriate full barriers. This helps elide some extra barriers in case of acquire/release/relaxed calls. bloat-o-meter for hsdk defconfig shows codegen improvements, although numbers below inflated due to unrelated inlining heuristic changes | bloat-o-meter vmlinux-643babe34fd7-non-relaxed vmlinux-45aa05cb44d7-relaxed | add/remove: 2/5 grow/shrink: 42/1222 up/down: 4158/-14312 (-10154) | Function old new delta | .. | sys_renameat 462 476 +14 | ip_mc_inc_group 424 436 +12 | do_read_cache_page 1882 1894 +12 | .. | refcount_dec_and_mutex_lock 254 250 -4 | refcount_dec_and_lock_irqsave 258 254 -4 | refcount_dec_and_lock 254 250 -4 | .. | tcp_v6_route_req 246 238 -8 | tcp_v4_destroy_sock 286 278 -8 | tcp_twsk_unique 352 344 -8 Link: https://lore.kernel.org/r/20180830144344.GW24142@hirez.programming.kicks-ass.net Suggested-by: Peter Zijlstra (Intel) <peterz@infradead.org> Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org> Signed-off-by: Vineet Gupta <vgupta@kernel.org>
This commit is contained in:
Родитель
7e8f8cbb43
Коммит
b64be68369
|
@ -22,16 +22,10 @@ static inline void arch_atomic_##op(int i, atomic_t *v) \
|
|||
} \
|
||||
|
||||
#define ATOMIC_OP_RETURN(op, c_op, asm_op) \
|
||||
static inline int arch_atomic_##op##_return(int i, atomic_t *v) \
|
||||
static inline int arch_atomic_##op##_return_relaxed(int i, atomic_t *v) \
|
||||
{ \
|
||||
unsigned int val; \
|
||||
\
|
||||
/* \
|
||||
* Explicit full memory barrier needed before/after as \
|
||||
* LLOCK/SCOND themselves don't provide any such semantics \
|
||||
*/ \
|
||||
smp_mb(); \
|
||||
\
|
||||
__asm__ __volatile__( \
|
||||
"1: llock %[val], [%[ctr]] \n" \
|
||||
" " #asm_op " %[val], %[val], %[i] \n" \
|
||||
|
@ -42,22 +36,17 @@ static inline int arch_atomic_##op##_return(int i, atomic_t *v) \
|
|||
[i] "ir" (i) \
|
||||
: "cc"); \
|
||||
\
|
||||
smp_mb(); \
|
||||
\
|
||||
return val; \
|
||||
}
|
||||
|
||||
#define arch_atomic_add_return_relaxed arch_atomic_add_return_relaxed
|
||||
#define arch_atomic_sub_return_relaxed arch_atomic_sub_return_relaxed
|
||||
|
||||
#define ATOMIC_FETCH_OP(op, c_op, asm_op) \
|
||||
static inline int arch_atomic_fetch_##op(int i, atomic_t *v) \
|
||||
static inline int arch_atomic_fetch_##op##_relaxed(int i, atomic_t *v) \
|
||||
{ \
|
||||
unsigned int val, orig; \
|
||||
\
|
||||
/* \
|
||||
* Explicit full memory barrier needed before/after as \
|
||||
* LLOCK/SCOND themselves don't provide any such semantics \
|
||||
*/ \
|
||||
smp_mb(); \
|
||||
\
|
||||
__asm__ __volatile__( \
|
||||
"1: llock %[orig], [%[ctr]] \n" \
|
||||
" " #asm_op " %[val], %[orig], %[i] \n" \
|
||||
|
@ -69,11 +58,17 @@ static inline int arch_atomic_fetch_##op(int i, atomic_t *v) \
|
|||
[i] "ir" (i) \
|
||||
: "cc"); \
|
||||
\
|
||||
smp_mb(); \
|
||||
\
|
||||
return orig; \
|
||||
}
|
||||
|
||||
#define arch_atomic_fetch_add_relaxed arch_atomic_fetch_add_relaxed
|
||||
#define arch_atomic_fetch_sub_relaxed arch_atomic_fetch_sub_relaxed
|
||||
|
||||
#define arch_atomic_fetch_and_relaxed arch_atomic_fetch_and_relaxed
|
||||
#define arch_atomic_fetch_andnot_relaxed arch_atomic_fetch_andnot_relaxed
|
||||
#define arch_atomic_fetch_or_relaxed arch_atomic_fetch_or_relaxed
|
||||
#define arch_atomic_fetch_xor_relaxed arch_atomic_fetch_xor_relaxed
|
||||
|
||||
#define ATOMIC_OPS(op, c_op, asm_op) \
|
||||
ATOMIC_OP(op, c_op, asm_op) \
|
||||
ATOMIC_OP_RETURN(op, c_op, asm_op) \
|
||||
|
@ -93,7 +88,6 @@ ATOMIC_OPS(or, |=, or)
|
|||
ATOMIC_OPS(xor, ^=, xor)
|
||||
|
||||
#define arch_atomic_andnot arch_atomic_andnot
|
||||
#define arch_atomic_fetch_andnot arch_atomic_fetch_andnot
|
||||
|
||||
#undef ATOMIC_OPS
|
||||
#undef ATOMIC_FETCH_OP
|
||||
|
|
|
@ -64,12 +64,10 @@ static inline void arch_atomic64_##op(s64 a, atomic64_t *v) \
|
|||
} \
|
||||
|
||||
#define ATOMIC64_OP_RETURN(op, op1, op2) \
|
||||
static inline s64 arch_atomic64_##op##_return(s64 a, atomic64_t *v) \
|
||||
static inline s64 arch_atomic64_##op##_return_relaxed(s64 a, atomic64_t *v) \
|
||||
{ \
|
||||
s64 val; \
|
||||
\
|
||||
smp_mb(); \
|
||||
\
|
||||
__asm__ __volatile__( \
|
||||
"1: \n" \
|
||||
" llockd %0, [%1] \n" \
|
||||
|
@ -81,18 +79,17 @@ static inline s64 arch_atomic64_##op##_return(s64 a, atomic64_t *v) \
|
|||
: "r"(&v->counter), "ir"(a) \
|
||||
: "cc"); /* memory clobber comes from smp_mb() */ \
|
||||
\
|
||||
smp_mb(); \
|
||||
\
|
||||
return val; \
|
||||
}
|
||||
|
||||
#define arch_atomic64_add_return_relaxed arch_atomic64_add_return_relaxed
|
||||
#define arch_atomic64_sub_return_relaxed arch_atomic64_sub_return_relaxed
|
||||
|
||||
#define ATOMIC64_FETCH_OP(op, op1, op2) \
|
||||
static inline s64 arch_atomic64_fetch_##op(s64 a, atomic64_t *v) \
|
||||
static inline s64 arch_atomic64_fetch_##op##_relaxed(s64 a, atomic64_t *v) \
|
||||
{ \
|
||||
s64 val, orig; \
|
||||
\
|
||||
smp_mb(); \
|
||||
\
|
||||
__asm__ __volatile__( \
|
||||
"1: \n" \
|
||||
" llockd %0, [%2] \n" \
|
||||
|
@ -104,11 +101,17 @@ static inline s64 arch_atomic64_fetch_##op(s64 a, atomic64_t *v) \
|
|||
: "r"(&v->counter), "ir"(a) \
|
||||
: "cc"); /* memory clobber comes from smp_mb() */ \
|
||||
\
|
||||
smp_mb(); \
|
||||
\
|
||||
return orig; \
|
||||
}
|
||||
|
||||
#define arch_atomic64_fetch_add_relaxed arch_atomic64_fetch_add_relaxed
|
||||
#define arch_atomic64_fetch_sub_relaxed arch_atomic64_fetch_sub_relaxed
|
||||
|
||||
#define arch_atomic64_fetch_and_relaxed arch_atomic64_fetch_and_relaxed
|
||||
#define arch_atomic64_fetch_andnot_relaxed arch_atomic64_fetch_andnot_relaxed
|
||||
#define arch_atomic64_fetch_or_relaxed arch_atomic64_fetch_or_relaxed
|
||||
#define arch_atomic64_fetch_xor_relaxed arch_atomic64_fetch_xor_relaxed
|
||||
|
||||
#define ATOMIC64_OPS(op, op1, op2) \
|
||||
ATOMIC64_OP(op, op1, op2) \
|
||||
ATOMIC64_OP_RETURN(op, op1, op2) \
|
||||
|
@ -128,7 +131,6 @@ ATOMIC64_OPS(or, or, or)
|
|||
ATOMIC64_OPS(xor, xor, xor)
|
||||
|
||||
#define arch_atomic64_andnot arch_atomic64_andnot
|
||||
#define arch_atomic64_fetch_andnot arch_atomic64_fetch_andnot
|
||||
|
||||
#undef ATOMIC64_OPS
|
||||
#undef ATOMIC64_FETCH_OP
|
||||
|
|
Загрузка…
Ссылка в новой задаче