locking/mutex: Optimize __mutex_trylock_fast()
Use try_cmpxchg to avoid the pointless TEST instruction.. And add the (missing) atomic_long_try_cmpxchg*() wrappery. On x86_64 this gives: 0000000000000710 <mutex_lock>: 0000000000000710 <mutex_lock>: 710: 65 48 8b 14 25 00 00 mov %gs:0x0,%rdx 710: 65 48 8b 14 25 00 00 mov %gs:0x0,%rdx 717: 00 00 717: 00 00 715: R_X86_64_32S current_task 715: R_X86_64_32S current_task 719: 31 c0 xor %eax,%eax 719: 31 c0 xor %eax,%eax 71b: f0 48 0f b1 17 lock cmpxchg %rdx,(%rdi) 71b: f0 48 0f b1 17 lock cmpxchg %rdx,(%rdi) 720: 48 85 c0 test %rax,%rax 720: 75 02 jne 724 <mutex_lock+0x14> 723: 75 02 jne 727 <mutex_lock+0x17> 722: f3 c3 repz retq 725: f3 c3 repz retq 724: eb da jmp 700 <__mutex_lock_slowpath> 727: eb d7 jmp 700 <__mutex_lock_slowpath> 726: 66 2e 0f 1f 84 00 00 nopw %cs:0x0(%rax,%rax,1) 729: 0f 1f 80 00 00 00 00 nopl 0x0(%rax) 72d: 00 00 00 On ARM64 this gives: 000000000000638 <mutex_lock>: 0000000000000638 <mutex_lock>: 638: d5384101 mrs x1, sp_el0 638: d5384101 mrs x1, sp_el0 63c: d2800002 mov x2, #0x0 63c: d2800002 mov x2, #0x0 640: f9800011 prfm pstl1strm, [x0] 640: f9800011 prfm pstl1strm, [x0] 644: c85ffc03 ldaxr x3, [x0] 644: c85ffc03 ldaxr x3, [x0] 648: ca020064 eor x4, x3, x2 648: ca020064 eor x4, x3, x2 64c: b5000064 cbnz x4, 658 <mutex_lock+0x20> 64c: b5000064 cbnz x4, 658 <mutex_lock+0x20> 650: c8047c01 stxr w4, x1, [x0] 650: c8047c01 stxr w4, x1, [x0] 654: 35ffff84 cbnz w4, 644 <mutex_lock+0xc> 654: 35ffff84 cbnz w4, 644 <mutex_lock+0xc> 658: b40000c3 cbz x3, 670 <mutex_lock+0x38> 658: b5000043 cbnz x3, 660 <mutex_lock+0x28> 65c: a9bf7bfd stp x29, x30, [sp,#-16]! 65c: d65f03c0 ret 660: 910003fd mov x29, sp 660: a9bf7bfd stp x29, x30, [sp,#-16]! 664: 97ffffef bl 620 <__mutex_lock_slowpath> 664: 910003fd mov x29, sp 668: a8c17bfd ldp x29, x30, [sp],#16 668: 97ffffee bl 620 <__mutex_lock_slowpath> 66c: d65f03c0 ret 66c: a8c17bfd ldp x29, x30, [sp],#16 670: d65f03c0 ret 670: d65f03c0 ret Reported-by: Matthew Wilcox <mawilcox@microsoft.com> Acked-by: Will Deacon <will.deacon@arm.com> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Thomas Gleixner <tglx@linutronix.de> Signed-off-by: Ingo Molnar <mingo@kernel.org>
This commit is contained in:
Родитель
de5b55c1d4
Коммит
c427f69564
|
@ -25,6 +25,7 @@ typedef atomic64_t atomic_long_t;
|
|||
|
||||
#define ATOMIC_LONG_INIT(i) ATOMIC64_INIT(i)
|
||||
#define ATOMIC_LONG_PFX(x) atomic64 ## x
|
||||
#define ATOMIC_LONG_TYPE s64
|
||||
|
||||
#else
|
||||
|
||||
|
@ -32,6 +33,7 @@ typedef atomic_t atomic_long_t;
|
|||
|
||||
#define ATOMIC_LONG_INIT(i) ATOMIC_INIT(i)
|
||||
#define ATOMIC_LONG_PFX(x) atomic ## x
|
||||
#define ATOMIC_LONG_TYPE int
|
||||
|
||||
#endif
|
||||
|
||||
|
@ -90,6 +92,21 @@ ATOMIC_LONG_ADD_SUB_OP(sub, _release)
|
|||
#define atomic_long_cmpxchg(l, old, new) \
|
||||
(ATOMIC_LONG_PFX(_cmpxchg)((ATOMIC_LONG_PFX(_t) *)(l), (old), (new)))
|
||||
|
||||
|
||||
#define atomic_long_try_cmpxchg_relaxed(l, old, new) \
|
||||
(ATOMIC_LONG_PFX(_try_cmpxchg_relaxed)((ATOMIC_LONG_PFX(_t) *)(l), \
|
||||
(ATOMIC_LONG_TYPE *)(old), (ATOMIC_LONG_TYPE)(new)))
|
||||
#define atomic_long_try_cmpxchg_acquire(l, old, new) \
|
||||
(ATOMIC_LONG_PFX(_try_cmpxchg_acquire)((ATOMIC_LONG_PFX(_t) *)(l), \
|
||||
(ATOMIC_LONG_TYPE *)(old), (ATOMIC_LONG_TYPE)(new)))
|
||||
#define atomic_long_try_cmpxchg_release(l, old, new) \
|
||||
(ATOMIC_LONG_PFX(_try_cmpxchg_release)((ATOMIC_LONG_PFX(_t) *)(l), \
|
||||
(ATOMIC_LONG_TYPE *)(old), (ATOMIC_LONG_TYPE)(new)))
|
||||
#define atomic_long_try_cmpxchg(l, old, new) \
|
||||
(ATOMIC_LONG_PFX(_try_cmpxchg)((ATOMIC_LONG_PFX(_t) *)(l), \
|
||||
(ATOMIC_LONG_TYPE *)(old), (ATOMIC_LONG_TYPE)(new)))
|
||||
|
||||
|
||||
#define atomic_long_xchg_relaxed(v, new) \
|
||||
(ATOMIC_LONG_PFX(_xchg_relaxed)((ATOMIC_LONG_PFX(_t) *)(v), (new)))
|
||||
#define atomic_long_xchg_acquire(v, new) \
|
||||
|
|
|
@ -139,8 +139,9 @@ static inline bool __mutex_trylock(struct mutex *lock)
|
|||
static __always_inline bool __mutex_trylock_fast(struct mutex *lock)
|
||||
{
|
||||
unsigned long curr = (unsigned long)current;
|
||||
unsigned long zero = 0UL;
|
||||
|
||||
if (!atomic_long_cmpxchg_acquire(&lock->owner, 0UL, curr))
|
||||
if (atomic_long_try_cmpxchg_acquire(&lock->owner, &zero, curr))
|
||||
return true;
|
||||
|
||||
return false;
|
||||
|
|
Загрузка…
Ссылка в новой задаче