locking/barriers: Replace smp_cond_acquire() with smp_cond_load_acquire()
This new form allows using hardware assisted waiting. Some hardware (ARM64 and x86) allow monitoring an address for changes, so by providing a pointer we can use this to replace the cpu_relax() with hardware optimized methods in the future. Requested-by: Will Deacon <will.deacon@arm.com> Suggested-by: Linus Torvalds <torvalds@linux-foundation.org> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Cc: Andrew Morton <akpm@linux-foundation.org> Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Thomas Gleixner <tglx@linutronix.de> Signed-off-by: Ingo Molnar <mingo@kernel.org>
This commit is contained in:
Родитель
245050c287
Коммит
1f03e8d291
|
@ -305,21 +305,34 @@ static __always_inline void __write_once_size(volatile void *p, void *res, int s
|
||||||
})
|
})
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* smp_cond_acquire() - Spin wait for cond with ACQUIRE ordering
|
* smp_cond_load_acquire() - (Spin) wait for cond with ACQUIRE ordering
|
||||||
|
* @ptr: pointer to the variable to wait on
|
||||||
* @cond: boolean expression to wait for
|
* @cond: boolean expression to wait for
|
||||||
*
|
*
|
||||||
* Equivalent to using smp_load_acquire() on the condition variable but employs
|
* Equivalent to using smp_load_acquire() on the condition variable but employs
|
||||||
* the control dependency of the wait to reduce the barrier on many platforms.
|
* the control dependency of the wait to reduce the barrier on many platforms.
|
||||||
*
|
*
|
||||||
|
* Due to C lacking lambda expressions we load the value of *ptr into a
|
||||||
|
* pre-named variable @VAL to be used in @cond.
|
||||||
|
*
|
||||||
* The control dependency provides a LOAD->STORE order, the additional RMB
|
* The control dependency provides a LOAD->STORE order, the additional RMB
|
||||||
* provides LOAD->LOAD order, together they provide LOAD->{LOAD,STORE} order,
|
* provides LOAD->LOAD order, together they provide LOAD->{LOAD,STORE} order,
|
||||||
* aka. ACQUIRE.
|
* aka. ACQUIRE.
|
||||||
*/
|
*/
|
||||||
#define smp_cond_acquire(cond) do { \
|
#ifndef smp_cond_load_acquire
|
||||||
while (!(cond)) \
|
#define smp_cond_load_acquire(ptr, cond_expr) ({ \
|
||||||
cpu_relax(); \
|
typeof(ptr) __PTR = (ptr); \
|
||||||
smp_rmb(); /* ctrl + rmb := acquire */ \
|
typeof(*ptr) VAL; \
|
||||||
} while (0)
|
for (;;) { \
|
||||||
|
VAL = READ_ONCE(*__PTR); \
|
||||||
|
if (cond_expr) \
|
||||||
|
break; \
|
||||||
|
cpu_relax(); \
|
||||||
|
} \
|
||||||
|
smp_rmb(); /* ctrl + rmb := acquire */ \
|
||||||
|
VAL; \
|
||||||
|
})
|
||||||
|
#endif
|
||||||
|
|
||||||
#endif /* __KERNEL__ */
|
#endif /* __KERNEL__ */
|
||||||
|
|
||||||
|
|
|
@ -475,7 +475,7 @@ void queued_spin_lock_slowpath(struct qspinlock *lock, u32 val)
|
||||||
* sequentiality; this is because not all clear_pending_set_locked()
|
* sequentiality; this is because not all clear_pending_set_locked()
|
||||||
* implementations imply full barriers.
|
* implementations imply full barriers.
|
||||||
*/
|
*/
|
||||||
smp_cond_acquire(!(atomic_read(&lock->val) & _Q_LOCKED_MASK));
|
smp_cond_load_acquire(&lock->val.counter, !(VAL & _Q_LOCKED_MASK));
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* take ownership and clear the pending bit.
|
* take ownership and clear the pending bit.
|
||||||
|
@ -562,7 +562,7 @@ queue:
|
||||||
*
|
*
|
||||||
* The PV pv_wait_head_or_lock function, if active, will acquire
|
* The PV pv_wait_head_or_lock function, if active, will acquire
|
||||||
* the lock and return a non-zero value. So we have to skip the
|
* the lock and return a non-zero value. So we have to skip the
|
||||||
* smp_cond_acquire() call. As the next PV queue head hasn't been
|
* smp_cond_load_acquire() call. As the next PV queue head hasn't been
|
||||||
* designated yet, there is no way for the locked value to become
|
* designated yet, there is no way for the locked value to become
|
||||||
* _Q_SLOW_VAL. So both the set_locked() and the
|
* _Q_SLOW_VAL. So both the set_locked() and the
|
||||||
* atomic_cmpxchg_relaxed() calls will be safe.
|
* atomic_cmpxchg_relaxed() calls will be safe.
|
||||||
|
@ -573,7 +573,7 @@ queue:
|
||||||
if ((val = pv_wait_head_or_lock(lock, node)))
|
if ((val = pv_wait_head_or_lock(lock, node)))
|
||||||
goto locked;
|
goto locked;
|
||||||
|
|
||||||
smp_cond_acquire(!((val = atomic_read(&lock->val)) & _Q_LOCKED_PENDING_MASK));
|
val = smp_cond_load_acquire(&lock->val.counter, !(VAL & _Q_LOCKED_PENDING_MASK));
|
||||||
|
|
||||||
locked:
|
locked:
|
||||||
/*
|
/*
|
||||||
|
@ -593,9 +593,9 @@ locked:
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
/*
|
/*
|
||||||
* The smp_cond_acquire() call above has provided the necessary
|
* The smp_cond_load_acquire() call above has provided the
|
||||||
* acquire semantics required for locking. At most two
|
* necessary acquire semantics required for locking. At most
|
||||||
* iterations of this loop may be ran.
|
* two iterations of this loop may be ran.
|
||||||
*/
|
*/
|
||||||
old = atomic_cmpxchg_relaxed(&lock->val, val, _Q_LOCKED_VAL);
|
old = atomic_cmpxchg_relaxed(&lock->val, val, _Q_LOCKED_VAL);
|
||||||
if (old == val)
|
if (old == val)
|
||||||
|
|
|
@ -1935,7 +1935,7 @@ static void ttwu_queue(struct task_struct *p, int cpu, int wake_flags)
|
||||||
* chain to provide order. Instead we do:
|
* chain to provide order. Instead we do:
|
||||||
*
|
*
|
||||||
* 1) smp_store_release(X->on_cpu, 0)
|
* 1) smp_store_release(X->on_cpu, 0)
|
||||||
* 2) smp_cond_acquire(!X->on_cpu)
|
* 2) smp_cond_load_acquire(!X->on_cpu)
|
||||||
*
|
*
|
||||||
* Example:
|
* Example:
|
||||||
*
|
*
|
||||||
|
@ -1946,7 +1946,7 @@ static void ttwu_queue(struct task_struct *p, int cpu, int wake_flags)
|
||||||
* sched-out X
|
* sched-out X
|
||||||
* smp_store_release(X->on_cpu, 0);
|
* smp_store_release(X->on_cpu, 0);
|
||||||
*
|
*
|
||||||
* smp_cond_acquire(!X->on_cpu);
|
* smp_cond_load_acquire(&X->on_cpu, !VAL);
|
||||||
* X->state = WAKING
|
* X->state = WAKING
|
||||||
* set_task_cpu(X,2)
|
* set_task_cpu(X,2)
|
||||||
*
|
*
|
||||||
|
@ -1972,7 +1972,7 @@ static void ttwu_queue(struct task_struct *p, int cpu, int wake_flags)
|
||||||
* This means that any means of doing remote wakeups must order the CPU doing
|
* This means that any means of doing remote wakeups must order the CPU doing
|
||||||
* the wakeup against the CPU the task is going to end up running on. This,
|
* the wakeup against the CPU the task is going to end up running on. This,
|
||||||
* however, is already required for the regular Program-Order guarantee above,
|
* however, is already required for the regular Program-Order guarantee above,
|
||||||
* since the waking CPU is the one issueing the ACQUIRE (smp_cond_acquire).
|
* since the waking CPU is the one issueing the ACQUIRE (smp_cond_load_acquire).
|
||||||
*
|
*
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
@ -2045,7 +2045,7 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags)
|
||||||
* This ensures that tasks getting woken will be fully ordered against
|
* This ensures that tasks getting woken will be fully ordered against
|
||||||
* their previous state and preserve Program Order.
|
* their previous state and preserve Program Order.
|
||||||
*/
|
*/
|
||||||
smp_cond_acquire(!p->on_cpu);
|
smp_cond_load_acquire(&p->on_cpu, !VAL);
|
||||||
|
|
||||||
p->sched_contributes_to_load = !!task_contributes_to_load(p);
|
p->sched_contributes_to_load = !!task_contributes_to_load(p);
|
||||||
p->state = TASK_WAKING;
|
p->state = TASK_WAKING;
|
||||||
|
|
|
@ -1113,7 +1113,7 @@ static inline void finish_lock_switch(struct rq *rq, struct task_struct *prev)
|
||||||
* In particular, the load of prev->state in finish_task_switch() must
|
* In particular, the load of prev->state in finish_task_switch() must
|
||||||
* happen before this.
|
* happen before this.
|
||||||
*
|
*
|
||||||
* Pairs with the smp_cond_acquire() in try_to_wake_up().
|
* Pairs with the smp_cond_load_acquire() in try_to_wake_up().
|
||||||
*/
|
*/
|
||||||
smp_store_release(&prev->on_cpu, 0);
|
smp_store_release(&prev->on_cpu, 0);
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -107,7 +107,7 @@ void __init call_function_init(void)
|
||||||
*/
|
*/
|
||||||
static __always_inline void csd_lock_wait(struct call_single_data *csd)
|
static __always_inline void csd_lock_wait(struct call_single_data *csd)
|
||||||
{
|
{
|
||||||
smp_cond_acquire(!(csd->flags & CSD_FLAG_LOCK));
|
smp_cond_load_acquire(&csd->flags, !(VAL & CSD_FLAG_LOCK));
|
||||||
}
|
}
|
||||||
|
|
||||||
static __always_inline void csd_lock(struct call_single_data *csd)
|
static __always_inline void csd_lock(struct call_single_data *csd)
|
||||||
|
|
Загрузка…
Ссылка в новой задаче