Bug 1784018 - Remove deprecated OSSpinLocks r=glandium

On macOS versions prior to 10.15 os_unfair_locks cannot spin in kernel-space
which degrades performance significantly. To obviate for this we spin in
user-space like OSSpinLock does, for the same number of times and invoking
x86-specific pause instructions in-between the locking attempts to avoid
starving a thread that might be running on the same physical core.

Differential Revision: https://phabricator.services.mozilla.com/D154205
This commit is contained in:
Gabriele Svelto 2022-08-24 09:18:57 +00:00
Родитель 1a8fbf7ec2
Коммит 44cbaf4248
2 изменённых файлов: 47 добавлений и 39 удалений

Просмотреть файл

@ -7,7 +7,7 @@
#if defined(XP_DARWIN)
// static
bool Mutex::UseUnfairLocks() {
bool Mutex::SpinInKernelSpace() {
if (__builtin_available(macOS 10.15, *)) {
return true;
}
@ -16,6 +16,6 @@ bool Mutex::UseUnfairLocks() {
}
// static
bool Mutex::gFallbackToOSSpinLock = !UseUnfairLocks();
bool Mutex::gSpinInKernelSpace = SpinInKernelSpace();
#endif // defined(XP_DARWIN)

Просмотреть файл

@ -10,7 +10,7 @@
#if defined(XP_WIN)
# include <windows.h>
#elif defined(XP_DARWIN)
# include <libkern/OSAtomic.h>
# include "mozilla/Assertions.h"
# include <os/lock.h>
#else
# include <pthread.h>
@ -32,12 +32,6 @@ OS_UNFAIR_LOCK_AVAILABILITY
OS_EXPORT OS_NOTHROW OS_NONNULL_ALL void os_unfair_lock_lock_with_options(
os_unfair_lock_t lock, os_unfair_lock_options_t options);
}
static_assert(OS_UNFAIR_LOCK_INIT._os_unfair_lock_opaque == OS_SPINLOCK_INIT,
"OS_UNFAIR_LOCK_INIT and OS_SPINLOCK_INIT have the same "
"value");
static_assert(sizeof(os_unfair_lock) == sizeof(OSSpinLock),
"os_unfair_lock and OSSpinLock are the same size");
#endif // defined(XP_DARWIN)
// Mutexes based on spinlocks. We can't use normal pthread spinlocks in all
@ -48,10 +42,7 @@ struct MOZ_CAPABILITY Mutex {
#if defined(XP_WIN)
CRITICAL_SECTION mMutex;
#elif defined(XP_DARWIN)
union {
os_unfair_lock mUnfairLock;
OSSpinLock mSpinLock;
} mMutex;
os_unfair_lock mMutex;
#else
pthread_mutex_t mMutex;
#endif
@ -63,10 +54,7 @@ struct MOZ_CAPABILITY Mutex {
return false;
}
#elif defined(XP_DARWIN)
// The hack below works because both OS_UNFAIR_LOCK_INIT and
// OS_SPINLOCK_INIT initialize the lock to 0 and in both case it's a 32-bit
// integer.
mMutex.mUnfairLock = OS_UNFAIR_LOCK_INIT;
mMutex = OS_UNFAIR_LOCK_INIT;
#elif defined(XP_LINUX) && !defined(ANDROID)
pthread_mutexattr_t attr;
if (pthread_mutexattr_init(&attr) != 0) {
@ -90,19 +78,46 @@ struct MOZ_CAPABILITY Mutex {
#if defined(XP_WIN)
EnterCriticalSection(&mMutex);
#elif defined(XP_DARWIN)
if (Mutex::gFallbackToOSSpinLock) {
OSSpinLockLock(&mMutex.mSpinLock);
} else {
// We rely on a non-public function to improve performance here.
// The OS_UNFAIR_LOCK_DATA_SYNCHRONIZATION flag informs the kernel that
// the calling thread is able to make progress even in absence of actions
// from other threads and the OS_UNFAIR_LOCK_ADAPTIVE_SPIN one causes the
// kernel to spin on a contested lock if the owning thread is running on
// the same physical core (presumably only on x86 CPUs given that ARM
// macs don't have cores capable of SMT).
// We rely on a non-public function to improve performance here.
// The OS_UNFAIR_LOCK_DATA_SYNCHRONIZATION flag informs the kernel that
// the calling thread is able to make progress even in absence of actions
// from other threads and the OS_UNFAIR_LOCK_ADAPTIVE_SPIN one causes the
// kernel to spin on a contested lock if the owning thread is running on
// the same physical core (presumably only on x86 CPUs given that ARM
// macs don't have cores capable of SMT). On versions of macOS older than
// 10.15 the latter is not available and we spin in userspace instead.
if (Mutex::gSpinInKernelSpace) {
os_unfair_lock_lock_with_options(
&mMutex.mUnfairLock,
&mMutex,
OS_UNFAIR_LOCK_DATA_SYNCHRONIZATION | OS_UNFAIR_LOCK_ADAPTIVE_SPIN);
} else {
# if defined(__x86_64__)
// On older versions of macOS (10.14 and older) the
// `OS_UNFAIR_LOCK_ADAPTIVE_SPIN` flag is not supported by the kernel,
// we spin in user-space instead like `OSSpinLock` does:
// https://github.com/apple/darwin-libplatform/blob/215b09856ab5765b7462a91be7076183076600df/src/os/lock.c#L183-L198
// Note that `OSSpinLock` uses 1000 iterations on x86-64:
// https://github.com/apple/darwin-libplatform/blob/215b09856ab5765b7462a91be7076183076600df/src/os/lock.c#L93
// ...but we only use 100 like it does on ARM:
// https://github.com/apple/darwin-libplatform/blob/215b09856ab5765b7462a91be7076183076600df/src/os/lock.c#L90
// We choose this value because it yields the same results in our
// benchmarks but is less likely to have detrimental effects caused by
// excessive spinning.
uint32_t retries = 100;
do {
if (os_unfair_lock_trylock(&mMutex)) {
return;
}
__asm__ __volatile__("pause");
} while (retries--);
os_unfair_lock_lock_with_options(&mMutex,
OS_UNFAIR_LOCK_DATA_SYNCHRONIZATION);
# else
MOZ_CRASH("User-space spin-locks should never be used on ARM");
# endif // defined(__x86_64__)
}
#else
pthread_mutex_lock(&mMutex);
@ -113,19 +128,15 @@ struct MOZ_CAPABILITY Mutex {
#if defined(XP_WIN)
LeaveCriticalSection(&mMutex);
#elif defined(XP_DARWIN)
if (Mutex::gFallbackToOSSpinLock) {
OSSpinLockUnlock(&mMutex.mSpinLock);
} else {
os_unfair_lock_unlock(&mMutex.mUnfairLock);
}
os_unfair_lock_unlock(&mMutex);
#else
pthread_mutex_unlock(&mMutex);
#endif
}
#if defined(XP_DARWIN)
static bool UseUnfairLocks();
static bool gFallbackToOSSpinLock;
static bool SpinInKernelSpace();
static bool gSpinInKernelSpace;
#endif // XP_DARWIN
};
@ -157,10 +168,7 @@ struct MOZ_CAPABILITY StaticMutex {
typedef Mutex StaticMutex;
# if defined(XP_DARWIN)
// The hack below works because both OS_UNFAIR_LOCK_INIT and OS_SPINLOCK_INIT
// initialize the lock to 0 and in both case it's a 32-bit integer.
# define STATIC_MUTEX_INIT \
{ .mUnfairLock = OS_UNFAIR_LOCK_INIT }
# define STATIC_MUTEX_INIT OS_UNFAIR_LOCK_INIT
# elif defined(XP_LINUX) && !defined(ANDROID)
# define STATIC_MUTEX_INIT PTHREAD_ADAPTIVE_MUTEX_INITIALIZER_NP
# else