/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ /* vim: set ts=8 sts=2 et sw=2 tw=80: */ /* This Source Code Form is subject to the terms of the Mozilla Public * License, v. 2.0. If a copy of the MPL was not distributed with this * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ /* * Implements (almost always) lock-free atomic operations. The operations here * are a subset of that which can be found in C++11's header, with a * different API to enforce consistent memory ordering constraints. * * Anyone caught using |volatile| for inter-thread memory safety needs to be * sent a copy of this header and the C++11 standard. */ #ifndef mozilla_Atomics_h #define mozilla_Atomics_h #include "mozilla/Assertions.h" #include "mozilla/Attributes.h" #include "mozilla/Compiler.h" #include "mozilla/TypeTraits.h" #include /* * Our minimum deployment target on clang/OS X is OS X 10.6, whose SDK * does not have . So be sure to check for support * along with C++0x support. */ #if defined(__clang__) || defined(__GNUC__) /* * Clang doesn't like from libstdc++ before 4.7 due to the * loose typing of the atomic builtins. GCC 4.5 and 4.6 lacks inline * definitions for unspecialized std::atomic and causes linking errors. * Therefore, we require at least 4.7.0 for using libstdc++. */ # if MOZ_USING_LIBSTDCXX && MOZ_LIBSTDCXX_VERSION_AT_LEAST(4, 7, 0) # define MOZ_HAVE_CXX11_ATOMICS # elif MOZ_USING_LIBCXX # define MOZ_HAVE_CXX11_ATOMICS # endif #elif defined(_MSC_VER) && _MSC_VER >= 1700 # if defined(DEBUG) /* * Provide our own failure code since we're having trouble linking to * std::_Debug_message (bug 982310). */ # define _INVALID_MEMORY_ORDER MOZ_CRASH("Invalid memory order") # endif # define MOZ_HAVE_CXX11_ATOMICS #endif namespace mozilla { /** * An enum of memory ordering possibilities for atomics. * * Memory ordering is the observable state of distinct values in memory. * (It's a separate concept from atomicity, which concerns whether an * operation can ever be observed in an intermediate state. Don't * conflate the two!) Given a sequence of operations in source code on * memory, it is *not* always the case that, at all times and on all * cores, those operations will appear to have occurred in that exact * sequence. First, the compiler might reorder that sequence, if it * thinks another ordering will be more efficient. Second, the CPU may * not expose so consistent a view of memory. CPUs will often perform * their own instruction reordering, above and beyond that performed by * the compiler. And each core has its own memory caches, and accesses * (reads and writes both) to "memory" may only resolve to out-of-date * cache entries -- not to the "most recently" performed operation in * some global sense. Any access to a value that may be used by * multiple threads, potentially across multiple cores, must therefore * have a memory ordering imposed on it, for all code on all * threads/cores to have a sufficiently coherent worldview. * * http://gcc.gnu.org/wiki/Atomic/GCCMM/AtomicSync and * http://en.cppreference.com/w/cpp/atomic/memory_order go into more * detail on all this, including examples of how each mode works. * * Note that for simplicity and practicality, not all of the modes in * C++11 are supported. The missing C++11 modes are either subsumed by * the modes we provide below, or not relevant for the CPUs we support * in Gecko. These three modes are confusing enough as it is! */ enum MemoryOrdering { /* * Relaxed ordering is the simplest memory ordering: none at all. * When the result of a write is observed, nothing may be inferred * about other memory. Writes ostensibly performed "before" on the * writing thread may not yet be visible. Writes performed "after" on * the writing thread may already be visible, if the compiler or CPU * reordered them. (The latter can happen if reads and/or writes get * held up in per-processor caches.) Relaxed ordering means * operations can always use cached values (as long as the actual * updates to atomic values actually occur, correctly, eventually), so * it's usually the fastest sort of atomic access. For this reason, * *it's also the most dangerous kind of access*. * * Relaxed ordering is good for things like process-wide statistics * counters that don't need to be consistent with anything else, so * long as updates themselves are atomic. (And so long as any * observations of that value can tolerate being out-of-date -- if you * need some sort of up-to-date value, you need some sort of other * synchronizing operation.) It's *not* good for locks, mutexes, * reference counts, etc. that mediate access to other memory, or must * be observably consistent with other memory. * * x86 architectures don't take advantage of the optimization * opportunities that relaxed ordering permits. Thus it's possible * that using relaxed ordering will "work" on x86 but fail elsewhere * (ARM, say, which *does* implement non-sequentially-consistent * relaxed ordering semantics). Be extra-careful using relaxed * ordering if you can't easily test non-x86 architectures! */ Relaxed, /* * When an atomic value is updated with ReleaseAcquire ordering, and * that new value is observed with ReleaseAcquire ordering, prior * writes (atomic or not) are also observable. What ReleaseAcquire * *doesn't* give you is any observable ordering guarantees for * ReleaseAcquire-ordered operations on different objects. For * example, if there are two cores that each perform ReleaseAcquire * operations on separate objects, each core may or may not observe * the operations made by the other core. The only way the cores can * be synchronized with ReleaseAcquire is if they both * ReleaseAcquire-access the same object. This implies that you can't * necessarily describe some global total ordering of ReleaseAcquire * operations. * * ReleaseAcquire ordering is good for (as the name implies) atomic * operations on values controlling ownership of things: reference * counts, mutexes, and the like. However, if you are thinking about * using these to implement your own locks or mutexes, you should take * a good, hard look at actual lock or mutex primitives first. */ ReleaseAcquire, /* * When an atomic value is updated with SequentiallyConsistent * ordering, all writes observable when the update is observed, just * as with ReleaseAcquire ordering. But, furthermore, a global total * ordering of SequentiallyConsistent operations *can* be described. * For example, if two cores perform SequentiallyConsistent operations * on separate objects, one core will observably perform its update * (and all previous operations will have completed), then the other * core will observably perform its update (and all previous * operations will have completed). (Although those previous * operations aren't themselves ordered -- they could be intermixed, * or ordered if they occur on atomic values with ordering * requirements.) SequentiallyConsistent is the *simplest and safest* * ordering of atomic operations -- it's always as if one operation * happens, then another, then another, in some order -- and every * core observes updates to happen in that single order. Because it * has the most synchronization requirements, operations ordered this * way also tend to be slowest. * * SequentiallyConsistent ordering can be desirable when multiple * threads observe objects, and they all have to agree on the * observable order of changes to them. People expect * SequentiallyConsistent ordering, even if they shouldn't, when * writing code, atomic or otherwise. SequentiallyConsistent is also * the ordering of choice when designing lockless data structures. If * you don't know what order to use, use this one. */ SequentiallyConsistent, }; } // namespace mozilla // Build up the underlying intrinsics. #ifdef MOZ_HAVE_CXX11_ATOMICS # include namespace mozilla { namespace detail { /* * We provide CompareExchangeFailureOrder to work around a bug in some * versions of GCC's header. See bug 898491. */ template struct AtomicOrderConstraints; template<> struct AtomicOrderConstraints { static const std::memory_order AtomicRMWOrder = std::memory_order_relaxed; static const std::memory_order LoadOrder = std::memory_order_relaxed; static const std::memory_order StoreOrder = std::memory_order_relaxed; static const std::memory_order CompareExchangeFailureOrder = std::memory_order_relaxed; }; template<> struct AtomicOrderConstraints { static const std::memory_order AtomicRMWOrder = std::memory_order_acq_rel; static const std::memory_order LoadOrder = std::memory_order_acquire; static const std::memory_order StoreOrder = std::memory_order_release; static const std::memory_order CompareExchangeFailureOrder = std::memory_order_acquire; }; template<> struct AtomicOrderConstraints { static const std::memory_order AtomicRMWOrder = std::memory_order_seq_cst; static const std::memory_order LoadOrder = std::memory_order_seq_cst; static const std::memory_order StoreOrder = std::memory_order_seq_cst; static const std::memory_order CompareExchangeFailureOrder = std::memory_order_seq_cst; }; template struct IntrinsicBase { typedef std::atomic ValueType; typedef AtomicOrderConstraints OrderedOp; }; template struct IntrinsicMemoryOps : public IntrinsicBase { typedef IntrinsicBase Base; static T load(const typename Base::ValueType& aPtr) { return aPtr.load(Base::OrderedOp::LoadOrder); } static void store(typename Base::ValueType& aPtr, T aVal) { aPtr.store(aVal, Base::OrderedOp::StoreOrder); } static T exchange(typename Base::ValueType& aPtr, T aVal) { return aPtr.exchange(aVal, Base::OrderedOp::AtomicRMWOrder); } static bool compareExchange(typename Base::ValueType& aPtr, T aOldVal, T aNewVal) { return aPtr.compare_exchange_strong(aOldVal, aNewVal, Base::OrderedOp::AtomicRMWOrder, Base::OrderedOp::CompareExchangeFailureOrder); } }; template struct IntrinsicAddSub : public IntrinsicBase { typedef IntrinsicBase Base; static T add(typename Base::ValueType& aPtr, T aVal) { return aPtr.fetch_add(aVal, Base::OrderedOp::AtomicRMWOrder); } static T sub(typename Base::ValueType& aPtr, T aVal) { return aPtr.fetch_sub(aVal, Base::OrderedOp::AtomicRMWOrder); } }; template struct IntrinsicAddSub : public IntrinsicBase { typedef IntrinsicBase Base; static T* add(typename Base::ValueType& aPtr, ptrdiff_t aVal) { return aPtr.fetch_add(fixupAddend(aVal), Base::OrderedOp::AtomicRMWOrder); } static T* sub(typename Base::ValueType& aPtr, ptrdiff_t aVal) { return aPtr.fetch_sub(fixupAddend(aVal), Base::OrderedOp::AtomicRMWOrder); } private: /* * GCC 4.6's header has a bug where adding X to an * atomic is not the same as adding X to a T*. Hence the need * for this function to provide the correct addend. */ static ptrdiff_t fixupAddend(ptrdiff_t aVal) { #if defined(__clang__) || defined(_MSC_VER) return aVal; #elif defined(__GNUC__) && MOZ_GCC_VERSION_AT_LEAST(4, 6, 0) && \ !MOZ_GCC_VERSION_AT_LEAST(4, 7, 0) return aVal * sizeof(T); #else return aVal; #endif } }; template struct IntrinsicIncDec : public IntrinsicAddSub { typedef IntrinsicBase Base; static T inc(typename Base::ValueType& aPtr) { return IntrinsicAddSub::add(aPtr, 1); } static T dec(typename Base::ValueType& aPtr) { return IntrinsicAddSub::sub(aPtr, 1); } }; template struct AtomicIntrinsics : public IntrinsicMemoryOps, public IntrinsicIncDec { typedef IntrinsicBase Base; static T or_(typename Base::ValueType& aPtr, T aVal) { return aPtr.fetch_or(aVal, Base::OrderedOp::AtomicRMWOrder); } static T xor_(typename Base::ValueType& aPtr, T aVal) { return aPtr.fetch_xor(aVal, Base::OrderedOp::AtomicRMWOrder); } static T and_(typename Base::ValueType& aPtr, T aVal) { return aPtr.fetch_and(aVal, Base::OrderedOp::AtomicRMWOrder); } }; template struct AtomicIntrinsics : public IntrinsicMemoryOps, public IntrinsicIncDec { }; } // namespace detail } // namespace mozilla #elif defined(__GNUC__) namespace mozilla { namespace detail { /* * The __sync_* family of intrinsics is documented here: * * http://gcc.gnu.org/onlinedocs/gcc-4.6.4/gcc/Atomic-Builtins.html * * While these intrinsics are deprecated in favor of the newer __atomic_* * family of intrincs: * * http://gcc.gnu.org/onlinedocs/gcc-4.7.3/gcc/_005f_005fatomic-Builtins.html * * any GCC version that supports the __atomic_* intrinsics will also support * the header and so will be handled above. We provide a version of * atomics using the __sync_* intrinsics to support older versions of GCC. * * All __sync_* intrinsics that we use below act as full memory barriers, for * both compiler and hardware reordering, except for __sync_lock_test_and_set, * which is a only an acquire barrier. When we call __sync_lock_test_and_set, * we add a barrier above it as appropriate. */ template struct Barrier; /* * Some processors (in particular, x86) don't require quite so many calls to * __sync_sychronize as our specializations of Barrier produce. If * performance turns out to be an issue, defining these specializations * on a per-processor basis would be a good first tuning step. */ template<> struct Barrier { static void beforeLoad() {} static void afterLoad() {} static void beforeStore() {} static void afterStore() {} }; template<> struct Barrier { static void beforeLoad() {} static void afterLoad() { __sync_synchronize(); } static void beforeStore() { __sync_synchronize(); } static void afterStore() {} }; template<> struct Barrier { static void beforeLoad() { __sync_synchronize(); } static void afterLoad() { __sync_synchronize(); } static void beforeStore() { __sync_synchronize(); } static void afterStore() { __sync_synchronize(); } }; template struct IntrinsicMemoryOps { static T load(const T& aPtr) { Barrier::beforeLoad(); T val = aPtr; Barrier::afterLoad(); return val; } static void store(T& aPtr, T aVal) { Barrier::beforeStore(); aPtr = aVal; Barrier::afterStore(); } static T exchange(T& aPtr, T aVal) { // __sync_lock_test_and_set is only an acquire barrier; loads and stores // can't be moved up from after to before it, but they can be moved down // from before to after it. We may want a stricter ordering, so we need // an explicit barrier. Barrier::beforeStore(); return __sync_lock_test_and_set(&aPtr, aVal); } static bool compareExchange(T& aPtr, T aOldVal, T aNewVal) { return __sync_bool_compare_and_swap(&aPtr, aOldVal, aNewVal); } }; template struct IntrinsicAddSub { typedef T ValueType; static T add(T& aPtr, T aVal) { return __sync_fetch_and_add(&aPtr, aVal); } static T sub(T& aPtr, T aVal) { return __sync_fetch_and_sub(&aPtr, aVal); } }; template struct IntrinsicAddSub { typedef T* ValueType; /* * The reinterpret_casts are needed so that * __sync_fetch_and_{add,sub} will properly type-check. * * Also, these functions do not provide standard semantics for * pointer types, so we need to adjust the addend. */ static ValueType add(ValueType& aPtr, ptrdiff_t aVal) { ValueType amount = reinterpret_cast(aVal * sizeof(T)); return __sync_fetch_and_add(&aPtr, amount); } static ValueType sub(ValueType& aPtr, ptrdiff_t aVal) { ValueType amount = reinterpret_cast(aVal * sizeof(T)); return __sync_fetch_and_sub(&aPtr, amount); } }; template struct IntrinsicIncDec : public IntrinsicAddSub { static T inc(T& aPtr) { return IntrinsicAddSub::add(aPtr, 1); } static T dec(T& aPtr) { return IntrinsicAddSub::sub(aPtr, 1); } }; template struct AtomicIntrinsics : public IntrinsicMemoryOps, public IntrinsicIncDec { static T or_( T& aPtr, T aVal) { return __sync_fetch_and_or(&aPtr, aVal); } static T xor_(T& aPtr, T aVal) { return __sync_fetch_and_xor(&aPtr, aVal); } static T and_(T& aPtr, T aVal) { return __sync_fetch_and_and(&aPtr, aVal); } }; template struct AtomicIntrinsics : public IntrinsicMemoryOps, public IntrinsicIncDec { }; } // namespace detail } // namespace mozilla #elif defined(_MSC_VER) /* * Windows comes with a full complement of atomic operations. * Unfortunately, most of those aren't available for Windows XP (even if * the compiler supports intrinsics for them), which is the oldest * version of Windows we support. Therefore, we only provide operations * on 32-bit datatypes for 32-bit Windows versions; for 64-bit Windows * versions, we support 64-bit datatypes as well. * * To avoid namespace pollution issues, we declare whatever functions we * need ourselves. */ extern "C" { long __cdecl _InterlockedExchangeAdd(long volatile* aDst, long aVal); long __cdecl _InterlockedOr(long volatile* aDst, long aVal); long __cdecl _InterlockedXor(long volatile* aDst, long aVal); long __cdecl _InterlockedAnd(long volatile* aDst, long aVal); long __cdecl _InterlockedExchange(long volatile *aDst, long aVal); long __cdecl _InterlockedCompareExchange(long volatile *aDst, long aNewVal, long aOldVal); } # pragma intrinsic(_InterlockedExchangeAdd) # pragma intrinsic(_InterlockedOr) # pragma intrinsic(_InterlockedXor) # pragma intrinsic(_InterlockedAnd) # pragma intrinsic(_InterlockedExchange) # pragma intrinsic(_InterlockedCompareExchange) namespace mozilla { namespace detail { # if !defined(_M_IX86) && !defined(_M_X64) /* * The implementations below are optimized for x86ish systems. You * will have to modify them if you are porting to Windows on a * different architecture. */ # error "Unknown CPU type" # endif /* * The PrimitiveIntrinsics template should define |Type|, the datatype of size * DataSize upon which we operate, and the following eight functions. * * static Type add(Type* aPtr, Type aVal); * static Type sub(Type* aPtr, Type aVal); * static Type or_(Type* aPtr, Type aVal); * static Type xor_(Type* aPtr, Type aVal); * static Type and_(Type* aPtr, Type aVal); * * These functions perform the obvious operation on the value contained in * |*aPtr| combined with |aVal| and return the value previously stored in * |*aPtr|. * * static void store(Type* aPtr, Type aVal); * * This function atomically stores |aVal| into |*aPtr| and must provide a full * memory fence after the store to prevent compiler and hardware instruction * reordering. It should also act as a compiler barrier to prevent reads and * writes from moving to after the store. * * static Type exchange(Type* aPtr, Type aVal); * * This function atomically stores |aVal| into |*aPtr| and returns the * previous contents of |*aPtr|; * * static bool compareExchange(Type* aPtr, Type aOldVal, Type aNewVal); * * This function atomically performs the following operation: * * if (*aPtr == aOldVal) { * *aPtr = aNewVal; * return true; * } else { * return false; * } * */ template struct PrimitiveIntrinsics; template<> struct PrimitiveIntrinsics<4> { typedef long Type; static Type add(Type* aPtr, Type aVal) { return _InterlockedExchangeAdd(aPtr, aVal); } static Type sub(Type* aPtr, Type aVal) { /* * _InterlockedExchangeSubtract isn't available before Windows 7, * and we must support Windows XP. */ return _InterlockedExchangeAdd(aPtr, -aVal); } static Type or_(Type* aPtr, Type aVal) { return _InterlockedOr(aPtr, aVal); } static Type xor_(Type* aPtr, Type aVal) { return _InterlockedXor(aPtr, aVal); } static Type and_(Type* aPtr, Type aVal) { return _InterlockedAnd(aPtr, aVal); } static void store(Type* aPtr, Type aVal) { _InterlockedExchange(aPtr, aVal); } static Type exchange(Type* aPtr, Type aVal) { return _InterlockedExchange(aPtr, aVal); } static bool compareExchange(Type* aPtr, Type aOldVal, Type aNewVal) { return _InterlockedCompareExchange(aPtr, aNewVal, aOldVal) == aOldVal; } }; # if defined(_M_X64) extern "C" { long long __cdecl _InterlockedExchangeAdd64(long long volatile* aDst, long long aVal); long long __cdecl _InterlockedOr64(long long volatile* aDst, long long aVal); long long __cdecl _InterlockedXor64(long long volatile* aDst, long long aVal); long long __cdecl _InterlockedAnd64(long long volatile* aDst, long long aVal); long long __cdecl _InterlockedExchange64(long long volatile* aDst, long long aVal); long long __cdecl _InterlockedCompareExchange64(long long volatile* aDst, long long aNewVal, long long aOldVal); } # pragma intrinsic(_InterlockedExchangeAdd64) # pragma intrinsic(_InterlockedOr64) # pragma intrinsic(_InterlockedXor64) # pragma intrinsic(_InterlockedAnd64) # pragma intrinsic(_InterlockedExchange64) # pragma intrinsic(_InterlockedCompareExchange64) template <> struct PrimitiveIntrinsics<8> { typedef __int64 Type; static Type add(Type* aPtr, Type aVal) { return _InterlockedExchangeAdd64(aPtr, aVal); } static Type sub(Type* aPtr, Type aVal) { /* * There is no _InterlockedExchangeSubtract64. */ return _InterlockedExchangeAdd64(aPtr, -aVal); } static Type or_(Type* aPtr, Type aVal) { return _InterlockedOr64(aPtr, aVal); } static Type xor_(Type* aPtr, Type aVal) { return _InterlockedXor64(aPtr, aVal); } static Type and_(Type* aPtr, Type aVal) { return _InterlockedAnd64(aPtr, aVal); } static void store(Type* aPtr, Type aVal) { _InterlockedExchange64(aPtr, aVal); } static Type exchange(Type* aPtr, Type aVal) { return _InterlockedExchange64(aPtr, aVal); } static bool compareExchange(Type* aPtr, Type aOldVal, Type aNewVal) { return _InterlockedCompareExchange64(aPtr, aNewVal, aOldVal) == aOldVal; } }; # endif extern "C" { void _ReadWriteBarrier(); } # pragma intrinsic(_ReadWriteBarrier) template struct Barrier; /* * We do not provide an afterStore method in Barrier, as Relaxed and * ReleaseAcquire orderings do not require one, and the required barrier * for SequentiallyConsistent is handled by PrimitiveIntrinsics. */ template<> struct Barrier { static void beforeLoad() {} static void afterLoad() {} static void beforeStore() {} }; template<> struct Barrier { static void beforeLoad() {} static void afterLoad() { _ReadWriteBarrier(); } static void beforeStore() { _ReadWriteBarrier(); } }; template<> struct Barrier { static void beforeLoad() { _ReadWriteBarrier(); } static void afterLoad() { _ReadWriteBarrier(); } static void beforeStore() { _ReadWriteBarrier(); } }; template struct CastHelper { static PrimType toPrimType(T aVal) { return static_cast(aVal); } static T fromPrimType(PrimType aVal) { return static_cast(aVal); } }; template struct CastHelper { static PrimType toPrimType(T* aVal) { return reinterpret_cast(aVal); } static T* fromPrimType(PrimType aVal) { return reinterpret_cast(aVal); } }; template struct IntrinsicBase { typedef T ValueType; typedef PrimitiveIntrinsics Primitives; typedef typename Primitives::Type PrimType; static_assert(sizeof(PrimType) == sizeof(T), "Selection of PrimitiveIntrinsics was wrong"); typedef CastHelper Cast; }; template struct IntrinsicMemoryOps : public IntrinsicBase { typedef typename IntrinsicBase::ValueType ValueType; typedef typename IntrinsicBase::Primitives Primitives; typedef typename IntrinsicBase::PrimType PrimType; typedef typename IntrinsicBase::Cast Cast; static ValueType load(const ValueType& aPtr) { Barrier::beforeLoad(); ValueType val = aPtr; Barrier::afterLoad(); return val; } static void store(ValueType& aPtr, ValueType aVal) { // For SequentiallyConsistent, Primitives::store() will generate the // proper memory fence. Everything else just needs a barrier before // the store. if (Order == SequentiallyConsistent) { Primitives::store(reinterpret_cast(&aPtr), Cast::toPrimType(aVal)); } else { Barrier::beforeStore(); aPtr = aVal; } } static ValueType exchange(ValueType& aPtr, ValueType aVal) { PrimType oldval = Primitives::exchange(reinterpret_cast(&aPtr), Cast::toPrimType(aVal)); return Cast::fromPrimType(oldval); } static bool compareExchange(ValueType& aPtr, ValueType aOldVal, ValueType aNewVal) { return Primitives::compareExchange(reinterpret_cast(&aPtr), Cast::toPrimType(aOldVal), Cast::toPrimType(aNewVal)); } }; template struct IntrinsicApplyHelper : public IntrinsicBase { typedef typename IntrinsicBase::ValueType ValueType; typedef typename IntrinsicBase::PrimType PrimType; typedef typename IntrinsicBase::Cast Cast; typedef PrimType (*BinaryOp)(PrimType*, PrimType); typedef PrimType (*UnaryOp)(PrimType*); static ValueType applyBinaryFunction(BinaryOp aOp, ValueType& aPtr, ValueType aVal) { PrimType* primTypePtr = reinterpret_cast(&aPtr); PrimType primTypeVal = Cast::toPrimType(aVal); return Cast::fromPrimType(aOp(primTypePtr, primTypeVal)); } static ValueType applyUnaryFunction(UnaryOp aOp, ValueType& aPtr) { PrimType* primTypePtr = reinterpret_cast(&aPtr); return Cast::fromPrimType(aOp(primTypePtr)); } }; template struct IntrinsicAddSub : public IntrinsicApplyHelper { typedef typename IntrinsicApplyHelper::ValueType ValueType; typedef typename IntrinsicBase::Primitives Primitives; static ValueType add(ValueType& aPtr, ValueType aVal) { return applyBinaryFunction(&Primitives::add, aPtr, aVal); } static ValueType sub(ValueType& aPtr, ValueType aVal) { return applyBinaryFunction(&Primitives::sub, aPtr, aVal); } }; template struct IntrinsicAddSub : public IntrinsicApplyHelper { typedef typename IntrinsicApplyHelper::ValueType ValueType; static ValueType add(ValueType& aPtr, ptrdiff_t aAmount) { return applyBinaryFunction(&Primitives::add, aPtr, (ValueType)(aAmount * sizeof(ValueType))); } static ValueType sub(ValueType& aPtr, ptrdiff_t aAmount) { return applyBinaryFunction(&Primitives::sub, aPtr, (ValueType)(aAmount * sizeof(ValueType))); } }; template struct IntrinsicIncDec : public IntrinsicAddSub { typedef typename IntrinsicAddSub::ValueType ValueType; static ValueType inc(ValueType& aPtr) { return add(aPtr, 1); } static ValueType dec(ValueType& aPtr) { return sub(aPtr, 1); } }; template struct AtomicIntrinsics : public IntrinsicMemoryOps, public IntrinsicIncDec { typedef typename IntrinsicIncDec::ValueType ValueType; static ValueType or_(ValueType& aPtr, T aVal) { return applyBinaryFunction(&Primitives::or_, aPtr, aVal); } static ValueType xor_(ValueType& aPtr, T aVal) { return applyBinaryFunction(&Primitives::xor_, aPtr, aVal); } static ValueType and_(ValueType& aPtr, T aVal) { return applyBinaryFunction(&Primitives::and_, aPtr, aVal); } }; template struct AtomicIntrinsics : public IntrinsicMemoryOps, public IntrinsicIncDec { typedef typename IntrinsicMemoryOps::ValueType ValueType; }; } // namespace detail } // namespace mozilla #else # error "Atomic compiler intrinsics are not supported on your platform" #endif namespace mozilla { namespace detail { template class AtomicBase { // We only support 32-bit types on 32-bit Windows, which constrains our // implementation elsewhere. But we support pointer-sized types everywhere. static_assert(sizeof(T) == 4 || (sizeof(uintptr_t) == 8 && sizeof(T) == 8), "mozilla/Atomics.h only supports 32-bit and pointer-sized types"); protected: typedef typename detail::AtomicIntrinsics Intrinsics; typename Intrinsics::ValueType mValue; public: MOZ_CONSTEXPR AtomicBase() : mValue() {} MOZ_CONSTEXPR AtomicBase(T aInit) : mValue(aInit) {} // Note: we can't provide operator T() here because Atomic inherits // from AtomcBase with T=uint32_t and not T=bool. If we implemented // operator T() here, it would cause errors when comparing Atomic with // a regular bool. T operator=(T aVal) { Intrinsics::store(mValue, aVal); return aVal; } /** * Performs an atomic swap operation. aVal is stored and the previous * value of this variable is returned. */ T exchange(T aVal) { return Intrinsics::exchange(mValue, aVal); } /** * Performs an atomic compare-and-swap operation and returns true if it * succeeded. This is equivalent to atomically doing * * if (mValue == aOldValue) { * mValue = aNewValue; * return true; * } else { * return false; * } */ bool compareExchange(T aOldValue, T aNewValue) { return Intrinsics::compareExchange(mValue, aOldValue, aNewValue); } private: template AtomicBase(const AtomicBase& aCopy) MOZ_DELETE; }; template class AtomicBaseIncDec : public AtomicBase { typedef typename detail::AtomicBase Base; public: MOZ_CONSTEXPR AtomicBaseIncDec() : Base() {} MOZ_CONSTEXPR AtomicBaseIncDec(T aInit) : Base(aInit) {} using Base::operator=; operator T() const { return Base::Intrinsics::load(Base::mValue); } T operator++(int) { return Base::Intrinsics::inc(Base::mValue); } T operator--(int) { return Base::Intrinsics::dec(Base::mValue); } T operator++() { return Base::Intrinsics::inc(Base::mValue) + 1; } T operator--() { return Base::Intrinsics::dec(Base::mValue) - 1; } private: template AtomicBaseIncDec(const AtomicBaseIncDec& aCopy) MOZ_DELETE; }; } // namespace detail /** * A wrapper for a type that enforces that all memory accesses are atomic. * * In general, where a variable |T foo| exists, |Atomic foo| can be used in * its place. Implementations for integral and pointer types are provided * below. * * Atomic accesses are sequentially consistent by default. You should * use the default unless you are tall enough to ride the * memory-ordering roller coaster (if you're not sure, you aren't) and * you have a compelling reason to do otherwise. * * There is one exception to the case of atomic memory accesses: providing an * initial value of the atomic value is not guaranteed to be atomic. This is a * deliberate design choice that enables static atomic variables to be declared * without introducing extra static constructors. */ template class Atomic; /** * Atomic implementation for integral types. * * In addition to atomic store and load operations, compound assignment and * increment/decrement operators are implemented which perform the * corresponding read-modify-write operation atomically. Finally, an atomic * swap method is provided. */ template class Atomic::value && !IsSame::value>::Type> : public detail::AtomicBaseIncDec { typedef typename detail::AtomicBaseIncDec Base; public: MOZ_CONSTEXPR Atomic() : Base() {} MOZ_CONSTEXPR Atomic(T aInit) : Base(aInit) {} using Base::operator=; T operator+=(T aDelta) { return Base::Intrinsics::add(Base::mValue, aDelta) + aDelta; } T operator-=(T aDelta) { return Base::Intrinsics::sub(Base::mValue, aDelta) - aDelta; } T operator|=(T aVal) { return Base::Intrinsics::or_(Base::mValue, aVal) | aVal; } T operator^=(T aVal) { return Base::Intrinsics::xor_(Base::mValue, aVal) ^ aVal; } T operator&=(T aVal) { return Base::Intrinsics::and_(Base::mValue, aVal) & aVal; } private: Atomic(Atomic& aOther) MOZ_DELETE; }; /** * Atomic implementation for pointer types. * * An atomic compare-and-swap primitive for pointer variables is provided, as * are atomic increment and decement operators. Also provided are the compound * assignment operators for addition and subtraction. Atomic swap (via * exchange()) is included as well. */ template class Atomic : public detail::AtomicBaseIncDec { typedef typename detail::AtomicBaseIncDec Base; public: MOZ_CONSTEXPR Atomic() : Base() {} MOZ_CONSTEXPR Atomic(T* aInit) : Base(aInit) {} using Base::operator=; T* operator+=(ptrdiff_t aDelta) { return Base::Intrinsics::add(Base::mValue, aDelta) + aDelta; } T* operator-=(ptrdiff_t aDelta) { return Base::Intrinsics::sub(Base::mValue, aDelta) - aDelta; } private: Atomic(Atomic& aOther) MOZ_DELETE; }; /** * Atomic implementation for enum types. * * The atomic store and load operations and the atomic swap method is provided. */ template class Atomic::value>::Type> : public detail::AtomicBase { typedef typename detail::AtomicBase Base; public: MOZ_CONSTEXPR Atomic() : Base() {} MOZ_CONSTEXPR Atomic(T aInit) : Base(aInit) {} operator T() const { return Base::Intrinsics::load(Base::mValue); } using Base::operator=; private: Atomic(Atomic& aOther) MOZ_DELETE; }; /** * Atomic implementation for boolean types. * * The atomic store and load operations and the atomic swap method is provided. * * Note: * * - sizeof(Atomic) != sizeof(bool) for some implementations of * bool and/or some implementations of std::atomic. This is allowed in * [atomic.types.generic]p9. * * - It's not obvious whether the 8-bit atomic functions on Windows are always * inlined or not. If they are not inlined, the corresponding functions in the * runtime library are not available on Windows XP. This is why we implement * Atomic with an underlying type of uint32_t. */ template class Atomic : protected detail::AtomicBase { typedef typename detail::AtomicBase Base; public: MOZ_CONSTEXPR Atomic() : Base() {} MOZ_CONSTEXPR Atomic(bool aInit) : Base(aInit) {} // We provide boolean wrappers for the underlying AtomicBase methods. operator bool() const { return Base::Intrinsics::load(Base::mValue); } bool operator=(bool aVal) { return Base::operator=(aVal); } bool exchange(bool aVal) { return Base::exchange(aVal); } bool compareExchange(bool aOldValue, bool aNewValue) { return Base::compareExchange(aOldValue, aNewValue); } private: Atomic(Atomic& aOther) MOZ_DELETE; }; } // namespace mozilla #endif /* mozilla_Atomics_h */