Bug 1142593 - factor atomics into the platform layer. r=waldo

2015-04-09 08:09:58 +02:00 · 2015-04-09 08:09:58 +02:00 · 797d1f085f
--- a/js/src/builtin/AtomicsObject.cpp
+++ b/js/src/builtin/AtomicsObject.cpp
@ -55,98 +55,18 @@

 #include "prmjtime.h"

+#include "jit/AtomicOperations.h"
+
 #include "js/Class.h"
 #include "vm/GlobalObject.h"
 #include "vm/SharedTypedArrayObject.h"
 #include "vm/TypedArrayObject.h"

 #include "jsobjinlines.h"
+#include "jit/AtomicOperations-inl.h"

 using namespace js;

-#if defined(MOZ_HAVE_CXX11_ATOMICS)
-# define CXX11_ATOMICS
-#elif defined(__clang__) || defined(__GNUC__)
-# define GNU_ATOMICS
-#elif _MSC_VER >= 1700 && _MSC_VER < 1800
-// Visual Studion 2012
-# define CXX11_ATOMICS
-# include <atomic>
-#elif defined(_MSC_VER)
-// Visual Studio 2010
-# define GNU_ATOMICS
-static inline void
-__sync_synchronize()
-{
-# if JS_BITS_PER_WORD == 32
-    // If configured for SSE2+ we can use the MFENCE instruction, available
-    // through the _mm_mfence intrinsic.  But for non-SSE2 systems we have
-    // to do something else.  Linux uses "lock add [esp], 0", so why not?
-    __asm lock add [esp], 0;
-# else
-    _mm_mfence();
-# endif
-}
-
-# define MSC_CAS(T, U, cmpxchg) \
-    static inline T \
-    __sync_val_compare_and_swap(T* addr, T oldval, T newval) { \
-        return (T)cmpxchg((U volatile*)addr, (U)oldval, (U)newval); \
-    }
-
-MSC_CAS(int8_t, char, _InterlockedCompareExchange8)
-MSC_CAS(uint8_t, char, _InterlockedCompareExchange8)
-MSC_CAS(int16_t, short, _InterlockedCompareExchange16)
-MSC_CAS(uint16_t, short, _InterlockedCompareExchange16)
-MSC_CAS(int32_t, long, _InterlockedCompareExchange)
-MSC_CAS(uint32_t, long, _InterlockedCompareExchange)
-
-# define MSC_FETCHADDOP(T, U, xadd) \
-    static inline T \
-    __sync_fetch_and_add(T* addr, T val) { \
-        return (T)xadd((U volatile*)addr, (U)val); \
-    } \
-    static inline T \
-    __sync_fetch_and_sub(T* addr, T val) { \
-        return (T)xadd((U volatile*)addr, (U)-val); \
-    }
-
-MSC_FETCHADDOP(int8_t, char, _InterlockedExchangeAdd8)
-MSC_FETCHADDOP(uint8_t, char, _InterlockedExchangeAdd8)
-MSC_FETCHADDOP(int16_t, short, _InterlockedExchangeAdd16)
-MSC_FETCHADDOP(uint16_t, short, _InterlockedExchangeAdd16)
-MSC_FETCHADDOP(int32_t, long, _InterlockedExchangeAdd)
-MSC_FETCHADDOP(uint32_t, long, _InterlockedExchangeAdd)
-
-# define MSC_FETCHBITOP(T, U, andop, orop, xorop) \
-    static inline T \
-    __sync_fetch_and_and(T* addr, T val) { \
-        return (T)andop((U volatile*)addr, (U)val);  \
-    } \
-    static inline T \
-    __sync_fetch_and_or(T* addr, T val) { \
-        return (T)orop((U volatile*)addr, (U)val);  \
-    } \
-    static inline T \
-    __sync_fetch_and_xor(T* addr, T val) { \
-        return (T)xorop((U volatile*)addr, (U)val);  \
-    } \
-
-MSC_FETCHBITOP(int8_t, char, _InterlockedAnd8, _InterlockedOr8, _InterlockedXor8)
-MSC_FETCHBITOP(uint8_t, char, _InterlockedAnd8, _InterlockedOr8, _InterlockedXor8)
-MSC_FETCHBITOP(int16_t, short, _InterlockedAnd16, _InterlockedOr16, _InterlockedXor16)
-MSC_FETCHBITOP(uint16_t, short, _InterlockedAnd16, _InterlockedOr16, _InterlockedXor16)
-MSC_FETCHBITOP(int32_t, long,  _InterlockedAnd, _InterlockedOr, _InterlockedXor)
-MSC_FETCHBITOP(uint32_t, long, _InterlockedAnd, _InterlockedOr, _InterlockedXor)
-
-# undef MSC_CAS
-# undef MSC_FETCHADDOP
-# undef MSC_FETCHBITOP
-
-#elif defined(ENABLE_SHARED_ARRAY_BUFFER)
-# error "Either disable JS shared memory or use a compiler that supports C++11 atomics or GCC/clang atomics"
-#endif
-
 const Class AtomicsObject::class_ = {
    "Atomics",
    JSCLASS_HAS_CACHED_PROTO(JSProto_Atomics)
@ -193,11 +113,7 @@ GetSharedTypedArrayIndex(JSContext* cx, HandleValue v, Handle<SharedTypedArrayOb
 void
 js::atomics_fullMemoryBarrier()
 {
-#if defined(CXX11_ATOMICS)
-    std::atomic_thread_fence(std::memory_order_seq_cst);
-#elif defined(GNU_ATOMICS)
-    __sync_synchronize();
-#endif
+    jit::AtomicOperations::fenceSeqCst();
 }

 static bool
@ -219,72 +135,53 @@ static int32_t
 do_cmpxchg(Scalar::Type viewType, int32_t oldCandidate, int32_t newCandidate, void* viewData,
           uint32_t offset, bool* badArrayType)
 {
-    // CAS always sets oldval to the old value of the cell.
-    // addr must be a T*, and oldval and newval should be variables of type T
-
-#if defined(CXX11_ATOMICS)
-# define CAS(T, addr, oldval, newval)                                    \
-    do {                                                                \
-        std::atomic_compare_exchange_strong(reinterpret_cast<std::atomic<T>*>(addr), &oldval, newval); \
-    } while(0)
-#elif defined(GNU_ATOMICS)
-# define CAS(T, addr, oldval, newval)                                    \
-    do {                                                                \
-        oldval = __sync_val_compare_and_swap(addr, (oldval), (newval)); \
-    } while(0)
-#else
-# define CAS(a, b, c, newval)  (void)newval
-#endif
-
    switch (viewType) {
      case Scalar::Int8: {
          int8_t oldval = (int8_t)oldCandidate;
          int8_t newval = (int8_t)newCandidate;
-          CAS(int8_t, (int8_t*)viewData + offset, oldval, newval);
+          oldval = jit::AtomicOperations::compareExchangeSeqCst((int8_t*)viewData + offset, oldval, newval);
          return oldval;
      }
      case Scalar::Uint8: {
          uint8_t oldval = (uint8_t)oldCandidate;
          uint8_t newval = (uint8_t)newCandidate;
-          CAS(uint8_t, (uint8_t*)viewData + offset, oldval, newval);
+          oldval = jit::AtomicOperations::compareExchangeSeqCst((uint8_t*)viewData + offset, oldval, newval);
          return oldval;
      }
      case Scalar::Uint8Clamped: {
          uint8_t oldval = ClampIntForUint8Array(oldCandidate);
          uint8_t newval = ClampIntForUint8Array(newCandidate);
-          CAS(uint8_t, (uint8_t*)viewData + offset, oldval, newval);
+          oldval = jit::AtomicOperations::compareExchangeSeqCst((uint8_t*)viewData + offset, oldval, newval);
          return oldval;
      }
      case Scalar::Int16: {
          int16_t oldval = (int16_t)oldCandidate;
          int16_t newval = (int16_t)newCandidate;
-          CAS(int16_t, (int16_t*)viewData + offset, oldval, newval);
+          oldval = jit::AtomicOperations::compareExchangeSeqCst((int16_t*)viewData + offset, oldval, newval);
          return oldval;
      }
      case Scalar::Uint16: {
          uint16_t oldval = (uint16_t)oldCandidate;
          uint16_t newval = (uint16_t)newCandidate;
-          CAS(uint16_t, (uint16_t*)viewData + offset, oldval, newval);
+          oldval = jit::AtomicOperations::compareExchangeSeqCst((uint16_t*)viewData + offset, oldval, newval);
          return oldval;
      }
      case Scalar::Int32: {
          int32_t oldval = oldCandidate;
          int32_t newval = newCandidate;
-          CAS(int32_t, (int32_t*)viewData + offset, oldval, newval);
+          oldval = jit::AtomicOperations::compareExchangeSeqCst((int32_t*)viewData + offset, oldval, newval);
          return oldval;
      }
      case Scalar::Uint32: {
          uint32_t oldval = (uint32_t)oldCandidate;
          uint32_t newval = (uint32_t)newCandidate;
-          CAS(uint32_t, (uint32_t*)viewData + offset, oldval, newval);
+          oldval = jit::AtomicOperations::compareExchangeSeqCst((uint32_t*)viewData + offset, oldval, newval);
          return (int32_t)oldval;
      }
      default:
        *badArrayType = true;
        return 0;
    }
-
-    // Do not undef CAS, it is used later
 }

 bool
@ -346,69 +243,41 @@ js::atomics_load(JSContext* cx, unsigned argc, Value* vp)
    if (!inRange)
        return atomics_fence_impl(cx, r);

-    // LOAD sets v to the value of *addr
-    // addr must be a T*, and v must be a variable of type T
-
-#if defined(CXX11_ATOMICS)
-# define LOAD(T, addr, v)                                                \
-    do {                                                                \
-        v = std::atomic_load(reinterpret_cast<std::atomic<T>*>(addr));  \
-    } while(0)
-#elif defined(GNU_ATOMICS)
-# define LOAD(T, addr, v)                        \
-    do {                                        \
-        __sync_synchronize();                   \
-        v = *(addr);                            \
-        __sync_synchronize();                   \
-    } while(0)
-#else
-# define LOAD(a, b, v)  v = 0
-#endif
-
    switch (view->type()) {
      case Scalar::Uint8:
      case Scalar::Uint8Clamped: {
-          uint8_t v;
-          LOAD(uint8_t, (uint8_t*)view->viewData() + offset, v);
+          uint8_t v = jit::AtomicOperations::loadSeqCst((uint8_t*)view->viewData() + offset);
          r.setInt32(v);
          return true;
      }
      case Scalar::Int8: {
-          int8_t v;
-          LOAD(int8_t, (int8_t*)view->viewData() + offset, v);
+          int8_t v = jit::AtomicOperations::loadSeqCst((uint8_t*)view->viewData() + offset);
          r.setInt32(v);
          return true;
      }
      case Scalar::Int16: {
-          int16_t v;
-          LOAD(int16_t, (int16_t*)view->viewData() + offset, v);
+          int16_t v = jit::AtomicOperations::loadSeqCst((int16_t*)view->viewData() + offset);
          r.setInt32(v);
          return true;
      }
      case Scalar::Uint16: {
-          uint16_t v;
-          LOAD(uint16_t, (uint16_t*)view->viewData() + offset, v);
+          uint16_t v = jit::AtomicOperations::loadSeqCst((uint16_t*)view->viewData() + offset);
          r.setInt32(v);
          return true;
      }
      case Scalar::Int32: {
-          int32_t v;
-          LOAD(int32_t, (int32_t*)view->viewData() + offset, v);
+          int32_t v = jit::AtomicOperations::loadSeqCst((int32_t*)view->viewData() + offset);
          r.setInt32(v);
          return true;
      }
      case Scalar::Uint32: {
-          uint32_t v;
-          LOAD(uint32_t, (uint32_t*)view->viewData() + offset, v);
+          uint32_t v = jit::AtomicOperations::loadSeqCst((uint32_t*)view->viewData() + offset);
          r.setNumber(v);
          return true;
      }
      default:
          return ReportBadArrayType(cx);
    }
-
-#undef LOAD
-
 }

 bool
@ -437,73 +306,52 @@ js::atomics_store(JSContext* cx, unsigned argc, Value* vp)
        return true;
    }

-    // STORE stores value in *addr
-    // addr must be a T*, and value should be of type T
-
-#if defined(CXX11_ATOMICS)
-# define STORE(T, addr, value)                                           \
-    do {                                                                \
-        std::atomic_store(reinterpret_cast<std::atomic<T>*>(addr), (T)value); \
-} while(0)
-#elif defined(GNU_ATOMICS)
-# define STORE(T, addr, value)                   \
-    do {                                        \
-        __sync_synchronize();                   \
-        *(addr) = value;                        \
-        __sync_synchronize();                   \
-    } while(0)
-#else
-# define STORE(a, b, c)  (void)0
-#endif
-
    switch (view->type()) {
      case Scalar::Int8: {
          int8_t value = (int8_t)numberValue;
-          STORE(int8_t, (int8_t*)view->viewData() + offset, value);
+          jit::AtomicOperations::storeSeqCst((int8_t*)view->viewData() + offset, value);
          r.setInt32(value);
          return true;
      }
      case Scalar::Uint8: {
          uint8_t value = (uint8_t)numberValue;
-          STORE(uint8_t, (uint8_t*)view->viewData() + offset, value);
+          jit::AtomicOperations::storeSeqCst((uint8_t*)view->viewData() + offset, value);
          r.setInt32(value);
          return true;
      }
      case Scalar::Uint8Clamped: {
          uint8_t value = ClampIntForUint8Array(numberValue);
-          STORE(uint8_t, (uint8_t*)view->viewData() + offset, value);
+          jit::AtomicOperations::storeSeqCst((uint8_t*)view->viewData() + offset, value);
          r.setInt32(value);
          return true;
      }
      case Scalar::Int16: {
          int16_t value = (int16_t)numberValue;
-          STORE(int16_t, (int16_t*)view->viewData() + offset, value);
+          jit::AtomicOperations::storeSeqCst((int16_t*)view->viewData() + offset, value);
          r.setInt32(value);
          return true;
      }
      case Scalar::Uint16: {
          uint16_t value = (uint16_t)numberValue;
-          STORE(uint16_t, (uint16_t*)view->viewData() + offset, value);
+          jit::AtomicOperations::storeSeqCst((uint16_t*)view->viewData() + offset, value);
          r.setInt32(value);
          return true;
      }
      case Scalar::Int32: {
          int32_t value = numberValue;
-          STORE(int32_t, (int32_t*)view->viewData() + offset, value);
+          jit::AtomicOperations::storeSeqCst((int32_t*)view->viewData() + offset, value);
          r.setInt32(value);
          return true;
      }
      case Scalar::Uint32: {
          uint32_t value = (uint32_t)numberValue;
-          STORE(uint32_t, (uint32_t*)view->viewData() + offset, value);
+          jit::AtomicOperations::storeSeqCst((uint32_t*)view->viewData() + offset, value);
          r.setNumber((double)value);
          return true;
      }
      default:
        return ReportBadArrayType(cx);
    }
-
-#undef STORE
 }

 template<typename T>
@ -548,8 +396,7 @@ atomics_binop_impl(JSContext* cx, HandleValue objv, HandleValue idxv, HandleValu
          for (;;) {
              uint8_t old = *loc;
              uint8_t result = (uint8_t)ClampIntForUint8Array(T::perform(old, value));
-              uint8_t tmp = old;  // tmp is overwritten by CAS
-              CAS(uint8_t, loc, tmp, result);
+              uint8_t tmp = jit::AtomicOperations::compareExchangeSeqCst(loc, old, result);
              if (tmp == old) {
                  r.setInt32(old);
                  break;
@ -582,28 +429,18 @@ atomics_binop_impl(JSContext* cx, HandleValue objv, HandleValue idxv, HandleValu
    }
 }

-#define INTEGRAL_TYPES_FOR_EACH(NAME, TRANSFORM) \
-    static int8_t operate(int8_t* addr, int8_t v) { return NAME(TRANSFORM(int8_t, addr), v); } \
-    static uint8_t operate(uint8_t* addr, uint8_t v) { return NAME(TRANSFORM(uint8_t, addr), v); } \
-    static int16_t operate(int16_t* addr, int16_t v) { return NAME(TRANSFORM(int16_t, addr), v); } \
-    static uint16_t operate(uint16_t* addr, uint16_t v) { return NAME(TRANSFORM(uint16_t, addr), v); } \
-    static int32_t operate(int32_t* addr, int32_t v) { return NAME(TRANSFORM(int32_t, addr), v); } \
-    static uint32_t operate(uint32_t* addr, uint32_t v) { return NAME(TRANSFORM(uint32_t, addr), v); }
-
-#define CAST_ATOMIC(t, v) reinterpret_cast<std::atomic<t>*>(v)
-#define DO_NOTHING(t, v) v
-#define ZERO(t, v) 0
+#define INTEGRAL_TYPES_FOR_EACH(NAME) \
+    static int8_t operate(int8_t* addr, int8_t v) { return NAME(addr, v); } \
+    static uint8_t operate(uint8_t* addr, uint8_t v) { return NAME(addr, v); } \
+    static int16_t operate(int16_t* addr, int16_t v) { return NAME(addr, v); } \
+    static uint16_t operate(uint16_t* addr, uint16_t v) { return NAME(addr, v); } \
+    static int32_t operate(int32_t* addr, int32_t v) { return NAME(addr, v); } \
+    static uint32_t operate(uint32_t* addr, uint32_t v) { return NAME(addr, v); }

 class do_add
 {
 public:
-#if defined(CXX11_ATOMICS)
-    INTEGRAL_TYPES_FOR_EACH(std::atomic_fetch_add, CAST_ATOMIC)
-#elif defined(GNU_ATOMICS)
-    INTEGRAL_TYPES_FOR_EACH(__sync_fetch_and_add, DO_NOTHING)
-#else
-    INTEGRAL_TYPES_FOR_EACH(ZERO, DO_NOTHING)
-#endif
+    INTEGRAL_TYPES_FOR_EACH(jit::AtomicOperations::fetchAddSeqCst)
    static int32_t perform(int32_t x, int32_t y) { return x + y; }
 };

@ -617,13 +454,7 @@ js::atomics_add(JSContext* cx, unsigned argc, Value* vp)
 class do_sub
 {
 public:
-#if defined(CXX11_ATOMICS)
-    INTEGRAL_TYPES_FOR_EACH(std::atomic_fetch_sub, CAST_ATOMIC)
-#elif defined(GNU_ATOMICS)
-    INTEGRAL_TYPES_FOR_EACH(__sync_fetch_and_sub, DO_NOTHING)
-#else
-    INTEGRAL_TYPES_FOR_EACH(ZERO, DO_NOTHING)
-#endif
+    INTEGRAL_TYPES_FOR_EACH(jit::AtomicOperations::fetchSubSeqCst)
    static int32_t perform(int32_t x, int32_t y) { return x - y; }
 };

@ -637,13 +468,7 @@ js::atomics_sub(JSContext* cx, unsigned argc, Value* vp)
 class do_and
 {
 public:
-#if defined(CXX11_ATOMICS)
-    INTEGRAL_TYPES_FOR_EACH(std::atomic_fetch_and, CAST_ATOMIC)
-#elif defined(GNU_ATOMICS)
-    INTEGRAL_TYPES_FOR_EACH(__sync_fetch_and_and, DO_NOTHING)
-#else
-    INTEGRAL_TYPES_FOR_EACH(ZERO, DO_NOTHING)
-#endif
+    INTEGRAL_TYPES_FOR_EACH(jit::AtomicOperations::fetchAndSeqCst)
    static int32_t perform(int32_t x, int32_t y) { return x & y; }
 };

@ -657,13 +482,7 @@ js::atomics_and(JSContext* cx, unsigned argc, Value* vp)
 class do_or
 {
 public:
-#if defined(CXX11_ATOMICS)
-    INTEGRAL_TYPES_FOR_EACH(std::atomic_fetch_or, CAST_ATOMIC)
-#elif defined(GNU_ATOMICS)
-    INTEGRAL_TYPES_FOR_EACH(__sync_fetch_and_or, DO_NOTHING)
-#else
-    INTEGRAL_TYPES_FOR_EACH(ZERO, DO_NOTHING)
-#endif
+    INTEGRAL_TYPES_FOR_EACH(jit::AtomicOperations::fetchOrSeqCst)
    static int32_t perform(int32_t x, int32_t y) { return x | y; }
 };

@ -677,13 +496,7 @@ js::atomics_or(JSContext* cx, unsigned argc, Value* vp)
 class do_xor
 {
 public:
-#if defined(CXX11_ATOMICS)
-    INTEGRAL_TYPES_FOR_EACH(std::atomic_fetch_xor, CAST_ATOMIC)
-#elif defined(GNU_ATOMICS)
-    INTEGRAL_TYPES_FOR_EACH(__sync_fetch_and_xor, DO_NOTHING)
-#else
-    INTEGRAL_TYPES_FOR_EACH(ZERO, DO_NOTHING)
-#endif
+    INTEGRAL_TYPES_FOR_EACH(jit::AtomicOperations::fetchXorSeqCst)
    static int32_t perform(int32_t x, int32_t y) { return x ^ y; }
 };

@ -694,11 +507,6 @@ js::atomics_xor(JSContext* cx, unsigned argc, Value* vp)
    return atomics_binop_impl<do_xor>(cx, args.get(0), args.get(1), args.get(2), args.rval());
 }

-#undef INTEGRAL_TYPES_FOR_EACH
-#undef CAST_ATOMIC
-#undef DO_NOTHING
-#undef ZERO
-
 // asm.js callouts for platforms that do not have non-word-sized
 // atomics where we don't want to inline the logic for the atomics.
 //
--- a/js/src/jit/AtomicOperations-inl.h
+++ b/js/src/jit/AtomicOperations-inl.h
@ -0,0 +1,22 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*-
+ * vim: set ts=8 sts=4 et sw=4 tw=99:
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef jit_AtomicOperations_inl_h
+#define jit_AtomicOperations_inl_h
+
+#if defined(JS_CODEGEN_ARM)
+# include "jit/arm/AtomicOperations-arm.h"
+#elif defined(JS_CODEGEN_MIPS)
+# include "jit/mips/AtomicOperations-mips.h"
+#elif defined(JS_CODEGEN_NONE)
+# include "jit/none/AtomicOperations-none.h"
+#elif defined(JS_CODEGEN_X86) || defined(JS_CODEGEN_X64)
+# include "jit/x86-shared/AtomicOperations-x86-shared.h"
+#else
+# error "Atomic operations must be defined for this platform"
+#endif
+
+#endif //  jit_AtomicOperations_inl_h
--- a/js/src/jit/AtomicOperations.h
+++ b/js/src/jit/AtomicOperations.h
@ -0,0 +1,121 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*-
+ * vim: set ts=8 sts=4 et sw=4 tw=99:
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef jit_AtomicOperations_h
+#define jit_AtomicOperations_h
+
+namespace js {
+namespace jit {
+
+/*
+ * The atomic operations layer defines the following types and
+ * functions.  The "SeqCst" suffix on operations means "sequentially
+ * consistent" and means such a function's operation must have
+ * "sequentially consistent" memory ordering.  See mfbt/Atomics.h for
+ * an explanation of this memory ordering.
+ *
+ * To make use of the functions you generally have to include
+ * AtomicOperations-inl.h.
+ *
+ * The fundamental constraint on all these primitives is that their
+ * realization by the C++ compiler MUST be compatible with code the
+ * JIT generates for its Atomics operations, so that an atomic access
+ * from the interpreter really is atomic relative to a concurrent
+ * access from jitted code.
+ *
+ * It's not a requirement that these methods be inlined; performance
+ * is not a great concern.  On some platforms these methods may call
+ * out to code that's generated at run time.
+ */
+
+class AtomicOperations
+{
+  public:
+
+    // Execute a full memory barrier (LoadLoad+LoadStore+StoreLoad+StoreStore).
+    static inline void fenceSeqCst();
+
+    // If the return value is true then a call to the 64-bit (8-byte)
+    // routines below will work, otherwise those functions will assert in
+    // debug builds and may crash in release build.  (See the code in
+    // ../arm for an example.)  The value of this call does not change
+    // during a run.
+    static inline bool isLockfree8();
+
+    // The following functions are defined for T = int8_t, uint8_t,
+    // int16_t, uint16_t, int32_t, uint32_t, int64_t, and uint64_t
+
+    // Atomically read* addr.
+    template<typename T>
+    static inline T loadSeqCst(T* addr);
+
+    // Atomically store val in *addr.
+    template<typename T>
+    static inline void storeSeqCst(T* addr, T val);
+
+    // Atomically store val in *addr and return the old value of *addr.
+    template<typename T>
+    static inline T exchangeSeqCst(T* addr, T val);
+
+    // Atomically check that *addr contains oldval and if so replace it
+    // with newval, in any case return the old contents of *addr
+    template<typename T>
+    static inline T compareExchangeSeqCst(T* addr, T oldval, T newval);
+
+    // The following functions are defined for T = int8_t, uint8_t,
+    // int16_t, uint16_t, int32_t, uint32_t only.
+
+    // Atomically add, subtract, bitwise-AND, bitwise-OR, or bitwise-XOR
+    // val into *addr and return the old value of *addr.
+    template<typename T>
+    static inline T fetchAddSeqCst(T* addr, T val);
+
+    template<typename T>
+    static inline T fetchSubSeqCst(T* addr, T val);
+
+    template<typename T>
+    static inline T fetchAndSeqCst(T* addr, T val);
+
+    template<typename T>
+    static inline T fetchOrSeqCst(T* addr, T val);
+
+    template<typename T>
+    static inline T fetchXorSeqCst(T* addr, T val);
+};
+
+/* A data type representing a lock on some region of a
+ * SharedArrayRawBuffer's memory, to be used only when the hardware
+ * does not provide necessary atomicity (eg, float64 access on ARMv6
+ * and some ARMv7 systems).
+ */
+struct RegionLock
+{
+  public:
+    RegionLock() : spinlock(0) {}
+
+    /* Addr is the address to be locked, nbytes the number of bytes we
+     * need to lock.  The lock that is taken may cover a larger range
+     * of bytes.
+     */
+    template<size_t nbytes>
+    void acquire(void* addr);
+
+    /* Addr is the address to be unlocked, nbytes the number of bytes
+     * we need to unlock.  The lock must be held by the calling thread,
+     * at the given address and for the number of bytes.
+     */
+    template<size_t nbytes>
+    void release(void* addr);
+
+  private:
+    /* For now, a simple spinlock that covers the entire buffer. */
+    uint32_t spinlock;
+};
+
+} // namespace jit
+} // namespace js
+
+#endif // jit_AtomicOperations_h
--- a/js/src/jit/arm/AtomicOperations-arm.h
+++ b/js/src/jit/arm/AtomicOperations-arm.h
@ -0,0 +1,220 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*-
+ * vim: set ts=8 sts=4 et sw=4 tw=99:
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+/* For documentation, see jit/AtomicOperations.h */
+
+#ifndef jit_arm_AtomicOperations_arm_h
+#define jit_arm_AtomicOperations_arm_h
+
+#include "jit/arm/Architecture-arm.h"
+#include "jit/AtomicOperations.h"
+
+#if defined(__clang__) || defined(__GNUC__)
+
+// The default implementation tactic for gcc/clang is to use the newer
+// __atomic intrinsics added for use in C++11 <atomic>.  Where that
+// isn't available, we use GCC's older __sync functions instead.
+//
+// ATOMICS_IMPLEMENTED_WITH_SYNC_INTRINSICS is kept as a backward
+// compatible option for older compilers: enable this to use GCC's old
+// __sync functions instead of the newer __atomic functions.  This
+// will be required for GCC 4.6.x and earlier, and probably for Clang
+// 3.1, should we need to use those versions.
+
+//#define ATOMICS_IMPLEMENTED_WITH_SYNC_INTRINSICS
+
+inline bool
+js::jit::AtomicOperations::isLockfree8()
+{
+    // The JIT and the C++ compiler must agree on whether to use atomics
+    // for 64-bit accesses.  There are two ways to do this: either the
+    // JIT defers to the C++ compiler (so if the C++ code is compiled
+    // for ARMv6, say, and __atomic_always_lock_free(8) is false, then the
+    // JIT ignores the fact that the program is running on ARMv7 or newer);
+    // or the C++ code in this file calls out to run-time generated code
+    // to do whatever the JIT does.
+    //
+    // For now, make the JIT defer to the C++ compiler when we know what
+    // the C++ compiler will do, otherwise assume a lock is needed.
+# ifndef ATOMICS_IMPLEMENTED_WITH_SYNC_INTRINSICS
+    MOZ_ASSERT(__atomic_always_lock_free(sizeof(int8_t), 0));
+    MOZ_ASSERT(__atomic_always_lock_free(sizeof(int16_t), 0));
+    MOZ_ASSERT(__atomic_always_lock_free(sizeof(int32_t), 0));
+    return HasLDSTREXBHD() && __atomic_always_lock_free(sizeof(int64_t), 0);
+# else
+    return false;
+# endif
+}
+
+inline void
+js::jit::AtomicOperations::fenceSeqCst()
+{
+# ifdef ATOMICS_IMPLEMENTED_WITH_SYNC_INTRINSICS
+    __sync_synchronize();
+# else
+    __atomic_thread_fence(__ATOMIC_SEQ_CST);
+# endif
+}
+
+template<typename T>
+inline T
+js::jit::AtomicOperations::loadSeqCst(T* addr)
+{
+    MOZ_ASSERT(sizeof(T) < 8 || isLockfree8());
+# ifdef ATOMICS_IMPLEMENTED_WITH_SYNC_INTRINSICS
+    __sync_synchronize();
+    T v = *addr;
+    __sync_synchronize();
+# else
+    T v;
+    __atomic_load(addr, &v, __ATOMIC_SEQ_CST);
+# endif
+    return v;
+}
+
+template<typename T>
+inline void
+js::jit::AtomicOperations::storeSeqCst(T* addr, T val)
+{
+    MOZ_ASSERT(sizeof(T) < 8 || isLockfree8());
+# ifdef ATOMICS_IMPLEMENTED_WITH_SYNC_INTRINSICS
+    __sync_synchronize();
+    *addr = val;
+    __sync_synchronize();
+# else
+    __atomic_store(addr, &val, __ATOMIC_SEQ_CST);
+# endif
+}
+
+template<typename T>
+inline T
+js::jit::AtomicOperations::exchangeSeqCst(T* addr, T val)
+{
+    MOZ_ASSERT(sizeof(T) < 8 || isLockfree8());
+# ifdef ATOMICS_IMPLEMENTED_WITH_SYNC_INTRINSICS
+    T v;
+    __sync_synchronize();
+    do {
+	v = *addr;
+    } while (__sync_val_compare_and_swap(addr, v, val) != v);
+    return v;
+# else
+    T v;
+    __atomic_exchange(addr, &val, &v, __ATOMIC_SEQ_CST);
+    return v;
+# endif
+}
+
+template<typename T>
+inline T
+js::jit::AtomicOperations::compareExchangeSeqCst(T* addr, T oldval, T newval)
+{
+    MOZ_ASSERT(sizeof(T) < 8 || isLockfree8());
+# ifdef ATOMICS_IMPLEMENTED_WITH_SYNC_INTRINSICS
+    return __sync_val_compare_and_swap(addr, oldval, newval);
+# else
+    __atomic_compare_exchange(addr, &oldval, &newval, false, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST);
+    return oldval;
+# endif
+}
+
+template<typename T>
+inline T
+js::jit::AtomicOperations::fetchAddSeqCst(T* addr, T val)
+{
+    static_assert(sizeof(T) <= 4, "not available for 8-byte values yet");
+# ifdef ATOMICS_IMPLEMENTED_WITH_SYNC_INTRINSICS
+    return __sync_fetch_and_add(addr, val);
+# else
+    return __atomic_fetch_add(addr, val, __ATOMIC_SEQ_CST);
+# endif
+}
+
+template<typename T>
+inline T
+js::jit::AtomicOperations::fetchSubSeqCst(T* addr, T val)
+{
+    static_assert(sizeof(T) <= 4, "not available for 8-byte values yet");
+# ifdef ATOMICS_IMPLEMENTED_WITH_SYNC_INTRINSICS
+    return __sync_fetch_and_sub(addr, val);
+# else
+    return __atomic_fetch_sub(addr, val, __ATOMIC_SEQ_CST);
+# endif
+}
+
+template<typename T>
+inline T
+js::jit::AtomicOperations::fetchAndSeqCst(T* addr, T val)
+{
+    static_assert(sizeof(T) <= 4, "not available for 8-byte values yet");
+# ifdef ATOMICS_IMPLEMENTED_WITH_SYNC_INTRINSICS
+    return __sync_fetch_and_and(addr, val);
+# else
+    return __atomic_fetch_and(addr, val, __ATOMIC_SEQ_CST);
+# endif
+}
+
+template<typename T>
+inline T
+js::jit::AtomicOperations::fetchOrSeqCst(T* addr, T val)
+{
+    static_assert(sizeof(T) <= 4, "not available for 8-byte values yet");
+# ifdef ATOMICS_IMPLEMENTED_WITH_SYNC_INTRINSICS
+    return __sync_fetch_and_or(addr, val);
+# else
+    return __atomic_fetch_or(addr, val, __ATOMIC_SEQ_CST);
+# endif
+}
+
+template<typename T>
+inline T
+js::jit::AtomicOperations::fetchXorSeqCst(T* addr, T val)
+{
+    static_assert(sizeof(T) <= 4, "not available for 8-byte values yet");
+# ifdef ATOMICS_IMPLEMENTED_WITH_SYNC_INTRINSICS
+    return __sync_fetch_and_xor(addr, val);
+# else
+    return __atomic_fetch_xor(addr, val, __ATOMIC_SEQ_CST);
+# endif
+}
+
+template<size_t nbytes>
+inline void
+js::jit::RegionLock::acquire(void* addr)
+{
+# ifdef ATOMICS_IMPLEMENTED_WITH_SYNC_INTRINSICS
+    while (!__sync_bool_compare_and_swap(&spinlock, 0, 1))
+        ;
+# else
+    uint32_t zero = 0;
+    uint32_t one = 1;
+    while (!__atomic_compare_exchange(&spinlock, &zero, &one, false, __ATOMIC_ACQUIRE, __ATOMIC_ACQUIRE))
+        continue;
+# endif
+}
+
+template<size_t nbytes>
+inline void
+js::jit::RegionLock::release(void* addr)
+{
+    MOZ_ASSERT(AtomicOperations::loadSeqCst(&spinlock) == 1, "releasing unlocked region lock");
+# ifdef ATOMICS_IMPLEMENTED_WITH_SYNC_INTRINSICS
+    __sync_sub_and_fetch(&spinlock, 1);
+# else
+    uint32_t zero = 0;
+    __atomic_store(&spinlock, &zero, __ATOMIC_SEQ_CST);
+# endif
+}
+
+# undef ATOMICS_IMPLEMENTED_WITH_SYNC_INTRINSICS
+
+#elif defined(ENABLE_SHARED_ARRAY_BUFFER)
+
+# error "Either disable JS shared memory, use GCC or Clang, or add code here"
+
+#endif
+
+#endif // jit_arm_AtomicOperations_arm_h
--- a/js/src/jit/mips/AtomicOperations-mips.h
+++ b/js/src/jit/mips/AtomicOperations-mips.h
@ -0,0 +1,105 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*-
+ * vim: set ts=8 sts=4 et sw=4 tw=99:
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+/* For documentation, see jit/AtomicOperations.h */
+
+#ifndef jit_mips_AtomicOperations_mips_h
+#define jit_mips_AtomicOperations_mips_h
+
+#include "jit/AtomicOperations.h"
+
+inline bool
+js::jit::AtomicOperations::isLockfree8()
+{
+    // Don't crash this one, since it may be read during
+    // initialization, to cache the value.
+    return false;
+}
+
+inline void
+js::jit::AtomicOperations::fenceSeqCst()
+{
+    MOZ_CRASH();
+}
+
+template<typename T>
+inline T
+js::jit::AtomicOperations::loadSeqCst(T* addr)
+{
+    MOZ_CRASH();
+}
+
+template<typename T>
+inline void
+js::jit::AtomicOperations::storeSeqCst(T* addr, T val)
+{
+    MOZ_CRASH();
+}
+
+template<typename T>
+inline T
+js::jit::AtomicOperations::compareExchangeSeqCst(T* addr, T oldval, T newval)
+{
+    MOZ_CRASH();
+}
+
+template<typename T>
+inline T
+js::jit::AtomicOperations::fetchAddSeqCst(T* addr, T val)
+{
+    MOZ_CRASH();
+}
+
+template<typename T>
+inline T
+js::jit::AtomicOperations::fetchSubSeqCst(T* addr, T val)
+{
+    MOZ_CRASH();
+}
+
+template<typename T>
+inline T
+js::jit::AtomicOperations::fetchAndSeqCst(T* addr, T val)
+{
+    MOZ_CRASH();
+}
+
+template<typename T>
+inline T
+js::jit::AtomicOperations::fetchOrSeqCst(T* addr, T val)
+{
+    MOZ_CRASH();
+}
+
+template<typename T>
+inline T
+js::jit::AtomicOperations::fetchXorSeqCst(T* addr, T val)
+{
+    MOZ_CRASH();
+}
+
+template<typename T>
+inline T
+js::jit::AtomicOperations::exchangeSeqCst(T* addr, T val)
+{
+    MOZ_CRASH();
+}
+
+template<size_t nbytes>
+inline void
+js::jit::RegionLock::acquire(void* addr)
+{
+    MOZ_CRASH();
+}
+
+template<size_t nbytes>
+inline void
+js::jit::RegionLock::release(void* addr)
+{
+    MOZ_CRASH();
+}
+
+#endif // jit_mips_AtomicOperations_mips_h
--- a/js/src/jit/none/AtomicOperations-none.h
+++ b/js/src/jit/none/AtomicOperations-none.h
@ -0,0 +1,105 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*-
+ * vim: set ts=8 sts=4 et sw=4 tw=99:
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+/* For documentation, see jit/AtomicOperations.h */
+
+#ifndef jit_none_AtomicOperations_none_h
+#define jit_none_AtomicOperations_none_h
+
+#include "jit/AtomicOperations.h"
+
+inline bool
+js::jit::AtomicOperations::isLockfree8()
+{
+    // Don't crash this one, since it may be read during
+    // initialization, to cache the value.
+    return false;
+}
+
+inline void
+js::jit::AtomicOperations::fenceSeqCst()
+{
+    MOZ_CRASH();
+}
+
+template<typename T>
+inline T
+js::jit::AtomicOperations::loadSeqCst(T* addr)
+{
+    MOZ_CRASH();
+}
+
+template<typename T>
+inline void
+js::jit::AtomicOperations::storeSeqCst(T* addr, T val)
+{
+    MOZ_CRASH();
+}
+
+template<typename T>
+inline T
+js::jit::AtomicOperations::compareExchangeSeqCst(T* addr, T oldval, T newval)
+{
+    MOZ_CRASH();
+}
+
+template<typename T>
+inline T
+js::jit::AtomicOperations::fetchAddSeqCst(T* addr, T val)
+{
+    MOZ_CRASH();
+}
+
+template<typename T>
+inline T
+js::jit::AtomicOperations::fetchSubSeqCst(T* addr, T val)
+{
+    MOZ_CRASH();
+}
+
+template<typename T>
+inline T
+js::jit::AtomicOperations::fetchAndSeqCst(T* addr, T val)
+{
+    MOZ_CRASH();
+}
+
+template<typename T>
+inline T
+js::jit::AtomicOperations::fetchOrSeqCst(T* addr, T val)
+{
+    MOZ_CRASH();
+}
+
+template<typename T>
+inline T
+js::jit::AtomicOperations::fetchXorSeqCst(T* addr, T val)
+{
+    MOZ_CRASH();
+}
+
+template<typename T>
+inline T
+js::jit::AtomicOperations::exchangeSeqCst(T* addr, T val)
+{
+    MOZ_CRASH();
+}
+
+template<size_t nbytes>
+inline void
+js::jit::RegionLock::acquire(void* addr)
+{
+    MOZ_CRASH();
+}
+
+template<size_t nbytes>
+inline void
+js::jit::RegionLock::release(void* addr)
+{
+    MOZ_CRASH();
+}
+
+#endif // jit_none_AtomicOperations_none_h
--- a/js/src/jit/x86-shared/AtomicOperations-x86-shared.h
+++ b/js/src/jit/x86-shared/AtomicOperations-x86-shared.h
@ -0,0 +1,547 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*-
+ * vim: set ts=8 sts=4 et sw=4 tw=99:
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+/* For overall documentation, see jit/AtomicOperations.h */
+
+#ifndef jit_shared_AtomicOperations_x86_shared_h
+#define jit_shared_AtomicOperations_x86_shared_h
+
+#include "jit/AtomicOperations.h"
+
+// Lock-freedom on x86 and x64:
+//
+// On x86 and x64 there are atomic instructions for 8-byte accesses:
+//
+// Load and stores:
+// - Loads and stores are single-copy atomic for up to 8 bytes
+//   starting with the Pentium; the store requires a post-fence for
+//   sequential consistency
+//
+// CompareExchange:
+// - On x64 CMPXCHGQ can always be used
+// - On x86 CMPXCHG8B can be used starting with the first Pentium
+//
+// Exchange:
+// - On x64 XCHGQ can always be used
+// - On x86 one has to use a CompareExchange loop
+//
+// Observe also that the JIT will not be enabled unless we have SSE2,
+// which was introduced with the Pentium 4.  Ergo the JIT will be able
+// to use atomic instructions for up to 8 bytes on all x86 platforms
+// for the primitives we care about.
+//
+// However, C++ compilers and libraries may not provide access to
+// those 8-byte instructions directly.  Clang in 32-bit mode does not
+// provide 8-byte atomic primitives at all (even with eg -arch i686
+// specified).  On Windows 32-bit, MSVC does not provide
+// _InterlockedExchange64 since it does not map directly to an
+// instruction.
+//
+// There are thus sundry workarounds below to handle known corner
+// cases.
+
+#if defined(__clang__) || defined(__GNUC__)
+
+// The default implementation tactic for gcc/clang is to use the newer
+// __atomic intrinsics added for use in C++11 <atomic>.  Where that
+// isn't available, we use GCC's older __sync functions instead.
+//
+// ATOMICS_IMPLEMENTED_WITH_SYNC_INTRINSICS is kept as a backward
+// compatible option for older compilers: enable this to use GCC's old
+// __sync functions instead of the newer __atomic functions.  This
+// will be required for GCC 4.6.x and earlier, and probably for Clang
+// 3.1, should we need to use those versions.
+
+// #define ATOMICS_IMPLEMENTED_WITH_SYNC_INTRINSICS
+
+// Lock-free 8-byte atomics are assumed on x86 but must be disabled in
+// corner cases, see comments below and in isLockfree8().
+
+# define LOCKFREE8
+
+// This pertains to Clang compiling with -m32, in this case the 64-bit
+// __atomic builtins are not available (observed on various Mac OS X
+// versions with Apple Clang and on Linux with Clang 3.5).
+//
+// For now just punt: disable lock-free 8-word data.  The JIT will
+// call isLockfree8() to determine what to do and will stay in sync.
+// (Bug 1146817 tracks the work to improve on this.)
+
+# if defined(__clang__) && defined(__i386)
+#  undef LOCKFREE8
+# endif
+
+inline bool
+js::jit::AtomicOperations::isLockfree8()
+{
+# ifndef ATOMICS_IMPLEMENTED_WITH_SYNC_INTRINSICS
+    MOZ_ASSERT(__atomic_always_lock_free(sizeof(int8_t), 0));
+    MOZ_ASSERT(__atomic_always_lock_free(sizeof(int16_t), 0));
+    MOZ_ASSERT(__atomic_always_lock_free(sizeof(int32_t), 0));
+# endif
+# ifdef LOCKFREE8
+#  ifndef ATOMICS_IMPLEMENTED_WITH_SYNC_INTRINSICS
+    MOZ_ASSERT(__atomic_always_lock_free(sizeof(int64_t), 0));
+#  endif
+    return true;
+# else
+    return false;
+# endif
+}
+
+inline void
+js::jit::AtomicOperations::fenceSeqCst()
+{
+# ifdef ATOMICS_IMPLEMENTED_WITH_SYNC_INTRINSICS
+    __sync_synchronize();
+# else
+    __atomic_thread_fence(__ATOMIC_SEQ_CST);
+# endif
+}
+
+template<typename T>
+inline T
+js::jit::AtomicOperations::loadSeqCst(T* addr)
+{
+    MOZ_ASSERT(sizeof(T) < 8 || isLockfree8());
+# ifdef ATOMICS_IMPLEMENTED_WITH_SYNC_INTRINSICS
+    // Inhibit compiler reordering with a volatile load.  The x86 does
+    // not reorder loads with respect to subsequent loads or stores
+    // and no ordering barrier is required here.  See more elaborate
+    // comments in storeSeqCst.
+    T v = *static_cast<T volatile*>(addr);
+# else
+    T v;
+    __atomic_load(addr, &v, __ATOMIC_SEQ_CST);
+# endif
+    return v;
+}
+
+# ifndef LOCKFREE8
+template<>
+inline int64_t
+js::jit::AtomicOperations::loadSeqCst(int64_t* addr)
+{
+    MOZ_CRASH();
+}
+
+template<>
+inline uint64_t
+js::jit::AtomicOperations::loadSeqCst(uint64_t* addr)
+{
+    MOZ_CRASH();
+}
+# endif // LOCKFREE8
+
+template<typename T>
+inline void
+js::jit::AtomicOperations::storeSeqCst(T* addr, T val)
+{
+    MOZ_ASSERT(sizeof(T) < 8 || isLockfree8());
+# ifdef ATOMICS_IMPLEMENTED_WITH_SYNC_INTRINSICS
+    // Inhibit compiler reordering with a volatile store.  The x86 may
+    // reorder a store with respect to a subsequent load from a
+    // different location, hence there is an ordering barrier here to
+    // prevent that.
+    //
+    // By way of background, look to eg
+    // http://bartoszmilewski.com/2008/11/05/who-ordered-memory-fences-on-an-x86/
+    //
+    // Consider:
+    //
+    //   uint8_t x = 0, y = 0; // to start
+    //
+    // thread1:
+    //   sx: AtomicOperations::store(&x, 1);
+    //   gy: uint8_t obs1 = AtomicOperations::loadSeqCst(&y);
+    //
+    // thread2:
+    //   sy: AtomicOperations::store(&y, 1);
+    //   gx: uint8_t obs2 = AtomicOperations::loadSeqCst(&x);
+    //
+    // Sequential consistency requires a total global ordering of
+    // operations: sx-gy-sy-gx, sx-sy-gx-gy, sx-sy-gy-gx, sy-gx-sx-gy,
+    // sy-sx-gy-gx, or sy-sx-gx-gy.  In every ordering at least one of
+    // sx-before-gx or sy-before-gy happens, so *at least one* of
+    // obs1/obs2 is 1.
+    //
+    // If AtomicOperations::{load,store}SeqCst were just volatile
+    // {load,store}, x86 could reorder gx/gy before each thread's
+    // prior load.  That would permit gx-gy-sx-sy: both loads would be
+    // 0!  Thus after a volatile store we must synchronize to ensure
+    // the store happens before the load.
+    *static_cast<T volatile*>(addr) = val;
+    __sync_synchronize();
+# else
+    __atomic_store(addr, &val, __ATOMIC_SEQ_CST);
+# endif
+}
+
+# ifndef LOCKFREE8
+template<>
+inline void
+js::jit::AtomicOperations::storeSeqCst(int64_t* addr, int64_t val)
+{
+    MOZ_CRASH();
+}
+
+template<>
+inline void
+js::jit::AtomicOperations::storeSeqCst(uint64_t* addr, uint64_t val)
+{
+    MOZ_CRASH();
+}
+# endif // LOCKFREE8
+
+template<typename T>
+inline T
+js::jit::AtomicOperations::exchangeSeqCst(T* addr, T val)
+{
+    MOZ_ASSERT(sizeof(T) < 8 || isLockfree8());
+# ifdef ATOMICS_IMPLEMENTED_WITH_SYNC_INTRINSICS
+    T v;
+    do {
+        // Here I assume the compiler will not hoist the load.  It
+        // shouldn't, because the CAS could affect* addr.
+        v = *addr;
+    } while (!__sync_bool_compare_and_swap(addr, v, val));
+    return v;
+# else
+    T v;
+    __atomic_exchange(addr, &val, &v, __ATOMIC_SEQ_CST);
+    return v;
+# endif
+}
+
+# ifndef LOCKFREE8
+template<>
+inline int64_t
+js::jit::AtomicOperations::exchangeSeqCst(int64_t* addr, int64_t val)
+{
+    MOZ_CRASH();
+}
+
+template<>
+inline uint64_t
+js::jit::AtomicOperations::exchangeSeqCst(uint64_t* addr, uint64_t val)
+{
+    MOZ_CRASH();
+}
+# endif // LOCKFREE8
+
+template<typename T>
+inline T
+js::jit::AtomicOperations::compareExchangeSeqCst(T* addr, T oldval, T newval)
+{
+    MOZ_ASSERT(sizeof(T) < 8 || isLockfree8());
+# ifdef ATOMICS_IMPLEMENTED_WITH_SYNC_INTRINSICS
+    return __sync_val_compare_and_swap(addr, oldval, newval);
+# else
+    __atomic_compare_exchange(addr, &oldval, &newval, false, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST);
+    return oldval;
+# endif
+}
+
+# ifndef LOCKFREE8
+template<>
+inline int64_t
+js::jit::AtomicOperations::compareExchangeSeqCst(int64_t* addr, int64_t oldval, int64_t newval)
+{
+    MOZ_CRASH();
+}
+
+template<>
+inline uint64_t
+js::jit::AtomicOperations::compareExchangeSeqCst(uint64_t* addr, uint64_t oldval, uint64_t newval)
+{
+    MOZ_CRASH();
+}
+# endif // LOCKFREE8
+
+template<typename T>
+inline T
+js::jit::AtomicOperations::fetchAddSeqCst(T* addr, T val)
+{
+    static_assert(sizeof(T) <= 4, "not available for 8-byte values yet");
+# ifdef ATOMICS_IMPLEMENTED_WITH_SYNC_INTRINSICS
+    return __sync_fetch_and_add(addr, val);
+# else
+    return __atomic_fetch_add(addr, val, __ATOMIC_SEQ_CST);
+# endif
+}
+
+template<typename T>
+inline T
+js::jit::AtomicOperations::fetchSubSeqCst(T* addr, T val)
+{
+    static_assert(sizeof(T) <= 4, "not available for 8-byte values yet");
+# ifdef ATOMICS_IMPLEMENTED_WITH_SYNC_INTRINSICS
+    return __sync_fetch_and_sub(addr, val);
+# else
+    return __atomic_fetch_sub(addr, val, __ATOMIC_SEQ_CST);
+# endif
+}
+
+template<typename T>
+inline T
+js::jit::AtomicOperations::fetchAndSeqCst(T* addr, T val)
+{
+    static_assert(sizeof(T) <= 4, "not available for 8-byte values yet");
+# ifdef ATOMICS_IMPLEMENTED_WITH_SYNC_INTRINSICS
+    return __sync_fetch_and_and(addr, val);
+# else
+    return __atomic_fetch_and(addr, val, __ATOMIC_SEQ_CST);
+# endif
+}
+
+template<typename T>
+inline T
+js::jit::AtomicOperations::fetchOrSeqCst(T* addr, T val)
+{
+    static_assert(sizeof(T) <= 4, "not available for 8-byte values yet");
+# ifdef ATOMICS_IMPLEMENTED_WITH_SYNC_INTRINSICS
+    return __sync_fetch_and_or(addr, val);
+# else
+    return __atomic_fetch_or(addr, val, __ATOMIC_SEQ_CST);
+# endif
+}
+
+template<typename T>
+inline T
+js::jit::AtomicOperations::fetchXorSeqCst(T* addr, T val)
+{
+    static_assert(sizeof(T) <= 4, "not available for 8-byte values yet");
+# ifdef ATOMICS_IMPLEMENTED_WITH_SYNC_INTRINSICS
+    return __sync_fetch_and_xor(addr, val);
+# else
+    return __atomic_fetch_xor(addr, val, __ATOMIC_SEQ_CST);
+# endif
+}
+
+template<size_t nbytes>
+inline void
+js::jit::RegionLock::acquire(void* addr)
+{
+# ifdef ATOMICS_IMPLEMENTED_WITH_SYNC_INTRINSICS
+    while (!__sync_bool_compare_and_swap(&spinlock, 0, 1))
+        continue;
+# else
+    uint32_t zero = 0;
+    uint32_t one = 1;
+    while (!__atomic_compare_exchange(&spinlock, &zero, &one, false, __ATOMIC_ACQUIRE, __ATOMIC_ACQUIRE))
+        continue;
+# endif
+}
+
+template<size_t nbytes>
+inline void
+js::jit::RegionLock::release(void* addr)
+{
+    MOZ_ASSERT(AtomicOperations::loadSeqCst(&spinlock) == 1, "releasing unlocked region lock");
+# ifdef ATOMICS_IMPLEMENTED_WITH_SYNC_INTRINSICS
+    __sync_sub_and_fetch(&spinlock, 1); // Should turn into LOCK XADD
+# else
+    uint32_t zero = 0;
+    __atomic_store(&spinlock, &zero, __ATOMIC_SEQ_CST);
+# endif
+}
+
+# undef ATOMICS_IMPLEMENTED_WITH_SYNC_INTRINSICS
+# undef LOCKFREE8
+
+#elif defined(_MSC_VER)
+
+// On 32-bit CPUs there is no 64-bit XCHG instruction, one must
+// instead use a loop with CMPXCHG8B.  Since MSVC provides
+// _InterlockedExchange64 only if it maps directly to XCHG, the
+// workaround must be manual.
+
+# define HAVE_EXCHANGE64
+
+# if !_WIN64
+#  undef HAVE_EXCHANGE64
+# endif
+
+// Below, _ReadWriteBarrier is a compiler directive, preventing
+// reordering of instructions and reuse of memory values across it.
+
+inline bool
+js::jit::AtomicOperations::isLockfree8()
+{
+    // See general comments at the start of this file.
+    //
+    // The MSDN docs suggest very strongly that if code is compiled for
+    // Pentium or better the 64-bit primitives will be lock-free, see
+    // eg the "Remarks" secion of the page for _InterlockedCompareExchange64,
+    // currently here:
+    // https://msdn.microsoft.com/en-us/library/ttk2z1ws%28v=vs.85%29.aspx
+    //
+    // But I've found no way to assert that at compile time or run time,
+    // there appears to be no WinAPI is_lock_free() test.
+    return true;
+}
+
+inline void
+js::jit::AtomicOperations::fenceSeqCst()
+{
+    _ReadWriteBarrier();
+# if JS_BITS_PER_WORD == 32
+    // If configured for SSE2+ we can use the MFENCE instruction, available
+    // through the _mm_mfence intrinsic.  But for non-SSE2 systems we have
+    // to do something else.  Linux uses "lock add [esp], 0", so why not?
+    __asm lock add [esp], 0;
+# else
+    _mm_mfence();
+# endif
+}
+
+template<typename T>
+inline T
+js::jit::AtomicOperations::loadSeqCst(T* addr)
+{
+    MOZ_ASSERT(sizeof(T) < 8 || isLockfree8());
+    _ReadWriteBarrier();
+    T v = *addr;
+    _ReadWriteBarrier();
+    return v;
+}
+
+template<typename T>
+inline void
+js::jit::AtomicOperations::storeSeqCst(T* addr, T val)
+{
+    MOZ_ASSERT(sizeof(T) < 8 || isLockfree8());
+    _ReadWriteBarrier();
+    *addr = val;
+    fenceSeqCst();
+}
+
+# define MSC_EXCHANGEOP(T, U, xchgop)                           \
+    template<> inline T                                         \
+    js::jit::AtomicOperations::exchangeSeqCst(T* addr, T val) { \
+        MOZ_ASSERT(sizeof(T) < 8 || isLockfree8());        \
+        return (T)xchgop((U volatile*)addr, (U)val);            \
+    }
+
+# define MSC_EXCHANGEOP_CAS(T, U, cmpxchg)                           \
+    template<> inline T                                              \
+    js::jit::AtomicOperations::exchangeSeqCst(T* addr, T newval) {   \
+        MOZ_ASSERT(sizeof(T) < 8 || isLockfree8());             \
+        T oldval;                                                    \
+        do {                                                         \
+            _ReadWriteBarrier();                                     \
+            oldval = *addr;                                          \
+        } while (!cmpxchg((U volatile*)addr, (U)newval, (U)oldval)); \
+        return oldval;                                               \
+    }
+
+MSC_EXCHANGEOP(int8_t, char, _InterlockedExchange8)
+MSC_EXCHANGEOP(uint8_t, char, _InterlockedExchange8)
+MSC_EXCHANGEOP(int16_t, short, _InterlockedExchange16)
+MSC_EXCHANGEOP(uint16_t, short, _InterlockedExchange16)
+MSC_EXCHANGEOP(int32_t, long, _InterlockedExchange)
+MSC_EXCHANGEOP(uint32_t, long, _InterlockedExchange)
+# ifdef HAVE_EXCHANGE64
+MSC_EXCHANGEOP(int64_t, __int64, _InterlockedExchange64)
+MSC_EXCHANGEOP(uint64_t, __int64, _InterlockedExchange64)
+# else
+MSC_EXCHANGEOP_CAS(int64_t, __int64, _InterlockedCompareExchange64)
+MSC_EXCHANGEOP_CAS(uint64_t, __int64, _InterlockedCompareExchange64)
+# endif
+
+# undef MSC_EXCHANGEOP
+# undef MSC_EXCHANGEOP_CAS
+
+# define MSC_CAS(T, U, cmpxchg)                                                     \
+    template<> inline T                                                             \
+    js::jit::AtomicOperations::compareExchangeSeqCst(T* addr, T oldval, T newval) { \
+        MOZ_ASSERT(sizeof(T) < 8 || isLockfree8());                            \
+        return (T)cmpxchg((U volatile*)addr, (U)newval, (U)oldval);                 \
+    }
+
+MSC_CAS(int8_t, char, _InterlockedCompareExchange8)
+MSC_CAS(uint8_t, char, _InterlockedCompareExchange8)
+MSC_CAS(int16_t, short, _InterlockedCompareExchange16)
+MSC_CAS(uint16_t, short, _InterlockedCompareExchange16)
+MSC_CAS(int32_t, long, _InterlockedCompareExchange)
+MSC_CAS(uint32_t, long, _InterlockedCompareExchange)
+MSC_CAS(int64_t, __int64, _InterlockedCompareExchange64)
+MSC_CAS(uint64_t, __int64, _InterlockedCompareExchange64)
+
+# undef MSC_CAS
+
+# define MSC_FETCHADDOP(T, U, xadd)                                           \
+    template<> inline T                                                       \
+    js::jit::AtomicOperations::fetchAddSeqCst(T* addr, T val) {               \
+        static_assert(sizeof(T) <= 4, "not available for 8-byte values yet"); \
+        return (T)xadd((U volatile*)addr, (U)val);                            \
+    }                                                                         \
+    template<> inline T                                                       \
+    js::jit::AtomicOperations::fetchSubSeqCst(T* addr, T val) {               \
+        static_assert(sizeof(T) <= 4, "not available for 8-byte values yet"); \
+        return (T)xadd((U volatile*)addr, (U)-val);                           \
+    }
+
+MSC_FETCHADDOP(int8_t, char, _InterlockedExchangeAdd8)
+MSC_FETCHADDOP(uint8_t, char, _InterlockedExchangeAdd8)
+MSC_FETCHADDOP(int16_t, short, _InterlockedExchangeAdd16)
+MSC_FETCHADDOP(uint16_t, short, _InterlockedExchangeAdd16)
+MSC_FETCHADDOP(int32_t, long, _InterlockedExchangeAdd)
+MSC_FETCHADDOP(uint32_t, long, _InterlockedExchangeAdd)
+
+# undef MSC_FETCHADDOP
+
+# define MSC_FETCHBITOP(T, U, andop, orop, xorop)                             \
+    template<> inline T                                                       \
+    js::jit::AtomicOperations::fetchAndSeqCst(T* addr, T val) {               \
+        static_assert(sizeof(T) <= 4, "not available for 8-byte values yet"); \
+        return (T)andop((U volatile*)addr, (U)val);                           \
+    }                                                                         \
+    template<> inline T                                                       \
+    js::jit::AtomicOperations::fetchOrSeqCst(T* addr, T val) {                \
+        static_assert(sizeof(T) <= 4, "not available for 8-byte values yet"); \
+        return (T)orop((U volatile*)addr, (U)val);                            \
+    }                                                                         \
+    template<> inline T                                                       \
+    js::jit::AtomicOperations::fetchXorSeqCst(T* addr, T val) {               \
+        static_assert(sizeof(T) <= 4, "not available for 8-byte values yet"); \
+        return (T)xorop((U volatile*)addr, (U)val);                           \
+    }
+
+MSC_FETCHBITOP(int8_t, char, _InterlockedAnd8, _InterlockedOr8, _InterlockedXor8)
+MSC_FETCHBITOP(uint8_t, char, _InterlockedAnd8, _InterlockedOr8, _InterlockedXor8)
+MSC_FETCHBITOP(int16_t, short, _InterlockedAnd16, _InterlockedOr16, _InterlockedXor16)
+MSC_FETCHBITOP(uint16_t, short, _InterlockedAnd16, _InterlockedOr16, _InterlockedXor16)
+MSC_FETCHBITOP(int32_t, long,  _InterlockedAnd, _InterlockedOr, _InterlockedXor)
+MSC_FETCHBITOP(uint32_t, long, _InterlockedAnd, _InterlockedOr, _InterlockedXor)
+
+# undef MSC_FETCHBITOP
+
+template<size_t nbytes>
+inline void
+js::jit::RegionLock::acquire(void* addr)
+{
+    while (_InterlockedCompareExchange((long*)&spinlock, /*newval=*/1, /*oldval=*/0) == 1)
+        continue;
+}
+
+template<size_t nbytes>
+inline void
+js::jit::RegionLock::release(void* addr)
+{
+    MOZ_ASSERT(AtomicOperations::loadSeqCst(&spinlock) == 1, "releasing unlocked region lock");
+    _InterlockedExchange((long*)&spinlock, 0);
+}
+
+# undef HAVE_EXCHANGE64
+
+#elif defined(ENABLE_SHARED_ARRAY_BUFFER)
+
+# error "Either disable JS shared memory, use GCC, Clang, or MSVC, or add code here"
+
+#endif // platform
+
+#endif // jit_shared_AtomicOperations_x86_shared_h