diff --git a/js/src/jit/CacheIRCompiler.cpp b/js/src/jit/CacheIRCompiler.cpp
index f5d632151b23..f3ec8ad845b2 100644
--- a/js/src/jit/CacheIRCompiler.cpp
+++ b/js/src/jit/CacheIRCompiler.cpp
@@ -8118,6 +8118,8 @@ bool CacheIRCompiler::emitAtomicsLoadResult(ObjOperandId objId,
   // Load the value.
   BaseIndex source(scratch, index, ScaleFromScalarType(elementType));
 
+  // NOTE: the generated code must match the assembly code in gen_load in
+  // GenerateAtomicOperations.py
   auto sync = Synchronization::Load();
 
   masm.memoryBarrierBefore(sync);
@@ -8168,6 +8170,8 @@ bool CacheIRCompiler::emitAtomicsStoreResult(ObjOperandId objId,
     // Store the value.
     BaseIndex dest(scratch, index, ScaleFromScalarType(elementType));
 
+    // NOTE: the generated code must match the assembly code in gen_store in
+    // GenerateAtomicOperations.py
     auto sync = Synchronization::Store();
 
     masm.memoryBarrierBefore(sync);
diff --git a/js/src/jit/GenerateAtomicOperations.py b/js/src/jit/GenerateAtomicOperations.py
index 69a7e232f413..dbde8d80ba3e 100644
--- a/js/src/jit/GenerateAtomicOperations.py
+++ b/js/src/jit/GenerateAtomicOperations.py
@@ -42,6 +42,11 @@ def gen_seqcst(fun_name):
 
 
 def gen_load(fun_name, cpp_type, size, barrier):
+    # NOTE: the assembly code must match the generated code in:
+    # - CacheIRCompiler::emitAtomicsLoadResult
+    # - LIRGenerator::visitLoadUnboxedScalar
+    # - CodeGenerator::visitAtomicLoad64 (on 64-bit platforms)
+    # - MacroAssembler::wasmLoad
     if cpu_arch in ("x86", "x86_64"):
         insns = ""
         if barrier:
@@ -128,6 +133,11 @@ def gen_load(fun_name, cpp_type, size, barrier):
 
 
 def gen_store(fun_name, cpp_type, size, barrier):
+    # NOTE: the assembly code must match the generated code in:
+    # - CacheIRCompiler::emitAtomicsStoreResult
+    # - LIRGenerator::visitStoreUnboxedScalar
+    # - CodeGenerator::visitAtomicStore64 (on 64-bit platforms)
+    # - MacroAssembler::wasmStore
     if cpu_arch in ("x86", "x86_64"):
         insns = ""
         if barrier:
@@ -208,6 +218,9 @@ def gen_store(fun_name, cpp_type, size, barrier):
 
 
 def gen_exchange(fun_name, cpp_type, size):
+    # NOTE: the assembly code must match the generated code in:
+    # - MacroAssembler::atomicExchange
+    # - MacroAssembler::atomicExchange64 (on 64-bit platforms)
     if cpu_arch in ("x86", "x86_64"):
         # Request an input/output register for `val` so that we can simply XCHG it
         # with *addr.
@@ -301,6 +314,9 @@ def gen_exchange(fun_name, cpp_type, size):
 
 
 def gen_cmpxchg(fun_name, cpp_type, size):
+    # NOTE: the assembly code must match the generated code in:
+    # - MacroAssembler::compareExchange
+    # - MacroAssembler::compareExchange64
     if cpu_arch == "x86" and size == 64:
         # Use a +A constraint to load `oldval` into EDX:EAX as input/output.
         # `newval` is loaded into ECX:EBX.
@@ -468,6 +484,9 @@ def gen_cmpxchg(fun_name, cpp_type, size):
 
 
 def gen_fetchop(fun_name, cpp_type, size, op):
+    # NOTE: the assembly code must match the generated code in:
+    # - MacroAssembler::atomicFetchOp
+    # - MacroAssembler::atomicFetchOp64 (on 64-bit platforms)
     if cpu_arch in ("x86", "x86_64"):
         # The `add` operation can be optimized with XADD.
         if op == "add":
diff --git a/js/src/jit/Lowering.cpp b/js/src/jit/Lowering.cpp
index e52b28b3e1bf..6ec2e7f8a7c2 100644
--- a/js/src/jit/Lowering.cpp
+++ b/js/src/jit/Lowering.cpp
@@ -3764,6 +3764,8 @@ void LIRGenerator::visitLoadUnboxedScalar(MLoadUnboxedScalar* ins) {
   const LAllocation index = useRegisterOrIndexConstant(
       ins->index(), ins->storageType(), ins->offsetAdjustment());
 
+  // NOTE: the generated code must match the assembly code in gen_load in
+  // GenerateAtomicOperations.py
   Synchronization sync = Synchronization::Load();
   if (ins->requiresMemoryBarrier()) {
     LMemoryBarrier* fence = new (alloc()) LMemoryBarrier(sync.barrierBefore);
@@ -3951,6 +3953,9 @@ void LIRGenerator::visitStoreUnboxedScalar(MStoreUnboxedScalar* ins) {
   // is a store instruction that incorporates the necessary
   // barriers, and we could use that instead of separate barrier and
   // store instructions.  See bug #1077027.
+  //
+  // NOTE: the generated code must match the assembly code in gen_store in
+  // GenerateAtomicOperations.py
   Synchronization sync = Synchronization::Store();
   if (ins->requiresMemoryBarrier()) {
     LMemoryBarrier* fence = new (alloc()) LMemoryBarrier(sync.barrierBefore);
diff --git a/js/src/jit/arm/MacroAssembler-arm.cpp b/js/src/jit/arm/MacroAssembler-arm.cpp
index 4050e5dfcb6f..aa1a8373b69b 100644
--- a/js/src/jit/arm/MacroAssembler-arm.cpp
+++ b/js/src/jit/arm/MacroAssembler-arm.cpp
@@ -4935,6 +4935,8 @@ static void CompareExchange(MacroAssembler& masm,
 
   ScratchRegisterScope scratch(masm);
 
+  // NOTE: the generated code must match the assembly code in gen_cmpxchg in
+  // GenerateAtomicOperations.py
   masm.memoryBarrierBefore(sync);
 
   masm.bind(&again);
@@ -5038,6 +5040,8 @@ static void AtomicExchange(MacroAssembler& masm,
 
   ScratchRegisterScope scratch(masm);
 
+  // NOTE: the generated code must match the assembly code in gen_exchange in
+  // GenerateAtomicOperations.py
   masm.memoryBarrierBefore(sync);
 
   masm.bind(&again);
@@ -5139,6 +5143,8 @@ static void AtomicFetchOp(MacroAssembler& masm,
   SecondScratchRegisterScope scratch2(masm);
   Register ptr = ComputePointerForAtomic(masm, mem, scratch2);
 
+  // NOTE: the generated code must match the assembly code in gen_fetchop in
+  // GenerateAtomicOperations.py
   masm.memoryBarrierBefore(sync);
 
   ScratchRegisterScope scratch(masm);
@@ -5394,6 +5400,8 @@ static void CompareExchange64(MacroAssembler& masm,
   SecondScratchRegisterScope scratch2(masm);
   Register ptr = ComputePointerForAtomic(masm, mem, scratch2);
 
+  // NOTE: the generated code must match the assembly code in gen_cmpxchg in
+  // GenerateAtomicOperations.py
   masm.memoryBarrierBefore(sync);
 
   masm.bind(&again);
@@ -6152,6 +6160,8 @@ void MacroAssemblerARM::wasmLoadImpl(const wasm::MemoryAccessDesc& access,
                   type == Scalar::Int32 || type == Scalar::Int64;
   unsigned byteSize = access.byteSize();
 
+  // NOTE: the generated code must match the assembly code in gen_load in
+  // GenerateAtomicOperations.py
   asMasm().memoryBarrierBefore(access.sync());
 
   BufferOffset load;
@@ -6267,6 +6277,8 @@ void MacroAssemblerARM::wasmStoreImpl(const wasm::MemoryAccessDesc& access,
     }
   }
 
+  // NOTE: the generated code must match the assembly code in gen_store in
+  // GenerateAtomicOperations.py
   asMasm().memoryBarrierAfter(access.sync());
 
   BufferOffset store;
diff --git a/js/src/jit/arm64/CodeGenerator-arm64.cpp b/js/src/jit/arm64/CodeGenerator-arm64.cpp
index 1667ba5b925f..920e409f6572 100644
--- a/js/src/jit/arm64/CodeGenerator-arm64.cpp
+++ b/js/src/jit/arm64/CodeGenerator-arm64.cpp
@@ -1985,6 +1985,8 @@ void CodeGenerator::visitAtomicLoad64(LAtomicLoad64* lir) {
 
   Scalar::Type storageType = mir->storageType();
 
+  // NOTE: the generated code must match the assembly code in gen_load in
+  // GenerateAtomicOperations.py
   auto sync = Synchronization::Load();
 
   masm.memoryBarrierBefore(sync);
@@ -2011,6 +2013,8 @@ void CodeGenerator::visitAtomicStore64(LAtomicStore64* lir) {
 
   masm.loadBigInt64(value, temp1);
 
+  // NOTE: the generated code must match the assembly code in gen_store in
+  // GenerateAtomicOperations.py
   auto sync = Synchronization::Store();
 
   masm.memoryBarrierBefore(sync);
diff --git a/js/src/jit/arm64/MacroAssembler-arm64.cpp b/js/src/jit/arm64/MacroAssembler-arm64.cpp
index c8c8c75a94fe..7bde4672bf8f 100644
--- a/js/src/jit/arm64/MacroAssembler-arm64.cpp
+++ b/js/src/jit/arm64/MacroAssembler-arm64.cpp
@@ -474,6 +474,8 @@ void MacroAssemblerCompat::wasmLoadImpl(const wasm::MemoryAccessDesc& access,
     instructionsExpected++;
   }
 
+  // NOTE: the generated code must match the assembly code in gen_load in
+  // GenerateAtomicOperations.py
   asMasm().memoryBarrierBefore(access.sync());
 
   {
@@ -625,6 +627,8 @@ void MacroAssemblerCompat::wasmStoreImpl(const wasm::MemoryAccessDesc& access,
 void MacroAssemblerCompat::wasmStoreImpl(const wasm::MemoryAccessDesc& access,
                                          MemOperand dstAddr, AnyRegister valany,
                                          Register64 val64) {
+  // NOTE: the generated code must match the assembly code in gen_store in
+  // GenerateAtomicOperations.py
   asMasm().memoryBarrierBefore(access.sync());
 
   {
@@ -2334,6 +2338,8 @@ static void CompareExchange(MacroAssembler& masm,
 
   MOZ_ASSERT(ptr.base().asUnsized() != output);
 
+  // NOTE: the generated code must match the assembly code in gen_cmpxchg in
+  // GenerateAtomicOperations.py
   masm.memoryBarrierBefore(sync);
 
   Register scratch = temps.AcquireX().asUnsized();
@@ -2365,6 +2371,8 @@ static void AtomicExchange(MacroAssembler& masm,
   Register scratch2 = temps.AcquireX().asUnsized();
   MemOperand ptr = ComputePointerForAtomic(masm, mem, scratch2);
 
+  // NOTE: the generated code must match the assembly code in gen_exchange in
+  // GenerateAtomicOperations.py
   masm.memoryBarrierBefore(sync);
 
   Register scratch = temps.AcquireX().asUnsized();
@@ -2395,6 +2403,8 @@ static void AtomicFetchOp(MacroAssembler& masm,
   Register scratch2 = temps.AcquireX().asUnsized();
   MemOperand ptr = ComputePointerForAtomic(masm, mem, scratch2);
 
+  // NOTE: the generated code must match the assembly code in gen_fetchop in
+  // GenerateAtomicOperations.py
   masm.memoryBarrierBefore(sync);
 
   Register scratch = temps.AcquireX().asUnsized();
diff --git a/js/src/jit/x64/CodeGenerator-x64.cpp b/js/src/jit/x64/CodeGenerator-x64.cpp
index 96b575d01b73..ab2bbb86cf67 100644
--- a/js/src/jit/x64/CodeGenerator-x64.cpp
+++ b/js/src/jit/x64/CodeGenerator-x64.cpp
@@ -325,6 +325,8 @@ void CodeGenerator::visitAtomicLoad64(LAtomicLoad64* lir) {
 
   Scalar::Type storageType = mir->storageType();
 
+  // NOTE: the generated code must match the assembly code in gen_load in
+  // GenerateAtomicOperations.py
   auto sync = Synchronization::Load();
 
   masm.memoryBarrierBefore(sync);
@@ -351,6 +353,8 @@ void CodeGenerator::visitAtomicStore64(LAtomicStore64* lir) {
 
   masm.loadBigInt64(value, temp1);
 
+  // NOTE: the generated code must match the assembly code in gen_store in
+  // GenerateAtomicOperations.py
   auto sync = Synchronization::Store();
 
   masm.memoryBarrierBefore(sync);
diff --git a/js/src/jit/x64/MacroAssembler-x64.cpp b/js/src/jit/x64/MacroAssembler-x64.cpp
index 6c49627c7c72..cb8d5fb86129 100644
--- a/js/src/jit/x64/MacroAssembler-x64.cpp
+++ b/js/src/jit/x64/MacroAssembler-x64.cpp
@@ -922,6 +922,8 @@ void MacroAssembler::PushBoxed(FloatRegister reg) {
 
 void MacroAssembler::wasmLoad(const wasm::MemoryAccessDesc& access,
                               Operand srcAddr, AnyRegister out) {
+  // NOTE: the generated code must match the assembly code in gen_load in
+  // GenerateAtomicOperations.py
   memoryBarrierBefore(access.sync());
 
   MOZ_ASSERT_IF(
@@ -1015,6 +1017,8 @@ void MacroAssembler::wasmLoad(const wasm::MemoryAccessDesc& access,
 
 void MacroAssembler::wasmLoadI64(const wasm::MemoryAccessDesc& access,
                                  Operand srcAddr, Register64 out) {
+  // NOTE: the generated code must match the assembly code in gen_load in
+  // GenerateAtomicOperations.py
   memoryBarrierBefore(access.sync());
 
   append(access, size());
@@ -1057,6 +1061,8 @@ void MacroAssembler::wasmLoadI64(const wasm::MemoryAccessDesc& access,
 
 void MacroAssembler::wasmStore(const wasm::MemoryAccessDesc& access,
                                AnyRegister value, Operand dstAddr) {
+  // NOTE: the generated code must match the assembly code in gen_store in
+  // GenerateAtomicOperations.py
   memoryBarrierBefore(access.sync());
 
   append(access, masm.size());
@@ -1342,6 +1348,8 @@ static void AtomicFetchOp64(MacroAssembler& masm,
                             const wasm::MemoryAccessDesc* access, AtomicOp op,
                             Register value, const T& mem, Register temp,
                             Register output) {
+  // NOTE: the generated code must match the assembly code in gen_fetchop in
+  // GenerateAtomicOperations.py
   if (op == AtomicFetchAddOp) {
     if (value != output) {
       masm.movq(value, output);
@@ -1441,6 +1449,8 @@ void MacroAssembler::compareExchange64(const Synchronization&,
                                        const Address& mem, Register64 expected,
                                        Register64 replacement,
                                        Register64 output) {
+  // NOTE: the generated code must match the assembly code in gen_cmpxchg in
+  // GenerateAtomicOperations.py
   MOZ_ASSERT(output.reg == rax);
   if (expected != output) {
     movq(expected.reg, output.reg);
@@ -1463,6 +1473,8 @@ void MacroAssembler::compareExchange64(const Synchronization&,
 void MacroAssembler::atomicExchange64(const Synchronization&,
                                       const Address& mem, Register64 value,
                                       Register64 output) {
+  // NOTE: the generated code must match the assembly code in gen_exchange in
+  // GenerateAtomicOperations.py
   if (value != output) {
     movq(value.reg, output.reg);
   }
diff --git a/js/src/jit/x86-shared/MacroAssembler-x86-shared.cpp b/js/src/jit/x86-shared/MacroAssembler-x86-shared.cpp
index 59b858e40bcb..4cf983f96157 100644
--- a/js/src/jit/x86-shared/MacroAssembler-x86-shared.cpp
+++ b/js/src/jit/x86-shared/MacroAssembler-x86-shared.cpp
@@ -1108,6 +1108,8 @@ static void CompareExchange(MacroAssembler& masm,
     masm.append(*access, masm.size());
   }
 
+  // NOTE: the generated code must match the assembly code in gen_cmpxchg in
+  // GenerateAtomicOperations.py
   switch (Scalar::byteSize(type)) {
     case 1:
       CheckBytereg(newval);
@@ -1153,7 +1155,8 @@ static void AtomicExchange(MacroAssembler& masm,
                            const wasm::MemoryAccessDesc* access,
                            Scalar::Type type, const T& mem, Register value,
                            Register output)
-
+// NOTE: the generated code must match the assembly code in gen_exchange in
+// GenerateAtomicOperations.py
 {
   if (value != output) {
     masm.movl(value, output);
@@ -1230,6 +1233,8 @@ static void AtomicFetchOp(MacroAssembler& masm,
                           const T& mem, Register temp, Register output) {
   // Note value can be an Imm or a Register.
 
+  // NOTE: the generated code must match the assembly code in gen_fetchop in
+  // GenerateAtomicOperations.py
 #define ATOMIC_BITOP_BODY(LOAD, OP, LOCK_CMPXCHG)  \
   do {                                             \
     MOZ_ASSERT(output != temp);                    \
diff --git a/js/src/jit/x86/MacroAssembler-x86.cpp b/js/src/jit/x86/MacroAssembler-x86.cpp
index 86a254c1f0c0..43d56e769aac 100644
--- a/js/src/jit/x86/MacroAssembler-x86.cpp
+++ b/js/src/jit/x86/MacroAssembler-x86.cpp
@@ -958,6 +958,8 @@ void MacroAssembler::wasmLoad(const wasm::MemoryAccessDesc& access,
           access.type() == Scalar::Float32 || access.type() == Scalar::Float64);
   MOZ_ASSERT_IF(access.isWidenSimd128Load(), access.type() == Scalar::Float64);
 
+  // NOTE: the generated code must match the assembly code in gen_load in
+  // GenerateAtomicOperations.py
   memoryBarrierBefore(access.sync());
 
   append(access, size());
@@ -1121,6 +1123,8 @@ void MacroAssembler::wasmStore(const wasm::MemoryAccessDesc& access,
   MOZ_ASSERT(dstAddr.kind() == Operand::MEM_REG_DISP ||
              dstAddr.kind() == Operand::MEM_SCALE);
 
+  // NOTE: the generated code must match the assembly code in gen_store in
+  // GenerateAtomicOperations.py
   memoryBarrierBefore(access.sync());
 
   append(access, size());
@@ -1217,6 +1221,8 @@ static void CompareExchange64(MacroAssembler& masm,
   MOZ_ASSERT(replacement.high == ecx);
   MOZ_ASSERT(replacement.low == ebx);
 
+  // NOTE: the generated code must match the assembly code in gen_cmpxchg in
+  // GenerateAtomicOperations.py
   if (access) {
     masm.append(*access, masm.size());
   }