Bug 1478632 - wasm simd, part 5a: extract porting interfaces and x86/x64 impls. r=bbouvier

Here we're *only* extracting porting APIs from the -SIMD.cpp file and exposing them in MacroAssembler.h, along with a little bit of renaming. Almost all the interesting action happens in the next patch. Differential Revision: https://phabricator.services.mozilla.com/D60856
2020-05-05 08:23:28 +00:00 · 2020-05-05 08:23:28 +00:00 · 168837c2ec
--- a/js/src/jit/MacroAssembler.h
+++ b/js/src/jit/MacroAssembler.h
@ -1772,6 +1772,297 @@ class MacroAssembler : public MacroAssemblerSpecific {

  inline void memoryBarrier(MemoryBarrierBits barrier) PER_SHARED_ARCH;

+ public:
+  // ========================================================================
+  // SIMD
+  //
+  // Naming is "operationSimd128" when operate on the whole vector, otherwise
+  // it's "operation<Type><Size>x<Lanes>".
+  //
+  // For microarchitectural reasons we can in principle get a performance win by
+  // using int or float specific instructions in the operationSimd128 case when
+  // we know that subsequent operations on the result are int or float oriented.
+  // In practice, we don't care about that yet.
+  //
+  // The order of operations here follows those in the SIMD overview document,
+  // https://github.com/WebAssembly/simd/blob/master/proposals/simd/SIMD.md.
+  //
+  // In many cases on x86, unless AVX is present on the chip then `lhs` and
+  // `dest` must be the same register.
+
+  // Moves
+
+  inline void moveSimd128(FloatRegister src, FloatRegister dest)
+      DEFINED_ON(x86_shared);
+
+  // Constants
+
+  inline void zeroSimd128(FloatRegister dest) DEFINED_ON(x86_shared);
+
+  // Splat
+
+  inline void splatX16(Register src, FloatRegister dest) DEFINED_ON(x86_shared);
+
+  inline void splatX8(Register src, FloatRegister dest) DEFINED_ON(x86_shared);
+
+  inline void splatX4(Register src, FloatRegister dest) DEFINED_ON(x86_shared);
+
+  inline void splatX4(FloatRegister src, FloatRegister dest)
+      DEFINED_ON(x86_shared);
+
+  // Extract lane as scalar
+
+  inline void extractLaneInt8x16(FloatRegister src, Register dest,
+                                 unsigned lane, SimdSign sign)
+      DEFINED_ON(x86_shared);
+
+  inline void extractLaneInt16x8(FloatRegister src, Register dest,
+                                 unsigned lane, SimdSign sign)
+      DEFINED_ON(x86_shared);
+
+  inline void extractLaneInt32x4(FloatRegister src, Register dest,
+                                 unsigned lane) DEFINED_ON(x86_shared);
+
+  inline void extractLaneFloat32x4(FloatRegister src, FloatRegister dest,
+                                   unsigned lane, bool canonicalize)
+      DEFINED_ON(x86_shared);
+
+  // Replace lane value
+
+  inline void replaceLaneInt8x16(FloatRegister src, Register value,
+                                 FloatRegister dest, unsigned lane)
+      DEFINED_ON(x86_shared);
+
+  inline void replaceLaneInt16x8(FloatRegister src, Register value,
+                                 FloatRegister dest, unsigned lane)
+      DEFINED_ON(x86_shared);
+
+  inline void replaceLaneInt32x4(FloatRegister src, Register value,
+                                 FloatRegister dest, unsigned lane)
+      DEFINED_ON(x86_shared);
+
+  inline void replaceLaneFloat32x4(FloatRegister src, FloatRegister value,
+                                   FloatRegister dest, unsigned lane)
+      DEFINED_ON(x86_shared);
+
+  // Shuffle - permute with immediate indices
+
+  // Swizzle - permute with variable indices
+
+  // Integer Add
+
+  inline void addInt8x16(FloatRegister lhs, FloatRegister rhs,
+                         FloatRegister dest) DEFINED_ON(x86_shared);
+
+  inline void addInt16x8(FloatRegister lhs, FloatRegister rhs,
+                         FloatRegister dest) DEFINED_ON(x86_shared);
+
+  inline void addInt32x4(FloatRegister lhs, FloatRegister rhs,
+                         FloatRegister dest) DEFINED_ON(x86_shared);
+
+  // Integer Subtract
+
+  inline void subInt8x16(FloatRegister lhs, FloatRegister rhs,
+                         FloatRegister dest) DEFINED_ON(x86_shared);
+
+  inline void subInt16x8(FloatRegister lhs, FloatRegister rhs,
+                         FloatRegister dest) DEFINED_ON(x86_shared);
+
+  inline void subInt32x4(FloatRegister lhs, FloatRegister rhs,
+                         FloatRegister dest) DEFINED_ON(x86_shared);
+
+  // Integer Multiply
+
+  inline void mulInt16x8(FloatRegister lhs, FloatRegister rhs,
+                         FloatRegister dest) DEFINED_ON(x86_shared);
+
+  inline bool mulInt32x4RequiresTemp() DEFINED_ON(x86_shared);
+
+  inline void mulInt32x4(FloatRegister lhs, FloatRegister rhs,
+                         FloatRegister temp, FloatRegister dest)
+      DEFINED_ON(x86_shared);
+
+  // Integer Negate
+
+  inline void negInt8x16(FloatRegister in, FloatRegister out)
+      DEFINED_ON(x86_shared);
+
+  inline void negInt16x8(FloatRegister in, FloatRegister out)
+      DEFINED_ON(x86_shared);
+
+  inline void negInt32x4(FloatRegister in, FloatRegister out)
+      DEFINED_ON(x86_shared);
+
+  // Saturating integer add
+
+  inline void addSatInt8x16(FloatRegister lhs, FloatRegister rhs, SimdSign sign,
+                            FloatRegister dest) DEFINED_ON(x86_shared);
+
+  inline void addSatInt16x8(FloatRegister lhs, FloatRegister rhs, SimdSign sign,
+                            FloatRegister dest) DEFINED_ON(x86_shared);
+
+  // Saturating integer subtract
+
+  inline void subSatInt8x16(FloatRegister lhs, FloatRegister rhs, SimdSign sign,
+                            FloatRegister dest) DEFINED_ON(x86_shared);
+
+  inline void subSatInt16x8(FloatRegister lhs, FloatRegister rhs, SimdSign sign,
+                            FloatRegister dest) DEFINED_ON(x86_shared);
+
+  // Lane-wise integer minimum
+
+  // Lane-wise integer maximum
+
+  // Lane-wise integer rounding average
+
+  // Left shift by scalar
+
+  inline void leftShiftInt16x8(FloatRegister src, Register count, Register temp,
+                               FloatRegister out) DEFINED_ON(x86_shared);
+
+  inline void leftShiftInt32x4(FloatRegister src, Register count, Register temp,
+                               FloatRegister out) DEFINED_ON(x86_shared);
+
+  // Right shift by scalar
+
+  inline void rightShiftInt16x8(FloatRegister src, Register count,
+                                Register temp, FloatRegister out)
+      DEFINED_ON(x86_shared);
+
+  inline void unsignedRightShiftInt16x8(FloatRegister src, Register count,
+                                        Register temp, FloatRegister out)
+      DEFINED_ON(x86_shared);
+
+  inline void rightShiftInt32x4(FloatRegister src, Register count,
+                                Register temp, FloatRegister out)
+      DEFINED_ON(x86_shared);
+
+  inline void unsignedRightShiftInt32x4(FloatRegister src, Register count,
+                                        Register temp, FloatRegister out)
+      DEFINED_ON(x86_shared);
+
+  // Bitwise and, or, xor, not
+
+  inline void bitwiseAndSimd128(FloatRegister lhs, FloatRegister rhs,
+                                FloatRegister dest) DEFINED_ON(x86_shared);
+
+  inline void bitwiseOrSimd128(FloatRegister lhs, FloatRegister rhs,
+                               FloatRegister dest) DEFINED_ON(x86_shared);
+
+  inline void bitwiseXorSimd128(FloatRegister lhs, FloatRegister rhs,
+                                FloatRegister dest) DEFINED_ON(x86_shared);
+
+  inline void bitwiseNotSimd128(FloatRegister src, FloatRegister dest)
+      DEFINED_ON(x86_shared);
+
+  // Bitwise and-not.  Note, this is ~lhs & rhs, which is not what wasm wants
+  // but conforms to what the x86 does.
+
+  inline void bitwiseAndNotSimd128(FloatRegister lhs, FloatRegister rhs,
+                                   FloatRegister dest) DEFINED_ON(x86_shared);
+
+  // Bitwise select
+
+  inline void bitwiseSelectSimd128(FloatRegister mask, FloatRegister onTrue,
+                                   FloatRegister onFalse, FloatRegister temp,
+                                   FloatRegister dest) DEFINED_ON(x86_shared);
+
+  // Any lane true
+
+  // All lanes true
+
+  // Comparisons (integer and floating-point)
+
+  inline void compareInt8x16(FloatRegister lhs, FloatRegister rhs,
+                             Assembler::Condition cond, FloatRegister dest)
+      DEFINED_ON(x86_shared);
+
+  inline void compareInt16x8(FloatRegister lhs, FloatRegister rhs,
+                             Assembler::Condition cond, FloatRegister dest)
+      DEFINED_ON(x86_shared);
+
+  inline void compareInt32x4(FloatRegister lhs, FloatRegister rhs,
+                             Assembler::Condition cond, FloatRegister dest)
+      DEFINED_ON(x86_shared);
+
+  inline void compareFloat32x4(FloatRegister lhs, FloatRegister rhs,
+                               Assembler::Condition cond, FloatRegister dest)
+      DEFINED_ON(x86_shared);
+
+  // Load
+
+  inline void loadUnalignedSimd128(const Address& src, FloatRegister dest)
+      DEFINED_ON(x86_shared);
+
+  inline void loadUnalignedSimd128(const BaseIndex& src, FloatRegister dest)
+      DEFINED_ON(x86_shared);
+
+  // Load and splat
+
+  // Load and extend
+
+  // Store
+
+  inline void storeUnalignedSimd128(FloatRegister src, const Address& dest)
+      DEFINED_ON(x86_shared);
+
+  inline void storeUnalignedSimd128(FloatRegister src, const BaseIndex& dest)
+      DEFINED_ON(x86_shared);
+
+  // Floating point negation
+
+  inline void negFloat32x4(FloatRegister src, FloatRegister dest)
+      DEFINED_ON(x86_shared);
+
+  // Floating point absolute value
+
+  inline void absFloat32x4(FloatRegister src, FloatRegister dest)
+      DEFINED_ON(x86_shared);
+
+  // NaN-propagating minimum
+
+  inline void minFloat32x4(FloatRegister lhs, FloatRegister rhs,
+                           FloatRegister dest) DEFINED_ON(x86_shared);
+
+  // NaN-propagating maximum
+
+  inline void maxFloat32x4(FloatRegister lhs, FloatRegister rhs,
+                           FloatRegister temp, FloatRegister dest)
+      DEFINED_ON(x86_shared);
+
+  // Floating add
+
+  inline void addFloat32x4(FloatRegister lhs, FloatRegister rhs,
+                           FloatRegister dest) DEFINED_ON(x86_shared);
+
+  // Floating subtract
+
+  inline void subFloat32x4(FloatRegister lhs, FloatRegister rhs,
+                           FloatRegister dest) DEFINED_ON(x86_shared);
+
+  // Floating division
+
+  inline void divFloat32x4(FloatRegister lhs, FloatRegister rhs,
+                           FloatRegister dest) DEFINED_ON(x86_shared);
+
+  // Floating Multiply
+
+  inline void mulFloat32x4(FloatRegister lhs, FloatRegister rhs,
+                           FloatRegister dest) DEFINED_ON(x86_shared);
+
+  // Floating square root
+
+  inline void sqrtFloat32x4(FloatRegister src, FloatRegister dest)
+      DEFINED_ON(x86_shared);
+
+  // Integer to floating point with rounding
+
+  // Floating point to integer with saturation
+
+  // Integer to integer narrowing
+
+  // Integer to integer widening
+
 public:
  // ========================================================================
  // Truncate floating point.
--- a/js/src/jit/x86-shared/MacroAssembler-x86-shared-inl.h
+++ b/js/src/jit/x86-shared/MacroAssembler-x86-shared-inl.h
@ -1068,6 +1068,414 @@ void MacroAssembler::memoryBarrier(MemoryBarrierBits barrier) {
  }
 }

+// ========================================================================
+// SIMD
+//
+// For vector operations of the form "operationSimd128" we currently bias in
+// favor of an integer representation on x86, but this is subject to later
+// adjustment based on an analysis of use cases and actual programs.
+//
+// The order of operations here follows the header file.
+
+// Moves
+
+void MacroAssembler::moveSimd128(FloatRegister src, FloatRegister dest) {
+  MacroAssemblerX86Shared::moveSimd128Int(src, dest);
+}
+
+// Constants
+
+void MacroAssembler::zeroSimd128(FloatRegister dest) {
+  MacroAssemblerX86Shared::zeroSimd128Int(dest);
+}
+
+// Splat
+
+void MacroAssembler::splatX16(Register src, FloatRegister dest) {
+  MacroAssemblerX86Shared::splatX16(src, dest);
+}
+
+void MacroAssembler::splatX8(Register src, FloatRegister dest) {
+  MacroAssemblerX86Shared::splatX8(src, dest);
+}
+
+void MacroAssembler::splatX4(Register src, FloatRegister dest) {
+  MacroAssemblerX86Shared::splatX4(src, dest);
+}
+
+void MacroAssembler::splatX4(FloatRegister src, FloatRegister dest) {
+  MacroAssemblerX86Shared::splatX4(src, dest);
+}
+
+// Extract lane as scalar
+
+void MacroAssembler::extractLaneInt8x16(FloatRegister src, Register dest,
+                                        unsigned lane, SimdSign sign) {
+  MacroAssemblerX86Shared::extractLaneInt8x16(src, dest, lane, sign);
+}
+
+void MacroAssembler::extractLaneInt16x8(FloatRegister src, Register dest,
+                                        unsigned lane, SimdSign sign) {
+  MacroAssemblerX86Shared::extractLaneInt16x8(src, dest, lane, sign);
+}
+
+void MacroAssembler::extractLaneInt32x4(FloatRegister src, Register dest,
+                                        unsigned lane) {
+  MacroAssemblerX86Shared::extractLaneInt32x4(src, dest, lane);
+}
+
+void MacroAssembler::extractLaneFloat32x4(FloatRegister src, FloatRegister dest,
+                                          unsigned lane, bool canonicalize) {
+  MacroAssemblerX86Shared::extractLaneFloat32x4(src, dest, lane, canonicalize);
+}
+
+// Replace lane value
+
+void MacroAssembler::replaceLaneInt8x16(FloatRegister src, Register value,
+                                        FloatRegister dest, unsigned lane) {
+  MacroAssemblerX86Shared::insertLaneSimdInt(src, value, dest, lane, 16);
+}
+
+void MacroAssembler::replaceLaneInt16x8(FloatRegister src, Register value,
+                                        FloatRegister dest, unsigned lane) {
+  MacroAssemblerX86Shared::insertLaneSimdInt(src, value, dest, lane, 8);
+}
+
+void MacroAssembler::replaceLaneInt32x4(FloatRegister src, Register value,
+                                        FloatRegister dest, unsigned lane) {
+  MacroAssemblerX86Shared::insertLaneSimdInt(src, value, dest, lane, 4);
+}
+
+void MacroAssembler::replaceLaneFloat32x4(FloatRegister src,
+                                          FloatRegister value,
+                                          FloatRegister dest, unsigned lane) {
+  MacroAssemblerX86Shared::insertLaneFloat32x4(src, value, dest, lane);
+}
+
+// Shuffle - permute with immediate indices
+
+// Swizzle - permute with variable indices
+
+// Integer Add
+
+void MacroAssembler::addInt8x16(FloatRegister lhs, FloatRegister rhs,
+                                FloatRegister dest) {
+  vpaddb(Operand(rhs), lhs, dest);
+}
+
+void MacroAssembler::addInt16x8(FloatRegister lhs, FloatRegister rhs,
+                                FloatRegister dest) {
+  vpaddw(Operand(rhs), lhs, dest);
+}
+
+void MacroAssembler::addInt32x4(FloatRegister lhs, FloatRegister rhs,
+                                FloatRegister dest) {
+  vpaddd(Operand(rhs), lhs, dest);
+}
+
+// Integer subtract
+
+void MacroAssembler::subInt8x16(FloatRegister lhs, FloatRegister rhs,
+                                FloatRegister dest) {
+  vpsubb(Operand(rhs), lhs, dest);
+}
+
+void MacroAssembler::subInt16x8(FloatRegister lhs, FloatRegister rhs,
+                                FloatRegister dest) {
+  vpsubw(Operand(rhs), lhs, dest);
+}
+
+void MacroAssembler::subInt32x4(FloatRegister lhs, FloatRegister rhs,
+                                FloatRegister dest) {
+  vpsubd(Operand(rhs), lhs, dest);
+}
+
+// Integer multiply
+
+void MacroAssembler::mulInt16x8(FloatRegister lhs, FloatRegister rhs,
+                                FloatRegister dest) {
+  vpmullw(Operand(rhs), lhs, dest);
+}
+
+bool MacroAssembler::mulInt32x4RequiresTemp() {
+  return !AssemblerX86Shared::HasSSE41();
+}
+
+void MacroAssembler::mulInt32x4(FloatRegister lhs, FloatRegister rhs,
+                                FloatRegister temp, FloatRegister dest) {
+  MOZ_ASSERT(temp.isInvalid() == !mulInt32x4RequiresTemp());
+  mozilla::Maybe<FloatRegister> theTemp =
+      mulInt32x4RequiresTemp() ? mozilla::Some(temp) : mozilla::Nothing();
+  MacroAssemblerX86Shared::mulInt32x4(lhs, Operand(rhs), theTemp, dest);
+}
+
+// Integer negate
+
+void MacroAssembler::negInt8x16(FloatRegister in, FloatRegister out) {
+  zeroSimd128Int(out);
+  packedSubInt8(Operand(in), out);
+}
+
+void MacroAssembler::negInt16x8(FloatRegister in, FloatRegister out) {
+  zeroSimd128Int(out);
+  packedSubInt16(Operand(in), out);
+}
+
+void MacroAssembler::negInt32x4(FloatRegister in, FloatRegister out) {
+  zeroSimd128Int(out);
+  packedSubInt32(Operand(in), out);
+}
+
+// Saturating integer add
+
+void MacroAssembler::addSatInt8x16(FloatRegister lhs, FloatRegister rhs,
+                                   SimdSign sign, FloatRegister dest) {
+  if (sign == SimdSign::Signed) {
+    vpaddsb(Operand(rhs), lhs, dest);
+  } else {
+    vpaddusb(Operand(rhs), lhs, dest);
+  }
+}
+
+void MacroAssembler::addSatInt16x8(FloatRegister lhs, FloatRegister rhs,
+                                   SimdSign sign, FloatRegister dest) {
+  if (sign == SimdSign::Signed) {
+    vpaddsw(Operand(rhs), lhs, dest);
+  } else {
+    vpaddusw(Operand(rhs), lhs, dest);
+  }
+}
+
+// Saturating integer subtract
+
+void MacroAssembler::subSatInt8x16(FloatRegister lhs, FloatRegister rhs,
+                                   SimdSign sign, FloatRegister dest) {
+  if (sign == SimdSign::Signed) {
+    vpsubsb(Operand(rhs), lhs, dest);
+  } else {
+    vpsubusb(Operand(rhs), lhs, dest);
+  }
+}
+
+void MacroAssembler::subSatInt16x8(FloatRegister lhs, FloatRegister rhs,
+                                   SimdSign sign, FloatRegister dest) {
+  if (sign == SimdSign::Signed) {
+    vpsubsw(Operand(rhs), lhs, dest);
+  } else {
+    vpsubusw(Operand(rhs), lhs, dest);
+  }
+}
+
+// Lane-wise integer minimum
+
+// Lane-wise integer maximum
+
+// Lane-wise integer rounding average
+
+// Left shift by scalar
+
+void MacroAssembler::leftShiftInt16x8(FloatRegister src, Register count,
+                                      Register temp, FloatRegister out) {
+  MacroAssemblerX86Shared::packedLeftShiftByScalarInt16x8(src, count, temp,
+                                                          out);
+}
+
+void MacroAssembler::leftShiftInt32x4(FloatRegister src, Register count,
+                                      Register temp, FloatRegister out) {
+  MacroAssemblerX86Shared::packedLeftShiftByScalarInt32x4(src, count, temp,
+                                                          out);
+}
+
+// Right shift by scalar
+
+void MacroAssembler::rightShiftInt16x8(FloatRegister src, Register count,
+                                       Register temp, FloatRegister out) {
+  MacroAssemblerX86Shared::packedRightShiftByScalarInt16x8(src, count, temp,
+                                                           out);
+}
+
+void MacroAssembler::unsignedRightShiftInt16x8(FloatRegister src,
+                                               Register count, Register temp,
+                                               FloatRegister out) {
+  MacroAssemblerX86Shared::packedUnsignedRightShiftByScalarInt16x8(src, count,
+                                                                   temp, out);
+}
+
+void MacroAssembler::rightShiftInt32x4(FloatRegister src, Register count,
+                                       Register temp, FloatRegister out) {
+  MacroAssemblerX86Shared::packedRightShiftByScalarInt32x4(src, count, temp,
+                                                           out);
+}
+
+void MacroAssembler::unsignedRightShiftInt32x4(FloatRegister src,
+                                               Register count, Register temp,
+                                               FloatRegister out) {
+  MacroAssemblerX86Shared::packedUnsignedRightShiftByScalarInt32x4(src, count,
+                                                                   temp, out);
+}
+
+// Bitwise and, or, xor, not
+
+void MacroAssembler::bitwiseAndSimd128(FloatRegister lhs, FloatRegister rhs,
+                                       FloatRegister dest) {
+  MacroAssemblerX86Shared::bitwiseAndSimdInt(lhs, Operand(rhs), dest);
+}
+
+void MacroAssembler::bitwiseOrSimd128(FloatRegister lhs, FloatRegister rhs,
+                                      FloatRegister dest) {
+  MacroAssemblerX86Shared::bitwiseOrSimdInt(lhs, Operand(rhs), dest);
+}
+
+void MacroAssembler::bitwiseXorSimd128(FloatRegister lhs, FloatRegister rhs,
+                                       FloatRegister dest) {
+  MacroAssemblerX86Shared::bitwiseXorSimdInt(lhs, Operand(rhs), dest);
+}
+
+void MacroAssembler::bitwiseNotSimd128(FloatRegister src, FloatRegister dest) {
+  MacroAssemblerX86Shared::notInt8x16(Operand(src), dest);
+}
+
+// Bitwise and-not
+
+void MacroAssembler::bitwiseAndNotSimd128(FloatRegister lhs, FloatRegister rhs,
+                                          FloatRegister dest) {
+  MacroAssemblerX86Shared::bitwiseAndNotSimdInt(lhs, Operand(rhs), dest);
+}
+
+// Bitwise select
+
+void MacroAssembler::bitwiseSelectSimd128(FloatRegister mask,
+                                          FloatRegister onTrue,
+                                          FloatRegister onFalse,
+                                          FloatRegister temp,
+                                          FloatRegister dest) {
+  MacroAssemblerX86Shared::selectSimd128(mask, onTrue, onFalse, temp, dest);
+}
+
+// All lanes true
+
+// Comparisons (integer and floating-point)
+
+void MacroAssembler::compareInt8x16(FloatRegister lhs, FloatRegister rhs,
+                                    Assembler::Condition cond,
+                                    FloatRegister dest) {
+  MacroAssemblerX86Shared::compareInt8x16(lhs, Operand(rhs), cond, dest);
+}
+
+void MacroAssembler::compareInt16x8(FloatRegister lhs, FloatRegister rhs,
+                                    Assembler::Condition cond,
+                                    FloatRegister dest) {
+  MacroAssemblerX86Shared::compareInt16x8(lhs, Operand(rhs), cond, dest);
+}
+
+void MacroAssembler::compareInt32x4(FloatRegister lhs, FloatRegister rhs,
+                                    Assembler::Condition cond,
+                                    FloatRegister dest) {
+  MacroAssemblerX86Shared::compareInt32x4(lhs, Operand(rhs), cond, dest);
+}
+
+void MacroAssembler::compareFloat32x4(FloatRegister lhs, FloatRegister rhs,
+                                      Assembler::Condition cond,
+                                      FloatRegister dest) {
+  MacroAssemblerX86Shared::compareFloat32x4(lhs, Operand(rhs), cond, dest);
+}
+
+// Load
+
+void MacroAssembler::loadUnalignedSimd128(const Address& src,
+                                          FloatRegister dest) {
+  vmovups(Operand(src), dest);
+}
+
+void MacroAssembler::loadUnalignedSimd128(const BaseIndex& src,
+                                          FloatRegister dest) {
+  vmovdqu(Operand(src), dest);
+}
+
+// Load and splat
+
+// Load and extend
+
+// Store
+
+void MacroAssembler::storeUnalignedSimd128(FloatRegister src,
+                                           const Address& dest) {
+  vmovups(src, Operand(dest));
+}
+
+void MacroAssembler::storeUnalignedSimd128(FloatRegister src,
+                                           const BaseIndex& dest) {
+  vmovups(src, Operand(dest));
+}
+
+// Floating point negation
+
+void MacroAssembler::negFloat32x4(FloatRegister src, FloatRegister dest) {
+  MacroAssemblerX86Shared::negFloat32x4(Operand(src), dest);
+}
+
+// Floating point absolute value
+
+void MacroAssembler::absFloat32x4(FloatRegister src, FloatRegister dest) {
+  MacroAssemblerX86Shared::absFloat32x4(Operand(src), dest);
+}
+
+// NaN-propagating minimum
+
+void MacroAssembler::minFloat32x4(FloatRegister lhs, FloatRegister rhs,
+                                  FloatRegister dest) {
+  MacroAssemblerX86Shared::minFloat32x4(lhs, Operand(rhs), dest);
+}
+
+// NaN-propagating maxium
+
+void MacroAssembler::maxFloat32x4(FloatRegister lhs, FloatRegister rhs,
+                                  FloatRegister temp, FloatRegister dest) {
+  MacroAssemblerX86Shared::maxFloat32x4(lhs, Operand(rhs), temp, dest);
+}
+
+// Floating add
+
+void MacroAssembler::addFloat32x4(FloatRegister lhs, FloatRegister rhs,
+                                  FloatRegister dest) {
+  vaddps(Operand(rhs), lhs, dest);
+}
+
+// Floating subtract
+
+void MacroAssembler::subFloat32x4(FloatRegister lhs, FloatRegister rhs,
+                                  FloatRegister dest) {
+  vsubps(Operand(rhs), lhs, dest);
+}
+
+// Floating division
+
+void MacroAssembler::divFloat32x4(FloatRegister lhs, FloatRegister rhs,
+                                  FloatRegister dest) {
+  vdivps(Operand(rhs), lhs, dest);
+}
+
+// Floating Multiply
+
+void MacroAssembler::mulFloat32x4(FloatRegister lhs, FloatRegister rhs,
+                                  FloatRegister dest) {
+  vmulps(Operand(rhs), lhs, dest);
+}
+
+// Floating square root
+
+void MacroAssembler::sqrtFloat32x4(FloatRegister src, FloatRegister dest) {
+  vsqrtps(Operand(src), dest);
+}
+
+// Integer to floating point with rounding
+
+// Floating point to integer with saturation
+
+// Integer to integer narrowing
+
+// Integer to integer widening
+
 // ========================================================================
 // Truncate floating point.

--- a/js/src/jit/x86-shared/MacroAssembler-x86-shared.cpp
+++ b/js/src/jit/x86-shared/MacroAssembler-x86-shared.cpp
@ -379,7 +379,7 @@ void MacroAssembler::PushRegsInMask(LiveRegisterSet set) {
    } else if (reg.isSingle()) {
      storeFloat32(reg, spillAddress);
    } else if (reg.isSimd128()) {
-      storeUnalignedSimd128Float(reg, spillAddress);
+      storeUnalignedSimd128(reg, spillAddress);
    } else {
      MOZ_CRASH("Unknown register type.");
    }
@ -417,7 +417,7 @@ void MacroAssembler::storeRegsInMask(LiveRegisterSet set, Address dest,
    } else if (reg.isSingle()) {
      storeFloat32(reg, dest);
    } else if (reg.isSimd128()) {
-      storeUnalignedSimd128Float(reg, dest);
+      storeUnalignedSimd128(reg, dest);
    } else {
      MOZ_CRASH("Unknown register type.");
    }
@ -452,7 +452,7 @@ void MacroAssembler::PopRegsInMaskIgnore(LiveRegisterSet set,
    } else if (reg.isSingle()) {
      loadFloat32(spillAddress, reg);
    } else if (reg.isSimd128()) {
-      loadUnalignedSimd128Float(spillAddress, reg);
+      loadUnalignedSimd128(spillAddress, reg);
    } else {
      MOZ_CRASH("Unknown register type.");
    }
--- a/js/src/jit/x86-shared/MacroAssembler-x86-shared.h
+++ b/js/src/jit/x86-shared/MacroAssembler-x86-shared.h
@ -432,88 +432,10 @@ class MacroAssemblerX86Shared : public Assembler {
  void compareFloat32x4(FloatRegister lhs, Operand rhs,
                        Assembler::Condition cond, FloatRegister output);

-  void addInt8x16(FloatRegister lhs, Operand rhs, FloatRegister output) {
-    vpaddb(rhs, lhs, output);
-  }
-  void addInt16x8(FloatRegister lhs, Operand rhs, FloatRegister output) {
-    vpaddw(rhs, lhs, output);
-  }
-  void addInt32x4(FloatRegister lhs, Operand rhs, FloatRegister output) {
-    vpaddd(rhs, lhs, output);
-  }
-  void addFloat32x4(FloatRegister lhs, Operand rhs, FloatRegister output) {
-    vaddps(rhs, lhs, output);
-  }
-
-  void addSatInt8x16(FloatRegister lhs, Operand rhs, SimdSign sign,
-                     FloatRegister output) {
-    if (sign == SimdSign::Signed) {
-      vpaddsb(rhs, lhs, output);
-    } else {
-      vpaddusb(rhs, lhs, output);
-    }
-  }
-  void addSatInt16x8(FloatRegister lhs, Operand rhs, SimdSign sign,
-                     FloatRegister output) {
-    if (sign == SimdSign::Signed) {
-      vpaddsw(rhs, lhs, output);
-    } else {
-      vpaddusw(rhs, lhs, output);
-    }
-  }
-
-  void subInt8x16(FloatRegister lhs, Operand rhs, FloatRegister output) {
-    vpsubb(rhs, lhs, output);
-  }
-  void subInt16x8(FloatRegister lhs, Operand rhs, FloatRegister output) {
-    vpsubw(rhs, lhs, output);
-  }
-  void subInt32x4(FloatRegister lhs, Operand rhs, FloatRegister output) {
-    vpsubd(rhs, lhs, output);
-  }
-  void subFloat32x4(FloatRegister lhs, Operand rhs, FloatRegister output) {
-    vsubps(rhs, lhs, output);
-  }
-
-  void subSatInt8x16(FloatRegister lhs, Operand rhs, SimdSign sign,
-                     FloatRegister output) {
-    if (sign == SimdSign::Signed) {
-      vpsubsb(rhs, lhs, output);
-    } else {
-      vpsubusb(rhs, lhs, output);
-    }
-  }
-  void subSatInt16x8(FloatRegister lhs, Operand rhs, SimdSign sign,
-                     FloatRegister output) {
-    if (sign == SimdSign::Signed) {
-      vpsubsw(rhs, lhs, output);
-    } else {
-      vpsubusw(rhs, lhs, output);
-    }
-  }
-
-  void mulInt16x8(FloatRegister lhs, Operand rhs, FloatRegister output) {
-    vpmullw(rhs, lhs, output);
-  }
  void mulInt32x4(FloatRegister lhs, Operand rhs,
                  const mozilla::Maybe<FloatRegister>& temp,
                  FloatRegister output);
-  void mulFloat32x4(FloatRegister lhs, Operand rhs, FloatRegister output) {
-    vmulps(rhs, lhs, output);
-  }

-  void negInt8x16(Operand in, FloatRegister out) {
-    zeroSimd128Int(out);
-    packedSubInt8(in, out);
-  }
-  void negInt16x8(Operand in, FloatRegister out) {
-    zeroSimd128Int(out);
-    packedSubInt16(in, out);
-  }
-  void negInt32x4(Operand in, FloatRegister out) {
-    zeroSimd128Int(out);
-    packedSubInt32(in, out);
-  }
  void negFloat32x4(Operand in, FloatRegister out);

  void notInt8x16(Operand in, FloatRegister out);
@ -521,9 +443,6 @@ class MacroAssemblerX86Shared : public Assembler {
  void notInt32x4(Operand in, FloatRegister out);
  void notFloat32x4(Operand in, FloatRegister out);

-  void divFloat32x4(FloatRegister lhs, Operand rhs, FloatRegister output) {
-    vdivps(rhs, lhs, output);
-  }
  void minFloat32x4(FloatRegister lhs, Operand rhs, FloatRegister output);
  void maxFloat32x4(FloatRegister lhs, Operand rhs, FloatRegister temp,
                    FloatRegister output);
@ -630,7 +549,6 @@ class MacroAssemblerX86Shared : public Assembler {
  void loadUnalignedSimd128Int(const Operand& src, FloatRegister dest) {
    vmovdqu(src, dest);
  }
-
  void storeUnalignedSimd128Int(FloatRegister src, const Address& dest) {
    vmovdqu(src, Operand(dest));
  }
@ -675,9 +593,6 @@ class MacroAssemblerX86Shared : public Assembler {
    // TODO See comment above. See also bug 1068028.
    vrsqrtps(src, dest);
  }
-  void packedSqrtFloat32x4(const Operand& src, FloatRegister dest) {
-    vsqrtps(src, dest);
-  }

 public:
  void packedLeftShiftByScalarInt16x8(FloatRegister in, Register count,
@ -752,22 +667,10 @@ class MacroAssemblerX86Shared : public Assembler {
    loadAlignedSimd128Float(src, dest);
    return dest;
  }
-  void loadUnalignedSimd128Float(const Address& src, FloatRegister dest) {
-    vmovups(Operand(src), dest);
-  }
-  void loadUnalignedSimd128Float(const BaseIndex& src, FloatRegister dest) {
-    vmovdqu(Operand(src), dest);
-  }
-  void loadUnalignedSimd128Float(const Operand& src, FloatRegister dest) {
+  void loadUnalignedSimd128(const Operand& src, FloatRegister dest) {
    vmovups(src, dest);
  }
-  void storeUnalignedSimd128Float(FloatRegister src, const Address& dest) {
-    vmovups(src, Operand(dest));
-  }
-  void storeUnalignedSimd128Float(FloatRegister src, const BaseIndex& dest) {
-    vmovups(src, Operand(dest));
-  }
-  void storeUnalignedSimd128Float(FloatRegister src, const Operand& dest) {
+  void storeUnalignedSimd128(FloatRegister src, const Operand& dest) {
    vmovups(src, dest);
  }
  void packedAddFloat32(const Operand& src, FloatRegister dest) {
--- a/js/src/wasm/WasmStubs.cpp
+++ b/js/src/wasm/WasmStubs.cpp
@ -355,7 +355,7 @@ static void SetupABIArguments(MacroAssembler& masm, const FuncExport& fe,
            break;
          case MIRType::Int8x16:
 #ifdef ENABLE_WASM_SIMD
-            masm.loadUnalignedSimd128Float(src, iter->fpu());
+            masm.loadUnalignedSimd128(src, iter->fpu());
            break;
 #else
            MOZ_CRASH("V128 not supported in SetupABIArguments");
@ -436,7 +436,7 @@ static void StoreRegisterResult(MacroAssembler& masm, const FuncExport& fe,
          break;
        case ValType::V128:
 #ifdef ENABLE_WASM_SIMD
-          masm.storeUnalignedSimd128Float(result.fpr(), Address(loc, 0));
+          masm.storeUnalignedSimd128(result.fpr(), Address(loc, 0));
          break;
 #else
          MOZ_CRASH("V128 not supported in StoreABIReturn");