Bug 1686001 - Implement i16x8.q15mulr_sat_s. r=jseward

Implement the new i16x8.q15mulr_sat_s instruction in baseline (x86, x64, arm64) and ion (x86, x64). Add basic test cases. Differential Revision: https://phabricator.services.mozilla.com/D102692
2021-01-25 13:35:43 +00:00 · 2021-01-25 13:35:43 +00:00 · 979ff1ff5d
--- a/js/src/jit-test/tests/wasm/binary.js
+++ b/js/src/jit-test/tests/wasm/binary.js
@ -305,7 +305,7 @@ if (!wasmSimdEnabled()) {
    let reservedSimd = [
        0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e,
        0x5f, 0x67, 0x68, 0x69, 0x6a, 0x74, 0x75, 0x7a, 0x7c, 0x7d, 0x7e,
-        0x7f, 0x94, 0x9c, 0xa5, 0xa6, 0xaf,
+        0x7f, 0x94, 0xa5, 0xa6, 0xaf,
        0xb0, 0xb2, 0xb3, 0xb4, 0xbc, 0xc0, 0xc2,
        0xc3, 0xc5, 0xc6, 0xcf, 0xd0,
        0xd4,
--- a/js/src/jit-test/tests/wasm/simd/ad-hack-non-cranelift.js
+++ b/js/src/jit-test/tests/wasm/simd/ad-hack-non-cranelift.js
@ -160,4 +160,20 @@ assertSame(get(mem64u, 0, 2), iota(2).map((n) => BigInt(as[n] >>> 0)));
 ins.exports.widen_high_i32x4_u();
 assertSame(get(mem64u, 0, 2), iota(2).map((n) => BigInt(as[n+2] >>> 0)));

+// Saturating rounding q-format multiplication.
+// This is to be moved into ad-hack.js

+var ins = wasmEvalText(`
+  (module
+    (memory (export "mem") 1 1)
+    (func (export "q15mulr_sat_s")
+      (v128.store (i32.const 0) (i16x8.q15mulr_sat_s (v128.load (i32.const 16)) (v128.load (i32.const 32))))))`);
+
+var mem16 = new Int16Array(ins.exports.mem.buffer);
+for ( let [as, bs] of cross(Int16Array.inputs) ) {
+    set(mem16, 8, as);
+    set(mem16, 16, bs);
+    ins.exports.q15mulr_sat_s();
+    assertSame(get(mem16, 0, 8),
+               iota(8).map((i) => signed_saturate((as[i] * bs[i] + 0x4000) >> 15, 16)));
+}
--- a/js/src/jit/MacroAssembler.h
+++ b/js/src/jit/MacroAssembler.h
@ -2282,6 +2282,9 @@ class MacroAssembler : public MacroAssemblerSpecific {
                                        FloatRegister lhsDest)
      DEFINED_ON(x86_shared, arm64);

+  inline void q15MulrSatInt16x8(FloatRegister rhs, FloatRegister lhsDest)
+      DEFINED_ON(x86_shared, arm64);
+
  // Integer Negate

  inline void negInt8x16(FloatRegister src, FloatRegister dest)
--- a/js/src/jit/arm64/MacroAssembler-arm64-inl.h
+++ b/js/src/jit/arm64/MacroAssembler-arm64-inl.h
@ -2274,6 +2274,11 @@ void MacroAssembler::unsignedExtMulHighInt32x4(FloatRegister rhs,
  Umull2(Simd2D(lhsDest), Simd4S(lhsDest), Simd4S(rhs));
 }

+void MacroAssembler::q15MulrSatInt16x8(FloatRegister rhs,
+                                       FloatRegister lhsDest) {
+  Sqrdmulh(Simd8H(lhsDest), Simd8H(lhsDest), Simd8H(rhs));
+}
+
 // Integer Negate

 void MacroAssembler::negInt8x16(FloatRegister src, FloatRegister dest) {
--- a/js/src/jit/x86-shared/Assembler-x86-shared.h
+++ b/js/src/jit/x86-shared/Assembler-x86-shared.h
@ -2995,6 +2995,20 @@ class AssemblerX86Shared : public AssemblerShared {
        MOZ_CRASH("unexpected operand kind");
    }
  }
+  void vpmulhrsw(const Operand& src1, FloatRegister src0, FloatRegister dest) {
+    MOZ_ASSERT(HasSSE2());
+    switch (src1.kind()) {
+      case Operand::FPREG:
+        masm.vpmulhrsw_rr(src1.fpu(), src0.encoding(), dest.encoding());
+        break;
+      case Operand::MEM_REG_DISP:
+        masm.vpmulhrsw_mr(src1.disp(), src1.base(), src0.encoding(),
+                          dest.encoding());
+        break;
+      default:
+        MOZ_CRASH("unexpected operand kind");
+    }
+  }
  void vpmulld(const Operand& src1, FloatRegister src0, FloatRegister dest) {
    MOZ_ASSERT(HasSSE41());
    switch (src1.kind()) {
--- a/js/src/jit/x86-shared/BaseAssembler-x86-shared.h
+++ b/js/src/jit/x86-shared/BaseAssembler-x86-shared.h
@ -813,6 +813,15 @@ class BaseAssembler : public GenericAssembler {
    threeByteOpSimd("vpmulld", VEX_PD, OP3_PMULLD_VdqWdq, ESCAPE_38, address,
                    src0, dst);
  }
+  void vpmulhrsw_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst) {
+    threeByteOpSimd("vpmulhrsw", VEX_PD, OP3_PMULHRSW_VdqWdq, ESCAPE_38, src1,
+                    src0, dst);
+  }
+  void vpmulhrsw_mr(int32_t offset, RegisterID base, XMMRegisterID src0,
+                    XMMRegisterID dst) {
+    threeByteOpSimd("vpmulhrsw", VEX_PD, OP3_PMULHRSW_VdqWdq, ESCAPE_38, offset,
+                    base, src0, dst);
+  }

  void vaddps_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst) {
    twoByteOpSimd("vaddps", VEX_PS, OP2_ADDPS_VpsWps, src1, src0, dst);
--- a/js/src/jit/x86-shared/CodeGenerator-x86-shared.cpp
+++ b/js/src/jit/x86-shared/CodeGenerator-x86-shared.cpp
@ -2612,6 +2612,9 @@ void CodeGenerator::visitWasmBinarySimd128(LWasmBinarySimd128* ins) {
    case wasm::SimdOp::I64x2ExtMulHighUI32x4:
      masm.unsignedExtMulHighInt32x4(rhs, lhsDest);
      break;
+    case wasm::SimdOp::I16x8Q15MulrSatS:
+      masm.q15MulrSatInt16x8(rhs, lhsDest);
+      break;
 #  ifdef ENABLE_WASM_SIMD_WORMHOLE
    case wasm::SimdOp::MozWHSELFTEST: {
      static const int8_t mask[16] = {0xD, 0xE, 0xA, 0xD, 0xD, 0,   0,   0xD,
--- a/js/src/jit/x86-shared/Encoding-x86-shared.h
+++ b/js/src/jit/x86-shared/Encoding-x86-shared.h
@ -340,6 +340,7 @@ enum ThreeByteOpcodeID {
  OP3_ROUNDPD_VpdWpd = 0x09,
  OP3_ROUNDSS_VsdWsd = 0x0A,
  OP3_ROUNDSD_VsdWsd = 0x0B,
+  OP3_PMULHRSW_VdqWdq = 0x0B,
  OP3_BLENDPS_VpsWpsIb = 0x0C,
  OP3_PBLENDW_VdqWdqIb = 0x0E,
  OP3_PALIGNR_VdqWdqIb = 0x0F,
--- a/js/src/jit/x86-shared/MacroAssembler-x86-shared-inl.h
+++ b/js/src/jit/x86-shared/MacroAssembler-x86-shared-inl.h
@ -1694,6 +1694,15 @@ void MacroAssembler::unsignedExtMulHighInt32x4(FloatRegister rhs,
  vpmuludq(Operand(scratch), lhsDest, lhsDest);
 }

+void MacroAssembler::q15MulrSatInt16x8(FloatRegister rhs,
+                                       FloatRegister lhsDest) {
+  ScratchSimd128Scope scratch(*this);
+  vpmulhrsw(Operand(rhs), lhsDest, lhsDest);
+  vmovdqa(lhsDest, scratch);
+  vpcmpeqwSimd128(SimdConstant::SplatX8(0x8000), scratch);
+  vpxor(scratch, lhsDest, lhsDest);
+}
+
 // Integer negate

 void MacroAssembler::negInt8x16(FloatRegister src, FloatRegister dest) {
--- a/js/src/wasm/WasmBaselineCompile.cpp
+++ b/js/src/wasm/WasmBaselineCompile.cpp
@ -13298,6 +13298,10 @@ static void ExtMulHighUI32x4(MacroAssembler& masm, RegV128 rs, RegV128 rsd) {
  masm.unsignedExtMulHighInt32x4(rs, rsd);
 }

+static void Q15MulrSatS(MacroAssembler& masm, RegV128 rs, RegV128 rsd) {
+  masm.q15MulrSatInt16x8(rs, rsd);
+}
+
 static void CmpI8x16(MacroAssembler& masm, Assembler::Condition cond,
                     RegV128 rs, RegV128 rsd) {
  masm.compareInt8x16(cond, rs, rsd);
@ -15321,6 +15325,8 @@ bool BaseCompiler::emitBody() {
            CHECK_NEXT(dispatchVectorBinary(ExtMulLowUI32x4));
          case uint32_t(SimdOp::I64x2ExtMulHighUI32x4):
            CHECK_NEXT(dispatchVectorBinary(ExtMulHighUI32x4));
+          case uint32_t(SimdOp::I16x8Q15MulrSatS):
+            CHECK_NEXT(dispatchVectorBinary(Q15MulrSatS));
          case uint32_t(SimdOp::I8x16Neg):
            CHECK_NEXT(dispatchVectorUnary(NegI8x16));
          case uint32_t(SimdOp::I16x8Neg):
--- a/js/src/wasm/WasmConstants.h
+++ b/js/src/wasm/WasmConstants.h
@ -602,7 +602,7 @@ enum class SimdOp {
  I16x8MaxU = 0x99,
  I16x8ExtMulLowSI8x16 = 0x9a,
  I16x8AvgrU = 0x9b,
-  // Unused = 0x9c
+  I16x8Q15MulrSatS = 0x9c,
  I16x8ExtMulHighSI8x16 = 0x9d,
  I16x8ExtMulLowUI8x16 = 0x9e,
  I16x8ExtMulHighUI8x16 = 0x9f,
--- a/js/src/wasm/WasmIonCompile.cpp
+++ b/js/src/wasm/WasmIonCompile.cpp
@ -4964,6 +4964,7 @@ static bool EmitBodyExprs(FunctionCompiler& f) {
          case uint32_t(SimdOp::I64x2ExtMulHighSI32x4):
          case uint32_t(SimdOp::I64x2ExtMulLowUI32x4):
          case uint32_t(SimdOp::I64x2ExtMulHighUI32x4):
+          case uint32_t(SimdOp::I16x8Q15MulrSatS):
            CHECK(EmitBinarySimd128(f, /* commutative= */ true, SimdOp(op.b1)));
          case uint32_t(SimdOp::V128AndNot):
          case uint32_t(SimdOp::I8x16Sub):
--- a/js/src/wasm/WasmOpIter.cpp
+++ b/js/src/wasm/WasmOpIter.cpp
@ -461,6 +461,7 @@ OpKind wasm::Classify(OpBytes op) {
        case SimdOp::I64x2ExtMulHighSI32x4:
        case SimdOp::I64x2ExtMulLowUI32x4:
        case SimdOp::I64x2ExtMulHighUI32x4:
+        case SimdOp::I16x8Q15MulrSatS:
          WASM_SIMD_OP(OpKind::Binary);
        case SimdOp::I8x16Neg:
        case SimdOp::I16x8Neg:
--- a/js/src/wasm/WasmValidate.cpp
+++ b/js/src/wasm/WasmValidate.cpp
@ -1070,6 +1070,7 @@ static bool DecodeFunctionBodyExprs(const ModuleEnvironment& env,
          case uint32_t(SimdOp::I64x2ExtMulHighSI32x4):
          case uint32_t(SimdOp::I64x2ExtMulLowUI32x4):
          case uint32_t(SimdOp::I64x2ExtMulHighUI32x4):
+          case uint32_t(SimdOp::I16x8Q15MulrSatS):
            CHECK(iter.readBinary(ValType::V128, &nothing, &nothing));

          case uint32_t(SimdOp::I8x16Neg):