From 463253db7d980c8257a734c67d470e84a179b7ed Mon Sep 17 00:00:00 2001 From: Lukas Bernhard <15350042+bernhl@users.noreply.github.com> Date: Tue, 2 Nov 2021 19:47:21 +0000 Subject: [PATCH] Bug 1709209 - [wasm] Move SIMD masking out of codegen. r=yury Differential Revision: https://phabricator.services.mozilla.com/D129314 --- js/src/jit/MacroAssembler.h | 58 +++++---- js/src/jit/arm64/Lowering-arm64.cpp | 1 - js/src/jit/arm64/MacroAssembler-arm64-inl.h | 20 +-- js/src/jit/arm64/MacroAssembler-arm64.cpp | 70 ++++++----- js/src/jit/shared/LIR-shared.h | 10 +- .../x86-shared/CodeGenerator-x86-shared.cpp | 27 ++-- js/src/jit/x86-shared/Lowering-x86-shared.cpp | 11 +- .../MacroAssembler-x86-shared-SIMD.cpp | 67 ++++------ .../MacroAssembler-x86-shared-inl.h | 69 +++++------ .../x86-shared/MacroAssembler-x86-shared.cpp | 30 +++++ .../x86-shared/MacroAssembler-x86-shared.h | 26 ++-- js/src/wasm/WasmBaselineCompile.cpp | 115 ++++++++++++------ js/src/wasm/WasmIonCompile.cpp | 9 ++ 13 files changed, 279 insertions(+), 234 deletions(-) diff --git a/js/src/jit/MacroAssembler.h b/js/src/jit/MacroAssembler.h index c3f6ad68c727..528e5f3d6b02 100644 --- a/js/src/jit/MacroAssembler.h +++ b/js/src/jit/MacroAssembler.h @@ -365,6 +365,13 @@ class MacroAssembler : public MacroAssemblerSpecific { void Push(RegisterOrSP reg); #endif +#ifdef ENABLE_WASM_SIMD + // `op` should be a shift operation. Return true if a variable-width shift + // operation on this architecture should pre-mask the shift count, and if so, + // return the mask in `*mask`. + static bool MustMaskShiftCountSimd128(wasm::SimdOp op, int32_t* mask); +#endif + private: // The value returned by GetMaxOffsetGuardLimit() in WasmTypes.h uint32_t wasmMaxOffsetGuardLimit_; @@ -2819,12 +2826,11 @@ class MacroAssembler : public MacroAssemblerSpecific { inline void absInt64x2(FloatRegister src, FloatRegister dest) DEFINED_ON(x86_shared, arm64); - // Left shift by scalar. Immediates must have been masked; shifts of zero - // will work but may or may not generate code. + // Left shift by scalar. Immediates and variable shifts must have been + // masked; shifts of zero will work but may or may not generate code. inline void leftShiftInt8x16(Register rhs, FloatRegister lhsDest, - Register temp1, FloatRegister temp2) - DEFINED_ON(x86_shared); + FloatRegister temp) DEFINED_ON(x86_shared); inline void leftShiftInt8x16(FloatRegister lhs, Register rhs, FloatRegister dest) DEFINED_ON(arm64); @@ -2833,8 +2839,8 @@ class MacroAssembler : public MacroAssemblerSpecific { FloatRegister dest) DEFINED_ON(x86_shared, arm64); - inline void leftShiftInt16x8(Register rhs, FloatRegister lhsDest, - Register temp) DEFINED_ON(x86_shared); + inline void leftShiftInt16x8(Register rhs, FloatRegister lhsDest) + DEFINED_ON(x86_shared); inline void leftShiftInt16x8(FloatRegister lhs, Register rhs, FloatRegister dest) DEFINED_ON(arm64); @@ -2843,8 +2849,8 @@ class MacroAssembler : public MacroAssemblerSpecific { FloatRegister dest) DEFINED_ON(x86_shared, arm64); - inline void leftShiftInt32x4(Register rhs, FloatRegister lhsDest, - Register temp) DEFINED_ON(x86_shared); + inline void leftShiftInt32x4(Register rhs, FloatRegister lhsDest) + DEFINED_ON(x86_shared); inline void leftShiftInt32x4(FloatRegister lhs, Register rhs, FloatRegister dest) DEFINED_ON(arm64); @@ -2853,8 +2859,8 @@ class MacroAssembler : public MacroAssemblerSpecific { FloatRegister dest) DEFINED_ON(x86_shared, arm64); - inline void leftShiftInt64x2(Register rhs, FloatRegister lhsDest, - Register temp) DEFINED_ON(x86_shared); + inline void leftShiftInt64x2(Register rhs, FloatRegister lhsDest) + DEFINED_ON(x86_shared); inline void leftShiftInt64x2(FloatRegister lhs, Register rhs, FloatRegister dest) DEFINED_ON(arm64); @@ -2863,12 +2869,11 @@ class MacroAssembler : public MacroAssemblerSpecific { FloatRegister dest) DEFINED_ON(x86_shared, arm64); - // Right shift by scalar. Immediates must have been masked; shifts of zero - // will work but may or may not generate code. + // Right shift by scalar. Immediates and variable shifts must have been + // masked; shifts of zero will work but may or may not generate code. inline void rightShiftInt8x16(Register rhs, FloatRegister lhsDest, - Register temp1, FloatRegister temp2) - DEFINED_ON(x86_shared); + FloatRegister temp) DEFINED_ON(x86_shared); inline void rightShiftInt8x16(FloatRegister lhs, Register rhs, FloatRegister dest) DEFINED_ON(arm64); @@ -2878,7 +2883,7 @@ class MacroAssembler : public MacroAssemblerSpecific { DEFINED_ON(x86_shared, arm64); inline void unsignedRightShiftInt8x16(Register rhs, FloatRegister lhsDest, - Register temp1, FloatRegister temp2) + FloatRegister temp) DEFINED_ON(x86_shared); inline void unsignedRightShiftInt8x16(FloatRegister lhs, Register rhs, @@ -2888,8 +2893,8 @@ class MacroAssembler : public MacroAssemblerSpecific { FloatRegister dest) DEFINED_ON(x86_shared, arm64); - inline void rightShiftInt16x8(Register rhs, FloatRegister lhsDest, - Register temp) DEFINED_ON(x86_shared); + inline void rightShiftInt16x8(Register rhs, FloatRegister lhsDest) + DEFINED_ON(x86_shared); inline void rightShiftInt16x8(FloatRegister lhs, Register rhs, FloatRegister dest) DEFINED_ON(arm64); @@ -2898,8 +2903,8 @@ class MacroAssembler : public MacroAssemblerSpecific { FloatRegister dest) DEFINED_ON(x86_shared, arm64); - inline void unsignedRightShiftInt16x8(Register rhs, FloatRegister lhsDest, - Register temp) DEFINED_ON(x86_shared); + inline void unsignedRightShiftInt16x8(Register rhs, FloatRegister lhsDest) + DEFINED_ON(x86_shared); inline void unsignedRightShiftInt16x8(FloatRegister lhs, Register rhs, FloatRegister dest) DEFINED_ON(arm64); @@ -2908,8 +2913,8 @@ class MacroAssembler : public MacroAssemblerSpecific { FloatRegister dest) DEFINED_ON(x86_shared, arm64); - inline void rightShiftInt32x4(Register rhs, FloatRegister lhsDest, - Register temp) DEFINED_ON(x86_shared); + inline void rightShiftInt32x4(Register rhs, FloatRegister lhsDest) + DEFINED_ON(x86_shared); inline void rightShiftInt32x4(FloatRegister lhs, Register rhs, FloatRegister dest) DEFINED_ON(arm64); @@ -2918,8 +2923,8 @@ class MacroAssembler : public MacroAssemblerSpecific { FloatRegister dest) DEFINED_ON(x86_shared, arm64); - inline void unsignedRightShiftInt32x4(Register rhs, FloatRegister lhsDest, - Register temp) DEFINED_ON(x86_shared); + inline void unsignedRightShiftInt32x4(Register rhs, FloatRegister lhsDest) + DEFINED_ON(x86_shared); inline void unsignedRightShiftInt32x4(FloatRegister lhs, Register rhs, FloatRegister dest) DEFINED_ON(arm64); @@ -2929,8 +2934,7 @@ class MacroAssembler : public MacroAssemblerSpecific { DEFINED_ON(x86_shared, arm64); inline void rightShiftInt64x2(Register rhs, FloatRegister lhsDest, - Register temp1, FloatRegister temp2) - DEFINED_ON(x86_shared); + FloatRegister temp) DEFINED_ON(x86_shared); inline void rightShiftInt64x2(Imm32 count, FloatRegister src, FloatRegister dest) @@ -2939,8 +2943,8 @@ class MacroAssembler : public MacroAssemblerSpecific { inline void rightShiftInt64x2(FloatRegister lhs, Register rhs, FloatRegister dest) DEFINED_ON(arm64); - inline void unsignedRightShiftInt64x2(Register rhs, FloatRegister lhsDest, - Register temp) DEFINED_ON(x86_shared); + inline void unsignedRightShiftInt64x2(Register rhs, FloatRegister lhsDest) + DEFINED_ON(x86_shared); inline void unsignedRightShiftInt64x2(FloatRegister lhs, Register rhs, FloatRegister dest) DEFINED_ON(arm64); diff --git a/js/src/jit/arm64/Lowering-arm64.cpp b/js/src/jit/arm64/Lowering-arm64.cpp index c57504caa051..bbd88b6cffb7 100644 --- a/js/src/jit/arm64/Lowering-arm64.cpp +++ b/js/src/jit/arm64/Lowering-arm64.cpp @@ -1122,7 +1122,6 @@ void LIRGenerator::visitWasmShiftSimd128(MWasmShiftSimd128* ins) { LAllocation lhsDestAlloc = useRegisterAtStart(lhs); LAllocation rhsAlloc = useRegisterAtStart(rhs); auto* lir = new (alloc()) LWasmVariableShiftSimd128(lhsDestAlloc, rhsAlloc, - LDefinition::BogusTemp(), LDefinition::BogusTemp()); define(lir, ins); #else diff --git a/js/src/jit/arm64/MacroAssembler-arm64-inl.h b/js/src/jit/arm64/MacroAssembler-arm64-inl.h index b568208091d4..0220c6c36891 100644 --- a/js/src/jit/arm64/MacroAssembler-arm64-inl.h +++ b/js/src/jit/arm64/MacroAssembler-arm64-inl.h @@ -3097,11 +3097,8 @@ void MacroAssembler::absInt64x2(FloatRegister src, FloatRegister dest) { void MacroAssembler::leftShiftInt8x16(FloatRegister lhs, Register rhs, FloatRegister dest) { - vixl::UseScratchRegisterScope temps(this); - ARMRegister scratch = temps.AcquireW(); - And(scratch, ARMRegister(rhs, 32), 7); ScratchSimd128Scope vscratch(*this); - Dup(Simd16B(vscratch), scratch); + Dup(Simd16B(vscratch), ARMRegister(rhs, 32)); Sshl(Simd16B(dest), Simd16B(lhs), Simd16B(vscratch)); } @@ -3112,11 +3109,8 @@ void MacroAssembler::leftShiftInt8x16(Imm32 count, FloatRegister src, void MacroAssembler::leftShiftInt16x8(FloatRegister lhs, Register rhs, FloatRegister dest) { - vixl::UseScratchRegisterScope temps(this); - ARMRegister scratch = temps.AcquireW(); - And(scratch, ARMRegister(rhs, 32), 15); ScratchSimd128Scope vscratch(*this); - Dup(Simd8H(vscratch), scratch); + Dup(Simd8H(vscratch), ARMRegister(rhs, 32)); Sshl(Simd8H(dest), Simd8H(lhs), Simd8H(vscratch)); } @@ -3127,11 +3121,8 @@ void MacroAssembler::leftShiftInt16x8(Imm32 count, FloatRegister src, void MacroAssembler::leftShiftInt32x4(FloatRegister lhs, Register rhs, FloatRegister dest) { - vixl::UseScratchRegisterScope temps(this); - ARMRegister scratch = temps.AcquireW(); - And(scratch, ARMRegister(rhs, 32), 31); ScratchSimd128Scope vscratch(*this); - Dup(Simd4S(vscratch), scratch); + Dup(Simd4S(vscratch), ARMRegister(rhs, 32)); Sshl(Simd4S(dest), Simd4S(lhs), Simd4S(vscratch)); } @@ -3142,11 +3133,8 @@ void MacroAssembler::leftShiftInt32x4(Imm32 count, FloatRegister src, void MacroAssembler::leftShiftInt64x2(FloatRegister lhs, Register rhs, FloatRegister dest) { - vixl::UseScratchRegisterScope temps(this); - ARMRegister scratch = temps.AcquireX(); - And(scratch, ARMRegister(rhs, 64), 63); ScratchSimd128Scope vscratch(*this); - Dup(Simd2D(vscratch), scratch); + Dup(Simd2D(vscratch), ARMRegister(rhs, 64)); Sshl(Simd2D(dest), Simd2D(lhs), Simd2D(vscratch)); } diff --git a/js/src/jit/arm64/MacroAssembler-arm64.cpp b/js/src/jit/arm64/MacroAssembler-arm64.cpp index 9e7e6424bcc8..c8c8c75a94fe 100644 --- a/js/src/jit/arm64/MacroAssembler-arm64.cpp +++ b/js/src/jit/arm64/MacroAssembler-arm64.cpp @@ -57,6 +57,36 @@ void MacroAssemblerCompat::boxValue(JSValueType type, Register src, Operand(ImmShiftedTag(type).value)); } +#ifdef ENABLE_WASM_SIMD +bool MacroAssembler::MustMaskShiftCountSimd128(wasm::SimdOp op, int32_t* mask) { + switch (op) { + case wasm::SimdOp::I8x16Shl: + case wasm::SimdOp::I8x16ShrU: + case wasm::SimdOp::I8x16ShrS: + *mask = 7; + break; + case wasm::SimdOp::I16x8Shl: + case wasm::SimdOp::I16x8ShrU: + case wasm::SimdOp::I16x8ShrS: + *mask = 15; + break; + case wasm::SimdOp::I32x4Shl: + case wasm::SimdOp::I32x4ShrU: + case wasm::SimdOp::I32x4ShrS: + *mask = 31; + break; + case wasm::SimdOp::I64x2Shl: + case wasm::SimdOp::I64x2ShrU: + case wasm::SimdOp::I64x2ShrS: + *mask = 63; + break; + default: + MOZ_CRASH("Unexpected shift operation"); + } + return true; +} +#endif + void MacroAssembler::clampDoubleToUint8(FloatRegister input, Register output) { ARMRegister dest(output, 32); Fcvtns(dest, ARMFPRegister(input, 64)); @@ -737,14 +767,8 @@ void MacroAssemblerCompat::rightShiftInt8x16(FloatRegister lhs, Register rhs, ScratchSimd128Scope scratch_(asMasm()); ARMFPRegister shift = Simd16B(scratch_); - // Compute -(shift & 7) in all 8-bit lanes - { - vixl::UseScratchRegisterScope temps(this); - ARMRegister scratch = temps.AcquireW(); - And(scratch, ARMRegister(rhs, 32), 7); - Neg(scratch, scratch); - Dup(shift, scratch); - } + Dup(shift, ARMRegister(rhs, 32)); + Neg(shift, shift); if (isUnsigned) { Ushl(Simd16B(dest), Simd16B(lhs), shift); @@ -759,14 +783,8 @@ void MacroAssemblerCompat::rightShiftInt16x8(FloatRegister lhs, Register rhs, ScratchSimd128Scope scratch_(asMasm()); ARMFPRegister shift = Simd8H(scratch_); - // Compute -(shift & 15) in all 16-bit lanes - { - vixl::UseScratchRegisterScope temps(this); - ARMRegister scratch = temps.AcquireW(); - And(scratch, ARMRegister(rhs, 32), 15); - Neg(scratch, scratch); - Dup(shift, scratch); - } + Dup(shift, ARMRegister(rhs, 32)); + Neg(shift, shift); if (isUnsigned) { Ushl(Simd8H(dest), Simd8H(lhs), shift); @@ -781,14 +799,8 @@ void MacroAssemblerCompat::rightShiftInt32x4(FloatRegister lhs, Register rhs, ScratchSimd128Scope scratch_(asMasm()); ARMFPRegister shift = Simd4S(scratch_); - // Compute -(shift & 31) in all 32-bit lanes - { - vixl::UseScratchRegisterScope temps(this); - ARMRegister scratch = temps.AcquireW(); - And(scratch, ARMRegister(rhs, 32), 31); - Neg(scratch, scratch); - Dup(shift, scratch); - } + Dup(shift, ARMRegister(rhs, 32)); + Neg(shift, shift); if (isUnsigned) { Ushl(Simd4S(dest), Simd4S(lhs), shift); @@ -803,14 +815,8 @@ void MacroAssemblerCompat::rightShiftInt64x2(FloatRegister lhs, Register rhs, ScratchSimd128Scope scratch_(asMasm()); ARMFPRegister shift = Simd2D(scratch_); - // Compute -(shift & 63) - { - vixl::UseScratchRegisterScope temps(this); - ARMRegister scratch = temps.AcquireX(); - And(scratch, ARMRegister(rhs, 64), 63); - Neg(scratch, scratch); - Dup(shift, scratch); - } + Dup(shift, ARMRegister(rhs, 64)); + Neg(shift, shift); if (isUnsigned) { Ushl(Simd2D(dest), Simd2D(lhs), shift); diff --git a/js/src/jit/shared/LIR-shared.h b/js/src/jit/shared/LIR-shared.h index 6442df04c388..36b8700846d6 100644 --- a/js/src/jit/shared/LIR-shared.h +++ b/js/src/jit/shared/LIR-shared.h @@ -3547,9 +3547,8 @@ class LWasmBinarySimd128WithConstant : public LInstructionHelper<1, 1, 0> { // (v128, i32) -> v128 effect-free variable-width shift operations // lhs and dest are the same. -// temp0 is a GPR (if in use). -// temp1 is an FPR (if in use). -class LWasmVariableShiftSimd128 : public LInstructionHelper<1, 2, 2> { +// temp is an FPR (if in use). +class LWasmVariableShiftSimd128 : public LInstructionHelper<1, 2, 1> { public: LIR_HEADER(WasmVariableShiftSimd128) @@ -3558,12 +3557,11 @@ class LWasmVariableShiftSimd128 : public LInstructionHelper<1, 2, 2> { static constexpr uint32_t Rhs = 1; LWasmVariableShiftSimd128(const LAllocation& lhs, const LAllocation& rhs, - const LDefinition& temp0, const LDefinition& temp1) + const LDefinition& temp) : LInstructionHelper(classOpcode) { setOperand(Lhs, lhs); setOperand(Rhs, rhs); - setTemp(0, temp0); - setTemp(1, temp1); + setTemp(0, temp); } const LAllocation* lhs() { return getOperand(Lhs); } diff --git a/js/src/jit/x86-shared/CodeGenerator-x86-shared.cpp b/js/src/jit/x86-shared/CodeGenerator-x86-shared.cpp index 59e85c1ba5ba..6ba2b07c269d 100644 --- a/js/src/jit/x86-shared/CodeGenerator-x86-shared.cpp +++ b/js/src/jit/x86-shared/CodeGenerator-x86-shared.cpp @@ -2942,47 +2942,46 @@ void CodeGenerator::visitWasmVariableShiftSimd128( #ifdef ENABLE_WASM_SIMD FloatRegister lhsDest = ToFloatRegister(ins->lhsDest()); Register rhs = ToRegister(ins->rhs()); - Register temp1 = ToTempRegisterOrInvalid(ins->getTemp(0)); - FloatRegister temp2 = ToTempFloatRegisterOrInvalid(ins->getTemp(1)); + FloatRegister temp = ToTempFloatRegisterOrInvalid(ins->getTemp(0)); MOZ_ASSERT(ToFloatRegister(ins->output()) == lhsDest); switch (ins->simdOp()) { case wasm::SimdOp::I8x16Shl: - masm.leftShiftInt8x16(rhs, lhsDest, temp1, temp2); + masm.leftShiftInt8x16(rhs, lhsDest, temp); break; case wasm::SimdOp::I8x16ShrS: - masm.rightShiftInt8x16(rhs, lhsDest, temp1, temp2); + masm.rightShiftInt8x16(rhs, lhsDest, temp); break; case wasm::SimdOp::I8x16ShrU: - masm.unsignedRightShiftInt8x16(rhs, lhsDest, temp1, temp2); + masm.unsignedRightShiftInt8x16(rhs, lhsDest, temp); break; case wasm::SimdOp::I16x8Shl: - masm.leftShiftInt16x8(rhs, lhsDest, temp1); + masm.leftShiftInt16x8(rhs, lhsDest); break; case wasm::SimdOp::I16x8ShrS: - masm.rightShiftInt16x8(rhs, lhsDest, temp1); + masm.rightShiftInt16x8(rhs, lhsDest); break; case wasm::SimdOp::I16x8ShrU: - masm.unsignedRightShiftInt16x8(rhs, lhsDest, temp1); + masm.unsignedRightShiftInt16x8(rhs, lhsDest); break; case wasm::SimdOp::I32x4Shl: - masm.leftShiftInt32x4(rhs, lhsDest, temp1); + masm.leftShiftInt32x4(rhs, lhsDest); break; case wasm::SimdOp::I32x4ShrS: - masm.rightShiftInt32x4(rhs, lhsDest, temp1); + masm.rightShiftInt32x4(rhs, lhsDest); break; case wasm::SimdOp::I32x4ShrU: - masm.unsignedRightShiftInt32x4(rhs, lhsDest, temp1); + masm.unsignedRightShiftInt32x4(rhs, lhsDest); break; case wasm::SimdOp::I64x2Shl: - masm.leftShiftInt64x2(rhs, lhsDest, temp1); + masm.leftShiftInt64x2(rhs, lhsDest); break; case wasm::SimdOp::I64x2ShrS: - masm.rightShiftInt64x2(rhs, lhsDest, temp1, temp2); + masm.rightShiftInt64x2(rhs, lhsDest, temp); break; case wasm::SimdOp::I64x2ShrU: - masm.unsignedRightShiftInt64x2(rhs, lhsDest, temp1); + masm.unsignedRightShiftInt64x2(rhs, lhsDest); break; default: MOZ_CRASH("Shift SimdOp not implemented"); diff --git a/js/src/jit/x86-shared/Lowering-x86-shared.cpp b/js/src/jit/x86-shared/Lowering-x86-shared.cpp index 95eb7d0235ae..05b2058a349e 100644 --- a/js/src/jit/x86-shared/Lowering-x86-shared.cpp +++ b/js/src/jit/x86-shared/Lowering-x86-shared.cpp @@ -1205,26 +1205,23 @@ void LIRGenerator::visitWasmShiftSimd128(MWasmShiftSimd128* ins) { js::wasm::ReportSimdAnalysis("shift -> variable shift"); # endif - LDefinition tempReg0 = LDefinition::BogusTemp(); - LDefinition tempReg1 = LDefinition::BogusTemp(); + LDefinition tempReg = LDefinition::BogusTemp(); switch (ins->simdOp()) { case wasm::SimdOp::I8x16Shl: case wasm::SimdOp::I8x16ShrS: case wasm::SimdOp::I8x16ShrU: case wasm::SimdOp::I64x2ShrS: - tempReg0 = temp(); - tempReg1 = tempSimd128(); + tempReg = tempSimd128(); break; default: - tempReg0 = temp(); break; } // Reusing the input if possible is never detrimental. LAllocation lhsDestAlloc = useRegisterAtStart(lhs); LAllocation rhsAlloc = useRegisterAtStart(rhs); - auto* lir = new (alloc()) - LWasmVariableShiftSimd128(lhsDestAlloc, rhsAlloc, tempReg0, tempReg1); + auto* lir = + new (alloc()) LWasmVariableShiftSimd128(lhsDestAlloc, rhsAlloc, tempReg); defineReuseInput(lir, ins, LWasmVariableShiftSimd128::LhsDest); #else MOZ_CRASH("No SIMD"); diff --git a/js/src/jit/x86-shared/MacroAssembler-x86-shared-SIMD.cpp b/js/src/jit/x86-shared/MacroAssembler-x86-shared-SIMD.cpp index 52997852a3e4..39ed29a415ab 100644 --- a/js/src/jit/x86-shared/MacroAssembler-x86-shared-SIMD.cpp +++ b/js/src/jit/x86-shared/MacroAssembler-x86-shared-SIMD.cpp @@ -902,22 +902,13 @@ void MacroAssemblerX86Shared::maxFloat64x2(FloatRegister lhs, Operand rhs, minMaxFloat64x2(/*isMin=*/false, lhs, rhs, temp1, temp2, output); } -static inline void MaskSimdShiftCount(MacroAssembler& masm, unsigned shiftmask, - Register count, Register temp, - FloatRegister dest) { - masm.mov(count, temp); - masm.andl(Imm32(shiftmask), temp); - masm.vmovd(temp, dest); -} - void MacroAssemblerX86Shared::packedShiftByScalarInt8x16( - FloatRegister in, Register count, Register temp, FloatRegister xtmp, - FloatRegister dest, + FloatRegister in, Register count, FloatRegister xtmp, FloatRegister dest, void (MacroAssemblerX86Shared::*shift)(FloatRegister, FloatRegister, FloatRegister), void (MacroAssemblerX86Shared::*extend)(const Operand&, FloatRegister)) { ScratchSimd128Scope scratch(asMasm()); - MaskSimdShiftCount(asMasm(), 7, count, temp, scratch); + vmovd(count, scratch); // High bytes vpalignr(Operand(in), xtmp, 8); @@ -938,9 +929,8 @@ void MacroAssemblerX86Shared::packedShiftByScalarInt8x16( } void MacroAssemblerX86Shared::packedLeftShiftByScalarInt8x16( - FloatRegister in, Register count, Register temp, FloatRegister xtmp, - FloatRegister dest) { - packedShiftByScalarInt8x16(in, count, temp, xtmp, dest, + FloatRegister in, Register count, FloatRegister xtmp, FloatRegister dest) { + packedShiftByScalarInt8x16(in, count, xtmp, dest, &MacroAssemblerX86Shared::vpsllw, &MacroAssemblerX86Shared::vpmovzxbw); } @@ -964,9 +954,8 @@ void MacroAssemblerX86Shared::packedLeftShiftByScalarInt8x16( } void MacroAssemblerX86Shared::packedRightShiftByScalarInt8x16( - FloatRegister in, Register count, Register temp, FloatRegister xtmp, - FloatRegister dest) { - packedShiftByScalarInt8x16(in, count, temp, xtmp, dest, + FloatRegister in, Register count, FloatRegister xtmp, FloatRegister dest) { + packedShiftByScalarInt8x16(in, count, xtmp, dest, &MacroAssemblerX86Shared::vpsraw, &MacroAssemblerX86Shared::vpmovsxbw); } @@ -984,9 +973,8 @@ void MacroAssemblerX86Shared::packedRightShiftByScalarInt8x16( } void MacroAssemblerX86Shared::packedUnsignedRightShiftByScalarInt8x16( - FloatRegister in, Register count, Register temp, FloatRegister xtmp, - FloatRegister dest) { - packedShiftByScalarInt8x16(in, count, temp, xtmp, dest, + FloatRegister in, Register count, FloatRegister xtmp, FloatRegister dest) { + packedShiftByScalarInt8x16(in, count, xtmp, dest, &MacroAssemblerX86Shared::vpsrlw, &MacroAssemblerX86Shared::vpmovzxbw); } @@ -1001,71 +989,70 @@ void MacroAssemblerX86Shared::packedUnsignedRightShiftByScalarInt8x16( } void MacroAssemblerX86Shared::packedLeftShiftByScalarInt16x8( - FloatRegister in, Register count, Register temp, FloatRegister dest) { + FloatRegister in, Register count, FloatRegister dest) { ScratchSimd128Scope scratch(asMasm()); - MaskSimdShiftCount(asMasm(), 15, count, temp, scratch); + vmovd(count, scratch); vpsllw(scratch, in, dest); } void MacroAssemblerX86Shared::packedRightShiftByScalarInt16x8( - FloatRegister in, Register count, Register temp, FloatRegister dest) { + FloatRegister in, Register count, FloatRegister dest) { ScratchSimd128Scope scratch(asMasm()); - MaskSimdShiftCount(asMasm(), 15, count, temp, scratch); + vmovd(count, scratch); vpsraw(scratch, in, dest); } void MacroAssemblerX86Shared::packedUnsignedRightShiftByScalarInt16x8( - FloatRegister in, Register count, Register temp, FloatRegister dest) { + FloatRegister in, Register count, FloatRegister dest) { ScratchSimd128Scope scratch(asMasm()); - MaskSimdShiftCount(asMasm(), 15, count, temp, scratch); + vmovd(count, scratch); vpsrlw(scratch, in, dest); } void MacroAssemblerX86Shared::packedLeftShiftByScalarInt32x4( - FloatRegister in, Register count, Register temp, FloatRegister dest) { + FloatRegister in, Register count, FloatRegister dest) { ScratchSimd128Scope scratch(asMasm()); - MaskSimdShiftCount(asMasm(), 31, count, temp, scratch); + vmovd(count, scratch); vpslld(scratch, in, dest); } void MacroAssemblerX86Shared::packedRightShiftByScalarInt32x4( - FloatRegister in, Register count, Register temp, FloatRegister dest) { + FloatRegister in, Register count, FloatRegister dest) { ScratchSimd128Scope scratch(asMasm()); - MaskSimdShiftCount(asMasm(), 31, count, temp, scratch); + vmovd(count, scratch); vpsrad(scratch, in, dest); } void MacroAssemblerX86Shared::packedUnsignedRightShiftByScalarInt32x4( - FloatRegister in, Register count, Register temp, FloatRegister dest) { + FloatRegister in, Register count, FloatRegister dest) { ScratchSimd128Scope scratch(asMasm()); - MaskSimdShiftCount(asMasm(), 31, count, temp, scratch); + vmovd(count, scratch); vpsrld(scratch, in, dest); } void MacroAssemblerX86Shared::packedLeftShiftByScalarInt64x2( - FloatRegister in, Register count, Register temp, FloatRegister dest) { + FloatRegister in, Register count, FloatRegister dest) { ScratchSimd128Scope scratch(asMasm()); - MaskSimdShiftCount(asMasm(), 63, count, temp, scratch); + vmovd(count, scratch); vpsllq(scratch, in, dest); } void MacroAssemblerX86Shared::packedRightShiftByScalarInt64x2( - FloatRegister in, Register count, Register temp1, FloatRegister temp2, - FloatRegister dest) { + FloatRegister in, Register count, FloatRegister temp, FloatRegister dest) { ScratchSimd128Scope scratch(asMasm()); - MaskSimdShiftCount(asMasm(), 63, count, temp1, temp2); + vmovd(count, temp); asMasm().moveSimd128(in, dest); asMasm().signReplicationInt64x2(in, scratch); // Invert if negative, shift all, invert back if negative. vpxor(Operand(scratch), dest, dest); - vpsrlq(temp2, dest, dest); + vpsrlq(temp, dest, dest); vpxor(Operand(scratch), dest, dest); } void MacroAssemblerX86Shared::packedUnsignedRightShiftByScalarInt64x2( - FloatRegister in, Register count, Register temp, FloatRegister dest) { + FloatRegister in, Register count, FloatRegister dest) { ScratchSimd128Scope scratch(asMasm()); - MaskSimdShiftCount(asMasm(), 63, count, temp, scratch); + vmovd(count, scratch); vpsrlq(scratch, in, dest); } diff --git a/js/src/jit/x86-shared/MacroAssembler-x86-shared-inl.h b/js/src/jit/x86-shared/MacroAssembler-x86-shared-inl.h index 6e92cb167a1a..44a0ff430ba2 100644 --- a/js/src/jit/x86-shared/MacroAssembler-x86-shared-inl.h +++ b/js/src/jit/x86-shared/MacroAssembler-x86-shared-inl.h @@ -2119,9 +2119,9 @@ void MacroAssembler::absInt64x2(FloatRegister src, FloatRegister dest) { // Left shift by scalar void MacroAssembler::leftShiftInt8x16(Register rhs, FloatRegister lhsDest, - Register temp1, FloatRegister temp2) { - MacroAssemblerX86Shared::packedLeftShiftByScalarInt8x16(lhsDest, rhs, temp1, - temp2, lhsDest); + FloatRegister temp) { + MacroAssemblerX86Shared::packedLeftShiftByScalarInt8x16(lhsDest, rhs, temp, + lhsDest); } void MacroAssembler::leftShiftInt8x16(Imm32 count, FloatRegister src, @@ -2129,9 +2129,8 @@ void MacroAssembler::leftShiftInt8x16(Imm32 count, FloatRegister src, MacroAssemblerX86Shared::packedLeftShiftByScalarInt8x16(count, src, dest); } -void MacroAssembler::leftShiftInt16x8(Register rhs, FloatRegister lhsDest, - Register temp) { - MacroAssemblerX86Shared::packedLeftShiftByScalarInt16x8(lhsDest, rhs, temp, +void MacroAssembler::leftShiftInt16x8(Register rhs, FloatRegister lhsDest) { + MacroAssemblerX86Shared::packedLeftShiftByScalarInt16x8(lhsDest, rhs, lhsDest); } @@ -2141,9 +2140,8 @@ void MacroAssembler::leftShiftInt16x8(Imm32 count, FloatRegister src, vpsllw(count, src, dest); } -void MacroAssembler::leftShiftInt32x4(Register rhs, FloatRegister lhsDest, - Register temp) { - MacroAssemblerX86Shared::packedLeftShiftByScalarInt32x4(lhsDest, rhs, temp, +void MacroAssembler::leftShiftInt32x4(Register rhs, FloatRegister lhsDest) { + MacroAssemblerX86Shared::packedLeftShiftByScalarInt32x4(lhsDest, rhs, lhsDest); } @@ -2153,9 +2151,8 @@ void MacroAssembler::leftShiftInt32x4(Imm32 count, FloatRegister src, vpslld(count, src, dest); } -void MacroAssembler::leftShiftInt64x2(Register rhs, FloatRegister lhsDest, - Register temp) { - MacroAssemblerX86Shared::packedLeftShiftByScalarInt64x2(lhsDest, rhs, temp, +void MacroAssembler::leftShiftInt64x2(Register rhs, FloatRegister lhsDest) { + MacroAssemblerX86Shared::packedLeftShiftByScalarInt64x2(lhsDest, rhs, lhsDest); } @@ -2168,9 +2165,9 @@ void MacroAssembler::leftShiftInt64x2(Imm32 count, FloatRegister src, // Right shift by scalar void MacroAssembler::rightShiftInt8x16(Register rhs, FloatRegister lhsDest, - Register temp1, FloatRegister temp2) { - MacroAssemblerX86Shared::packedRightShiftByScalarInt8x16(lhsDest, rhs, temp1, - temp2, lhsDest); + FloatRegister temp) { + MacroAssemblerX86Shared::packedRightShiftByScalarInt8x16(lhsDest, rhs, temp, + lhsDest); } void MacroAssembler::rightShiftInt8x16(Imm32 count, FloatRegister src, @@ -2180,10 +2177,9 @@ void MacroAssembler::rightShiftInt8x16(Imm32 count, FloatRegister src, void MacroAssembler::unsignedRightShiftInt8x16(Register rhs, FloatRegister lhsDest, - Register temp1, - FloatRegister temp2) { + FloatRegister temp) { MacroAssemblerX86Shared::packedUnsignedRightShiftByScalarInt8x16( - lhsDest, rhs, temp1, temp2, lhsDest); + lhsDest, rhs, temp, lhsDest); } void MacroAssembler::unsignedRightShiftInt8x16(Imm32 count, FloatRegister src, @@ -2192,9 +2188,8 @@ void MacroAssembler::unsignedRightShiftInt8x16(Imm32 count, FloatRegister src, dest); } -void MacroAssembler::rightShiftInt16x8(Register rhs, FloatRegister lhsDest, - Register temp) { - MacroAssemblerX86Shared::packedRightShiftByScalarInt16x8(lhsDest, rhs, temp, +void MacroAssembler::rightShiftInt16x8(Register rhs, FloatRegister lhsDest) { + MacroAssemblerX86Shared::packedRightShiftByScalarInt16x8(lhsDest, rhs, lhsDest); } @@ -2205,10 +2200,9 @@ void MacroAssembler::rightShiftInt16x8(Imm32 count, FloatRegister src, } void MacroAssembler::unsignedRightShiftInt16x8(Register rhs, - FloatRegister lhsDest, - Register temp) { - MacroAssemblerX86Shared::packedUnsignedRightShiftByScalarInt16x8( - lhsDest, rhs, temp, lhsDest); + FloatRegister lhsDest) { + MacroAssemblerX86Shared::packedUnsignedRightShiftByScalarInt16x8(lhsDest, rhs, + lhsDest); } void MacroAssembler::unsignedRightShiftInt16x8(Imm32 count, FloatRegister src, @@ -2217,9 +2211,8 @@ void MacroAssembler::unsignedRightShiftInt16x8(Imm32 count, FloatRegister src, vpsrlw(count, src, dest); } -void MacroAssembler::rightShiftInt32x4(Register rhs, FloatRegister lhsDest, - Register temp) { - MacroAssemblerX86Shared::packedRightShiftByScalarInt32x4(lhsDest, rhs, temp, +void MacroAssembler::rightShiftInt32x4(Register rhs, FloatRegister lhsDest) { + MacroAssemblerX86Shared::packedRightShiftByScalarInt32x4(lhsDest, rhs, lhsDest); } @@ -2230,10 +2223,9 @@ void MacroAssembler::rightShiftInt32x4(Imm32 count, FloatRegister src, } void MacroAssembler::unsignedRightShiftInt32x4(Register rhs, - FloatRegister lhsDest, - Register temp) { - MacroAssemblerX86Shared::packedUnsignedRightShiftByScalarInt32x4( - lhsDest, rhs, temp, lhsDest); + FloatRegister lhsDest) { + MacroAssemblerX86Shared::packedUnsignedRightShiftByScalarInt32x4(lhsDest, rhs, + lhsDest); } void MacroAssembler::unsignedRightShiftInt32x4(Imm32 count, FloatRegister src, @@ -2243,9 +2235,9 @@ void MacroAssembler::unsignedRightShiftInt32x4(Imm32 count, FloatRegister src, } void MacroAssembler::rightShiftInt64x2(Register rhs, FloatRegister lhsDest, - Register temp1, FloatRegister temp2) { - MacroAssemblerX86Shared::packedRightShiftByScalarInt64x2(lhsDest, rhs, temp1, - temp2, lhsDest); + FloatRegister temp) { + MacroAssemblerX86Shared::packedRightShiftByScalarInt64x2(lhsDest, rhs, temp, + lhsDest); } void MacroAssembler::rightShiftInt64x2(Imm32 count, FloatRegister src, @@ -2254,10 +2246,9 @@ void MacroAssembler::rightShiftInt64x2(Imm32 count, FloatRegister src, } void MacroAssembler::unsignedRightShiftInt64x2(Register rhs, - FloatRegister lhsDest, - Register temp) { - MacroAssemblerX86Shared::packedUnsignedRightShiftByScalarInt64x2( - lhsDest, rhs, temp, lhsDest); + FloatRegister lhsDest) { + MacroAssemblerX86Shared::packedUnsignedRightShiftByScalarInt64x2(lhsDest, rhs, + lhsDest); } void MacroAssembler::unsignedRightShiftInt64x2(Imm32 count, FloatRegister src, diff --git a/js/src/jit/x86-shared/MacroAssembler-x86-shared.cpp b/js/src/jit/x86-shared/MacroAssembler-x86-shared.cpp index 9cc48042da7d..da0e79d5d0ac 100644 --- a/js/src/jit/x86-shared/MacroAssembler-x86-shared.cpp +++ b/js/src/jit/x86-shared/MacroAssembler-x86-shared.cpp @@ -284,6 +284,36 @@ void MacroAssemblerX86Shared::minMaxFloat32(FloatRegister first, bind(&done); } +#ifdef ENABLE_WASM_SIMD +bool MacroAssembler::MustMaskShiftCountSimd128(wasm::SimdOp op, int32_t* mask) { + switch (op) { + case wasm::SimdOp::I8x16Shl: + case wasm::SimdOp::I8x16ShrU: + case wasm::SimdOp::I8x16ShrS: + *mask = 7; + break; + case wasm::SimdOp::I16x8Shl: + case wasm::SimdOp::I16x8ShrU: + case wasm::SimdOp::I16x8ShrS: + *mask = 15; + break; + case wasm::SimdOp::I32x4Shl: + case wasm::SimdOp::I32x4ShrU: + case wasm::SimdOp::I32x4ShrS: + *mask = 31; + break; + case wasm::SimdOp::I64x2Shl: + case wasm::SimdOp::I64x2ShrU: + case wasm::SimdOp::I64x2ShrS: + *mask = 63; + break; + default: + MOZ_CRASH("Unexpected shift operation"); + } + return true; +} +#endif + //{{{ check_macroassembler_style // =============================================================== // MacroAssembler high-level usage. diff --git a/js/src/jit/x86-shared/MacroAssembler-x86-shared.h b/js/src/jit/x86-shared/MacroAssembler-x86-shared.h index c83b1481788a..a0f8331598bc 100644 --- a/js/src/jit/x86-shared/MacroAssembler-x86-shared.h +++ b/js/src/jit/x86-shared/MacroAssembler-x86-shared.h @@ -499,53 +499,45 @@ class MacroAssemblerX86Shared : public Assembler { FloatRegister temp2, FloatRegister output); void packedShiftByScalarInt8x16( - FloatRegister in, Register count, Register temp, FloatRegister xtmp, - FloatRegister dest, + FloatRegister in, Register count, FloatRegister xtmp, FloatRegister dest, void (MacroAssemblerX86Shared::*shift)(FloatRegister, FloatRegister, FloatRegister), void (MacroAssemblerX86Shared::*extend)(const Operand&, FloatRegister)); void packedLeftShiftByScalarInt8x16(FloatRegister in, Register count, - Register temp, FloatRegister xtmp, - FloatRegister dest); + FloatRegister xtmp, FloatRegister dest); void packedLeftShiftByScalarInt8x16(Imm32 count, FloatRegister src, FloatRegister dest); void packedRightShiftByScalarInt8x16(FloatRegister in, Register count, - Register temp, FloatRegister xtmp, - FloatRegister dest); + FloatRegister xtmp, FloatRegister dest); void packedRightShiftByScalarInt8x16(Imm32 count, FloatRegister src, FloatRegister dest); void packedUnsignedRightShiftByScalarInt8x16(FloatRegister in, Register count, - Register temp, FloatRegister xtmp, FloatRegister dest); void packedUnsignedRightShiftByScalarInt8x16(Imm32 count, FloatRegister src, FloatRegister dest); void packedLeftShiftByScalarInt16x8(FloatRegister in, Register count, - Register temp, FloatRegister dest); + FloatRegister dest); void packedRightShiftByScalarInt16x8(FloatRegister in, Register count, - Register temp, FloatRegister dest); + FloatRegister dest); void packedUnsignedRightShiftByScalarInt16x8(FloatRegister in, Register count, - Register temp, FloatRegister dest); void packedLeftShiftByScalarInt32x4(FloatRegister in, Register count, - Register temp, FloatRegister dest); + FloatRegister dest); void packedRightShiftByScalarInt32x4(FloatRegister in, Register count, - Register temp, FloatRegister dest); + FloatRegister dest); void packedUnsignedRightShiftByScalarInt32x4(FloatRegister in, Register count, - Register temp, FloatRegister dest); void packedLeftShiftByScalarInt64x2(FloatRegister in, Register count, - Register temp, FloatRegister dest); + FloatRegister dest); void packedRightShiftByScalarInt64x2(FloatRegister in, Register count, - Register temp1, FloatRegister temp2, - FloatRegister dest); + FloatRegister temp, FloatRegister dest); void packedRightShiftByScalarInt64x2(Imm32 count, FloatRegister src, FloatRegister dest); void packedUnsignedRightShiftByScalarInt64x2(FloatRegister in, Register count, - Register temp, FloatRegister dest); void selectSimd128(FloatRegister mask, FloatRegister onTrue, FloatRegister onFalse, FloatRegister temp, diff --git a/js/src/wasm/WasmBaselineCompile.cpp b/js/src/wasm/WasmBaselineCompile.cpp index 165cd2a9929e..3d29490bb81d 100644 --- a/js/src/wasm/WasmBaselineCompile.cpp +++ b/js/src/wasm/WasmBaselineCompile.cpp @@ -6964,108 +6964,153 @@ static void ExtAddPairwiseUI16x8(MacroAssembler& masm, RegV128 rs, masm.unsignedExtAddPairwiseInt16x8(rs, rsd); } +static void ShiftOpMask(MacroAssembler& masm, SimdOp op, RegI32 in, + RegI32 out) { + int32_t maskBits; + + masm.mov(in, out); + if (MacroAssembler::MustMaskShiftCountSimd128(op, &maskBits)) { + masm.and32(Imm32(maskBits), out); + } +} + # if defined(JS_CODEGEN_X86) || defined(JS_CODEGEN_X64) static void ShiftLeftI8x16(MacroAssembler& masm, RegI32 rs, RegV128 rsd, RegI32 temp1, RegV128 temp2) { - masm.leftShiftInt8x16(rs, rsd, temp1, temp2); + ShiftOpMask(masm, SimdOp::I8x16Shl, rs, temp1); + masm.leftShiftInt8x16(temp1, rsd, temp2); } static void ShiftLeftI16x8(MacroAssembler& masm, RegI32 rs, RegV128 rsd, RegI32 temp) { - masm.leftShiftInt16x8(rs, rsd, temp); + ShiftOpMask(masm, SimdOp::I16x8Shl, rs, temp); + masm.leftShiftInt16x8(temp, rsd); } static void ShiftLeftI32x4(MacroAssembler& masm, RegI32 rs, RegV128 rsd, RegI32 temp) { - masm.leftShiftInt32x4(rs, rsd, temp); + ShiftOpMask(masm, SimdOp::I32x4Shl, rs, temp); + masm.leftShiftInt32x4(temp, rsd); } static void ShiftLeftI64x2(MacroAssembler& masm, RegI32 rs, RegV128 rsd, RegI32 temp) { - masm.leftShiftInt64x2(rs, rsd, temp); + ShiftOpMask(masm, SimdOp::I64x2Shl, rs, temp); + masm.leftShiftInt64x2(temp, rsd); } static void ShiftRightI8x16(MacroAssembler& masm, RegI32 rs, RegV128 rsd, RegI32 temp1, RegV128 temp2) { - masm.rightShiftInt8x16(rs, rsd, temp1, temp2); + ShiftOpMask(masm, SimdOp::I8x16ShrS, rs, temp1); + masm.rightShiftInt8x16(temp1, rsd, temp2); } static void ShiftRightUI8x16(MacroAssembler& masm, RegI32 rs, RegV128 rsd, RegI32 temp1, RegV128 temp2) { - masm.unsignedRightShiftInt8x16(rs, rsd, temp1, temp2); + ShiftOpMask(masm, SimdOp::I8x16ShrU, rs, temp1); + masm.unsignedRightShiftInt8x16(temp1, rsd, temp2); } static void ShiftRightI16x8(MacroAssembler& masm, RegI32 rs, RegV128 rsd, RegI32 temp) { - masm.rightShiftInt16x8(rs, rsd, temp); + ShiftOpMask(masm, SimdOp::I16x8ShrS, rs, temp); + masm.rightShiftInt16x8(temp, rsd); } static void ShiftRightUI16x8(MacroAssembler& masm, RegI32 rs, RegV128 rsd, RegI32 temp) { - masm.unsignedRightShiftInt16x8(rs, rsd, temp); + ShiftOpMask(masm, SimdOp::I16x8ShrU, rs, temp); + masm.unsignedRightShiftInt16x8(temp, rsd); } static void ShiftRightI32x4(MacroAssembler& masm, RegI32 rs, RegV128 rsd, RegI32 temp) { - masm.rightShiftInt32x4(rs, rsd, temp); + ShiftOpMask(masm, SimdOp::I32x4ShrS, rs, temp); + masm.rightShiftInt32x4(temp, rsd); } static void ShiftRightUI32x4(MacroAssembler& masm, RegI32 rs, RegV128 rsd, RegI32 temp) { - masm.unsignedRightShiftInt32x4(rs, rsd, temp); + ShiftOpMask(masm, SimdOp::I32x4ShrU, rs, temp); + masm.unsignedRightShiftInt32x4(temp, rsd); } static void ShiftRightUI64x2(MacroAssembler& masm, RegI32 rs, RegV128 rsd, RegI32 temp) { - masm.unsignedRightShiftInt64x2(rs, rsd, temp); + ShiftOpMask(masm, SimdOp::I64x2ShrU, rs, temp); + masm.unsignedRightShiftInt64x2(temp, rsd); } # elif defined(JS_CODEGEN_ARM64) -static void ShiftLeftI8x16(MacroAssembler& masm, RegI32 rs, RegV128 rsd) { - masm.leftShiftInt8x16(rsd, rs, rsd); +static void ShiftLeftI8x16(MacroAssembler& masm, RegI32 rs, RegV128 rsd, + RegI32 temp) { + ShiftOpMask(masm, SimdOp::I8x16Shl, rs, temp); + masm.leftShiftInt8x16(rsd, temp, rsd); } -static void ShiftLeftI16x8(MacroAssembler& masm, RegI32 rs, RegV128 rsd) { - masm.leftShiftInt16x8(rsd, rs, rsd); +static void ShiftLeftI16x8(MacroAssembler& masm, RegI32 rs, RegV128 rsd, + RegI32 temp) { + ShiftOpMask(masm, SimdOp::I16x8Shl, rs, temp); + masm.leftShiftInt16x8(rsd, temp, rsd); } -static void ShiftLeftI32x4(MacroAssembler& masm, RegI32 rs, RegV128 rsd) { - masm.leftShiftInt32x4(rsd, rs, rsd); +static void ShiftLeftI32x4(MacroAssembler& masm, RegI32 rs, RegV128 rsd, + RegI32 temp) { + ShiftOpMask(masm, SimdOp::I32x4Shl, rs, temp); + masm.leftShiftInt32x4(rsd, temp, rsd); } -static void ShiftLeftI64x2(MacroAssembler& masm, RegI32 rs, RegV128 rsd) { - masm.leftShiftInt64x2(rsd, rs, rsd); +static void ShiftLeftI64x2(MacroAssembler& masm, RegI32 rs, RegV128 rsd, + RegI32 temp) { + ShiftOpMask(masm, SimdOp::I64x2Shl, rs, temp); + masm.leftShiftInt64x2(rsd, temp, rsd); } -static void ShiftRightI8x16(MacroAssembler& masm, RegI32 rs, RegV128 rsd) { - masm.rightShiftInt8x16(rsd, rs, rsd); +static void ShiftRightI8x16(MacroAssembler& masm, RegI32 rs, RegV128 rsd, + RegI32 temp) { + ShiftOpMask(masm, SimdOp::I8x16ShrS, rs, temp); + masm.rightShiftInt8x16(rsd, temp, rsd); } -static void ShiftRightUI8x16(MacroAssembler& masm, RegI32 rs, RegV128 rsd) { - masm.unsignedRightShiftInt8x16(rsd, rs, rsd); +static void ShiftRightUI8x16(MacroAssembler& masm, RegI32 rs, RegV128 rsd, + RegI32 temp) { + ShiftOpMask(masm, SimdOp::I8x16ShrU, rs, temp); + masm.unsignedRightShiftInt8x16(rsd, temp, rsd); } -static void ShiftRightI16x8(MacroAssembler& masm, RegI32 rs, RegV128 rsd) { - masm.rightShiftInt16x8(rsd, rs, rsd); +static void ShiftRightI16x8(MacroAssembler& masm, RegI32 rs, RegV128 rsd, + RegI32 temp) { + ShiftOpMask(masm, SimdOp::I16x8ShrS, rs, temp); + masm.rightShiftInt16x8(rsd, temp, rsd); } -static void ShiftRightUI16x8(MacroAssembler& masm, RegI32 rs, RegV128 rsd) { - masm.unsignedRightShiftInt16x8(rsd, rs, rsd); +static void ShiftRightUI16x8(MacroAssembler& masm, RegI32 rs, RegV128 rsd, + RegI32 temp) { + ShiftOpMask(masm, SimdOp::I16x8ShrU, rs, temp); + masm.unsignedRightShiftInt16x8(rsd, temp, rsd); } -static void ShiftRightI32x4(MacroAssembler& masm, RegI32 rs, RegV128 rsd) { - masm.rightShiftInt32x4(rsd, rs, rsd); +static void ShiftRightI32x4(MacroAssembler& masm, RegI32 rs, RegV128 rsd, + RegI32 temp) { + ShiftOpMask(masm, SimdOp::I32x4ShrS, rs, temp); + masm.rightShiftInt32x4(rsd, temp, rsd); } -static void ShiftRightUI32x4(MacroAssembler& masm, RegI32 rs, RegV128 rsd) { - masm.unsignedRightShiftInt32x4(rsd, rs, rsd); +static void ShiftRightUI32x4(MacroAssembler& masm, RegI32 rs, RegV128 rsd, + RegI32 temp) { + ShiftOpMask(masm, SimdOp::I32x4ShrU, rs, temp); + masm.unsignedRightShiftInt32x4(rsd, temp, rsd); } -static void ShiftRightI64x2(MacroAssembler& masm, RegI32 rs, RegV128 rsd) { - masm.rightShiftInt64x2(rsd, rs, rsd); +static void ShiftRightI64x2(MacroAssembler& masm, RegI32 rs, RegV128 rsd, + RegI32 temp) { + ShiftOpMask(masm, SimdOp::I64x2ShrS, rs, temp); + masm.rightShiftInt64x2(rsd, temp, rsd); } -static void ShiftRightUI64x2(MacroAssembler& masm, RegI32 rs, RegV128 rsd) { - masm.unsignedRightShiftInt64x2(rsd, rs, rsd); +static void ShiftRightUI64x2(MacroAssembler& masm, RegI32 rs, RegV128 rsd, + RegI32 temp) { + ShiftOpMask(masm, SimdOp::I64x2ShrU, rs, temp); + masm.unsignedRightShiftInt64x2(rsd, temp, rsd); } # endif diff --git a/js/src/wasm/WasmIonCompile.cpp b/js/src/wasm/WasmIonCompile.cpp index f1ecb5dce266..f52bdac0dbbb 100644 --- a/js/src/wasm/WasmIonCompile.cpp +++ b/js/src/wasm/WasmIonCompile.cpp @@ -726,6 +726,15 @@ class FunctionCompiler { MOZ_ASSERT(lhs->type() == MIRType::Simd128 && rhs->type() == MIRType::Int32); + int32_t maskBits; + if (MacroAssembler::MustMaskShiftCountSimd128(op, &maskBits)) { + MConstant* mask = MConstant::New(alloc(), Int32Value(maskBits)); + curBlock_->add(mask); + auto* rhs2 = MBitAnd::New(alloc(), rhs, mask, MIRType::Int32); + curBlock_->add(rhs2); + rhs = rhs2; + } + auto* ins = MWasmShiftSimd128::New(alloc(), lhs, rhs, op); curBlock_->add(ins); return ins;