diff --git a/js/src/jit/shared/Assembler-x86-shared.h b/js/src/jit/shared/Assembler-x86-shared.h index e9d1ef50c916..fde408157288 100644 --- a/js/src/jit/shared/Assembler-x86-shared.h +++ b/js/src/jit/shared/Assembler-x86-shared.h @@ -2234,6 +2234,23 @@ class AssemblerX86Shared : public AssemblerShared MOZ_CRASH("unexpected operand kind"); } } + void vblendvps(FloatRegister mask, FloatRegister src1, FloatRegister src0, FloatRegister dest) { + MOZ_ASSERT(HasSSE41()); + masm.vblendvps_rr(mask.code(), src1.code(), src0.code(), dest.code()); + } + void vblendvps(FloatRegister mask, const Operand &src1, FloatRegister src0, FloatRegister dest) { + MOZ_ASSERT(HasSSE41()); + switch (src1.kind()) { + case Operand::FPREG: + masm.vblendvps_rr(mask.code(), src1.fpu(), src0.code(), dest.code()); + break; + case Operand::MEM_REG_DISP: + masm.vblendvps_mr(mask.code(), src1.disp(), src1.base(), src0.code(), dest.code()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } void movsldup(FloatRegister src, FloatRegister dest) { MOZ_ASSERT(HasSSE3()); masm.movsldup_rr(src.code(), dest.code()); diff --git a/js/src/jit/shared/BaseAssembler-x86-shared.h b/js/src/jit/shared/BaseAssembler-x86-shared.h index a0a5047f2d4e..d8aefa19b4c7 100644 --- a/js/src/jit/shared/BaseAssembler-x86-shared.h +++ b/js/src/jit/shared/BaseAssembler-x86-shared.h @@ -3788,6 +3788,9 @@ public: void vblendvps_rr(XMMRegisterID mask, XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst) { vblendvOpSimd(mask, src1, src0, dst); } + void vblendvps_mr(XMMRegisterID mask, int offset, RegisterID base, XMMRegisterID src0, XMMRegisterID dst) { + vblendvOpSimd(mask, offset, base, src0, dst); + } void movsldup_rr(XMMRegisterID src, XMMRegisterID dst) { @@ -4397,6 +4400,25 @@ private: mask, (RegisterID)rm, src0, dst); } + void vblendvOpSimd(XMMRegisterID mask, int offset, RegisterID base, XMMRegisterID src0, XMMRegisterID dst) + { + if (useLegacySSEEncodingForVblendv(mask, src0, dst)) { + spew("blendvps %s0x%x(%s), %s", + PRETTY_PRINT_OFFSET(offset), nameIReg(base), nameFPReg(src0)); + // Even though a "ps" instruction, vblendv is encoded with the "pd" prefix. + m_formatter.legacySSEPrefix(VEX_PD); + m_formatter.threeByteOp(OP3_BLENDVPS_VdqWdq, ESCAPE_BLENDVPS, offset, base, src0); + return; + } + + spew("vblendvps %s, %s0x%x(%s), %s, %s", + nameFPReg(mask), PRETTY_PRINT_OFFSET(offset), nameIReg(base), + nameFPReg(src0), nameFPReg(dst)); + // Even though a "ps" instruction, vblendv is encoded with the "pd" prefix. + m_formatter.vblendvOpVex(VEX_PD, OP3_VBLENDVPS_VdqWdq, ESCAPE_VBLENDVPS, + mask, offset, base, src0, dst); + } + #ifdef JS_CODEGEN_X64 void twoByteOpSimd64(const char *name, VexOperandType ty, TwoByteOpcodeID opcode, XMMRegisterID rm, XMMRegisterID src0, XMMRegisterID dst) @@ -4733,6 +4755,21 @@ private: immediate8(mask << 4); } + void vblendvOpVex(VexOperandType ty, ThreeByteOpcodeID opcode, ThreeByteEscape escape, + XMMRegisterID mask, int offset, RegisterID base, XMMRegisterID src0, int reg) + { + int r = (reg >> 3), x = 0, b = (base >> 3); + int m = 0, w = 0, v = src0, l = 0; + switch (escape) { + case 0x38: m = 2; break; // 0x0F 0x38 + case 0x3A: m = 3; break; // 0x0F 0x3A + default: MOZ_CRASH("unexpected escape"); + } + threeOpVex(ty, r, x, b, m, w, v, l, opcode); + memoryModRM(offset, base, reg); + immediate8(mask << 4); + } + #ifdef JS_CODEGEN_X64 // Quad-word-sized operands: // diff --git a/js/src/jit/shared/CodeGenerator-x86-shared.cpp b/js/src/jit/shared/CodeGenerator-x86-shared.cpp index e721e0f2bf0b..667e569d643b 100644 --- a/js/src/jit/shared/CodeGenerator-x86-shared.cpp +++ b/js/src/jit/shared/CodeGenerator-x86-shared.cpp @@ -2704,12 +2704,18 @@ CodeGeneratorX86Shared::visitSimdBinaryArithFx4(LSimdBinaryArithFx4 *ins) FloatRegister rhsCopy = masm.reusedInputAlignedFloat32x4(rhs, mask); masm.vcmpneqps(rhs, rhsCopy, mask); - // Emulates blendv - if (lhs != output) - masm.movaps(lhs, output); - masm.andps(Operand(mask), output); - masm.andnps(Operand(tmp), mask); - masm.orps(Operand(mask), output); + if (AssemblerX86Shared::HasAVX()) { + masm.vblendvps(mask, lhs, tmp, output); + } else { + // Emulate vblendvps. + // With SSE.4.1 we could use blendvps, however it's awkward since + // it requires the mask to be in xmm0. + if (lhs != output) + masm.movaps(lhs, output); + masm.andps(Operand(mask), output); + masm.andnps(Operand(tmp), mask); + masm.orps(Operand(mask), output); + } return; } case MSimdBinaryArith::MaxNum: { @@ -2732,12 +2738,18 @@ CodeGeneratorX86Shared::visitSimdBinaryArithFx4(LSimdBinaryArithFx4 *ins) FloatRegister rhsCopy = masm.reusedInputAlignedFloat32x4(rhs, mask); masm.vcmpneqps(rhs, rhsCopy, mask); - // Emulates blendv - if (lhs != output) - masm.movaps(lhs, output); - masm.andps(Operand(mask), output); - masm.andnps(Operand(tmp), mask); - masm.orps(Operand(mask), output); + if (AssemblerX86Shared::HasAVX()) { + masm.vblendvps(mask, lhs, tmp, output); + } else { + // Emulate vblendvps. + // With SSE.4.1 we could use blendvps, however it's awkward since + // it requires the mask to be in xmm0. + if (lhs != output) + masm.movaps(lhs, output); + masm.andps(Operand(mask), output); + masm.andnps(Operand(tmp), mask); + masm.orps(Operand(mask), output); + } return; } }