From 04e387afbb4e07594dbd621385529b4664bc06b3 Mon Sep 17 00:00:00 2001 From: Dan Gohman Date: Sun, 28 Dec 2014 07:04:12 -0800 Subject: [PATCH] Bug 1115752 - SpiderMonkey: VEX encodings for moves, shifts, and multiplies r=jandem --- js/src/jit/shared/Assembler-x86-shared.h | 107 +++--- js/src/jit/shared/BaseAssembler-x86-shared.h | 336 ++++++++---------- .../jit/shared/CodeGenerator-x86-shared.cpp | 34 +- .../jit/shared/MacroAssembler-x86-shared.cpp | 2 +- js/src/jit/shared/MacroAssembler-x86-shared.h | 42 +-- js/src/jit/x64/Assembler-x64.h | 10 +- js/src/jit/x64/CodeGenerator-x64.cpp | 2 +- js/src/jit/x64/MacroAssembler-x64.h | 4 +- js/src/jit/x86/Assembler-x86.h | 42 +-- js/src/jit/x86/CodeGenerator-x86.cpp | 16 +- js/src/jit/x86/MacroAssembler-x86.cpp | 8 +- 11 files changed, 293 insertions(+), 310 deletions(-) diff --git a/js/src/jit/shared/Assembler-x86-shared.h b/js/src/jit/shared/Assembler-x86-shared.h index bfb0429a2537..5269fdad50bf 100644 --- a/js/src/jit/shared/Assembler-x86-shared.h +++ b/js/src/jit/shared/Assembler-x86-shared.h @@ -474,26 +474,26 @@ class AssemblerX86Shared : public AssemblerShared // Eventually movapd should be overloaded to support loads and // stores too. - void movapd(FloatRegister src, FloatRegister dest) { + void vmovapd(FloatRegister src, FloatRegister dest) { MOZ_ASSERT(HasSSE2()); - masm.movapd_rr(src.code(), dest.code()); + masm.vmovapd_rr(src.code(), dest.code()); } - void movaps(FloatRegister src, FloatRegister dest) { + void vmovaps(FloatRegister src, FloatRegister dest) { MOZ_ASSERT(HasSSE2()); - masm.movaps_rr(src.code(), dest.code()); + masm.vmovaps_rr(src.code(), dest.code()); } - void movaps(const Operand &src, FloatRegister dest) { + void vmovaps(const Operand &src, FloatRegister dest) { MOZ_ASSERT(HasSSE2()); switch (src.kind()) { case Operand::MEM_REG_DISP: - masm.movaps_mr(src.disp(), src.base(), dest.code()); + masm.vmovaps_mr(src.disp(), src.base(), dest.code()); break; case Operand::MEM_SCALE: - masm.movaps_mr(src.disp(), src.base(), src.index(), src.scale(), dest.code()); + masm.vmovaps_mr(src.disp(), src.base(), src.index(), src.scale(), dest.code()); break; case Operand::FPREG: - masm.movaps_rr(src.fpu(), dest.code()); + masm.vmovaps_rr(src.fpu(), dest.code()); break; default: MOZ_CRASH("unexpected operand kind"); @@ -512,14 +512,14 @@ class AssemblerX86Shared : public AssemblerShared MOZ_CRASH("unexpected operand kind"); } } - void movups(const Operand &src, FloatRegister dest) { + void vmovups(const Operand &src, FloatRegister dest) { MOZ_ASSERT(HasSSE2()); switch (src.kind()) { case Operand::MEM_REG_DISP: - masm.movups_mr(src.disp(), src.base(), dest.code()); + masm.vmovups_mr(src.disp(), src.base(), dest.code()); break; case Operand::MEM_SCALE: - masm.movups_mr(src.disp(), src.base(), src.index(), src.scale(), dest.code()); + masm.vmovups_mr(src.disp(), src.base(), src.index(), src.scale(), dest.code()); break; default: MOZ_CRASH("unexpected operand kind"); @@ -539,14 +539,14 @@ class AssemblerX86Shared : public AssemblerShared } } - // movsd is only provided in load/store form since the + // vmovsd is only provided in load/store form since the // register-to-register form has different semantics (it doesn't clobber // the whole output register) and isn't needed currently. - void movsd(const Address &src, FloatRegister dest) { - masm.movsd_mr(src.offset, src.base.code(), dest.code()); + void vmovsd(const Address &src, FloatRegister dest) { + masm.vmovsd_mr(src.offset, src.base.code(), dest.code()); } - void movsd(const BaseIndex &src, FloatRegister dest) { - masm.movsd_mr(src.offset, src.base.code(), src.index.code(), src.scale, dest.code()); + void vmovsd(const BaseIndex &src, FloatRegister dest) { + masm.vmovsd_mr(src.offset, src.base.code(), src.index.code(), src.scale, dest.code()); } void movsd(FloatRegister src, const Address &dest) { masm.movsd_rm(src.code(), dest.offset, dest.base.code()); @@ -554,15 +554,15 @@ class AssemblerX86Shared : public AssemblerShared void movsd(FloatRegister src, const BaseIndex &dest) { masm.movsd_rm(src.code(), dest.offset, dest.base.code(), dest.index.code(), dest.scale); } - // Although movss is not only provided in load/store form (for the same - // reasons as movsd above), the register to register form should be only + // Although vmovss is not only provided in load/store form (for the same + // reasons as vmovsd above), the register to register form should be only // used in contexts where we care about not clearing the higher lanes of // the FloatRegister. - void movss(const Address &src, FloatRegister dest) { - masm.movss_mr(src.offset, src.base.code(), dest.code()); + void vmovss(const Address &src, FloatRegister dest) { + masm.vmovss_mr(src.offset, src.base.code(), dest.code()); } - void movss(const BaseIndex &src, FloatRegister dest) { - masm.movss_mr(src.offset, src.base.code(), src.index.code(), src.scale, dest.code()); + void vmovss(const BaseIndex &src, FloatRegister dest) { + masm.vmovss_mr(src.offset, src.base.code(), src.index.code(), src.scale, dest.code()); } void movss(FloatRegister src, const Address &dest) { masm.movss_rm(src.code(), dest.offset, dest.base.code()); @@ -570,17 +570,17 @@ class AssemblerX86Shared : public AssemblerShared void movss(FloatRegister src, const BaseIndex &dest) { masm.movss_rm(src.code(), dest.offset, dest.base.code(), dest.index.code(), dest.scale); } - void movss(FloatRegister src, const FloatRegister &dest) { - masm.movss_rr(src.code(), dest.code()); + void vmovss(FloatRegister src1, FloatRegister src0, FloatRegister dest) { + masm.vmovss_rr(src1.code(), src0.code(), dest.code()); } - void movdqu(const Operand &src, FloatRegister dest) { + void vmovdqu(const Operand &src, FloatRegister dest) { MOZ_ASSERT(HasSSE2()); switch (src.kind()) { case Operand::MEM_REG_DISP: - masm.movdqu_mr(src.disp(), src.base(), dest.code()); + masm.vmovdqu_mr(src.disp(), src.base(), dest.code()); break; case Operand::MEM_SCALE: - masm.movdqu_mr(src.disp(), src.base(), src.index(), src.scale(), dest.code()); + masm.vmovdqu_mr(src.disp(), src.base(), src.index(), src.scale(), dest.code()); break; default: MOZ_CRASH("unexpected operand kind"); @@ -599,17 +599,17 @@ class AssemblerX86Shared : public AssemblerShared MOZ_CRASH("unexpected operand kind"); } } - void movdqa(const Operand &src, FloatRegister dest) { + void vmovdqa(const Operand &src, FloatRegister dest) { MOZ_ASSERT(HasSSE2()); switch (src.kind()) { case Operand::FPREG: - masm.movdqa_rr(src.fpu(), dest.code()); + masm.vmovdqa_rr(src.fpu(), dest.code()); break; case Operand::MEM_REG_DISP: - masm.movdqa_mr(src.disp(), src.base(), dest.code()); + masm.vmovdqa_mr(src.disp(), src.base(), dest.code()); break; case Operand::MEM_SCALE: - masm.movdqa_mr(src.disp(), src.base(), src.index(), src.scale(), dest.code()); + masm.vmovdqa_mr(src.disp(), src.base(), src.index(), src.scale(), dest.code()); break; default: MOZ_CRASH("unexpected operand kind"); @@ -628,9 +628,9 @@ class AssemblerX86Shared : public AssemblerShared MOZ_CRASH("unexpected operand kind"); } } - void movdqa(FloatRegister src, FloatRegister dest) { + void vmovdqa(FloatRegister src, FloatRegister dest) { MOZ_ASSERT(HasSSE2()); - masm.movdqa_rr(src.code(), dest.code()); + masm.vmovdqa_rr(src.code(), dest.code()); } void vcvtss2sd(FloatRegister src1, FloatRegister src0, FloatRegister dest) { MOZ_ASSERT(HasSSE2()); @@ -1542,25 +1542,25 @@ class AssemblerX86Shared : public AssemblerShared MOZ_ASSERT(HasSSE2()); masm.psrlq_ir(shift.value, dest.code()); } - void pslld(FloatRegister src, FloatRegister dest) { + void vpslld(FloatRegister src1, FloatRegister src0, FloatRegister dest) { MOZ_ASSERT(HasSSE2()); - masm.pslld_rr(src.code(), dest.code()); + masm.vpslld_rr(src1.code(), src0.code(), dest.code()); } void pslld(Imm32 count, FloatRegister dest) { MOZ_ASSERT(HasSSE2()); masm.pslld_ir(count.value, dest.code()); } - void psrad(FloatRegister src, FloatRegister dest) { + void vpsrad(FloatRegister src1, FloatRegister src0, FloatRegister dest) { MOZ_ASSERT(HasSSE2()); - masm.psrad_rr(src.code(), dest.code()); + masm.vpsrad_rr(src1.code(), src0.code(), dest.code()); } void psrad(Imm32 count, FloatRegister dest) { MOZ_ASSERT(HasSSE2()); masm.psrad_ir(count.value, dest.code()); } - void psrld(FloatRegister src, FloatRegister dest) { + void vpsrld(FloatRegister src1, FloatRegister src0, FloatRegister dest) { MOZ_ASSERT(HasSSE2()); - masm.psrld_rr(src.code(), dest.code()); + masm.vpsrld_rr(src1.code(), src0.code(), dest.code()); } void psrld(Imm32 count, FloatRegister dest) { MOZ_ASSERT(HasSSE2()); @@ -1798,21 +1798,34 @@ class AssemblerX86Shared : public AssemblerShared MOZ_CRASH("unexpected operand kind"); } } - void pmuludq(FloatRegister src, FloatRegister dest) { + void vpmuludq(FloatRegister src1, FloatRegister src0, FloatRegister dest) { MOZ_ASSERT(HasSSE2()); - masm.pmuludq_rr(src.code(), dest.code()); + masm.vpmuludq_rr(src1.code(), src0.code(), dest.code()); } - void pmulld(const Operand &src, FloatRegister dest) { - MOZ_ASSERT(HasSSE41()); - switch (src.kind()) { + void vpmuludq(const Operand &src1, FloatRegister src0, FloatRegister dest) { + MOZ_ASSERT(HasSSE2()); + switch (src1.kind()) { case Operand::FPREG: - masm.pmulld_rr(src.fpu(), dest.code()); + masm.vpmuludq_rr(src1.fpu(), src0.code(), dest.code()); break; case Operand::MEM_REG_DISP: - masm.pmulld_mr(src.disp(), src.base(), dest.code()); + masm.vpmuludq_mr(src1.disp(), src1.base(), src0.code(), dest.code()); + break; + default: + MOZ_CRASH("unexpected operand kind"); + } + } + void vpmulld(const Operand &src1, FloatRegister src0, FloatRegister dest) { + MOZ_ASSERT(HasSSE41()); + switch (src1.kind()) { + case Operand::FPREG: + masm.vpmulld_rr(src1.fpu(), src0.code(), dest.code()); + break; + case Operand::MEM_REG_DISP: + masm.vpmulld_mr(src1.disp(), src1.base(), src0.code(), dest.code()); break; case Operand::MEM_ADDRESS32: - masm.pmulld_mr(src.address(), dest.code()); + masm.vpmulld_mr(src1.address(), src0.code(), dest.code()); break; default: MOZ_CRASH("unexpected operand kind"); diff --git a/js/src/jit/shared/BaseAssembler-x86-shared.h b/js/src/jit/shared/BaseAssembler-x86-shared.h index 93d02aea2036..8fcd658e89f3 100644 --- a/js/src/jit/shared/BaseAssembler-x86-shared.h +++ b/js/src/jit/shared/BaseAssembler-x86-shared.h @@ -829,30 +829,26 @@ public: twoByteOpSimd("vpsubd", VEX_PD, OP2_PSUBD_VdqWdq, address, src0, dst); } - void pmuludq_rr(XMMRegisterID src, XMMRegisterID dst) + void vpmuludq_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst) { - spew("pmuludq %s, %s", nameFPReg(src), nameFPReg(dst)); - m_formatter.prefix(PRE_SSE_66); - m_formatter.twoByteOp(OP2_PMULUDQ_VdqWdq, (RegisterID)src, (RegisterID)dst); + twoByteOpSimd("vpmuludq", VEX_PD, OP2_PMULUDQ_VdqWdq, src1, src0, dst); + } + void vpmuludq_mr(int offset, RegisterID base, XMMRegisterID src0, XMMRegisterID dst) + { + twoByteOpSimd("vpmuludq", VEX_PD, OP2_PMULUDQ_VdqWdq, offset, base, src0, dst); } - void pmulld_rr(XMMRegisterID src, XMMRegisterID dst) + void vpmulld_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst) { - spew("pmulld %s, %s", nameFPReg(src), nameFPReg(dst)); - m_formatter.prefix(PRE_SSE_66); - m_formatter.threeByteOp(OP3_PMULLD_VdqWdq, ESCAPE_PMULLD, (RegisterID)src, (RegisterID)dst); + threeByteOpSimd("vpmulld", VEX_PD, OP3_PMULLD_VdqWdq, ESCAPE_PMULLD, src1, src0, dst); } - void pmulld_mr(int offset, RegisterID base, XMMRegisterID dst) + void vpmulld_mr(int offset, RegisterID base, XMMRegisterID src0, XMMRegisterID dst) { - spew("pmulld " MEM_ob ", %s", ADDR_ob(offset, base), nameFPReg(dst)); - m_formatter.prefix(PRE_SSE_66); - m_formatter.threeByteOp(OP3_PMULLD_VdqWdq, ESCAPE_PMULLD, offset, base, (RegisterID)dst); + threeByteOpSimd("vpmulld", VEX_PD, OP3_PMULLD_VdqWdq, ESCAPE_PMULLD, offset, base, src0, dst); } - void pmulld_mr(const void* address, XMMRegisterID dst) + void vpmulld_mr(const void* address, XMMRegisterID src0, XMMRegisterID dst) { - spew("pmulld %p, %s", address, nameFPReg(dst)); - m_formatter.prefix(PRE_SSE_66); - m_formatter.threeByteOp(OP3_PMULLD_VdqWdq, ESCAPE_PMULLD, address, (RegisterID)dst); + threeByteOpSimd("vpmulld", VEX_PD, OP3_PMULLD_VdqWdq, ESCAPE_PMULLD, address, src0, dst); } void vaddps_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst) @@ -2911,11 +2907,6 @@ public: twoByteOpSimd("vunpckhps", VEX_PS, OP2_UNPCKHPS_VsdWsd, src1, src0, dst); } - void vmovd_rr(RegisterID src, XMMRegisterID dst) - { - movdOpSimd(src, dst); - } - void vpand_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst) { twoByteOpSimd("vpand", VEX_PD, OP2_PANDDQ_VdqWdq, src1, src0, dst); @@ -3048,11 +3039,9 @@ public: m_formatter.immediate8(shift); } - void pslld_rr(XMMRegisterID src, XMMRegisterID dst) + void vpslld_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst) { - spew("pslld %s, %s", nameFPReg(src), nameFPReg(dst)); - m_formatter.prefix(PRE_SSE_66); - m_formatter.twoByteOp(OP2_PSLLD_VdqWdq, (RegisterID)src, (RegisterID)dst); + twoByteOpSimd("vpslld", VEX_PD, OP2_PSLLD_VdqWdq, src1, src0, dst); } void pslld_ir(int32_t count, XMMRegisterID dst) @@ -3063,11 +3052,9 @@ public: m_formatter.immediate8(int8_t(count)); } - void psrad_rr(XMMRegisterID src, XMMRegisterID dst) + void vpsrad_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst) { - spew("psrad %s, %s", nameFPReg(src), nameFPReg(dst)); - m_formatter.prefix(PRE_SSE_66); - m_formatter.twoByteOp(OP2_PSRAD_VdqWdq, (RegisterID)src, (RegisterID)dst); + twoByteOpSimd("vpsrad", VEX_PD, OP2_PSRAD_VdqWdq, src1, src0, dst); } void psrad_ir(int32_t count, XMMRegisterID dst) @@ -3078,11 +3065,9 @@ public: m_formatter.immediate8(int8_t(count)); } - void psrld_rr(XMMRegisterID src, XMMRegisterID dst) + void vpsrld_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst) { - spew("psrld %s, %s", nameFPReg(src), nameFPReg(dst)); - m_formatter.prefix(PRE_SSE_66); - m_formatter.twoByteOp(OP2_PSRLD_VdqWdq, (RegisterID)src, (RegisterID)dst); + twoByteOpSimd("vpsrld", VEX_PD, OP2_PSRLD_VdqWdq, src1, src0, dst); } void psrld_ir(int32_t count, XMMRegisterID dst) @@ -3109,22 +3094,23 @@ public: void vmovd_rr(XMMRegisterID src, RegisterID dst) { - movdOpSimd(src, dst); + twoByteOpSimdInt32("vmovd", VEX_PD, OP2_MOVD_EdVd, (XMMRegisterID)dst, (RegisterID)src); + } + + void vmovd_rr(RegisterID src, XMMRegisterID dst) + { + twoByteOpInt32Simd("vmovd", VEX_PD, OP2_MOVD_VdEd, src, X86Registers::invalid_xmm, dst); } #ifdef JS_CODEGEN_X64 - void movq_rr(XMMRegisterID src, RegisterID dst) + void vmovq_rr(XMMRegisterID src, RegisterID dst) { - spew("movq %s, %s", nameFPReg(src), nameIReg(dst)); - m_formatter.prefix(PRE_SSE_66); - m_formatter.twoByteOp64(OP2_MOVD_EdVd, dst, (RegisterID)src); + twoByteOpSimdInt64("vmovq", VEX_PD, OP2_MOVD_EdVd, (XMMRegisterID)dst, (RegisterID)src); } - void movq_rr(RegisterID src, XMMRegisterID dst) + void vmovq_rr(RegisterID src, XMMRegisterID dst) { - spew("movq %s, %s", nameIReg(src), nameFPReg(dst)); - m_formatter.prefix(PRE_SSE_66); - m_formatter.twoByteOp64(OP2_MOVD_VdEd, src, (RegisterID)dst); + twoByteOpInt64Simd("vmovq", VEX_PD, OP2_MOVD_VdEd, src, X86Registers::invalid_xmm, dst); } #endif @@ -3156,18 +3142,14 @@ public: m_formatter.twoByteOp_disp32(OP2_MOVSD_WsdVsd, offset, base, (RegisterID)src); } - void movss_mr(int offset, RegisterID base, XMMRegisterID dst) + void vmovss_mr(int offset, RegisterID base, XMMRegisterID dst) { - spew("movss " MEM_ob ", %s", ADDR_ob(offset, base), nameFPReg(dst)); - m_formatter.prefix(PRE_SSE_F3); - m_formatter.twoByteOp(OP2_MOVSD_VsdWsd, offset, base, (RegisterID)dst); + twoByteOpSimd("vmovss", VEX_SS, OP2_MOVSD_VsdWsd, offset, base, X86Registers::invalid_xmm, dst); } - void movss_mr_disp32(int offset, RegisterID base, XMMRegisterID dst) + void vmovss_mr_disp32(int offset, RegisterID base, XMMRegisterID dst) { - spew("movss " MEM_o32b ", %s", ADDR_o32b(offset, base), nameFPReg(dst)); - m_formatter.prefix(PRE_SSE_F3); - m_formatter.twoByteOp_disp32(OP2_MOVSD_VsdWsd, offset, base, (RegisterID)dst); + twoByteOpSimd_disp32("vmovss", VEX_SS, OP2_MOVSD_VsdWsd, offset, base, X86Registers::invalid_xmm, dst); } void movsd_rm(XMMRegisterID src, int offset, RegisterID base, RegisterID index, int scale) @@ -3184,78 +3166,59 @@ public: m_formatter.twoByteOp(OP2_MOVSD_WsdVsd, offset, base, index, scale, (RegisterID)src); } - void movss_mr(int offset, RegisterID base, RegisterID index, int scale, XMMRegisterID dst) + void vmovss_mr(int offset, RegisterID base, RegisterID index, int scale, XMMRegisterID dst) { - spew("movss " MEM_obs ", %s", ADDR_obs(offset, base, index, scale), nameFPReg(dst)); - m_formatter.prefix(PRE_SSE_F3); - m_formatter.twoByteOp(OP2_MOVSD_VsdWsd, offset, base, index, scale, (RegisterID)dst); + twoByteOpSimd("vmovss", VEX_SS, OP2_MOVSD_VsdWsd, offset, base, index, scale, X86Registers::invalid_xmm, dst); } - void movsd_mr(int offset, RegisterID base, XMMRegisterID dst) + void vmovsd_mr(int offset, RegisterID base, XMMRegisterID dst) { - spew("movsd " MEM_ob ", %s", ADDR_ob(offset, base), nameFPReg(dst)); - m_formatter.prefix(PRE_SSE_F2); - m_formatter.twoByteOp(OP2_MOVSD_VsdWsd, offset, base, (RegisterID)dst); + twoByteOpSimd("vmovsd", VEX_SD, OP2_MOVSD_VsdWsd, offset, base, X86Registers::invalid_xmm, dst); } - void movsd_mr_disp32(int offset, RegisterID base, XMMRegisterID dst) + void vmovsd_mr_disp32(int offset, RegisterID base, XMMRegisterID dst) { - spew("movsd " MEM_o32b ", %s", ADDR_o32b(offset, base), nameFPReg(dst)); - m_formatter.prefix(PRE_SSE_F2); - m_formatter.twoByteOp_disp32(OP2_MOVSD_VsdWsd, offset, base, (RegisterID)dst); + twoByteOpSimd_disp32("vmovsd", VEX_SD, OP2_MOVSD_VsdWsd, offset, base, X86Registers::invalid_xmm, dst); } - void movsd_mr(int offset, RegisterID base, RegisterID index, int scale, XMMRegisterID dst) + void vmovsd_mr(int offset, RegisterID base, RegisterID index, int scale, XMMRegisterID dst) { - spew("movsd " MEM_obs ", %s", ADDR_obs(offset, base, index, scale), nameFPReg(dst)); - m_formatter.prefix(PRE_SSE_F2); - m_formatter.twoByteOp(OP2_MOVSD_VsdWsd, offset, base, index, scale, (RegisterID)dst); + twoByteOpSimd("vmovsd", VEX_SD, OP2_MOVSD_VsdWsd, offset, base, index, scale, X86Registers::invalid_xmm, dst); } // Note that the register-to-register form of movsd does not write to the // entire output register. For general-purpose register-to-register moves, - // use movaps instead. - void movsd_rr(XMMRegisterID src, XMMRegisterID dst) + // use movapd instead. + void vmovsd_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst) { - spew("movsd %s, %s", nameFPReg(src), nameFPReg(dst)); - m_formatter.prefix(PRE_SSE_F2); - m_formatter.twoByteOp(OP2_MOVSD_VsdWsd, (RegisterID)src, (RegisterID)dst); + twoByteOpSimd("vmovsd", VEX_SD, OP2_MOVSD_VsdWsd, src1, src0, dst); } // The register-to-register form of movss has the same problem as movsd - // above. Prefer movapd for register-to-register moves. - void movss_rr(XMMRegisterID src, XMMRegisterID dst) + // above. Prefer movaps for register-to-register moves. + void vmovss_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst) { - spew("movss %s, %s", nameFPReg(src), nameFPReg(dst)); - m_formatter.prefix(PRE_SSE_F3); - m_formatter.twoByteOp(OP2_MOVSD_VsdWsd, (RegisterID)src, (RegisterID)dst); + twoByteOpSimd("vmovss", VEX_SS, OP2_MOVSD_VsdWsd, src1, src0, dst); } - void movsd_mr(const void* address, XMMRegisterID dst) + void vmovsd_mr(const void* address, XMMRegisterID dst) { - spew("movsd %p, %s", address, nameFPReg(dst)); - m_formatter.prefix(PRE_SSE_F2); - m_formatter.twoByteOp(OP2_MOVSD_VsdWsd, address, (RegisterID)dst); + twoByteOpSimd("vmovsd", VEX_SD, OP2_MOVSD_VsdWsd, address, X86Registers::invalid_xmm, dst); } - void movss_mr(const void* address, XMMRegisterID dst) + void vmovss_mr(const void* address, XMMRegisterID dst) { - spew("movss %p, %s", address, nameFPReg(dst)); - m_formatter.prefix(PRE_SSE_F3); - m_formatter.twoByteOp(OP2_MOVSD_VsdWsd, address, (RegisterID)dst); + twoByteOpSimd("vmovss", VEX_SS, OP2_MOVSD_VsdWsd, address, X86Registers::invalid_xmm, dst); } - void movups_mr(const void* address, XMMRegisterID dst) + void vmovups_mr(const void* address, XMMRegisterID dst) { - spew("movups %p, %s", address, nameFPReg(dst)); - m_formatter.twoByteOp(OP2_MOVPS_VpsWps, address, (RegisterID)dst); + twoByteOpSimd("vmovups", VEX_PS, OP2_MOVPS_VpsWps, address, X86Registers::invalid_xmm, dst); } - void movdqu_mr(const void* address, XMMRegisterID dst) + void vmovdqu_mr(const void* address, XMMRegisterID dst) { - spew("movdqu %p, %s", address, nameFPReg(dst)); - m_formatter.prefix(PRE_SSE_F3); - m_formatter.twoByteOp(OP2_MOVDQ_VdqWdq, address, (RegisterID)dst); + twoByteOpSimd("vmovdqu", VEX_SS, OP2_MOVDQ_VdqWdq, address, X86Registers::invalid_xmm, dst); } void movsd_rm(XMMRegisterID src, const void* address) @@ -3341,10 +3304,9 @@ public: } #endif - void movaps_rr(XMMRegisterID src, XMMRegisterID dst) + void vmovaps_rr(XMMRegisterID src, XMMRegisterID dst) { - spew("movaps %s, %s", nameFPReg(src), nameFPReg(dst)); - m_formatter.twoByteOp(OP2_MOVAPS_VsdWsd, (RegisterID)src, (RegisterID)dst); + twoByteOpSimd("vmovaps", VEX_PS, OP2_MOVAPS_VsdWsd, src, X86Registers::invalid_xmm, dst); } void movaps_rm(XMMRegisterID src, int offset, RegisterID base) { @@ -3356,18 +3318,15 @@ public: spew("movaps %s, " MEM_obs, nameFPReg(src), ADDR_obs(offset, base, index, scale)); m_formatter.twoByteOp(OP2_MOVAPS_WsdVsd, offset, base, index, scale, (RegisterID)src); } - void movaps_mr(int offset, RegisterID base, XMMRegisterID dst) + void vmovaps_mr(int offset, RegisterID base, XMMRegisterID dst) { - spew("movaps " MEM_ob ", %s", ADDR_ob(offset, base), nameFPReg(dst)); - m_formatter.twoByteOp(OP2_MOVAPS_VsdWsd, offset, base, (RegisterID)dst); + twoByteOpSimd("vmovaps", VEX_PS, OP2_MOVAPS_VsdWsd, offset, base, X86Registers::invalid_xmm, dst); } - void movaps_mr(int offset, RegisterID base, RegisterID index, int scale, XMMRegisterID dst) + void vmovaps_mr(int offset, RegisterID base, RegisterID index, int scale, XMMRegisterID dst) { - spew("movaps " MEM_obs ", %s", ADDR_obs(offset, base, index, scale), nameFPReg(dst)); - m_formatter.twoByteOp(OP2_MOVAPS_VsdWsd, offset, base, index, scale, (RegisterID)dst); + twoByteOpSimd("vmovaps", VEX_PS, OP2_MOVAPS_VsdWsd, offset, base, index, scale, X86Registers::invalid_xmm, dst); } - void movups_rm(XMMRegisterID src, int offset, RegisterID base) { spew("movups %s, " MEM_ob, nameFPReg(src), ADDR_ob(offset, base)); @@ -3383,27 +3342,22 @@ public: spew("movups %s, " MEM_obs, nameFPReg(src), ADDR_obs(offset, base, index, scale)); m_formatter.twoByteOp(OP2_MOVPS_WpsVps, offset, base, index, scale, (RegisterID)src); } - void movups_mr(int offset, RegisterID base, XMMRegisterID dst) + void vmovups_mr(int offset, RegisterID base, XMMRegisterID dst) { - spew("movups " MEM_ob ", %s", ADDR_ob(offset, base), nameFPReg(dst)); - m_formatter.twoByteOp(OP2_MOVPS_VpsWps, offset, base, (RegisterID)dst); + twoByteOpSimd("vmovups", VEX_PS, OP2_MOVPS_VpsWps, offset, base, X86Registers::invalid_xmm, dst); } - void movups_mr_disp32(int offset, RegisterID base, XMMRegisterID dst) + void vmovups_mr_disp32(int offset, RegisterID base, XMMRegisterID dst) { - spew("movups " MEM_o32b ", %s", ADDR_o32b(offset, base), nameFPReg(dst)); - m_formatter.twoByteOp_disp32(OP2_MOVPS_VpsWps, offset, base, (RegisterID)dst); + twoByteOpSimd_disp32("vmovups", VEX_PS, OP2_MOVPS_VpsWps, offset, base, X86Registers::invalid_xmm, dst); } - void movups_mr(int offset, RegisterID base, RegisterID index, int scale, XMMRegisterID dst) + void vmovups_mr(int offset, RegisterID base, RegisterID index, int scale, XMMRegisterID dst) { - spew("movups " MEM_obs ", %s", ADDR_obs(offset, base, index, scale), nameFPReg(dst)); - m_formatter.twoByteOp(OP2_MOVPS_VpsWps, offset, base, index, scale, (RegisterID)dst); + twoByteOpSimd("vmovups", VEX_PS, OP2_MOVPS_VpsWps, offset, base, index, scale, X86Registers::invalid_xmm, dst); } - void movapd_rr(XMMRegisterID src, XMMRegisterID dst) + void vmovapd_rr(XMMRegisterID src, XMMRegisterID dst) { - spew("movapd %s, %s", nameFPReg(src), nameFPReg(dst)); - m_formatter.prefix(PRE_SSE_66); - m_formatter.twoByteOp(OP2_MOVAPD_VsdWsd, (RegisterID)src, (RegisterID)dst); + twoByteOpSimd("vmovapd", VEX_PD, OP2_MOVAPD_VsdWsd, src, X86Registers::invalid_xmm, dst); } #ifdef JS_CODEGEN_X64 @@ -3422,17 +3376,14 @@ public: return JmpSrc(m_formatter.size()); } #else - void movaps_mr(const void* address, XMMRegisterID dst) + void vmovaps_mr(const void* address, XMMRegisterID dst) { - spew("movaps %p, %s", address, nameFPReg(dst)); - m_formatter.twoByteOp(OP2_MOVAPS_VsdWsd, address, (RegisterID)dst); + twoByteOpSimd("vmovaps", VEX_PS, OP2_MOVAPS_VsdWsd, address, X86Registers::invalid_xmm, dst); } - void movdqa_mr(const void* address, XMMRegisterID dst) + void vmovdqa_mr(const void* address, XMMRegisterID dst) { - spew("movdqa %p, %s", address, nameFPReg(dst)); - m_formatter.prefix(PRE_SSE_66); - m_formatter.twoByteOp(OP2_MOVDQ_VdqWdq, address, (RegisterID)dst); + twoByteOpSimd("vmovdqa", VEX_PD, OP2_MOVDQ_VdqWdq, address, X86Registers::invalid_xmm, dst); } #endif // JS_CODEGEN_X64 @@ -3457,32 +3408,24 @@ public: m_formatter.twoByteOp(OP2_MOVDQ_WdqVdq, offset, base, index, scale, (RegisterID)src); } - void movdqu_mr(int offset, RegisterID base, XMMRegisterID dst) + void vmovdqu_mr(int offset, RegisterID base, XMMRegisterID dst) { - spew("movdqu " MEM_ob ", %s", ADDR_ob(offset, base), nameFPReg(dst)); - m_formatter.prefix(PRE_SSE_F3); - m_formatter.twoByteOp(OP2_MOVDQ_VdqWdq, offset, base, (RegisterID)dst); + twoByteOpSimd("vmovdqu", VEX_SS, OP2_MOVDQ_VdqWdq, offset, base, X86Registers::invalid_xmm, dst); } - void movdqu_mr_disp32(int offset, RegisterID base, XMMRegisterID dst) + void vmovdqu_mr_disp32(int offset, RegisterID base, XMMRegisterID dst) { - spew("movdqu " MEM_o32b ", %s", ADDR_o32b(offset, base), nameFPReg(dst)); - m_formatter.prefix(PRE_SSE_F3); - m_formatter.twoByteOp_disp32(OP2_MOVDQ_VdqWdq, offset, base, (RegisterID)dst); + twoByteOpSimd_disp32("vmovdqu", VEX_SS, OP2_MOVDQ_VdqWdq, offset, base, X86Registers::invalid_xmm, dst); } - void movdqu_mr(int offset, RegisterID base, RegisterID index, int scale, XMMRegisterID dst) + void vmovdqu_mr(int offset, RegisterID base, RegisterID index, int scale, XMMRegisterID dst) { - spew("movdqu " MEM_obs ", %s", ADDR_obs(offset, base, index, scale), nameFPReg(dst)); - m_formatter.prefix(PRE_SSE_F3); - m_formatter.twoByteOp(OP2_MOVDQ_VdqWdq, offset, base, index, scale, (RegisterID)dst); + twoByteOpSimd("vmovdqu", VEX_SS, OP2_MOVDQ_VdqWdq, offset, base, index, scale, X86Registers::invalid_xmm, dst); } - void movdqa_rr(XMMRegisterID src, XMMRegisterID dst) + void vmovdqa_rr(XMMRegisterID src, XMMRegisterID dst) { - spew("movdqa %s, %s", nameFPReg(src), nameFPReg(dst)); - m_formatter.prefix(PRE_SSE_66); - m_formatter.twoByteOp(OP2_MOVDQ_VdqWdq, (RegisterID)src, (RegisterID)dst); + twoByteOpSimd("vmovdqa", VEX_PD, OP2_MOVDQ_VdqWdq, src, X86Registers::invalid_xmm, dst); } void movdqa_rm(XMMRegisterID src, int offset, RegisterID base) @@ -3499,18 +3442,15 @@ public: m_formatter.twoByteOp(OP2_MOVDQ_WdqVdq, offset, base, index, scale, (RegisterID)src); } - void movdqa_mr(int offset, RegisterID base, XMMRegisterID dst) + void vmovdqa_mr(int offset, RegisterID base, XMMRegisterID dst) { - spew("movdqa " MEM_ob ", %s", ADDR_ob(offset, base), nameFPReg(dst)); - m_formatter.prefix(PRE_SSE_66); - m_formatter.twoByteOp(OP2_MOVDQ_VdqWdq, offset, base, (RegisterID)dst); + + twoByteOpSimd("vmovdqa", VEX_PD, OP2_MOVDQ_VdqWdq, offset, base, X86Registers::invalid_xmm, dst); } - void movdqa_mr(int offset, RegisterID base, RegisterID index, int scale, XMMRegisterID dst) + void vmovdqa_mr(int offset, RegisterID base, RegisterID index, int scale, XMMRegisterID dst) { - spew("movdqa " MEM_obs ", %s", ADDR_obs(offset, base, index, scale), nameFPReg(dst)); - m_formatter.prefix(PRE_SSE_66); - m_formatter.twoByteOp(OP2_MOVDQ_VdqWdq, offset, base, index, scale, (RegisterID)dst); + twoByteOpSimd("vmovdqa", VEX_PD, OP2_MOVDQ_VdqWdq, offset, base, index, scale, X86Registers::invalid_xmm, dst); } void vmulsd_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst) @@ -4250,8 +4190,12 @@ private: return; } - spew("%-11s" MEM_ob ", %s, %s", name, - ADDR_ob(offset, base), nameFPReg(src0), nameFPReg(dst)); + if (src0 == X86Registers::invalid_xmm) { + spew("%-11s" MEM_ob ", %s", name, ADDR_ob(offset, base), nameFPReg(dst)); + } else { + spew("%-11s" MEM_ob ", %s, %s", name, + ADDR_ob(offset, base), nameFPReg(src0), nameFPReg(dst)); + } m_formatter.twoByteOpVex(ty, opcode, offset, base, src0, dst); } @@ -4265,7 +4209,12 @@ private: return; } - spew("%-11s" MEM_o32b ", %s, %s", name, ADDR_o32b(offset, base), nameFPReg(src0), nameFPReg(dst)); + if (src0 == X86Registers::invalid_xmm) { + spew("%-11s" MEM_o32b ", %s", name, ADDR_o32b(offset, base), nameFPReg(dst)); + } else { + spew("%-11s" MEM_o32b ", %s, %s", name, + ADDR_o32b(offset, base), nameFPReg(src0), nameFPReg(dst)); + } m_formatter.twoByteOpVex_disp32(ty, opcode, offset, base, src0, dst); } @@ -4281,8 +4230,13 @@ private: return; } - spew("%-11s" MEM_obs ", %s, %s", name, ADDR_obs(offset, base, index, scale), - nameFPReg(src0), nameFPReg(dst)); + if (src0 == X86Registers::invalid_xmm) { + spew("%-11s" MEM_obs ", %s", name, ADDR_obs(offset, base, index, scale), + nameFPReg(dst)); + } else { + spew("%-11s" MEM_obs ", %s, %s", name, ADDR_obs(offset, base, index, scale), + nameFPReg(src0), nameFPReg(dst)); + } m_formatter.twoByteOpVex(ty, opcode, offset, base, index, scale, src0, dst); } @@ -4296,7 +4250,10 @@ private: return; } - spew("%-11s%p, %s, %s", name, address, nameFPReg(src0), nameFPReg(dst)); + if (src0 == X86Registers::invalid_xmm) + spew("%-11s%p, %s", name, address, nameFPReg(dst)); + else + spew("%-11s%p, %s, %s", name, address, nameFPReg(src0), nameFPReg(dst)); m_formatter.twoByteOpVex(ty, opcode, address, src0, dst); } @@ -4334,13 +4291,19 @@ private: XMMRegisterID rm, RegisterID dst) { if (useLegacySSEEncodingForOtherOutput()) { - spew("%-11s%s, %s", legacySSEOpName(name), nameFPReg(rm), nameIReg(4, dst)); + if (opcode == OP2_MOVD_EdVd) + spew("%-11s%s, %s", legacySSEOpName(name), nameFPReg((XMMRegisterID)dst), nameIReg(4, (RegisterID)rm)); + else + spew("%-11s%s, %s", legacySSEOpName(name), nameFPReg(rm), nameIReg(4, dst)); m_formatter.legacySSEPrefix(ty); m_formatter.twoByteOp(opcode, (RegisterID)rm, dst); return; } - spew("%-11s%s, %s", name, nameFPReg(rm), nameIReg(4, dst)); + if (opcode == OP2_MOVD_EdVd) + spew("%-11s%s, %s", name, nameFPReg((XMMRegisterID)dst), nameIReg(4, (RegisterID)rm)); + else + spew("%-11s%s, %s", name, nameFPReg(rm), nameIReg(4, dst)); m_formatter.twoByteOpVex(ty, opcode, (RegisterID)rm, X86Registers::invalid_xmm, dst); } @@ -4349,13 +4312,19 @@ private: XMMRegisterID rm, RegisterID dst) { if (useLegacySSEEncodingForOtherOutput()) { - spew("%-11s%s, %s", legacySSEOpName(name), nameFPReg(rm), nameIReg(dst)); + if (opcode == OP2_MOVD_EdVd) + spew("%-11s%s, %s", legacySSEOpName(name), nameFPReg((XMMRegisterID)dst), nameIReg((RegisterID)rm)); + else + spew("%-11s%s, %s", legacySSEOpName(name), nameFPReg(rm), nameIReg(dst)); m_formatter.legacySSEPrefix(ty); m_formatter.twoByteOp64(opcode, (RegisterID)rm, dst); return; } - spew("%-11s%s, %s", name, nameFPReg(rm), nameIReg(dst)); + if (opcode == OP2_MOVD_EdVd) + spew("%-11s%s, %s", name, nameFPReg((XMMRegisterID)dst), nameIReg((RegisterID)rm)); + else + spew("%-11s%s, %s", name, nameFPReg(rm), nameIReg(dst)); m_formatter.twoByteOpVex64(ty, opcode, (RegisterID)rm, X86Registers::invalid_xmm, (XMMRegisterID)dst); } #endif @@ -4422,6 +4391,21 @@ private: m_formatter.threeByteOpVex(ty, opcode, escape, offset, base, src0, dst); } + void threeByteOpSimd(const char *name, VexOperandType ty, ThreeByteOpcodeID opcode, + ThreeByteEscape escape, + const void *address, XMMRegisterID src0, XMMRegisterID dst) + { + if (useLegacySSEEncoding(src0, dst)) { + spew("%-11s%p, %s", legacySSEOpName(name), address, nameFPReg(dst)); + m_formatter.legacySSEPrefix(ty); + m_formatter.threeByteOp(opcode, escape, address, dst); + return; + } + + spew("%-11s%p, %s, %s", name, address, nameFPReg(src0), nameFPReg(dst)); + m_formatter.threeByteOpVex(ty, opcode, escape, address, src0, dst); + } + // Blendv is a three-byte op, but the VEX encoding has a different opcode // than the SSE encoding, so we handle it specially. void vblendvOpSimd(XMMRegisterID mask, XMMRegisterID rm, XMMRegisterID src0, XMMRegisterID dst) @@ -4458,34 +4442,6 @@ private: mask, offset, base, src0, dst); } - // XMM-to-GPR movd is a two-byte op, but the operands are encoded in reverse - // order, so we handle it specially. - void movdOpSimd(XMMRegisterID src, RegisterID dst) { - if (useLegacySSEEncodingForOtherOutput()) { - spew("movd %s, %s", nameFPReg(src), nameIReg(4, dst)); - m_formatter.prefix(PRE_SSE_66); - m_formatter.twoByteOp(OP2_MOVD_EdVd, dst, (RegisterID)src); - return; - } - - spew("vmovd %s, %s", nameFPReg(src), nameIReg(4, dst)); - m_formatter.twoByteOpVex(VEX_PD, OP2_MOVD_EdVd, dst, X86Registers::invalid_xmm, src); - } - - // GPR-to-XMM movd is a two-byte op, but it doesn't have an extra XMM - // input, so we handle it specially. - void movdOpSimd(RegisterID src, XMMRegisterID dst) { - if (useLegacySSEEncodingForOtherOutput()) { - spew("movd %s, %s", nameIReg(4, src), nameFPReg(dst)); - m_formatter.prefix(PRE_SSE_66); - m_formatter.twoByteOp(OP2_MOVD_VdEd, src, dst); - return; - } - - spew("vmovd %s, %s", nameIReg(4, src), nameFPReg(dst)); - m_formatter.twoByteOpVex(VEX_PD, OP2_MOVD_VdEd, src, X86Registers::invalid_xmm, dst); - } - static int32_t getInt32(void* where) { return reinterpret_cast(where)[-1]; @@ -4801,6 +4757,20 @@ private: memoryModRM(address, reg); } + void threeByteOpVex(VexOperandType ty, ThreeByteOpcodeID opcode, ThreeByteEscape escape, + const void *address, XMMRegisterID src0, int reg) + { + int r = (reg >> 3), x = 0, b = 0; + int m = 0, w = 0, v = src0, l = 0; + switch (escape) { + case 0x38: m = 2; break; // 0x0F 0x38 + case 0x3A: m = 3; break; // 0x0F 0x3A + default: MOZ_CRASH("unexpected escape"); + } + threeOpVex(ty, r, x, b, m, w, v, l, opcode); + memoryModRM(address, reg); + } + void vblendvOpVex(VexOperandType ty, ThreeByteOpcodeID opcode, ThreeByteEscape escape, XMMRegisterID mask, RegisterID rm, XMMRegisterID src0, int reg) { diff --git a/js/src/jit/shared/CodeGenerator-x86-shared.cpp b/js/src/jit/shared/CodeGenerator-x86-shared.cpp index 92d189c03936..4c1338053079 100644 --- a/js/src/jit/shared/CodeGenerator-x86-shared.cpp +++ b/js/src/jit/shared/CodeGenerator-x86-shared.cpp @@ -2146,7 +2146,7 @@ CodeGeneratorX86Shared::visitSimdSplatX4(LSimdSplatX4 *ins) case MIRType_Float32x4: { FloatRegister r = ToFloatRegister(ins->getOperand(0)); if (r != output) - masm.movaps(r, output); + masm.moveFloat32x4(r, output); masm.shufps(0, output, output); break; } @@ -2231,7 +2231,7 @@ CodeGeneratorX86Shared::visitSimdInsertElementF(LSimdInsertElementF *ins) // As both operands are registers, movss doesn't modify the upper bits // of the destination operand. if (value != output) - masm.movss(value, output); + masm.vmovss(value, vector, output); return; } @@ -2372,7 +2372,7 @@ CodeGeneratorX86Shared::visitSimdShuffle(LSimdShuffle *ins) // register-register movss preserves the high lanes. if (ins->lanesMatch(4, 1, 2, 3)) { - masm.movss(rhs, out); + masm.vmovss(rhs, lhs, out); return; } @@ -2441,7 +2441,7 @@ CodeGeneratorX86Shared::visitSimdShuffle(LSimdShuffle *ins) masm.shufps(firstMask, lhs, rhsCopy); masm.shufps(secondMask, lhs, rhsCopy); - masm.movaps(rhsCopy, out); + masm.moveFloat32x4(rhsCopy, out); return; } @@ -2454,9 +2454,9 @@ CodeGeneratorX86Shared::visitSimdShuffle(LSimdShuffle *ins) if (AssemblerX86Shared::HasAVX()) { masm.vmovhlps(lhs, rhs, out); } else { - masm.movaps(rhs, ScratchSimdReg); + masm.moveFloat32x4(rhs, ScratchSimdReg); masm.vmovhlps(lhs, ScratchSimdReg, ScratchSimdReg); - masm.movaps(ScratchSimdReg, out); + masm.moveFloat32x4(ScratchSimdReg, out); } return; } @@ -2476,9 +2476,9 @@ CodeGeneratorX86Shared::visitSimdShuffle(LSimdShuffle *ins) if (AssemblerX86Shared::HasAVX()) { masm.vunpcklps(lhs, rhs, out); } else { - masm.movaps(rhs, ScratchSimdReg); + masm.moveFloat32x4(rhs, ScratchSimdReg); masm.vunpcklps(lhs, ScratchSimdReg, ScratchSimdReg); - masm.movaps(ScratchSimdReg, out); + masm.moveFloat32x4(ScratchSimdReg, out); } return; } @@ -2493,9 +2493,9 @@ CodeGeneratorX86Shared::visitSimdShuffle(LSimdShuffle *ins) if (AssemblerX86Shared::HasAVX()) { masm.vunpckhps(lhs, rhs, out); } else { - masm.movaps(rhs, ScratchSimdReg); + masm.moveFloat32x4(rhs, ScratchSimdReg); masm.vunpckhps(lhs, ScratchSimdReg, ScratchSimdReg); - masm.movaps(ScratchSimdReg, out); + masm.moveFloat32x4(ScratchSimdReg, out); } return; } @@ -2644,18 +2644,18 @@ CodeGeneratorX86Shared::visitSimdBinaryArithIx4(LSimdBinaryArithIx4 *ins) return; case MSimdBinaryArith::Mul: { if (AssemblerX86Shared::HasSSE41()) { - masm.pmulld(rhs, lhs); + masm.vpmulld(rhs, lhs, output); return; } masm.loadAlignedInt32x4(rhs, ScratchSimdReg); - masm.pmuludq(lhs, ScratchSimdReg); + masm.vpmuludq(lhs, ScratchSimdReg, ScratchSimdReg); // ScratchSimdReg contains (Rx, _, Rz, _) where R is the resulting vector. FloatRegister temp = ToFloatRegister(ins->temp()); masm.pshufd(MacroAssembler::ComputeShuffleMask(LaneY, LaneY, LaneW, LaneW), lhs, lhs); masm.pshufd(MacroAssembler::ComputeShuffleMask(LaneY, LaneY, LaneW, LaneW), rhs, temp); - masm.pmuludq(temp, lhs); + masm.vpmuludq(temp, lhs, lhs); // lhs contains (Ry, _, Rw, _) where R is the resulting vector. masm.shufps(MacroAssembler::ComputeShuffleMask(LaneX, LaneZ, LaneX, LaneZ), ScratchSimdReg, lhs); @@ -2745,7 +2745,7 @@ CodeGeneratorX86Shared::visitSimdBinaryArithFx4(LSimdBinaryArithFx4 *ins) // With SSE.4.1 we could use blendvps, however it's awkward since // it requires the mask to be in xmm0. if (lhs != output) - masm.movaps(lhs, output); + masm.moveFloat32x4(lhs, output); masm.vandps(Operand(mask), output, output); masm.vandnps(Operand(tmp), mask, mask); masm.vorps(Operand(mask), output, output); @@ -2779,7 +2779,7 @@ CodeGeneratorX86Shared::visitSimdBinaryArithFx4(LSimdBinaryArithFx4 *ins) // With SSE.4.1 we could use blendvps, however it's awkward since // it requires the mask to be in xmm0. if (lhs != output) - masm.movaps(lhs, output); + masm.moveFloat32x4(lhs, output); masm.vandps(Operand(mask), output, output); masm.vandnps(Operand(tmp), mask, mask); masm.vorps(Operand(mask), output, output); @@ -2947,9 +2947,9 @@ CodeGeneratorX86Shared::visitSimdSelect(LSimdSelect *ins) FloatRegister temp = ToFloatRegister(ins->temp()); if (onTrue != output) - masm.movaps(onTrue, output); + masm.vmovaps(onTrue, output); if (mask != temp) - masm.movaps(mask, temp); + masm.vmovaps(mask, temp); masm.bitwiseAndX4(Operand(mask), output); masm.bitwiseAndNotX4(Operand(onFalse), temp); diff --git a/js/src/jit/shared/MacroAssembler-x86-shared.cpp b/js/src/jit/shared/MacroAssembler-x86-shared.cpp index f73636cdb60a..8b8a32013c03 100644 --- a/js/src/jit/shared/MacroAssembler-x86-shared.cpp +++ b/js/src/jit/shared/MacroAssembler-x86-shared.cpp @@ -231,7 +231,7 @@ MacroAssemblerX86Shared::branchNegativeZero(FloatRegister reg, bind(&nonZero); #elif defined(JS_CODEGEN_X64) - movq(reg, scratch); + vmovq(reg, scratch); cmpq(Imm32(1), scratch); j(Overflow, label); #endif diff --git a/js/src/jit/shared/MacroAssembler-x86-shared.h b/js/src/jit/shared/MacroAssembler-x86-shared.h index ab22921f4168..efc58832c515 100644 --- a/js/src/jit/shared/MacroAssembler-x86-shared.h +++ b/js/src/jit/shared/MacroAssembler-x86-shared.h @@ -741,10 +741,10 @@ class MacroAssemblerX86Shared : public Assembler store32(src, dest); } void loadDouble(const Address &src, FloatRegister dest) { - movsd(src, dest); + vmovsd(src, dest); } void loadDouble(const BaseIndex &src, FloatRegister dest) { - movsd(src, dest); + vmovsd(src, dest); } void loadDouble(const Operand &src, FloatRegister dest) { switch (src.kind()) { @@ -778,7 +778,7 @@ class MacroAssemblerX86Shared : public Assembler } void moveDouble(FloatRegister src, FloatRegister dest) { // Use movapd instead of movsd to avoid dependencies. - movapd(src, dest); + vmovapd(src, dest); } void zeroDouble(FloatRegister reg) { vxorpd(reg, reg, reg); @@ -857,16 +857,16 @@ class MacroAssemblerX86Shared : public Assembler } void loadAlignedInt32x4(const Address &src, FloatRegister dest) { - movdqa(Operand(src), dest); + vmovdqa(Operand(src), dest); } void loadAlignedInt32x4(const Operand &src, FloatRegister dest) { - movdqa(src, dest); + vmovdqa(src, dest); } void storeAlignedInt32x4(FloatRegister src, const Address &dest) { movdqa(src, Operand(dest)); } void moveInt32x4(FloatRegister src, FloatRegister dest) { - movdqa(src, dest); + vmovdqa(src, dest); } FloatRegister reusedInputInt32x4(FloatRegister src, FloatRegister dest) { if (HasAVX()) @@ -881,10 +881,10 @@ class MacroAssemblerX86Shared : public Assembler return dest; } void loadUnalignedInt32x4(const Address &src, FloatRegister dest) { - movdqu(Operand(src), dest); + vmovdqu(Operand(src), dest); } void loadUnalignedInt32x4(const Operand &src, FloatRegister dest) { - movdqu(src, dest); + vmovdqu(src, dest); } void storeUnalignedInt32x4(FloatRegister src, const Address &dest) { movdqu(src, Operand(dest)); @@ -919,35 +919,35 @@ class MacroAssemblerX86Shared : public Assembler } void packedLeftShiftByScalar(FloatRegister src, FloatRegister dest) { - pslld(src, dest); + vpslld(src, dest, dest); } void packedLeftShiftByScalar(Imm32 count, FloatRegister dest) { pslld(count, dest); } void packedRightShiftByScalar(FloatRegister src, FloatRegister dest) { - psrad(src, dest); + vpsrad(src, dest, dest); } void packedRightShiftByScalar(Imm32 count, FloatRegister dest) { psrad(count, dest); } void packedUnsignedRightShiftByScalar(FloatRegister src, FloatRegister dest) { - psrld(src, dest); + vpsrld(src, dest, dest); } void packedUnsignedRightShiftByScalar(Imm32 count, FloatRegister dest) { psrld(count, dest); } void loadAlignedFloat32x4(const Address &src, FloatRegister dest) { - movaps(Operand(src), dest); + vmovaps(Operand(src), dest); } void loadAlignedFloat32x4(const Operand &src, FloatRegister dest) { - movaps(src, dest); + vmovaps(src, dest); } void storeAlignedFloat32x4(FloatRegister src, const Address &dest) { movaps(src, Operand(dest)); } void moveFloat32x4(FloatRegister src, FloatRegister dest) { - movaps(src, dest); + vmovaps(src, dest); } FloatRegister reusedInputFloat32x4(FloatRegister src, FloatRegister dest) { if (HasAVX()) @@ -962,10 +962,10 @@ class MacroAssemblerX86Shared : public Assembler return dest; } void loadUnalignedFloat32x4(const Address &src, FloatRegister dest) { - movups(Operand(src), dest); + vmovups(Operand(src), dest); } void loadUnalignedFloat32x4(const Operand &src, FloatRegister dest) { - movups(src, dest); + vmovups(src, dest); } void storeUnalignedFloat32x4(FloatRegister src, const Address &dest) { movups(src, Operand(dest)); @@ -1026,11 +1026,11 @@ class MacroAssemblerX86Shared : public Assembler vcvtss2sd(dest, dest, dest); } void loadFloatAsDouble(const Address &src, FloatRegister dest) { - movss(src, dest); + vmovss(src, dest); vcvtss2sd(dest, dest, dest); } void loadFloatAsDouble(const BaseIndex &src, FloatRegister dest) { - movss(src, dest); + vmovss(src, dest); vcvtss2sd(dest, dest, dest); } void loadFloatAsDouble(const Operand &src, FloatRegister dest) { @@ -1038,10 +1038,10 @@ class MacroAssemblerX86Shared : public Assembler vcvtss2sd(dest, dest, dest); } void loadFloat32(const Address &src, FloatRegister dest) { - movss(src, dest); + vmovss(src, dest); } void loadFloat32(const BaseIndex &src, FloatRegister dest) { - movss(src, dest); + vmovss(src, dest); } void loadFloat32(const Operand &src, FloatRegister dest) { switch (src.kind()) { @@ -1075,7 +1075,7 @@ class MacroAssemblerX86Shared : public Assembler } void moveFloat32(FloatRegister src, FloatRegister dest) { // Use movaps instead of movss to avoid dependencies. - movaps(src, dest); + vmovaps(src, dest); } // Checks whether a double is representable as a 32-bit integer. If so, the diff --git a/js/src/jit/x64/Assembler-x64.h b/js/src/jit/x64/Assembler-x64.h index d5266facffa6..0904145c2c80 100644 --- a/js/src/jit/x64/Assembler-x64.h +++ b/js/src/jit/x64/Assembler-x64.h @@ -303,7 +303,7 @@ class Assembler : public AssemblerX86Shared } void pop(FloatRegister src) { - movsd(Address(StackPointer, 0), src); + vmovsd(Address(StackPointer, 0), src); addq(Imm32(sizeof(double)), StackPointer); } @@ -394,11 +394,11 @@ class Assembler : public AssemblerX86Shared MOZ_CRASH("unexpected operand kind"); } } - void movq(Register src, FloatRegister dest) { - masm.movq_rr(src.code(), dest.code()); + void vmovq(Register src, FloatRegister dest) { + masm.vmovq_rr(src.code(), dest.code()); } - void movq(FloatRegister src, Register dest) { - masm.movq_rr(src.code(), dest.code()); + void vmovq(FloatRegister src, Register dest) { + masm.vmovq_rr(src.code(), dest.code()); } void movq(Register src, Register dest) { masm.movq_rr(src.code(), dest.code()); diff --git a/js/src/jit/x64/CodeGenerator-x64.cpp b/js/src/jit/x64/CodeGenerator-x64.cpp index 3ff45268d4ec..76666e49226b 100644 --- a/js/src/jit/x64/CodeGenerator-x64.cpp +++ b/js/src/jit/x64/CodeGenerator-x64.cpp @@ -76,7 +76,7 @@ CodeGeneratorX64::visitBox(LBox *box) masm.convertFloat32ToDouble(reg, ScratchDoubleReg); reg = ScratchDoubleReg; } - masm.movq(reg, ToRegister(result)); + masm.vmovq(reg, ToRegister(result)); } else { masm.boxValue(ValueTypeFromMIRType(box->type()), ToRegister(in), ToRegister(result)); } diff --git a/js/src/jit/x64/MacroAssembler-x64.h b/js/src/jit/x64/MacroAssembler-x64.h index 3d599aa4b3fb..87ae115ac59d 100644 --- a/js/src/jit/x64/MacroAssembler-x64.h +++ b/js/src/jit/x64/MacroAssembler-x64.h @@ -1104,7 +1104,7 @@ class MacroAssemblerX64 : public MacroAssemblerX86Shared } void boxDouble(FloatRegister src, const ValueOperand &dest) { - movq(src, dest.valueReg()); + vmovq(src, dest.valueReg()); } void boxNonDouble(JSValueType type, Register src, const ValueOperand &dest) { MOZ_ASSERT(src != dest.valueReg()); @@ -1151,7 +1151,7 @@ class MacroAssemblerX64 : public MacroAssemblerX86Shared } void unboxDouble(const ValueOperand &src, FloatRegister dest) { - movq(src.valueReg(), dest); + vmovq(src.valueReg(), dest); } void unboxPrivate(const ValueOperand &src, const Register dest) { movq(src.valueReg(), dest); diff --git a/js/src/jit/x86/Assembler-x86.h b/js/src/jit/x86/Assembler-x86.h index 1275f3753aa4..521d3325e93f 100644 --- a/js/src/jit/x86/Assembler-x86.h +++ b/js/src/jit/x86/Assembler-x86.h @@ -227,7 +227,7 @@ class Assembler : public AssemblerX86Shared } void pop(FloatRegister src) { - movsd(Address(StackPointer, 0), src); + vmovsd(Address(StackPointer, 0), src); addl(Imm32(sizeof(double)), StackPointer); } @@ -459,24 +459,24 @@ class Assembler : public AssemblerX86Shared masm.movl_mr_disp32(src.offset, src.base.code(), dest.code()); return CodeOffsetLabel(masm.currentOffset()); } - CodeOffsetLabel movssWithPatch(Address src, FloatRegister dest) { + CodeOffsetLabel vmovssWithPatch(Address src, FloatRegister dest) { MOZ_ASSERT(HasSSE2()); - masm.movss_mr_disp32(src.offset, src.base.code(), dest.code()); + masm.vmovss_mr_disp32(src.offset, src.base.code(), dest.code()); return CodeOffsetLabel(masm.currentOffset()); } - CodeOffsetLabel movsdWithPatch(Address src, FloatRegister dest) { + CodeOffsetLabel vmovsdWithPatch(Address src, FloatRegister dest) { MOZ_ASSERT(HasSSE2()); - masm.movsd_mr_disp32(src.offset, src.base.code(), dest.code()); + masm.vmovsd_mr_disp32(src.offset, src.base.code(), dest.code()); return CodeOffsetLabel(masm.currentOffset()); } - CodeOffsetLabel movupsWithPatch(Address src, FloatRegister dest) { + CodeOffsetLabel vmovupsWithPatch(Address src, FloatRegister dest) { MOZ_ASSERT(HasSSE2()); - masm.movups_mr_disp32(src.offset, src.base.code(), dest.code()); + masm.vmovups_mr_disp32(src.offset, src.base.code(), dest.code()); return CodeOffsetLabel(masm.currentOffset()); } - CodeOffsetLabel movdquWithPatch(Address src, FloatRegister dest) { + CodeOffsetLabel vmovdquWithPatch(Address src, FloatRegister dest) { MOZ_ASSERT(HasSSE2()); - masm.movdqu_mr_disp32(src.offset, src.base.code(), dest.code()); + masm.vmovdqu_mr_disp32(src.offset, src.base.code(), dest.code()); return CodeOffsetLabel(masm.currentOffset()); } @@ -543,34 +543,34 @@ class Assembler : public AssemblerX86Shared masm.movl_mr(src.addr, dest.code()); return CodeOffsetLabel(masm.currentOffset()); } - CodeOffsetLabel movssWithPatch(PatchedAbsoluteAddress src, FloatRegister dest) { + CodeOffsetLabel vmovssWithPatch(PatchedAbsoluteAddress src, FloatRegister dest) { MOZ_ASSERT(HasSSE2()); - masm.movss_mr(src.addr, dest.code()); + masm.vmovss_mr(src.addr, dest.code()); return CodeOffsetLabel(masm.currentOffset()); } - CodeOffsetLabel movsdWithPatch(PatchedAbsoluteAddress src, FloatRegister dest) { + CodeOffsetLabel vmovsdWithPatch(PatchedAbsoluteAddress src, FloatRegister dest) { MOZ_ASSERT(HasSSE2()); - masm.movsd_mr(src.addr, dest.code()); + masm.vmovsd_mr(src.addr, dest.code()); return CodeOffsetLabel(masm.currentOffset()); } - CodeOffsetLabel movdqaWithPatch(PatchedAbsoluteAddress src, FloatRegister dest) { + CodeOffsetLabel vmovdqaWithPatch(PatchedAbsoluteAddress src, FloatRegister dest) { MOZ_ASSERT(HasSSE2()); - masm.movdqa_mr(src.addr, dest.code()); + masm.vmovdqa_mr(src.addr, dest.code()); return CodeOffsetLabel(masm.currentOffset()); } - CodeOffsetLabel movdquWithPatch(PatchedAbsoluteAddress src, FloatRegister dest) { + CodeOffsetLabel vmovdquWithPatch(PatchedAbsoluteAddress src, FloatRegister dest) { MOZ_ASSERT(HasSSE2()); - masm.movdqu_mr(src.addr, dest.code()); + masm.vmovdqu_mr(src.addr, dest.code()); return CodeOffsetLabel(masm.currentOffset()); } - CodeOffsetLabel movapsWithPatch(PatchedAbsoluteAddress src, FloatRegister dest) { + CodeOffsetLabel vmovapsWithPatch(PatchedAbsoluteAddress src, FloatRegister dest) { MOZ_ASSERT(HasSSE2()); - masm.movaps_mr(src.addr, dest.code()); + masm.vmovaps_mr(src.addr, dest.code()); return CodeOffsetLabel(masm.currentOffset()); } - CodeOffsetLabel movupsWithPatch(PatchedAbsoluteAddress src, FloatRegister dest) { + CodeOffsetLabel vmovupsWithPatch(PatchedAbsoluteAddress src, FloatRegister dest) { MOZ_ASSERT(HasSSE2()); - masm.movups_mr(src.addr, dest.code()); + masm.vmovups_mr(src.addr, dest.code()); return CodeOffsetLabel(masm.currentOffset()); } diff --git a/js/src/jit/x86/CodeGenerator-x86.cpp b/js/src/jit/x86/CodeGenerator-x86.cpp index 10d800104a2d..afab7a985397 100644 --- a/js/src/jit/x86/CodeGenerator-x86.cpp +++ b/js/src/jit/x86/CodeGenerator-x86.cpp @@ -265,10 +265,10 @@ CodeGeneratorX86::load(Scalar::Type vt, const T &srcAddr, const LDefinition *out case Scalar::Uint16: masm.movzwlWithPatch(srcAddr, ToRegister(out)); break; case Scalar::Int32: case Scalar::Uint32: masm.movlWithPatch(srcAddr, ToRegister(out)); break; - case Scalar::Float32: masm.movssWithPatch(srcAddr, ToFloatRegister(out)); break; - case Scalar::Float64: masm.movsdWithPatch(srcAddr, ToFloatRegister(out)); break; - case Scalar::Float32x4: masm.movupsWithPatch(srcAddr, ToFloatRegister(out)); break; - case Scalar::Int32x4: masm.movdquWithPatch(srcAddr, ToFloatRegister(out)); break; + case Scalar::Float32: masm.vmovssWithPatch(srcAddr, ToFloatRegister(out)); break; + case Scalar::Float64: masm.vmovsdWithPatch(srcAddr, ToFloatRegister(out)); break; + case Scalar::Float32x4: masm.vmovupsWithPatch(srcAddr, ToFloatRegister(out)); break; + case Scalar::Int32x4: masm.vmovdquWithPatch(srcAddr, ToFloatRegister(out)); break; case Scalar::MaxTypedArrayViewType: MOZ_CRASH("unexpected type"); } } @@ -607,18 +607,18 @@ CodeGeneratorX86::visitAsmJSLoadGlobalVar(LAsmJSLoadGlobalVar *ins) label = masm.movlWithPatch(PatchedAbsoluteAddress(), ToRegister(ins->output())); break; case MIRType_Float32: - label = masm.movssWithPatch(PatchedAbsoluteAddress(), ToFloatRegister(ins->output())); + label = masm.vmovssWithPatch(PatchedAbsoluteAddress(), ToFloatRegister(ins->output())); break; case MIRType_Double: - label = masm.movsdWithPatch(PatchedAbsoluteAddress(), ToFloatRegister(ins->output())); + label = masm.vmovsdWithPatch(PatchedAbsoluteAddress(), ToFloatRegister(ins->output())); break; // Aligned access: code is aligned on PageSize + there is padding // before the global data section. case MIRType_Int32x4: - label = masm.movdqaWithPatch(PatchedAbsoluteAddress(), ToFloatRegister(ins->output())); + label = masm.vmovdqaWithPatch(PatchedAbsoluteAddress(), ToFloatRegister(ins->output())); break; case MIRType_Float32x4: - label = masm.movapsWithPatch(PatchedAbsoluteAddress(), ToFloatRegister(ins->output())); + label = masm.vmovapsWithPatch(PatchedAbsoluteAddress(), ToFloatRegister(ins->output())); break; default: MOZ_CRASH("unexpected type in visitAsmJSLoadGlobalVar"); diff --git a/js/src/jit/x86/MacroAssembler-x86.cpp b/js/src/jit/x86/MacroAssembler-x86.cpp index dcc350faee24..5a56ee76bcc0 100644 --- a/js/src/jit/x86/MacroAssembler-x86.cpp +++ b/js/src/jit/x86/MacroAssembler-x86.cpp @@ -50,7 +50,7 @@ MacroAssemblerX86::loadConstantDouble(double d, FloatRegister dest) Double *dbl = getDouble(d); if (!dbl) return; - masm.movsd_mr(reinterpret_cast(dbl->uses.prev()), dest.code()); + masm.vmovsd_mr(reinterpret_cast(dbl->uses.prev()), dest.code()); dbl->uses.setPrev(masm.size()); } @@ -96,7 +96,7 @@ MacroAssemblerX86::loadConstantFloat32(float f, FloatRegister dest) Float *flt = getFloat(f); if (!flt) return; - masm.movss_mr(reinterpret_cast(flt->uses.prev()), dest.code()); + masm.vmovss_mr(reinterpret_cast(flt->uses.prev()), dest.code()); flt->uses.setPrev(masm.size()); } @@ -144,7 +144,7 @@ MacroAssemblerX86::loadConstantInt32x4(const SimdConstant &v, FloatRegister dest if (!i4) return; MOZ_ASSERT(i4->type() == SimdConstant::Int32x4); - masm.movdqa_mr(reinterpret_cast(i4->uses.prev()), dest.code()); + masm.vmovdqa_mr(reinterpret_cast(i4->uses.prev()), dest.code()); i4->uses.setPrev(masm.size()); } @@ -158,7 +158,7 @@ MacroAssemblerX86::loadConstantFloat32x4(const SimdConstant &v, FloatRegister de if (!f4) return; MOZ_ASSERT(f4->type() == SimdConstant::Float32x4); - masm.movaps_mr(reinterpret_cast(f4->uses.prev()), dest.code()); + masm.vmovaps_mr(reinterpret_cast(f4->uses.prev()), dest.code()); f4->uses.setPrev(masm.size()); }