зеркало из https://github.com/mozilla/gecko-dev.git
Bug 1115752 - SpiderMonkey: VEX encodings for instructions with leading immediates r=jandem
This commit is contained in:
Родитель
ba261c06cc
Коммит
e8c913ac57
|
@ -1706,7 +1706,7 @@ class MSimdShuffle : public MBinaryInstruction, public MSimdShuffleBase
|
|||
{
|
||||
// Swap operands so that new lanes come from LHS in majority.
|
||||
// In the balanced case, swap operands if needs be, in order to be able
|
||||
// to do only one shufps on x86.
|
||||
// to do only one vshufps on x86.
|
||||
unsigned lanesFromLHS = (laneX < 4) + (laneY < 4) + (laneZ < 4) + (laneW < 4);
|
||||
if (lanesFromLHS < 2 || (lanesFromLHS == 2 && laneX >= 4 && laneY >=4)) {
|
||||
laneX = (laneX + 4) % 8;
|
||||
|
|
|
@ -1496,75 +1496,75 @@ class AssemblerX86Shared : public AssemblerShared
|
|||
masm.divl_r(divisor.code());
|
||||
}
|
||||
|
||||
void pinsrd(unsigned lane, Register src, FloatRegister dest) {
|
||||
void vpinsrd(unsigned lane, Register src1, FloatRegister src0, FloatRegister dest) {
|
||||
MOZ_ASSERT(HasSSE41());
|
||||
masm.pinsrd_irr(lane, src.code(), dest.code());
|
||||
masm.vpinsrd_irr(lane, src1.code(), src0.code(), dest.code());
|
||||
}
|
||||
void pinsrd(unsigned lane, const Operand &src, FloatRegister dest) {
|
||||
void vpinsrd(unsigned lane, const Operand &src1, FloatRegister src0, FloatRegister dest) {
|
||||
MOZ_ASSERT(HasSSE41());
|
||||
switch (src.kind()) {
|
||||
switch (src1.kind()) {
|
||||
case Operand::REG:
|
||||
masm.pinsrd_irr(lane, src.reg(), dest.code());
|
||||
masm.vpinsrd_irr(lane, src1.reg(), src0.code(), dest.code());
|
||||
break;
|
||||
case Operand::MEM_REG_DISP:
|
||||
masm.pinsrd_imr(lane, src.disp(), src.base(), dest.code());
|
||||
masm.vpinsrd_imr(lane, src1.disp(), src1.base(), src0.code(), dest.code());
|
||||
break;
|
||||
default:
|
||||
MOZ_CRASH("unexpected operand kind");
|
||||
}
|
||||
}
|
||||
void pextrd(unsigned lane, FloatRegister src, Register dest) {
|
||||
void vpextrd(unsigned lane, FloatRegister src, Register dest) {
|
||||
MOZ_ASSERT(HasSSE41());
|
||||
masm.pextrd_irr(lane, src.code(), dest.code());
|
||||
masm.vpextrd_irr(lane, src.code(), dest.code());
|
||||
}
|
||||
void pextrd(unsigned lane, FloatRegister src, const Operand &dest) {
|
||||
void vpextrd(unsigned lane, FloatRegister src, const Operand &dest) {
|
||||
MOZ_ASSERT(HasSSE41());
|
||||
switch (dest.kind()) {
|
||||
case Operand::REG:
|
||||
masm.pextrd_irr(lane, src.code(), dest.reg());
|
||||
masm.vpextrd_irr(lane, src.code(), dest.reg());
|
||||
break;
|
||||
case Operand::MEM_REG_DISP:
|
||||
masm.pextrd_imr(lane, src.code(), dest.disp(), dest.base());
|
||||
masm.vpextrd_irm(lane, src.code(), dest.disp(), dest.base());
|
||||
break;
|
||||
default:
|
||||
MOZ_CRASH("unexpected operand kind");
|
||||
}
|
||||
}
|
||||
void psrldq(Imm32 shift, FloatRegister dest) {
|
||||
void vpsrldq(Imm32 shift, FloatRegister src0, FloatRegister dest) {
|
||||
MOZ_ASSERT(HasSSE2());
|
||||
masm.psrldq_ir(shift.value, dest.code());
|
||||
masm.vpsrldq_ir(shift.value, src0.code(), dest.code());
|
||||
}
|
||||
void psllq(Imm32 shift, FloatRegister dest) {
|
||||
void vpsllq(Imm32 shift, FloatRegister src0, FloatRegister dest) {
|
||||
MOZ_ASSERT(HasSSE2());
|
||||
masm.psllq_ir(shift.value, dest.code());
|
||||
masm.vpsllq_ir(shift.value, src0.code(), dest.code());
|
||||
}
|
||||
void psrlq(Imm32 shift, FloatRegister dest) {
|
||||
void vpsrlq(Imm32 shift, FloatRegister src0, FloatRegister dest) {
|
||||
MOZ_ASSERT(HasSSE2());
|
||||
masm.psrlq_ir(shift.value, dest.code());
|
||||
masm.vpsrlq_ir(shift.value, src0.code(), dest.code());
|
||||
}
|
||||
void vpslld(FloatRegister src1, FloatRegister src0, FloatRegister dest) {
|
||||
MOZ_ASSERT(HasSSE2());
|
||||
masm.vpslld_rr(src1.code(), src0.code(), dest.code());
|
||||
}
|
||||
void pslld(Imm32 count, FloatRegister dest) {
|
||||
void vpslld(Imm32 count, FloatRegister src0, FloatRegister dest) {
|
||||
MOZ_ASSERT(HasSSE2());
|
||||
masm.pslld_ir(count.value, dest.code());
|
||||
masm.vpslld_ir(count.value, src0.code(), dest.code());
|
||||
}
|
||||
void vpsrad(FloatRegister src1, FloatRegister src0, FloatRegister dest) {
|
||||
MOZ_ASSERT(HasSSE2());
|
||||
masm.vpsrad_rr(src1.code(), src0.code(), dest.code());
|
||||
}
|
||||
void psrad(Imm32 count, FloatRegister dest) {
|
||||
void vpsrad(Imm32 count, FloatRegister src0, FloatRegister dest) {
|
||||
MOZ_ASSERT(HasSSE2());
|
||||
masm.psrad_ir(count.value, dest.code());
|
||||
masm.vpsrad_ir(count.value, src0.code(), dest.code());
|
||||
}
|
||||
void vpsrld(FloatRegister src1, FloatRegister src0, FloatRegister dest) {
|
||||
MOZ_ASSERT(HasSSE2());
|
||||
masm.vpsrld_rr(src1.code(), src0.code(), dest.code());
|
||||
}
|
||||
void psrld(Imm32 count, FloatRegister dest) {
|
||||
void vpsrld(Imm32 count, FloatRegister src0, FloatRegister dest) {
|
||||
MOZ_ASSERT(HasSSE2());
|
||||
masm.psrld_ir(count.value, dest.code());
|
||||
masm.vpsrld_ir(count.value, src0.code(), dest.code());
|
||||
}
|
||||
|
||||
void vcvtsi2sd(const Operand &src1, FloatRegister src0, FloatRegister dest) {
|
||||
|
@ -2073,21 +2073,21 @@ class AssemblerX86Shared : public AssemblerShared
|
|||
}
|
||||
}
|
||||
|
||||
void pshufd(uint32_t mask, FloatRegister src, FloatRegister dest) {
|
||||
void vpshufd(uint32_t mask, FloatRegister src, FloatRegister dest) {
|
||||
MOZ_ASSERT(HasSSE2());
|
||||
masm.pshufd_irr(mask, src.code(), dest.code());
|
||||
masm.vpshufd_irr(mask, src.code(), dest.code());
|
||||
}
|
||||
void pshufd(uint32_t mask, const Operand &src, FloatRegister dest) {
|
||||
void vpshufd(uint32_t mask, const Operand &src1, FloatRegister dest) {
|
||||
MOZ_ASSERT(HasSSE2());
|
||||
switch (src.kind()) {
|
||||
switch (src1.kind()) {
|
||||
case Operand::FPREG:
|
||||
masm.pshufd_irr(mask, src.fpu(), dest.code());
|
||||
masm.vpshufd_irr(mask, src1.fpu(), dest.code());
|
||||
break;
|
||||
case Operand::MEM_REG_DISP:
|
||||
masm.pshufd_imr(mask, src.disp(), src.base(), dest.code());
|
||||
masm.vpshufd_imr(mask, src1.disp(), src1.base(), dest.code());
|
||||
break;
|
||||
case Operand::MEM_ADDRESS32:
|
||||
masm.pshufd_imr(mask, src.address(), dest.code());
|
||||
masm.vpshufd_imr(mask, src1.address(), dest.code());
|
||||
break;
|
||||
default:
|
||||
MOZ_CRASH("unexpected operand kind");
|
||||
|
@ -2109,21 +2109,21 @@ class AssemblerX86Shared : public AssemblerShared
|
|||
MOZ_ASSERT(HasSSE2());
|
||||
masm.vunpckhps_rr(src1.code(), src0.code(), dest.code());
|
||||
}
|
||||
void shufps(uint32_t mask, FloatRegister src, FloatRegister dest) {
|
||||
void vshufps(uint32_t mask, FloatRegister src1, FloatRegister src0, FloatRegister dest) {
|
||||
MOZ_ASSERT(HasSSE2());
|
||||
masm.shufps_irr(mask, src.code(), dest.code());
|
||||
masm.vshufps_irr(mask, src1.code(), src0.code(), dest.code());
|
||||
}
|
||||
void shufps(uint32_t mask, const Operand &src, FloatRegister dest) {
|
||||
void vshufps(uint32_t mask, const Operand &src1, FloatRegister src0, FloatRegister dest) {
|
||||
MOZ_ASSERT(HasSSE2());
|
||||
switch (src.kind()) {
|
||||
switch (src1.kind()) {
|
||||
case Operand::FPREG:
|
||||
masm.shufps_irr(mask, src.fpu(), dest.code());
|
||||
masm.vshufps_irr(mask, src1.fpu(), src0.code(), dest.code());
|
||||
break;
|
||||
case Operand::MEM_REG_DISP:
|
||||
masm.shufps_imr(mask, src.disp(), src.base(), dest.code());
|
||||
masm.vshufps_imr(mask, src1.disp(), src1.base(), src0.code(), dest.code());
|
||||
break;
|
||||
case Operand::MEM_ADDRESS32:
|
||||
masm.shufps_imr(mask, src.address(), dest.code());
|
||||
masm.vshufps_imr(mask, src1.address(), src0.code(), dest.code());
|
||||
break;
|
||||
default:
|
||||
MOZ_CRASH("unexpected operand kind");
|
||||
|
@ -2303,15 +2303,15 @@ class AssemblerX86Shared : public AssemblerShared
|
|||
MOZ_ASSERT(HasSSE2());
|
||||
masm.vsqrtss_rr(src1.code(), src0.code(), dest.code());
|
||||
}
|
||||
void roundsd(X86Assembler::RoundingMode mode, FloatRegister src, FloatRegister dest) {
|
||||
void vroundsd(X86Assembler::RoundingMode mode, FloatRegister src1, FloatRegister src0, FloatRegister dest) {
|
||||
MOZ_ASSERT(HasSSE41());
|
||||
masm.roundsd_rr(mode, src.code(), dest.code());
|
||||
masm.vroundsd_irr(mode, src1.code(), src0.code(), dest.code());
|
||||
}
|
||||
void roundss(X86Assembler::RoundingMode mode, FloatRegister src, FloatRegister dest) {
|
||||
void vroundss(X86Assembler::RoundingMode mode, FloatRegister src1, FloatRegister src0, FloatRegister dest) {
|
||||
MOZ_ASSERT(HasSSE41());
|
||||
masm.roundss_rr(mode, src.code(), dest.code());
|
||||
masm.vroundss_irr(mode, src1.code(), src0.code(), dest.code());
|
||||
}
|
||||
unsigned insertpsMask(SimdLane sourceLane, SimdLane destLane, unsigned zeroMask = 0)
|
||||
unsigned vinsertpsMask(SimdLane sourceLane, SimdLane destLane, unsigned zeroMask = 0)
|
||||
{
|
||||
// Note that the sourceLane bits are ignored in the case of a source
|
||||
// memory operand, and the source is the given 32-bits memory location.
|
||||
|
@ -2322,9 +2322,9 @@ class AssemblerX86Shared : public AssemblerShared
|
|||
MOZ_ASSERT(ret < 256);
|
||||
return ret;
|
||||
}
|
||||
void insertps(FloatRegister src, FloatRegister dest, unsigned mask) {
|
||||
void vinsertps(uint32_t mask, FloatRegister src1, FloatRegister src0, FloatRegister dest) {
|
||||
MOZ_ASSERT(HasSSE41());
|
||||
masm.insertps_irr(mask, src.code(), dest.code());
|
||||
masm.vinsertps_irr(mask, src1.code(), src0.code(), dest.code());
|
||||
}
|
||||
unsigned blendpsMask(bool x, bool y, bool z, bool w) {
|
||||
return x | (y << 1) | (z << 2) | (w << 3);
|
||||
|
|
|
@ -332,6 +332,15 @@ private:
|
|||
OP_GROUP5_Ev = 0xFF
|
||||
};
|
||||
|
||||
enum ShiftID {
|
||||
Shift_vpsrld = 2,
|
||||
Shift_vpsrlq = 2,
|
||||
Shift_vpsrldq = 3,
|
||||
Shift_vpsrad = 4,
|
||||
Shift_vpslld = 6,
|
||||
Shift_vpsllq = 6
|
||||
};
|
||||
|
||||
enum TwoByteOpcodeID {
|
||||
OP2_UD2 = 0x0B,
|
||||
OP2_MOVSD_VsdWsd = 0x10,
|
||||
|
@ -428,6 +437,7 @@ private:
|
|||
case OP2_MOVSD_WsdVsd: // also OP2_MOVPS_WpsVps
|
||||
case OP2_MOVAPS_WsdVsd:
|
||||
case OP2_MOVDQ_WdqVdq:
|
||||
case OP3_PEXTRD_EdVdqIb:
|
||||
return true;
|
||||
default:
|
||||
break;
|
||||
|
@ -2755,18 +2765,15 @@ public:
|
|||
|
||||
void vcmpps_rr(uint8_t order, XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst)
|
||||
{
|
||||
twoByteOpSimd("vcmpps", VEX_PS, OP2_CMPPS_VpsWps, src1, src0, dst);
|
||||
m_formatter.immediate8s(order);
|
||||
twoByteOpImmSimd("vcmpps", VEX_PS, OP2_CMPPS_VpsWps, order, src1, src0, dst);
|
||||
}
|
||||
void vcmpps_mr(uint8_t order, int32_t offset, RegisterID base, XMMRegisterID src0, XMMRegisterID dst)
|
||||
{
|
||||
twoByteOpSimd("vcmpps", VEX_PS, OP2_CMPPS_VpsWps, offset, base, src0, dst);
|
||||
m_formatter.immediate8s(order);
|
||||
twoByteOpImmSimd("vcmpps", VEX_PS, OP2_CMPPS_VpsWps, order, offset, base, src0, dst);
|
||||
}
|
||||
void vcmpps_mr(uint8_t order, const void* address, XMMRegisterID src0, XMMRegisterID dst)
|
||||
{
|
||||
twoByteOpSimd("vcmpps", VEX_PS, OP2_CMPPS_VpsWps, address, src0, dst);
|
||||
m_formatter.immediate8s(order);
|
||||
twoByteOpImmSimd("vcmpps", VEX_PS, OP2_CMPPS_VpsWps, order, address, src0, dst);
|
||||
}
|
||||
|
||||
void vrcpps_rr(XMMRegisterID src, XMMRegisterID dst) {
|
||||
|
@ -2977,50 +2984,30 @@ public:
|
|||
twoByteOpSimd("vpandn", VEX_PD, OP2_PANDNDQ_VdqWdq, address, src0, dst);
|
||||
}
|
||||
|
||||
void pshufd_irr(uint32_t mask, XMMRegisterID src, XMMRegisterID dst)
|
||||
void vpshufd_irr(uint32_t mask, XMMRegisterID src, XMMRegisterID dst)
|
||||
{
|
||||
spew("pshufd $0x%x, %s, %s", mask, nameFPReg(src), nameFPReg(dst));
|
||||
m_formatter.prefix(PRE_SSE_66);
|
||||
m_formatter.twoByteOp(OP2_PSHUFD_VdqWdqIb, (RegisterID)src, (RegisterID)dst);
|
||||
m_formatter.immediate8u(mask);
|
||||
twoByteOpImmSimd("vpshufd", VEX_PD, OP2_PSHUFD_VdqWdqIb, mask, src, X86Registers::invalid_xmm, dst);
|
||||
}
|
||||
void vpshufd_imr(uint32_t mask, int32_t offset, RegisterID base, XMMRegisterID dst)
|
||||
{
|
||||
twoByteOpImmSimd("vpshufd", VEX_PD, OP2_PSHUFD_VdqWdqIb, mask, offset, base, X86Registers::invalid_xmm, dst);
|
||||
}
|
||||
void vpshufd_imr(uint32_t mask, const void* address, XMMRegisterID dst)
|
||||
{
|
||||
twoByteOpImmSimd("vpshufd", VEX_PD, OP2_PSHUFD_VdqWdqIb, mask, address, X86Registers::invalid_xmm, dst);
|
||||
}
|
||||
|
||||
void pshufd_imr(uint32_t mask, int32_t offset, RegisterID base, XMMRegisterID dst)
|
||||
void vshufps_irr(uint32_t mask, XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst)
|
||||
{
|
||||
MOZ_ASSERT(mask < 256);
|
||||
spew("pshufd $0x%x, " MEM_ob ", %s", mask, ADDR_ob(offset, base), nameFPReg(dst));
|
||||
m_formatter.prefix(PRE_SSE_66);
|
||||
m_formatter.twoByteOp(OP2_PSHUFD_VdqWdqIb, offset, base, (RegisterID)dst);
|
||||
m_formatter.immediate8u(mask);
|
||||
twoByteOpImmSimd("vshufps", VEX_PS, OP2_SHUFPS_VpsWpsIb, mask, src1, src0, dst);
|
||||
}
|
||||
|
||||
void pshufd_imr(uint32_t mask, const void* address, XMMRegisterID dst)
|
||||
void vshufps_imr(uint32_t mask, int32_t offset, RegisterID base, XMMRegisterID src0, XMMRegisterID dst)
|
||||
{
|
||||
spew("pshufd $0x%x, %p, %s", mask, address, nameFPReg(dst));
|
||||
m_formatter.prefix(PRE_SSE_66);
|
||||
m_formatter.twoByteOp(OP2_PSHUFD_VdqWdqIb, address, (RegisterID)dst);
|
||||
m_formatter.immediate8u(mask);
|
||||
twoByteOpImmSimd("vshufps", VEX_PS, OP2_SHUFPS_VpsWpsIb, mask, offset, base, src0, dst);
|
||||
}
|
||||
|
||||
void shufps_irr(uint32_t mask, XMMRegisterID src, XMMRegisterID dst)
|
||||
void vshufps_imr(uint32_t mask, const void* address, XMMRegisterID src0, XMMRegisterID dst)
|
||||
{
|
||||
spew("shufps $0x%x, %s, %s", mask, nameFPReg(src), nameFPReg(dst));
|
||||
m_formatter.twoByteOp(OP2_SHUFPS_VpsWpsIb, (RegisterID)src, (RegisterID)dst);
|
||||
m_formatter.immediate8u(mask);
|
||||
}
|
||||
|
||||
void shufps_imr(uint32_t mask, int32_t offset, RegisterID base, XMMRegisterID dst)
|
||||
{
|
||||
spew("shufps $0x%x, " MEM_ob ", %s", mask, ADDR_ob(offset, base), nameFPReg(dst));
|
||||
m_formatter.twoByteOp(OP2_SHUFPS_VpsWpsIb, offset, base, (RegisterID)dst);
|
||||
m_formatter.immediate8u(mask);
|
||||
}
|
||||
|
||||
void shufps_imr(uint32_t mask, const void* address, XMMRegisterID dst)
|
||||
{
|
||||
spew("shufps $0x%x, %p, %s", mask, address, nameFPReg(dst));
|
||||
m_formatter.twoByteOp(OP2_SHUFPS_VpsWpsIb, address, (RegisterID)dst);
|
||||
m_formatter.immediate8u(mask);
|
||||
twoByteOpImmSimd("vshufps", VEX_PS, OP2_SHUFPS_VpsWpsIb, mask, address, src0, dst);
|
||||
}
|
||||
|
||||
void vmovhlps_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst)
|
||||
|
@ -3033,28 +3020,22 @@ public:
|
|||
twoByteOpSimd("vmovlhps", VEX_PS, OP2_MOVLHPS_VqUq, src1, src0, dst);
|
||||
}
|
||||
|
||||
void psrldq_ir(int shift, XMMRegisterID dest)
|
||||
void vpsrldq_ir(uint32_t count, XMMRegisterID src, XMMRegisterID dst)
|
||||
{
|
||||
spew("psrldq $%d, %s", shift, nameFPReg(dest));
|
||||
m_formatter.prefix(PRE_SSE_66);
|
||||
m_formatter.twoByteOp(OP2_PSRLDQ_Vd, (RegisterID)dest, (RegisterID)3);
|
||||
m_formatter.immediate8s(shift);
|
||||
MOZ_ASSERT(count < 16);
|
||||
shiftOpImmSimd("vpsrldq", OP2_PSRLDQ_Vd, Shift_vpsrldq, count, src, dst);
|
||||
}
|
||||
|
||||
void psllq_ir(int shift, XMMRegisterID dest)
|
||||
void vpsllq_ir(uint32_t count, XMMRegisterID src, XMMRegisterID dst)
|
||||
{
|
||||
spew("psllq $%d, %s", shift, nameFPReg(dest));
|
||||
m_formatter.prefix(PRE_SSE_66);
|
||||
m_formatter.twoByteOp(OP2_PSRLDQ_Vd, (RegisterID)dest, (RegisterID)6);
|
||||
m_formatter.immediate8s(shift);
|
||||
MOZ_ASSERT(count < 64);
|
||||
shiftOpImmSimd("vpsllq", OP2_PSRLDQ_Vd, Shift_vpsllq, count, src, dst);
|
||||
}
|
||||
|
||||
void psrlq_ir(int shift, XMMRegisterID dest)
|
||||
void vpsrlq_ir(uint32_t count, XMMRegisterID src, XMMRegisterID dst)
|
||||
{
|
||||
spew("psrlq $%d, %s", shift, nameFPReg(dest));
|
||||
m_formatter.prefix(PRE_SSE_66);
|
||||
m_formatter.twoByteOp(OP2_PSRLDQ_Vd, (RegisterID)dest, (RegisterID)2);
|
||||
m_formatter.immediate8s(shift);
|
||||
MOZ_ASSERT(count < 64);
|
||||
shiftOpImmSimd("vpsrlq", OP2_PSRLDQ_Vd, Shift_vpsrlq, count, src, dst);
|
||||
}
|
||||
|
||||
void vpslld_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst)
|
||||
|
@ -3062,12 +3043,10 @@ public:
|
|||
twoByteOpSimd("vpslld", VEX_PD, OP2_PSLLD_VdqWdq, src1, src0, dst);
|
||||
}
|
||||
|
||||
void pslld_ir(int32_t count, XMMRegisterID dst)
|
||||
void vpslld_ir(uint32_t count, XMMRegisterID src, XMMRegisterID dst)
|
||||
{
|
||||
spew("pslld $%d, %s", count, nameFPReg(dst));
|
||||
m_formatter.prefix(PRE_SSE_66);
|
||||
m_formatter.twoByteOp(OP2_PSLLD_UdqIb, (RegisterID)dst, (RegisterID)6);
|
||||
m_formatter.immediate8s(int8_t(count));
|
||||
MOZ_ASSERT(count < 32);
|
||||
shiftOpImmSimd("vpslld", OP2_PSLLD_UdqIb, Shift_vpslld, count, src, dst);
|
||||
}
|
||||
|
||||
void vpsrad_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst)
|
||||
|
@ -3075,12 +3054,10 @@ public:
|
|||
twoByteOpSimd("vpsrad", VEX_PD, OP2_PSRAD_VdqWdq, src1, src0, dst);
|
||||
}
|
||||
|
||||
void psrad_ir(int32_t count, XMMRegisterID dst)
|
||||
void vpsrad_ir(int32_t count, XMMRegisterID src, XMMRegisterID dst)
|
||||
{
|
||||
spew("psrad $%d, %s", count, nameFPReg(dst));
|
||||
m_formatter.prefix(PRE_SSE_66);
|
||||
m_formatter.twoByteOp(OP2_PSRAD_UdqIb, (RegisterID)dst, (RegisterID)4);
|
||||
m_formatter.immediate8s(int8_t(count));
|
||||
MOZ_ASSERT(count < 32);
|
||||
shiftOpImmSimd("vpsrad", OP2_PSRAD_UdqIb, Shift_vpsrad, count, src, dst);
|
||||
}
|
||||
|
||||
void vpsrld_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst)
|
||||
|
@ -3088,12 +3065,10 @@ public:
|
|||
twoByteOpSimd("vpsrld", VEX_PD, OP2_PSRLD_VdqWdq, src1, src0, dst);
|
||||
}
|
||||
|
||||
void psrld_ir(int32_t count, XMMRegisterID dst)
|
||||
void vpsrld_ir(uint32_t count, XMMRegisterID src, XMMRegisterID dst)
|
||||
{
|
||||
spew("psrld $%d, %s", count, nameFPReg(dst));
|
||||
m_formatter.prefix(PRE_SSE_66);
|
||||
m_formatter.twoByteOp(OP2_PSRLD_UdqIb, (RegisterID)dst, (RegisterID)2);
|
||||
m_formatter.immediate8s(int8_t(count));
|
||||
MOZ_ASSERT(count < 32);
|
||||
shiftOpImmSimd("vpsrld", OP2_PSRLD_UdqIb, Shift_vpsrld, count, src, dst);
|
||||
}
|
||||
|
||||
void vmovmskpd_rr(XMMRegisterID src, RegisterID dst)
|
||||
|
@ -3432,12 +3407,10 @@ public:
|
|||
twoByteOpSimd("vmulss", VEX_SS, OP2_MULSD_VsdWsd, offset, base, src0, dst);
|
||||
}
|
||||
|
||||
void pextrw_irr(int whichWord, XMMRegisterID src, RegisterID dst)
|
||||
void vpextrw_irr(uint32_t whichWord, XMMRegisterID src, RegisterID dst)
|
||||
{
|
||||
FIXME_INSN_PRINTING;
|
||||
m_formatter.prefix(PRE_SSE_66);
|
||||
m_formatter.twoByteOp(OP2_PEXTRW_GdUdIb, (RegisterID)src, (RegisterID)dst);
|
||||
m_formatter.immediate8(whichWord);
|
||||
MOZ_ASSERT(whichWord < 8);
|
||||
twoByteOpImmSimdInt32("vpextrw", VEX_PD, OP2_PEXTRW_GdUdIb, whichWord, src, dst);
|
||||
}
|
||||
|
||||
void vsubsd_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst)
|
||||
|
@ -3580,58 +3553,40 @@ public:
|
|||
twoByteOpSimd("vsqrtss", VEX_SS, OP2_SQRTSS_VssWss, src1, src0, dst);
|
||||
}
|
||||
|
||||
void roundsd_rr(RoundingMode mode, XMMRegisterID src, XMMRegisterID dst)
|
||||
void vroundsd_irr(RoundingMode mode, XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst)
|
||||
{
|
||||
spew("roundsd $%d, %s, %s", (int)mode, nameFPReg(src), nameFPReg(dst));
|
||||
m_formatter.prefix(PRE_SSE_66);
|
||||
m_formatter.threeByteOp(OP3_ROUNDSD_VsdWsd, ESCAPE_ROUNDSD, (RegisterID)src, (RegisterID)dst);
|
||||
m_formatter.immediate8u(mode);
|
||||
threeByteOpImmSimd("vroundsd", VEX_PD, OP3_ROUNDSD_VsdWsd, ESCAPE_ROUNDSD, mode, src1, src0, dst);
|
||||
}
|
||||
|
||||
void roundss_rr(RoundingMode mode, XMMRegisterID src, XMMRegisterID dst)
|
||||
void vroundss_irr(RoundingMode mode, XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst)
|
||||
{
|
||||
spew("roundss $%d, %s, %s", (int)mode, nameFPReg(src), nameFPReg(dst));
|
||||
m_formatter.prefix(PRE_SSE_66);
|
||||
m_formatter.threeByteOp(OP3_ROUNDSS_VsdWsd, ESCAPE_ROUNDSD, (RegisterID)src, (RegisterID)dst);
|
||||
m_formatter.immediate8(mode); // modes are the same for roundsd and roundss
|
||||
threeByteOpImmSimd("vroundss", VEX_PD, OP3_ROUNDSS_VsdWsd, ESCAPE_ROUNDSD, mode, src1, src0, dst);
|
||||
}
|
||||
|
||||
void insertps_irr(unsigned mask, XMMRegisterID src, XMMRegisterID dst)
|
||||
void vinsertps_irr(uint32_t mask, XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst)
|
||||
{
|
||||
spew("insertps $0x%x, %s, %s", mask, nameFPReg(src), nameFPReg(dst));
|
||||
m_formatter.prefix(PRE_SSE_66);
|
||||
m_formatter.threeByteOp(OP3_INSERTPS_VpsUps, ESCAPE_INSERTPS, (RegisterID)src, (RegisterID)dst);
|
||||
m_formatter.immediate8u(mask);
|
||||
threeByteOpImmSimd("vinsertps", VEX_PD, OP3_INSERTPS_VpsUps, ESCAPE_INSERTPS, mask, src1, src0, dst);
|
||||
}
|
||||
|
||||
void pinsrd_irr(unsigned lane, RegisterID src, XMMRegisterID dst)
|
||||
void vpinsrd_irr(unsigned lane, RegisterID src1, XMMRegisterID src0, XMMRegisterID dst)
|
||||
{
|
||||
MOZ_ASSERT(lane < 4);
|
||||
spew("pinsrd $0x%x, %s, %s", lane, nameIReg(4, src), nameFPReg(dst));
|
||||
m_formatter.prefix(PRE_SSE_66);
|
||||
m_formatter.threeByteOp(OP3_PINSRD_VdqEdIb, ESCAPE_PINSRD, (RegisterID)src, (RegisterID)dst);
|
||||
m_formatter.immediate8u(lane);
|
||||
threeByteOpImmInt32Simd("vpinsrd", VEX_PD, OP3_PINSRD_VdqEdIb, ESCAPE_PINSRD, lane, src1, src0, dst);
|
||||
}
|
||||
|
||||
void pinsrd_imr(unsigned lane, int32_t offset, RegisterID base, XMMRegisterID dst)
|
||||
void vpinsrd_imr(unsigned lane, int32_t offset, RegisterID base, XMMRegisterID src0, XMMRegisterID dst)
|
||||
{
|
||||
MOZ_ASSERT(lane < 4);
|
||||
spew("pinsrd $0x%x, " MEM_ob ", %s", lane, ADDR_ob(offset, base), nameFPReg(dst));
|
||||
m_formatter.prefix(PRE_SSE_66);
|
||||
m_formatter.threeByteOp(OP3_PINSRD_VdqEdIb, ESCAPE_PINSRD, offset, base, (RegisterID)dst);
|
||||
m_formatter.immediate8u(lane);
|
||||
threeByteOpImmInt32Simd("vpinsrd", VEX_PD, OP3_PINSRD_VdqEdIb, ESCAPE_PINSRD, lane, offset, base, src0, dst);
|
||||
}
|
||||
|
||||
void pextrd_irr(unsigned lane, XMMRegisterID src, RegisterID dst)
|
||||
void vpextrd_irr(unsigned lane, XMMRegisterID src, RegisterID dst)
|
||||
{
|
||||
MOZ_ASSERT(lane < 4);
|
||||
spew("pextrd $0x%x, %s, %s", lane, nameFPReg(src), nameIReg(4, dst));
|
||||
m_formatter.prefix(PRE_SSE_66);
|
||||
m_formatter.threeByteOp(OP3_PEXTRD_EdVdqIb, ESCAPE_PEXTRD, (RegisterID)dst, (RegisterID)src);
|
||||
m_formatter.immediate8u(lane);
|
||||
threeByteOpImmSimdInt32("vpextrd", VEX_PD, OP3_PEXTRD_EdVdqIb, ESCAPE_PEXTRD, lane, (XMMRegisterID)dst, (RegisterID)src);
|
||||
}
|
||||
|
||||
void pextrd_imr(unsigned lane, XMMRegisterID src, int32_t offset, RegisterID base)
|
||||
void vpextrd_irm(unsigned lane, XMMRegisterID src, int32_t offset, RegisterID base)
|
||||
{
|
||||
MOZ_ASSERT(lane < 4);
|
||||
spew("pextrd $0x%x, %s, " MEM_ob, lane, nameFPReg(src), ADDR_ob(offset, base));
|
||||
|
@ -3644,16 +3599,14 @@ public:
|
|||
{
|
||||
MOZ_ASSERT(imm < 16);
|
||||
// Despite being a "ps" instruction, vblendps is encoded with the "pd" prefix.
|
||||
threeByteOpSimd("vblendps", VEX_PD, OP3_BLENDPS_VpsWpsIb, ESCAPE_BLENDPS, src1, src0, dst);
|
||||
m_formatter.immediate8u(imm);
|
||||
threeByteOpImmSimd("vblendps", VEX_PD, OP3_BLENDPS_VpsWpsIb, ESCAPE_BLENDPS, imm, src1, src0, dst);
|
||||
}
|
||||
|
||||
void vblendps_imr(unsigned imm, int32_t offset, RegisterID base, XMMRegisterID src0, XMMRegisterID dst)
|
||||
{
|
||||
MOZ_ASSERT(imm < 16);
|
||||
// Despite being a "ps" instruction, vblendps is encoded with the "pd" prefix.
|
||||
threeByteOpSimd("vblendps", VEX_PD, OP3_BLENDPS_VpsWpsIb, ESCAPE_BLENDPS, offset, base, src0, dst);
|
||||
m_formatter.immediate8u(imm);
|
||||
threeByteOpImmSimd("vblendps", VEX_PD, OP3_BLENDPS_VpsWpsIb, ESCAPE_BLENDPS, imm, offset, base, src0, dst);
|
||||
}
|
||||
|
||||
void vblendvps_rr(XMMRegisterID mask, XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst) {
|
||||
|
@ -4160,6 +4113,25 @@ private:
|
|||
m_formatter.twoByteOpVex(ty, opcode, (RegisterID)rm, src0, dst);
|
||||
}
|
||||
|
||||
void twoByteOpImmSimd(const char *name, VexOperandType ty, TwoByteOpcodeID opcode,
|
||||
uint32_t imm, XMMRegisterID rm, XMMRegisterID src0, XMMRegisterID dst)
|
||||
{
|
||||
if (useLegacySSEEncoding(src0, dst)) {
|
||||
spew("%-11s$0x%x, %s, %s", legacySSEOpName(name), imm, nameFPReg(rm), nameFPReg(dst));
|
||||
m_formatter.legacySSEPrefix(ty);
|
||||
m_formatter.twoByteOp(opcode, (RegisterID)rm, dst);
|
||||
m_formatter.immediate8u(imm);
|
||||
return;
|
||||
}
|
||||
|
||||
if (src0 == X86Registers::invalid_xmm)
|
||||
spew("%-11s$0x%x, %s, %s", name, imm, nameFPReg(rm), nameFPReg(dst));
|
||||
else
|
||||
spew("%-11s$0x%x, %s, %s, %s", name, imm, nameFPReg(rm), nameFPReg(src0), nameFPReg(dst));
|
||||
m_formatter.twoByteOpVex(ty, opcode, (RegisterID)rm, src0, dst);
|
||||
m_formatter.immediate8u(imm);
|
||||
}
|
||||
|
||||
void twoByteOpSimd(const char *name, VexOperandType ty, TwoByteOpcodeID opcode,
|
||||
int32_t offset, RegisterID base, XMMRegisterID src0, XMMRegisterID dst)
|
||||
{
|
||||
|
@ -4215,6 +4187,24 @@ private:
|
|||
m_formatter.twoByteOpVex_disp32(ty, opcode, offset, base, src0, dst);
|
||||
}
|
||||
|
||||
void twoByteOpImmSimd(const char *name, VexOperandType ty, TwoByteOpcodeID opcode,
|
||||
uint32_t imm, int32_t offset, RegisterID base, XMMRegisterID src0, XMMRegisterID dst)
|
||||
{
|
||||
if (useLegacySSEEncoding(src0, dst)) {
|
||||
spew("%-11s$0x%x, " MEM_ob ", %s", legacySSEOpName(name), imm,
|
||||
ADDR_ob(offset, base), nameFPReg(dst));
|
||||
m_formatter.legacySSEPrefix(ty);
|
||||
m_formatter.twoByteOp(opcode, offset, base, dst);
|
||||
m_formatter.immediate8u(imm);
|
||||
return;
|
||||
}
|
||||
|
||||
spew("%-11s$0x%x, " MEM_ob ", %s, %s", name, imm, ADDR_ob(offset, base),
|
||||
nameFPReg(src0), nameFPReg(dst));
|
||||
m_formatter.twoByteOpVex(ty, opcode, offset, base, src0, dst);
|
||||
m_formatter.immediate8u(imm);
|
||||
}
|
||||
|
||||
void twoByteOpSimd(const char *name, VexOperandType ty, TwoByteOpcodeID opcode,
|
||||
int32_t offset, RegisterID base, RegisterID index, int scale,
|
||||
XMMRegisterID src0, XMMRegisterID dst)
|
||||
|
@ -4271,6 +4261,22 @@ private:
|
|||
m_formatter.twoByteOpVex(ty, opcode, address, src0, dst);
|
||||
}
|
||||
|
||||
void twoByteOpImmSimd(const char *name, VexOperandType ty, TwoByteOpcodeID opcode,
|
||||
uint32_t imm, const void *address, XMMRegisterID src0, XMMRegisterID dst)
|
||||
{
|
||||
if (useLegacySSEEncoding(src0, dst)) {
|
||||
spew("%-11s$0x%x, %p, %s", legacySSEOpName(name), imm, address, nameFPReg(dst));
|
||||
m_formatter.legacySSEPrefix(ty);
|
||||
m_formatter.twoByteOp(opcode, address, dst);
|
||||
m_formatter.immediate8u(imm);
|
||||
return;
|
||||
}
|
||||
|
||||
spew("%-11s$0x%x, %p, %s, %s", name, imm, address, nameFPReg(src0), nameFPReg(dst));
|
||||
m_formatter.twoByteOpVex(ty, opcode, address, src0, dst);
|
||||
m_formatter.immediate8u(imm);
|
||||
}
|
||||
|
||||
void twoByteOpInt32Simd(const char *name, VexOperandType ty, TwoByteOpcodeID opcode,
|
||||
RegisterID rm, XMMRegisterID src0, XMMRegisterID dst)
|
||||
{
|
||||
|
@ -4345,6 +4351,22 @@ private:
|
|||
m_formatter.twoByteOpVex(ty, opcode, (RegisterID)rm, X86Registers::invalid_xmm, dst);
|
||||
}
|
||||
|
||||
void twoByteOpImmSimdInt32(const char *name, VexOperandType ty, TwoByteOpcodeID opcode,
|
||||
uint32_t imm, XMMRegisterID rm, RegisterID dst)
|
||||
{
|
||||
if (useLegacySSEEncodingForOtherOutput()) {
|
||||
spew("%-11s$0x%x, %s, %s", legacySSEOpName(name), imm, nameFPReg(rm), nameIReg(4, dst));
|
||||
m_formatter.legacySSEPrefix(ty);
|
||||
m_formatter.twoByteOp(opcode, (RegisterID)rm, dst);
|
||||
m_formatter.immediate8u(imm);
|
||||
return;
|
||||
}
|
||||
|
||||
spew("%-11s$0x%x, %s, %s", name, imm, nameFPReg(rm), nameIReg(4, dst));
|
||||
m_formatter.twoByteOpVex(ty, opcode, (RegisterID)rm, X86Registers::invalid_xmm, dst);
|
||||
m_formatter.immediate8u(imm);
|
||||
}
|
||||
|
||||
#ifdef JS_CODEGEN_X64
|
||||
void twoByteOpSimdInt64(const char *name, VexOperandType ty, TwoByteOpcodeID opcode,
|
||||
XMMRegisterID rm, RegisterID dst)
|
||||
|
@ -4416,6 +4438,23 @@ private:
|
|||
m_formatter.threeByteOpVex(ty, opcode, escape, (RegisterID)rm, src0, dst);
|
||||
}
|
||||
|
||||
void threeByteOpImmSimd(const char *name, VexOperandType ty, ThreeByteOpcodeID opcode,
|
||||
ThreeByteEscape escape,
|
||||
uint32_t imm, XMMRegisterID rm, XMMRegisterID src0, XMMRegisterID dst)
|
||||
{
|
||||
if (useLegacySSEEncoding(src0, dst)) {
|
||||
spew("%-11s$0x%x, %s, %s", legacySSEOpName(name), imm, nameFPReg(rm), nameFPReg(dst));
|
||||
m_formatter.legacySSEPrefix(ty);
|
||||
m_formatter.threeByteOp(opcode, escape, (RegisterID)rm, dst);
|
||||
m_formatter.immediate8u(imm);
|
||||
return;
|
||||
}
|
||||
|
||||
spew("%-11s$0x%x, %s, %s, %s", name, imm, nameFPReg(rm), nameFPReg(src0), nameFPReg(dst));
|
||||
m_formatter.threeByteOpVex(ty, opcode, escape, (RegisterID)rm, src0, dst);
|
||||
m_formatter.immediate8u(imm);
|
||||
}
|
||||
|
||||
void threeByteOpSimd(const char *name, VexOperandType ty, ThreeByteOpcodeID opcode,
|
||||
ThreeByteEscape escape,
|
||||
int32_t offset, RegisterID base, XMMRegisterID src0, XMMRegisterID dst)
|
||||
|
@ -4433,6 +4472,25 @@ private:
|
|||
m_formatter.threeByteOpVex(ty, opcode, escape, offset, base, src0, dst);
|
||||
}
|
||||
|
||||
void threeByteOpImmSimd(const char *name, VexOperandType ty, ThreeByteOpcodeID opcode,
|
||||
ThreeByteEscape escape,
|
||||
uint32_t imm, int32_t offset, RegisterID base, XMMRegisterID src0, XMMRegisterID dst)
|
||||
{
|
||||
if (useLegacySSEEncoding(src0, dst)) {
|
||||
spew("%-11s$0x%x, " MEM_ob ", %s", legacySSEOpName(name), imm,
|
||||
ADDR_ob(offset, base), nameFPReg(dst));
|
||||
m_formatter.legacySSEPrefix(ty);
|
||||
m_formatter.threeByteOp(opcode, escape, offset, base, dst);
|
||||
m_formatter.immediate8u(imm);
|
||||
return;
|
||||
}
|
||||
|
||||
spew("%-11s$0x%x, " MEM_ob ", %s, %s", name, imm, ADDR_ob(offset, base),
|
||||
nameFPReg(src0), nameFPReg(dst));
|
||||
m_formatter.threeByteOpVex(ty, opcode, escape, offset, base, src0, dst);
|
||||
m_formatter.immediate8u(imm);
|
||||
}
|
||||
|
||||
void threeByteOpSimd(const char *name, VexOperandType ty, ThreeByteOpcodeID opcode,
|
||||
ThreeByteEscape escape,
|
||||
const void *address, XMMRegisterID src0, XMMRegisterID dst)
|
||||
|
@ -4448,6 +4506,77 @@ private:
|
|||
m_formatter.threeByteOpVex(ty, opcode, escape, address, src0, dst);
|
||||
}
|
||||
|
||||
void threeByteOpImmInt32Simd(const char *name, VexOperandType ty, ThreeByteOpcodeID opcode,
|
||||
ThreeByteEscape escape, uint32_t imm,
|
||||
RegisterID src1, XMMRegisterID src0, XMMRegisterID dst)
|
||||
{
|
||||
if (useLegacySSEEncoding(src0, dst)) {
|
||||
spew("%-11s$0x%x, %s, %s", legacySSEOpName(name), imm, nameIReg(4, src1), nameFPReg(dst));
|
||||
m_formatter.legacySSEPrefix(ty);
|
||||
m_formatter.threeByteOp(opcode, escape, src1, dst);
|
||||
m_formatter.immediate8u(imm);
|
||||
return;
|
||||
}
|
||||
|
||||
spew("%-11s$0x%x, %s, %s, %s", name, imm, nameIReg(4, src1), nameFPReg(src0), nameFPReg(dst));
|
||||
m_formatter.threeByteOpVex(ty, opcode, escape, src1, src0, dst);
|
||||
m_formatter.immediate8u(imm);
|
||||
}
|
||||
|
||||
void threeByteOpImmInt32Simd(const char *name, VexOperandType ty, ThreeByteOpcodeID opcode,
|
||||
ThreeByteEscape escape, uint32_t imm,
|
||||
int32_t offset, RegisterID base, XMMRegisterID src0, XMMRegisterID dst)
|
||||
{
|
||||
if (useLegacySSEEncoding(src0, dst)) {
|
||||
spew("%-11s$0x%x, " MEM_ob ", %s", legacySSEOpName(name), imm, ADDR_ob(offset, base), nameFPReg(dst));
|
||||
m_formatter.legacySSEPrefix(ty);
|
||||
m_formatter.threeByteOp(opcode, escape, offset, base, dst);
|
||||
m_formatter.immediate8u(imm);
|
||||
return;
|
||||
}
|
||||
|
||||
spew("%-11s$0x%x, " MEM_ob ", %s, %s", name, imm, ADDR_ob(offset, base), nameFPReg(src0), nameFPReg(dst));
|
||||
m_formatter.threeByteOpVex(ty, opcode, escape, offset, base, src0, dst);
|
||||
m_formatter.immediate8u(imm);
|
||||
}
|
||||
|
||||
void threeByteOpImmSimdInt32(const char *name, VexOperandType ty, ThreeByteOpcodeID opcode,
|
||||
ThreeByteEscape escape, uint32_t imm,
|
||||
XMMRegisterID src, RegisterID dst)
|
||||
{
|
||||
if (useLegacySSEEncodingForOtherOutput()) {
|
||||
spew("%-11s$0x%x, %s, %s", legacySSEOpName(name), imm, nameFPReg(src), nameIReg(4, dst));
|
||||
m_formatter.legacySSEPrefix(ty);
|
||||
m_formatter.threeByteOp(opcode, escape, (RegisterID)src, dst);
|
||||
m_formatter.immediate8u(imm);
|
||||
return;
|
||||
}
|
||||
|
||||
if (opcode == OP3_PEXTRD_EdVdqIb)
|
||||
spew("%-11s$0x%x, %s, %s", name, imm, nameFPReg((XMMRegisterID)dst), nameIReg(4, (RegisterID)src));
|
||||
else
|
||||
spew("%-11s$0x%x, %s, %s", name, imm, nameFPReg(src), nameIReg(4, dst));
|
||||
m_formatter.threeByteOpVex(ty, opcode, escape, (RegisterID)src, X86Registers::invalid_xmm, dst);
|
||||
m_formatter.immediate8u(imm);
|
||||
}
|
||||
|
||||
void threeByteOpImmSimdInt32(const char *name, VexOperandType ty, ThreeByteOpcodeID opcode,
|
||||
ThreeByteEscape escape, uint32_t imm,
|
||||
int32_t offset, RegisterID base, RegisterID dst)
|
||||
{
|
||||
if (useLegacySSEEncodingForOtherOutput()) {
|
||||
spew("%-11s$0x%x, " MEM_ob ", %s", legacySSEOpName(name), imm, ADDR_ob(offset, base), nameIReg(4, dst));
|
||||
m_formatter.legacySSEPrefix(ty);
|
||||
m_formatter.threeByteOp(opcode, escape, offset, base, dst);
|
||||
m_formatter.immediate8u(imm);
|
||||
return;
|
||||
}
|
||||
|
||||
spew("%-11s$0x%x, " MEM_ob ", %s", name, imm, ADDR_ob(offset, base), nameIReg(4, dst));
|
||||
m_formatter.threeByteOpVex(ty, opcode, escape, offset, base, X86Registers::invalid_xmm, dst);
|
||||
m_formatter.immediate8u(imm);
|
||||
}
|
||||
|
||||
// Blendv is a three-byte op, but the VEX encoding has a different opcode
|
||||
// than the SSE encoding, so we handle it specially.
|
||||
void vblendvOpSimd(XMMRegisterID mask, XMMRegisterID rm, XMMRegisterID src0, XMMRegisterID dst)
|
||||
|
@ -4484,6 +4613,22 @@ private:
|
|||
mask, offset, base, src0, dst);
|
||||
}
|
||||
|
||||
void shiftOpImmSimd(const char *name, TwoByteOpcodeID opcode, ShiftID shiftKind,
|
||||
uint32_t imm, XMMRegisterID src, XMMRegisterID dst)
|
||||
{
|
||||
if (useLegacySSEEncoding(src, dst)) {
|
||||
spew("%-11s$%d, %s", legacySSEOpName(name), imm, nameFPReg(dst));
|
||||
m_formatter.legacySSEPrefix(VEX_PD);
|
||||
m_formatter.twoByteOp(opcode, (RegisterID)dst, (int)shiftKind);
|
||||
m_formatter.immediate8u(imm);
|
||||
return;
|
||||
}
|
||||
|
||||
spew("%-11s$%d, %s, %s", name, imm, nameFPReg(src), nameFPReg(dst));
|
||||
m_formatter.twoByteOpVex(VEX_PD, opcode, (RegisterID)dst, src, (int)shiftKind);
|
||||
m_formatter.immediate8u(imm);
|
||||
}
|
||||
|
||||
static int32_t getInt32(void* where)
|
||||
{
|
||||
return reinterpret_cast<int32_t*>(where)[-1];
|
||||
|
|
|
@ -1596,7 +1596,7 @@ CodeGeneratorX86Shared::visitFloor(LFloor *lir)
|
|||
bailoutFrom(&bailout, lir->snapshot());
|
||||
|
||||
// Round toward -Infinity.
|
||||
masm.roundsd(X86Assembler::RoundDown, input, scratch);
|
||||
masm.vroundsd(X86Assembler::RoundDown, input, scratch, scratch);
|
||||
|
||||
bailoutCvttsd2si(scratch, output, lir->snapshot());
|
||||
} else {
|
||||
|
@ -1653,7 +1653,7 @@ CodeGeneratorX86Shared::visitFloorF(LFloorF *lir)
|
|||
bailoutFrom(&bailout, lir->snapshot());
|
||||
|
||||
// Round toward -Infinity.
|
||||
masm.roundss(X86Assembler::RoundDown, input, scratch);
|
||||
masm.vroundss(X86Assembler::RoundDown, input, scratch, scratch);
|
||||
|
||||
bailoutCvttss2si(scratch, output, lir->snapshot());
|
||||
} else {
|
||||
|
@ -1718,7 +1718,7 @@ CodeGeneratorX86Shared::visitCeil(LCeil *lir)
|
|||
// x <= -1 or x > -0
|
||||
masm.bind(&lessThanMinusOne);
|
||||
// Round toward +Infinity.
|
||||
masm.roundsd(X86Assembler::RoundUp, input, scratch);
|
||||
masm.vroundsd(X86Assembler::RoundUp, input, scratch, scratch);
|
||||
bailoutCvttsd2si(scratch, output, lir->snapshot());
|
||||
return;
|
||||
}
|
||||
|
@ -1770,7 +1770,7 @@ CodeGeneratorX86Shared::visitCeilF(LCeilF *lir)
|
|||
// x <= -1 or x > -0
|
||||
masm.bind(&lessThanMinusOne);
|
||||
// Round toward +Infinity.
|
||||
masm.roundss(X86Assembler::RoundUp, input, scratch);
|
||||
masm.vroundss(X86Assembler::RoundUp, input, scratch, scratch);
|
||||
bailoutCvttss2si(scratch, output, lir->snapshot());
|
||||
return;
|
||||
}
|
||||
|
@ -1845,7 +1845,7 @@ CodeGeneratorX86Shared::visitRound(LRound *lir)
|
|||
// Add 0.5 and round toward -Infinity. The result is stored in the temp
|
||||
// register (currently contains 0.5).
|
||||
masm.addDouble(input, temp);
|
||||
masm.roundsd(X86Assembler::RoundDown, temp, scratch);
|
||||
masm.vroundsd(X86Assembler::RoundDown, temp, scratch, scratch);
|
||||
|
||||
// Truncate.
|
||||
bailoutCvttsd2si(scratch, output, lir->snapshot());
|
||||
|
@ -1928,7 +1928,7 @@ CodeGeneratorX86Shared::visitRoundF(LRoundF *lir)
|
|||
// Add 0.5 and round toward -Infinity. The result is stored in the temp
|
||||
// register (currently contains 0.5).
|
||||
masm.addFloat32(input, temp);
|
||||
masm.roundss(X86Assembler::RoundDown, temp, scratch);
|
||||
masm.vroundss(X86Assembler::RoundDown, temp, scratch, scratch);
|
||||
|
||||
// Truncate.
|
||||
bailoutCvttss2si(scratch, output, lir->snapshot());
|
||||
|
@ -2093,7 +2093,7 @@ CodeGeneratorX86Shared::visitSimdValueInt32x4(LSimdValueInt32x4 *ins)
|
|||
masm.vmovd(ToRegister(ins->getOperand(0)), output);
|
||||
for (size_t i = 1; i < 4; ++i) {
|
||||
Register r = ToRegister(ins->getOperand(i));
|
||||
masm.pinsrd(i, r, output);
|
||||
masm.vpinsrd(i, r, output, output);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
@ -2140,14 +2140,14 @@ CodeGeneratorX86Shared::visitSimdSplatX4(LSimdSplatX4 *ins)
|
|||
case MIRType_Int32x4: {
|
||||
Register r = ToRegister(ins->getOperand(0));
|
||||
masm.vmovd(r, output);
|
||||
masm.pshufd(0, output, output);
|
||||
masm.vpshufd(0, output, output);
|
||||
break;
|
||||
}
|
||||
case MIRType_Float32x4: {
|
||||
FloatRegister r = ToFloatRegister(ins->getOperand(0));
|
||||
if (r != output)
|
||||
masm.moveFloat32x4(r, output);
|
||||
masm.shufps(0, output, output);
|
||||
masm.vshufps(0, output, output, output);
|
||||
break;
|
||||
}
|
||||
default:
|
||||
|
@ -2166,7 +2166,7 @@ CodeGeneratorX86Shared::visitSimdExtractElementI(LSimdExtractElementI *ins)
|
|||
// The value we want to extract is in the low double-word
|
||||
masm.moveLowInt32(input, output);
|
||||
} else if (AssemblerX86Shared::HasSSE41()) {
|
||||
masm.pextrd(lane, input, output);
|
||||
masm.vpextrd(lane, input, output);
|
||||
} else {
|
||||
uint32_t mask = MacroAssembler::ComputeShuffleMask(lane);
|
||||
masm.shuffleInt32(mask, input, ScratchSimdReg);
|
||||
|
@ -2208,7 +2208,8 @@ CodeGeneratorX86Shared::visitSimdInsertElementI(LSimdInsertElementI *ins)
|
|||
// value goes into the first component, as vmovd clears out the higher lanes
|
||||
// of the output.
|
||||
if (AssemblerX86Shared::HasSSE41()) {
|
||||
masm.pinsrd(component, value, output);
|
||||
// TODO: Teach Lowering that we don't need defineReuseInput if we have AVX.
|
||||
masm.vpinsrd(component, value, vector, output);
|
||||
return;
|
||||
}
|
||||
|
||||
|
@ -2237,7 +2238,7 @@ CodeGeneratorX86Shared::visitSimdInsertElementF(LSimdInsertElementF *ins)
|
|||
|
||||
if (AssemblerX86Shared::HasSSE41()) {
|
||||
// The input value is in the low float32 of the 'value' FloatRegister.
|
||||
masm.insertps(value, output, masm.insertpsMask(SimdLane::LaneX, ins->lane()));
|
||||
masm.vinsertps(masm.vinsertpsMask(SimdLane::LaneX, ins->lane()), value, output, output);
|
||||
return;
|
||||
}
|
||||
|
||||
|
@ -2343,7 +2344,7 @@ CodeGeneratorX86Shared::visitSimdShuffle(LSimdShuffle *ins)
|
|||
unsigned numLanesFromLHS = (x < 4) + (y < 4) + (z < 4) + (w < 4);
|
||||
MOZ_ASSERT(numLanesFromLHS >= 2);
|
||||
|
||||
// When reading this method, remember that shufps takes the two first
|
||||
// When reading this method, remember that vshufps takes the two first
|
||||
// inputs of the destination operand (right operand) and the two last
|
||||
// inputs of the source operand (left operand).
|
||||
//
|
||||
|
@ -2376,7 +2377,7 @@ CodeGeneratorX86Shared::visitSimdShuffle(LSimdShuffle *ins)
|
|||
return;
|
||||
}
|
||||
|
||||
// SSE4.1 insertps can handle any single element.
|
||||
// SSE4.1 vinsertps can handle any single element.
|
||||
unsigned numLanesUnchanged = (x == 0) + (y == 1) + (z == 2) + (w == 3);
|
||||
if (AssemblerX86Shared::HasSSE41() && numLanesUnchanged == 3) {
|
||||
SimdLane srcLane;
|
||||
|
@ -2395,7 +2396,7 @@ CodeGeneratorX86Shared::visitSimdShuffle(LSimdShuffle *ins)
|
|||
srcLane = SimdLane(w - 4);
|
||||
dstLane = LaneW;
|
||||
}
|
||||
masm.insertps(rhs, out, masm.insertpsMask(srcLane, dstLane));
|
||||
masm.vinsertps(masm.vinsertpsMask(srcLane, dstLane), rhs, out, out);
|
||||
return;
|
||||
}
|
||||
|
||||
|
@ -2404,21 +2405,21 @@ CodeGeneratorX86Shared::visitSimdShuffle(LSimdShuffle *ins)
|
|||
if (x < 4 && y < 4) {
|
||||
if (w >= 4) {
|
||||
w %= 4;
|
||||
// T = (Rw Rw Lz Lz) = shufps(firstMask, lhs, rhs)
|
||||
// T = (Rw Rw Lz Lz) = vshufps(firstMask, lhs, rhs, rhs)
|
||||
firstMask = MacroAssembler::ComputeShuffleMask(w, w, z, z);
|
||||
// (Lx Ly Lz Rw) = (Lx Ly Tz Tx) = shufps(secondMask, T, lhs)
|
||||
// (Lx Ly Lz Rw) = (Lx Ly Tz Tx) = vshufps(secondMask, T, lhs, lhs)
|
||||
secondMask = MacroAssembler::ComputeShuffleMask(x, y, LaneZ, LaneX);
|
||||
} else {
|
||||
MOZ_ASSERT(z >= 4);
|
||||
z %= 4;
|
||||
// T = (Rz Rz Lw Lw) = shufps(firstMask, lhs, rhs)
|
||||
// T = (Rz Rz Lw Lw) = vshufps(firstMask, lhs, rhs, rhs)
|
||||
firstMask = MacroAssembler::ComputeShuffleMask(z, z, w, w);
|
||||
// (Lx Ly Rz Lw) = (Lx Ly Tx Tz) = shufps(secondMask, T, lhs)
|
||||
// (Lx Ly Rz Lw) = (Lx Ly Tx Tz) = vshufps(secondMask, T, lhs, lhs)
|
||||
secondMask = MacroAssembler::ComputeShuffleMask(x, y, LaneX, LaneZ);
|
||||
}
|
||||
|
||||
masm.shufps(firstMask, lhs, rhsCopy);
|
||||
masm.shufps(secondMask, rhsCopy, lhs);
|
||||
masm.vshufps(firstMask, lhs, rhsCopy, rhsCopy);
|
||||
masm.vshufps(secondMask, rhsCopy, lhs, lhs);
|
||||
return;
|
||||
}
|
||||
|
||||
|
@ -2426,21 +2427,21 @@ CodeGeneratorX86Shared::visitSimdShuffle(LSimdShuffle *ins)
|
|||
|
||||
if (y >= 4) {
|
||||
y %= 4;
|
||||
// T = (Ry Ry Lx Lx) = shufps(firstMask, lhs, rhs)
|
||||
// T = (Ry Ry Lx Lx) = vshufps(firstMask, lhs, rhs, rhs)
|
||||
firstMask = MacroAssembler::ComputeShuffleMask(y, y, x, x);
|
||||
// (Lx Ry Lz Lw) = (Tz Tx Lz Lw) = shufps(secondMask, lhs, T)
|
||||
// (Lx Ry Lz Lw) = (Tz Tx Lz Lw) = vshufps(secondMask, lhs, T, T)
|
||||
secondMask = MacroAssembler::ComputeShuffleMask(LaneZ, LaneX, z, w);
|
||||
} else {
|
||||
MOZ_ASSERT(x >= 4);
|
||||
x %= 4;
|
||||
// T = (Rx Rx Ly Ly) = shufps(firstMask, lhs, rhs)
|
||||
// T = (Rx Rx Ly Ly) = vshufps(firstMask, lhs, rhs, rhs)
|
||||
firstMask = MacroAssembler::ComputeShuffleMask(x, x, y, y);
|
||||
// (Rx Ly Lz Lw) = (Tx Tz Lz Lw) = shufps(secondMask, lhs, T)
|
||||
// (Rx Ly Lz Lw) = (Tx Tz Lz Lw) = vshufps(secondMask, lhs, T, T)
|
||||
secondMask = MacroAssembler::ComputeShuffleMask(LaneX, LaneZ, z, w);
|
||||
}
|
||||
|
||||
masm.shufps(firstMask, lhs, rhsCopy);
|
||||
masm.shufps(secondMask, lhs, rhsCopy);
|
||||
masm.vshufps(firstMask, lhs, rhsCopy, rhsCopy);
|
||||
masm.vshufps(secondMask, lhs, rhsCopy, rhsCopy);
|
||||
masm.moveFloat32x4(rhsCopy, out);
|
||||
return;
|
||||
}
|
||||
|
@ -2500,17 +2501,17 @@ CodeGeneratorX86Shared::visitSimdShuffle(LSimdShuffle *ins)
|
|||
return;
|
||||
}
|
||||
|
||||
// In one shufps
|
||||
// In one vshufps
|
||||
if (x < 4 && y < 4) {
|
||||
mask = MacroAssembler::ComputeShuffleMask(x, y, z % 4, w % 4);
|
||||
masm.shufps(mask, rhs, out);
|
||||
masm.vshufps(mask, rhs, out, out);
|
||||
return;
|
||||
}
|
||||
|
||||
// At creation, we should have explicitly swapped in this case.
|
||||
MOZ_ASSERT(!(z >= 4 && w >= 4));
|
||||
|
||||
// In two shufps, for the most generic case:
|
||||
// In two vshufps, for the most generic case:
|
||||
uint32_t firstMask[4], secondMask[4];
|
||||
unsigned i = 0, j = 2, k = 0;
|
||||
|
||||
|
@ -2533,11 +2534,11 @@ CodeGeneratorX86Shared::visitSimdShuffle(LSimdShuffle *ins)
|
|||
|
||||
mask = MacroAssembler::ComputeShuffleMask(firstMask[0], firstMask[1],
|
||||
firstMask[2], firstMask[3]);
|
||||
masm.shufps(mask, rhs, lhs);
|
||||
masm.vshufps(mask, rhs, lhs, lhs);
|
||||
|
||||
mask = MacroAssembler::ComputeShuffleMask(secondMask[0], secondMask[1],
|
||||
secondMask[2], secondMask[3]);
|
||||
masm.shufps(mask, lhs, lhs);
|
||||
masm.vshufps(mask, lhs, lhs, lhs);
|
||||
}
|
||||
|
||||
void
|
||||
|
@ -2653,14 +2654,14 @@ CodeGeneratorX86Shared::visitSimdBinaryArithIx4(LSimdBinaryArithIx4 *ins)
|
|||
// ScratchSimdReg contains (Rx, _, Rz, _) where R is the resulting vector.
|
||||
|
||||
FloatRegister temp = ToFloatRegister(ins->temp());
|
||||
masm.pshufd(MacroAssembler::ComputeShuffleMask(LaneY, LaneY, LaneW, LaneW), lhs, lhs);
|
||||
masm.pshufd(MacroAssembler::ComputeShuffleMask(LaneY, LaneY, LaneW, LaneW), rhs, temp);
|
||||
masm.vpshufd(MacroAssembler::ComputeShuffleMask(LaneY, LaneY, LaneW, LaneW), lhs, lhs);
|
||||
masm.vpshufd(MacroAssembler::ComputeShuffleMask(LaneY, LaneY, LaneW, LaneW), rhs, temp);
|
||||
masm.vpmuludq(temp, lhs, lhs);
|
||||
// lhs contains (Ry, _, Rw, _) where R is the resulting vector.
|
||||
|
||||
masm.shufps(MacroAssembler::ComputeShuffleMask(LaneX, LaneZ, LaneX, LaneZ), ScratchSimdReg, lhs);
|
||||
masm.vshufps(MacroAssembler::ComputeShuffleMask(LaneX, LaneZ, LaneX, LaneZ), ScratchSimdReg, lhs, lhs);
|
||||
// lhs contains (Ry, Rw, Rx, Rz)
|
||||
masm.shufps(MacroAssembler::ComputeShuffleMask(LaneZ, LaneX, LaneW, LaneY), lhs, lhs);
|
||||
masm.vshufps(MacroAssembler::ComputeShuffleMask(LaneZ, LaneX, LaneW, LaneY), lhs, lhs, lhs);
|
||||
return;
|
||||
}
|
||||
case MSimdBinaryArith::Div:
|
||||
|
@ -2904,7 +2905,19 @@ CodeGeneratorX86Shared::visitSimdShift(LSimdShift *ins)
|
|||
// 1068028.
|
||||
const LAllocation *val = ins->value();
|
||||
if (val->isConstant()) {
|
||||
Imm32 count(ToInt32(val));
|
||||
int32_t c = ToInt32(val);
|
||||
if (c > 31) {
|
||||
switch (ins->operation()) {
|
||||
case MSimdShift::lsh:
|
||||
case MSimdShift::ursh:
|
||||
masm.zeroInt32x4(out);
|
||||
return;
|
||||
default:
|
||||
c = 31;
|
||||
break;
|
||||
}
|
||||
}
|
||||
Imm32 count(c);
|
||||
switch (ins->operation()) {
|
||||
case MSimdShift::lsh:
|
||||
masm.packedLeftShiftByScalar(count, out);
|
||||
|
|
|
@ -789,14 +789,14 @@ class MacroAssemblerX86Shared : public Assembler
|
|||
void negateDouble(FloatRegister reg) {
|
||||
// From MacroAssemblerX86Shared::maybeInlineDouble
|
||||
vpcmpeqw(ScratchDoubleReg, ScratchDoubleReg, ScratchDoubleReg);
|
||||
psllq(Imm32(63), ScratchDoubleReg);
|
||||
vpsllq(Imm32(63), ScratchDoubleReg, ScratchDoubleReg);
|
||||
|
||||
// XOR the float in a float register with -0.0.
|
||||
vxorpd(ScratchDoubleReg, reg, reg); // s ^ 0x80000000000000
|
||||
}
|
||||
void negateFloat(FloatRegister reg) {
|
||||
vpcmpeqw(ScratchFloat32Reg, ScratchFloat32Reg, ScratchFloat32Reg);
|
||||
psllq(Imm32(31), ScratchFloat32Reg);
|
||||
vpsllq(Imm32(31), ScratchFloat32Reg, ScratchFloat32Reg);
|
||||
|
||||
// XOR the float in a float register with -0.0.
|
||||
vxorps(ScratchFloat32Reg, reg, reg); // s ^ 0x80000000
|
||||
|
@ -922,19 +922,19 @@ class MacroAssemblerX86Shared : public Assembler
|
|||
vpslld(src, dest, dest);
|
||||
}
|
||||
void packedLeftShiftByScalar(Imm32 count, FloatRegister dest) {
|
||||
pslld(count, dest);
|
||||
vpslld(count, dest, dest);
|
||||
}
|
||||
void packedRightShiftByScalar(FloatRegister src, FloatRegister dest) {
|
||||
vpsrad(src, dest, dest);
|
||||
}
|
||||
void packedRightShiftByScalar(Imm32 count, FloatRegister dest) {
|
||||
psrad(count, dest);
|
||||
vpsrad(count, dest, dest);
|
||||
}
|
||||
void packedUnsignedRightShiftByScalar(FloatRegister src, FloatRegister dest) {
|
||||
vpsrld(src, dest, dest);
|
||||
}
|
||||
void packedUnsignedRightShiftByScalar(Imm32 count, FloatRegister dest) {
|
||||
psrld(count, dest);
|
||||
vpsrld(count, dest, dest);
|
||||
}
|
||||
|
||||
void loadAlignedFloat32x4(const Address &src, FloatRegister dest) {
|
||||
|
@ -996,7 +996,7 @@ class MacroAssemblerX86Shared : public Assembler
|
|||
}
|
||||
|
||||
void shuffleInt32(uint32_t mask, FloatRegister src, FloatRegister dest) {
|
||||
pshufd(mask, src, dest);
|
||||
vpshufd(mask, src, dest);
|
||||
}
|
||||
void moveLowInt32(FloatRegister src, Register dest) {
|
||||
vmovd(src, dest);
|
||||
|
@ -1013,12 +1013,12 @@ class MacroAssemblerX86Shared : public Assembler
|
|||
// Note: this is useAtStart-safe because src isn't read afterwards.
|
||||
if (src != dest)
|
||||
moveFloat32x4(src, dest);
|
||||
shufps(mask, dest, dest);
|
||||
vshufps(mask, dest, dest, dest);
|
||||
}
|
||||
void shuffleMix(uint32_t mask, const Operand &src, FloatRegister dest) {
|
||||
// Note this uses shufps, which is a cross-domain penaly on CPU where it
|
||||
// Note this uses vshufps, which is a cross-domain penaly on CPU where it
|
||||
// applies, but that's the way clang and gcc do it.
|
||||
shufps(mask, src, dest);
|
||||
vshufps(mask, src, dest, dest);
|
||||
}
|
||||
|
||||
void moveFloatAsDouble(Register src, FloatRegister dest) {
|
||||
|
@ -1134,7 +1134,7 @@ class MacroAssemblerX86Shared : public Assembler
|
|||
}
|
||||
|
||||
// It is also possible to load several common constants using vpcmpeqw
|
||||
// to get all ones and then psllq and psrlq to get zeros at the ends,
|
||||
// to get all ones and then vpsllq and vpsrlq to get zeros at the ends,
|
||||
// as described in "13.4 Generating constants" of
|
||||
// "2. Optimizing subroutines in assembly language" by Agner Fog, and as
|
||||
// previously implemented here. However, with x86 and x64 both using
|
||||
|
|
|
@ -868,10 +868,10 @@ class MacroAssemblerX86 : public MacroAssemblerX86Shared
|
|||
void boxDouble(FloatRegister src, const ValueOperand &dest) {
|
||||
if (Assembler::HasSSE41()) {
|
||||
vmovd(src, dest.payloadReg());
|
||||
pextrd(1, src, dest.typeReg());
|
||||
vpextrd(1, src, dest.typeReg());
|
||||
} else {
|
||||
vmovd(src, dest.payloadReg());
|
||||
psrldq(Imm32(4), src);
|
||||
vpsrldq(Imm32(4), src, src);
|
||||
vmovd(src, dest.typeReg());
|
||||
}
|
||||
}
|
||||
|
@ -905,7 +905,7 @@ class MacroAssemblerX86 : public MacroAssemblerX86Shared
|
|||
MOZ_ASSERT(dest != ScratchDoubleReg);
|
||||
if (Assembler::HasSSE41()) {
|
||||
vmovd(src.payloadReg(), dest);
|
||||
pinsrd(1, src.typeReg(), dest);
|
||||
vpinsrd(1, src.typeReg(), dest, dest);
|
||||
} else {
|
||||
vmovd(src.payloadReg(), dest);
|
||||
vmovd(src.typeReg(), ScratchDoubleReg);
|
||||
|
@ -919,7 +919,7 @@ class MacroAssemblerX86 : public MacroAssemblerX86Shared
|
|||
movl(payload, scratch);
|
||||
vmovd(scratch, dest);
|
||||
movl(type, scratch);
|
||||
pinsrd(1, scratch, dest);
|
||||
vpinsrd(1, scratch, dest, dest);
|
||||
} else {
|
||||
movl(payload, scratch);
|
||||
vmovd(scratch, dest);
|
||||
|
|
Загрузка…
Ссылка в новой задаче