зеркало из https://github.com/mozilla/gecko-dev.git
Bug 1111241 - SpiderMonkey: Use VEX encodings for add, sub, and, or, xor, and andn r=jandem
This commit is contained in:
Родитель
0f58581e92
Коммит
2911f76be6
|
@ -447,6 +447,9 @@ class LSimdBinaryBitwiseX4 : public LInstructionHelper<1, 2, 0>
|
|||
MSimdBinaryBitwise::Operation operation() const {
|
||||
return mir_->toSimdBinaryBitwise()->operation();
|
||||
}
|
||||
MIRType type() const {
|
||||
return mir_->type();
|
||||
}
|
||||
};
|
||||
|
||||
class LSimdShift : public LInstructionHelper<1, 2, 0>
|
||||
|
|
|
@ -1784,33 +1784,33 @@ class AssemblerX86Shared : public AssemblerShared
|
|||
MOZ_ASSERT(HasSSE2());
|
||||
masm.movd_rr(src.code(), dest.code());
|
||||
}
|
||||
void paddd(const Operand &src, FloatRegister dest) {
|
||||
void vpaddd(const Operand &src1, FloatRegister src0, FloatRegister dest) {
|
||||
MOZ_ASSERT(HasSSE2());
|
||||
switch (src.kind()) {
|
||||
switch (src1.kind()) {
|
||||
case Operand::FPREG:
|
||||
masm.paddd_rr(src.fpu(), dest.code());
|
||||
masm.vpaddd_rr(src1.fpu(), src0.code(), dest.code());
|
||||
break;
|
||||
case Operand::MEM_REG_DISP:
|
||||
masm.paddd_mr(src.disp(), src.base(), dest.code());
|
||||
masm.vpaddd_mr(src1.disp(), src1.base(), src0.code(), dest.code());
|
||||
break;
|
||||
case Operand::MEM_ADDRESS32:
|
||||
masm.paddd_mr(src.address(), dest.code());
|
||||
masm.vpaddd_mr(src1.address(), src0.code(), dest.code());
|
||||
break;
|
||||
default:
|
||||
MOZ_CRASH("unexpected operand kind");
|
||||
}
|
||||
}
|
||||
void psubd(const Operand &src, FloatRegister dest) {
|
||||
void vpsubd(const Operand &src1, FloatRegister src0, FloatRegister dest) {
|
||||
MOZ_ASSERT(HasSSE2());
|
||||
switch (src.kind()) {
|
||||
switch (src1.kind()) {
|
||||
case Operand::FPREG:
|
||||
masm.psubd_rr(src.fpu(), dest.code());
|
||||
masm.vpsubd_rr(src1.fpu(), src0.code(), dest.code());
|
||||
break;
|
||||
case Operand::MEM_REG_DISP:
|
||||
masm.psubd_mr(src.disp(), src.base(), dest.code());
|
||||
masm.vpsubd_mr(src1.disp(), src1.base(), src0.code(), dest.code());
|
||||
break;
|
||||
case Operand::MEM_ADDRESS32:
|
||||
masm.psubd_mr(src.address(), dest.code());
|
||||
masm.vpsubd_mr(src1.address(), src0.code(), dest.code());
|
||||
break;
|
||||
default:
|
||||
MOZ_CRASH("unexpected operand kind");
|
||||
|
@ -1932,75 +1932,152 @@ class AssemblerX86Shared : public AssemblerShared
|
|||
MOZ_CRASH("unexpected operand kind");
|
||||
}
|
||||
}
|
||||
void andps(const Operand &src, FloatRegister dest) {
|
||||
void vandps(const Operand &src1, FloatRegister src0, FloatRegister dest) {
|
||||
MOZ_ASSERT(HasSSE2());
|
||||
switch (src.kind()) {
|
||||
switch (src1.kind()) {
|
||||
case Operand::FPREG:
|
||||
masm.andps_rr(src.fpu(), dest.code());
|
||||
masm.vandps_rr(src1.fpu(), src0.code(), dest.code());
|
||||
break;
|
||||
case Operand::MEM_REG_DISP:
|
||||
masm.andps_mr(src.disp(), src.base(), dest.code());
|
||||
masm.vandps_mr(src1.disp(), src1.base(), src0.code(), dest.code());
|
||||
break;
|
||||
case Operand::MEM_ADDRESS32:
|
||||
masm.andps_mr(src.address(), dest.code());
|
||||
masm.vandps_mr(src1.address(), src0.code(), dest.code());
|
||||
break;
|
||||
default:
|
||||
MOZ_CRASH("unexpected operand kind");
|
||||
}
|
||||
}
|
||||
void andnps(const Operand &src, FloatRegister dest) {
|
||||
void vandnps(const Operand &src1, FloatRegister src0, FloatRegister dest) {
|
||||
// Negates bits of dest and then applies AND
|
||||
MOZ_ASSERT(HasSSE2());
|
||||
switch (src.kind()) {
|
||||
switch (src1.kind()) {
|
||||
case Operand::FPREG:
|
||||
masm.andnps_rr(src.fpu(), dest.code());
|
||||
masm.vandnps_rr(src1.fpu(), src0.code(), dest.code());
|
||||
break;
|
||||
case Operand::MEM_REG_DISP:
|
||||
masm.andnps_mr(src.disp(), src.base(), dest.code());
|
||||
masm.vandnps_mr(src1.disp(), src1.base(), src0.code(), dest.code());
|
||||
break;
|
||||
case Operand::MEM_ADDRESS32:
|
||||
masm.andnps_mr(src.address(), dest.code());
|
||||
masm.vandnps_mr(src1.address(), src0.code(), dest.code());
|
||||
break;
|
||||
default:
|
||||
MOZ_CRASH("unexpected operand kind");
|
||||
}
|
||||
}
|
||||
void orps(const Operand &src, FloatRegister dest) {
|
||||
void vorps(const Operand &src1, FloatRegister src0, FloatRegister dest) {
|
||||
MOZ_ASSERT(HasSSE2());
|
||||
switch (src.kind()) {
|
||||
switch (src1.kind()) {
|
||||
case Operand::FPREG:
|
||||
masm.orps_rr(src.fpu(), dest.code());
|
||||
masm.vorps_rr(src1.fpu(), src0.code(), dest.code());
|
||||
break;
|
||||
case Operand::MEM_REG_DISP:
|
||||
masm.orps_mr(src.disp(), src.base(), dest.code());
|
||||
masm.vorps_mr(src1.disp(), src1.base(), src0.code(), dest.code());
|
||||
break;
|
||||
case Operand::MEM_ADDRESS32:
|
||||
masm.orps_mr(src.address(), dest.code());
|
||||
masm.vorps_mr(src1.address(), src0.code(), dest.code());
|
||||
break;
|
||||
default:
|
||||
MOZ_CRASH("unexpected operand kind");
|
||||
}
|
||||
}
|
||||
void xorps(const Operand &src, FloatRegister dest) {
|
||||
void vxorps(const Operand &src1, FloatRegister src0, FloatRegister dest) {
|
||||
MOZ_ASSERT(HasSSE2());
|
||||
switch (src.kind()) {
|
||||
switch (src1.kind()) {
|
||||
case Operand::FPREG:
|
||||
masm.xorps_rr(src.fpu(), dest.code());
|
||||
masm.vxorps_rr(src1.fpu(), src0.code(), dest.code());
|
||||
break;
|
||||
case Operand::MEM_REG_DISP:
|
||||
masm.xorps_mr(src.disp(), src.base(), dest.code());
|
||||
masm.vxorps_mr(src1.disp(), src1.base(), src0.code(), dest.code());
|
||||
break;
|
||||
case Operand::MEM_ADDRESS32:
|
||||
masm.xorps_mr(src.address(), dest.code());
|
||||
masm.vxorps_mr(src1.address(), src0.code(), dest.code());
|
||||
break;
|
||||
default:
|
||||
MOZ_CRASH("unexpected operand kind");
|
||||
}
|
||||
}
|
||||
void pxor(FloatRegister src, FloatRegister dest) {
|
||||
void vpand(FloatRegister src1, FloatRegister src0, FloatRegister dest) {
|
||||
MOZ_ASSERT(HasSSE2());
|
||||
masm.pxor_rr(src.code(), dest.code());
|
||||
masm.vpand_rr(src1.code(), src0.code(), dest.code());
|
||||
}
|
||||
void vpand(const Operand &src1, FloatRegister src0, FloatRegister dest) {
|
||||
MOZ_ASSERT(HasSSE2());
|
||||
switch (src1.kind()) {
|
||||
case Operand::FPREG:
|
||||
masm.vpand_rr(src1.fpu(), src0.code(), dest.code());
|
||||
break;
|
||||
case Operand::MEM_REG_DISP:
|
||||
masm.vpand_mr(src1.disp(), src1.base(), src0.code(), dest.code());
|
||||
break;
|
||||
case Operand::MEM_ADDRESS32:
|
||||
masm.vpand_mr(src1.address(), src0.code(), dest.code());
|
||||
break;
|
||||
default:
|
||||
MOZ_CRASH("unexpected operand kind");
|
||||
}
|
||||
}
|
||||
void vpor(FloatRegister src1, FloatRegister src0, FloatRegister dest) {
|
||||
MOZ_ASSERT(HasSSE2());
|
||||
masm.vpor_rr(src1.code(), src0.code(), dest.code());
|
||||
}
|
||||
void vpor(const Operand &src1, FloatRegister src0, FloatRegister dest) {
|
||||
MOZ_ASSERT(HasSSE2());
|
||||
switch (src1.kind()) {
|
||||
case Operand::FPREG:
|
||||
masm.vpor_rr(src1.fpu(), src0.code(), dest.code());
|
||||
break;
|
||||
case Operand::MEM_REG_DISP:
|
||||
masm.vpor_mr(src1.disp(), src1.base(), src0.code(), dest.code());
|
||||
break;
|
||||
case Operand::MEM_ADDRESS32:
|
||||
masm.vpor_mr(src1.address(), src0.code(), dest.code());
|
||||
break;
|
||||
default:
|
||||
MOZ_CRASH("unexpected operand kind");
|
||||
}
|
||||
}
|
||||
void vpxor(FloatRegister src1, FloatRegister src0, FloatRegister dest) {
|
||||
MOZ_ASSERT(HasSSE2());
|
||||
masm.vpxor_rr(src1.code(), src0.code(), dest.code());
|
||||
}
|
||||
void vpxor(const Operand &src1, FloatRegister src0, FloatRegister dest) {
|
||||
MOZ_ASSERT(HasSSE2());
|
||||
switch (src1.kind()) {
|
||||
case Operand::FPREG:
|
||||
masm.vpxor_rr(src1.fpu(), src0.code(), dest.code());
|
||||
break;
|
||||
case Operand::MEM_REG_DISP:
|
||||
masm.vpxor_mr(src1.disp(), src1.base(), src0.code(), dest.code());
|
||||
break;
|
||||
case Operand::MEM_ADDRESS32:
|
||||
masm.vpxor_mr(src1.address(), src0.code(), dest.code());
|
||||
break;
|
||||
default:
|
||||
MOZ_CRASH("unexpected operand kind");
|
||||
}
|
||||
}
|
||||
void vpandn(FloatRegister src1, FloatRegister src0, FloatRegister dest) {
|
||||
MOZ_ASSERT(HasSSE2());
|
||||
masm.vpandn_rr(src1.code(), src0.code(), dest.code());
|
||||
}
|
||||
void vpandn(const Operand &src1, FloatRegister src0, FloatRegister dest) {
|
||||
MOZ_ASSERT(HasSSE2());
|
||||
switch (src1.kind()) {
|
||||
case Operand::FPREG:
|
||||
masm.vpandn_rr(src1.fpu(), src0.code(), dest.code());
|
||||
break;
|
||||
case Operand::MEM_REG_DISP:
|
||||
masm.vpandn_mr(src1.disp(), src1.base(), src0.code(), dest.code());
|
||||
break;
|
||||
case Operand::MEM_ADDRESS32:
|
||||
masm.vpandn_mr(src1.address(), src0.code(), dest.code());
|
||||
break;
|
||||
default:
|
||||
MOZ_CRASH("unexpected operand kind");
|
||||
}
|
||||
}
|
||||
|
||||
void pshufd(uint32_t mask, FloatRegister src, FloatRegister dest) {
|
||||
MOZ_ASSERT(HasSSE2());
|
||||
masm.pshufd_irr(mask, src.code(), dest.code());
|
||||
|
@ -2199,29 +2276,29 @@ class AssemblerX86Shared : public AssemblerShared
|
|||
MOZ_CRASH("unexpected operand kind");
|
||||
}
|
||||
}
|
||||
void xorpd(FloatRegister src, FloatRegister dest) {
|
||||
void vxorpd(FloatRegister src1, FloatRegister src0, FloatRegister dest) {
|
||||
MOZ_ASSERT(HasSSE2());
|
||||
masm.xorpd_rr(src.code(), dest.code());
|
||||
masm.vxorpd_rr(src1.code(), src0.code(), dest.code());
|
||||
}
|
||||
void xorps(FloatRegister src, FloatRegister dest) {
|
||||
void vxorps(FloatRegister src1, FloatRegister src0, FloatRegister dest) {
|
||||
MOZ_ASSERT(HasSSE2());
|
||||
masm.xorps_rr(src.code(), dest.code());
|
||||
masm.vxorps_rr(src1.code(), src0.code(), dest.code());
|
||||
}
|
||||
void orpd(FloatRegister src, FloatRegister dest) {
|
||||
void vorpd(FloatRegister src1, FloatRegister src0, FloatRegister dest) {
|
||||
MOZ_ASSERT(HasSSE2());
|
||||
masm.orpd_rr(src.code(), dest.code());
|
||||
masm.vorpd_rr(src1.code(), src0.code(), dest.code());
|
||||
}
|
||||
void orps(FloatRegister src, FloatRegister dest) {
|
||||
void vorps(FloatRegister src1, FloatRegister src0, FloatRegister dest) {
|
||||
MOZ_ASSERT(HasSSE2());
|
||||
masm.orps_rr(src.code(), dest.code());
|
||||
masm.vorps_rr(src1.code(), src0.code(), dest.code());
|
||||
}
|
||||
void andpd(FloatRegister src, FloatRegister dest) {
|
||||
void vandpd(FloatRegister src1, FloatRegister src0, FloatRegister dest) {
|
||||
MOZ_ASSERT(HasSSE2());
|
||||
masm.andpd_rr(src.code(), dest.code());
|
||||
masm.vandpd_rr(src1.code(), src0.code(), dest.code());
|
||||
}
|
||||
void andps(FloatRegister src, FloatRegister dest) {
|
||||
void vandps(FloatRegister src1, FloatRegister src0, FloatRegister dest) {
|
||||
MOZ_ASSERT(HasSSE2());
|
||||
masm.andps_rr(src.code(), dest.code());
|
||||
masm.vandps_rr(src1.code(), src0.code(), dest.code());
|
||||
}
|
||||
void sqrtsd(FloatRegister src, FloatRegister dest) {
|
||||
MOZ_ASSERT(HasSSE2());
|
||||
|
|
|
@ -386,7 +386,10 @@ private:
|
|||
OP2_PEXTRW_GdUdIb = 0xC5,
|
||||
OP2_SHUFPS_VpsWpsIb = 0xC6,
|
||||
OP2_PSRLD_VdqWdq = 0xD2,
|
||||
OP2_PANDDQ_VdqWdq = 0xDB,
|
||||
OP2_PANDNDQ_VdqWdq = 0xDF,
|
||||
OP2_PSRAD_VdqWdq = 0xE2,
|
||||
OP2_PORDQ_VdqWdq = 0xEB,
|
||||
OP2_PXORDQ_VdqWdq = 0xEF,
|
||||
OP2_PSLLD_VdqWdq = 0xF2,
|
||||
OP2_PMULUDQ_VdqWdq = 0xF4,
|
||||
|
@ -779,44 +782,30 @@ public:
|
|||
m_formatter.twoByteOp(OP2_XADD_EvGv, offset, base, index, scale, srcdest);
|
||||
}
|
||||
|
||||
void paddd_rr(XMMRegisterID src, XMMRegisterID dst)
|
||||
void vpaddd_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst)
|
||||
{
|
||||
spew("paddd %s, %s", nameFPReg(src), nameFPReg(dst));
|
||||
m_formatter.prefix(PRE_SSE_66);
|
||||
m_formatter.twoByteOp(OP2_PADDD_VdqWdq, (RegisterID)src, (RegisterID)dst);
|
||||
twoByteOpSimd("vpaddd", VEX_PD, OP2_PADDD_VdqWdq, src1, src0, dst);
|
||||
}
|
||||
void paddd_mr(int offset, RegisterID base, XMMRegisterID dst)
|
||||
void vpaddd_mr(int offset, RegisterID base, XMMRegisterID src0, XMMRegisterID dst)
|
||||
{
|
||||
spew("paddd %s0x%x(%s), %s",
|
||||
PRETTY_PRINT_OFFSET(offset), nameIReg(base), nameFPReg(dst));
|
||||
m_formatter.prefix(PRE_SSE_66);
|
||||
m_formatter.twoByteOp(OP2_PADDD_VdqWdq, offset, base, (RegisterID)dst);
|
||||
twoByteOpSimd("vpaddd", VEX_PD, OP2_PADDD_VdqWdq, offset, base, src0, dst);
|
||||
}
|
||||
void paddd_mr(const void* address, XMMRegisterID dst)
|
||||
void vpaddd_mr(const void* address, XMMRegisterID src0, XMMRegisterID dst)
|
||||
{
|
||||
spew("paddd %p, %s", address, nameFPReg(dst));
|
||||
m_formatter.prefix(PRE_SSE_66);
|
||||
m_formatter.twoByteOp(OP2_PADDD_VdqWdq, address, (RegisterID)dst);
|
||||
twoByteOpSimd("vpaddd", VEX_PD, OP2_PADDD_VdqWdq, address, src0, dst);
|
||||
}
|
||||
|
||||
void psubd_rr(XMMRegisterID src, XMMRegisterID dst)
|
||||
void vpsubd_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst)
|
||||
{
|
||||
spew("psubd %s, %s", nameFPReg(src), nameFPReg(dst));
|
||||
m_formatter.prefix(PRE_SSE_66);
|
||||
m_formatter.twoByteOp(OP2_PSUBD_VdqWdq, (RegisterID)src, (RegisterID)dst);
|
||||
twoByteOpSimd("vpsubd", VEX_PD, OP2_PSUBD_VdqWdq, src1, src0, dst);
|
||||
}
|
||||
void psubd_mr(int offset, RegisterID base, XMMRegisterID dst)
|
||||
void vpsubd_mr(int offset, RegisterID base, XMMRegisterID src0, XMMRegisterID dst)
|
||||
{
|
||||
spew("psubd %s0x%x(%s), %s",
|
||||
PRETTY_PRINT_OFFSET(offset), nameIReg(base), nameFPReg(dst));
|
||||
m_formatter.prefix(PRE_SSE_66);
|
||||
m_formatter.twoByteOp(OP2_PSUBD_VdqWdq, offset, base, (RegisterID)dst);
|
||||
twoByteOpSimd("vpsubd", VEX_PD, OP2_PSUBD_VdqWdq, offset, base, src0, dst);
|
||||
}
|
||||
void psubd_mr(const void* address, XMMRegisterID dst)
|
||||
void vpsubd_mr(const void* address, XMMRegisterID src0, XMMRegisterID dst)
|
||||
{
|
||||
spew("psubd %p, %s", address, nameFPReg(dst));
|
||||
m_formatter.prefix(PRE_SSE_66);
|
||||
m_formatter.twoByteOp(OP2_PSUBD_VdqWdq, address, (RegisterID)dst);
|
||||
twoByteOpSimd("vpsubd", VEX_PD, OP2_PSUBD_VdqWdq, address, src0, dst);
|
||||
}
|
||||
|
||||
void pmuludq_rr(XMMRegisterID src, XMMRegisterID dst)
|
||||
|
@ -3016,11 +3005,53 @@ public:
|
|||
m_formatter.twoByteOp(OP2_MOVD_VdEd, src, (RegisterID)dst);
|
||||
}
|
||||
|
||||
void pxor_rr(XMMRegisterID src, XMMRegisterID dst)
|
||||
void vpand_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst)
|
||||
{
|
||||
spew("pxor %s, %s", nameFPReg(src), nameFPReg(dst));
|
||||
m_formatter.prefix(PRE_SSE_66);
|
||||
m_formatter.twoByteOp(OP2_PXORDQ_VdqWdq, (RegisterID)src, (RegisterID)dst);
|
||||
twoByteOpSimd("vpand", VEX_PD, OP2_PANDDQ_VdqWdq, src1, src0, dst);
|
||||
}
|
||||
void vpand_mr(int offset, RegisterID base, XMMRegisterID src0, XMMRegisterID dst)
|
||||
{
|
||||
twoByteOpSimd("vpand", VEX_PD, OP2_PANDDQ_VdqWdq, offset, base, src0, dst);
|
||||
}
|
||||
void vpand_mr(const void *address, XMMRegisterID src0, XMMRegisterID dst)
|
||||
{
|
||||
twoByteOpSimd("vpand", VEX_PD, OP2_PANDDQ_VdqWdq, address, src0, dst);
|
||||
}
|
||||
void vpor_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst)
|
||||
{
|
||||
twoByteOpSimd("vpor", VEX_PD, OP2_PORDQ_VdqWdq, src1, src0, dst);
|
||||
}
|
||||
void vpor_mr(int offset, RegisterID base, XMMRegisterID src0, XMMRegisterID dst)
|
||||
{
|
||||
twoByteOpSimd("vpor", VEX_PD, OP2_PORDQ_VdqWdq, offset, base, src0, dst);
|
||||
}
|
||||
void vpor_mr(const void *address, XMMRegisterID src0, XMMRegisterID dst)
|
||||
{
|
||||
twoByteOpSimd("vpor", VEX_PD, OP2_PORDQ_VdqWdq, address, src0, dst);
|
||||
}
|
||||
void vpxor_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst)
|
||||
{
|
||||
twoByteOpSimd("vpxor", VEX_PD, OP2_PXORDQ_VdqWdq, src1, src0, dst);
|
||||
}
|
||||
void vpxor_mr(int offset, RegisterID base, XMMRegisterID src0, XMMRegisterID dst)
|
||||
{
|
||||
twoByteOpSimd("vpxor", VEX_PD, OP2_PXORDQ_VdqWdq, offset, base, src0, dst);
|
||||
}
|
||||
void vpxor_mr(const void *address, XMMRegisterID src0, XMMRegisterID dst)
|
||||
{
|
||||
twoByteOpSimd("vpxor", VEX_PD, OP2_PXORDQ_VdqWdq, address, src0, dst);
|
||||
}
|
||||
void vpandn_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst)
|
||||
{
|
||||
twoByteOpSimd("vpandn", VEX_PD, OP2_PANDNDQ_VdqWdq, src1, src0, dst);
|
||||
}
|
||||
void vpandn_mr(int offset, RegisterID base, XMMRegisterID src0, XMMRegisterID dst)
|
||||
{
|
||||
twoByteOpSimd("vpandn", VEX_PD, OP2_PANDNDQ_VdqWdq, offset, base, src0, dst);
|
||||
}
|
||||
void vpandn_mr(const void *address, XMMRegisterID src0, XMMRegisterID dst)
|
||||
{
|
||||
twoByteOpSimd("vpandn", VEX_PD, OP2_PANDNDQ_VdqWdq, address, src0, dst);
|
||||
}
|
||||
|
||||
void pshufd_irr(uint32_t mask, XMMRegisterID src, XMMRegisterID dst)
|
||||
|
@ -3703,101 +3734,79 @@ public:
|
|||
twoByteOpSimd("vdivss", VEX_SS, OP2_DIVSD_VsdWsd, offset, base, src0, dst);
|
||||
}
|
||||
|
||||
void xorpd_rr(XMMRegisterID src, XMMRegisterID dst)
|
||||
void vxorpd_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst)
|
||||
{
|
||||
spew("xorpd %s, %s", nameFPReg(src), nameFPReg(dst));
|
||||
m_formatter.prefix(PRE_SSE_66);
|
||||
m_formatter.twoByteOp(OP2_XORPD_VpdWpd, (RegisterID)src, (RegisterID)dst);
|
||||
twoByteOpSimd("vxorpd", VEX_PD, OP2_XORPD_VpdWpd, src1, src0, dst);
|
||||
}
|
||||
|
||||
void orpd_rr(XMMRegisterID src, XMMRegisterID dst)
|
||||
void vorpd_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst)
|
||||
{
|
||||
spew("orpd %s, %s", nameFPReg(src), nameFPReg(dst));
|
||||
m_formatter.prefix(PRE_SSE_66);
|
||||
m_formatter.twoByteOp(OP2_ORPD_VpdWpd, (RegisterID)src, (RegisterID)dst);
|
||||
twoByteOpSimd("vorpd", VEX_PD, OP2_ORPD_VpdWpd, src1, src0, dst);
|
||||
}
|
||||
|
||||
void andpd_rr(XMMRegisterID src, XMMRegisterID dst)
|
||||
void vandpd_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst)
|
||||
{
|
||||
spew("andpd %s, %s", nameFPReg(src), nameFPReg(dst));
|
||||
m_formatter.prefix(PRE_SSE_66);
|
||||
m_formatter.twoByteOp(OP2_ANDPD_VpdWpd, (RegisterID)src, (RegisterID)dst);
|
||||
twoByteOpSimd("vandpd", VEX_PD, OP2_ANDPD_VpdWpd, src1, src0, dst);
|
||||
}
|
||||
|
||||
void andps_rr(XMMRegisterID src, XMMRegisterID dst)
|
||||
void vandps_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst)
|
||||
{
|
||||
spew("andps %s, %s", nameFPReg(src), nameFPReg(dst));
|
||||
m_formatter.twoByteOp(OP2_ANDPS_VpsWps, (RegisterID)src, (RegisterID)dst);
|
||||
twoByteOpSimd("vandps", VEX_PS, OP2_ANDPS_VpsWps, src1, src0, dst);
|
||||
}
|
||||
|
||||
void andps_mr(int offset, RegisterID base, XMMRegisterID dst)
|
||||
void vandps_mr(int offset, RegisterID base, XMMRegisterID src0, XMMRegisterID dst)
|
||||
{
|
||||
spew("andps %s0x%x(%s), %s",
|
||||
PRETTY_PRINT_OFFSET(offset), nameIReg(base), nameFPReg(dst));
|
||||
m_formatter.twoByteOp(OP2_ANDPS_VpsWps, offset, base, (RegisterID)dst);
|
||||
twoByteOpSimd("vandps", VEX_PS, OP2_ANDPS_VpsWps, offset, base, src0, dst);
|
||||
}
|
||||
|
||||
void andps_mr(const void* address, XMMRegisterID dst)
|
||||
void vandps_mr(const void* address, XMMRegisterID src0, XMMRegisterID dst)
|
||||
{
|
||||
spew("andps %p, %s", address, nameFPReg(dst));
|
||||
m_formatter.twoByteOp(OP2_ANDPS_VpsWps, address, (RegisterID)dst);
|
||||
twoByteOpSimd("vandps", VEX_PS, OP2_ANDPS_VpsWps, address, src0, dst);
|
||||
}
|
||||
|
||||
void andnps_rr(XMMRegisterID src, XMMRegisterID dst)
|
||||
void vandnps_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst)
|
||||
{
|
||||
spew("andnps %s, %s", nameFPReg(src), nameFPReg(dst));
|
||||
m_formatter.twoByteOp(OP2_ANDNPS_VpsWps, (RegisterID)src, (RegisterID)dst);
|
||||
twoByteOpSimd("vandnps", VEX_PS, OP2_ANDNPS_VpsWps, src1, src0, dst);
|
||||
}
|
||||
|
||||
void andnps_mr(int offset, RegisterID base, XMMRegisterID dst)
|
||||
void vandnps_mr(int offset, RegisterID base, XMMRegisterID src0, XMMRegisterID dst)
|
||||
{
|
||||
spew("andnps %s0x%x(%s), %s",
|
||||
PRETTY_PRINT_OFFSET(offset), nameIReg(base), nameFPReg(dst));
|
||||
m_formatter.twoByteOp(OP2_ANDNPS_VpsWps, offset, base, (RegisterID)dst);
|
||||
twoByteOpSimd("vandnps", VEX_PS, OP2_ANDNPS_VpsWps, offset, base, src0, dst);
|
||||
}
|
||||
|
||||
void andnps_mr(const void* address, XMMRegisterID dst)
|
||||
void vandnps_mr(const void* address, XMMRegisterID src0, XMMRegisterID dst)
|
||||
{
|
||||
spew("andnps %p, %s", address, nameFPReg(dst));
|
||||
m_formatter.twoByteOp(OP2_ANDPS_VpsWps, address, (RegisterID)dst);
|
||||
twoByteOpSimd("vandnps", VEX_PS, OP2_ANDNPS_VpsWps, address, src0, dst);
|
||||
}
|
||||
|
||||
void orps_rr(XMMRegisterID src, XMMRegisterID dst)
|
||||
void vorps_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst)
|
||||
{
|
||||
spew("orps %s, %s", nameFPReg(src), nameFPReg(dst));
|
||||
m_formatter.twoByteOp(OP2_ORPS_VpsWps, (RegisterID)src, (RegisterID)dst);
|
||||
twoByteOpSimd("vorps", VEX_PS, OP2_ORPS_VpsWps, src1, src0, dst);
|
||||
}
|
||||
|
||||
void orps_mr(int offset, RegisterID base, XMMRegisterID dst)
|
||||
void vorps_mr(int offset, RegisterID base, XMMRegisterID src0, XMMRegisterID dst)
|
||||
{
|
||||
spew("orps %s0x%x(%s), %s",
|
||||
PRETTY_PRINT_OFFSET(offset), nameIReg(base), nameFPReg(dst));
|
||||
m_formatter.twoByteOp(OP2_ORPS_VpsWps, offset, base, (RegisterID)dst);
|
||||
twoByteOpSimd("vorps", VEX_PS, OP2_ORPS_VpsWps, offset, base, src0, dst);
|
||||
}
|
||||
|
||||
void orps_mr(const void* address, XMMRegisterID dst)
|
||||
void vorps_mr(const void* address, XMMRegisterID src0, XMMRegisterID dst)
|
||||
{
|
||||
spew("orps %p, %s", address, nameFPReg(dst));
|
||||
m_formatter.twoByteOp(OP2_ORPS_VpsWps, address, (RegisterID)dst);
|
||||
twoByteOpSimd("vorps", VEX_PS, OP2_ORPS_VpsWps, address, src0, dst);
|
||||
}
|
||||
|
||||
void xorps_rr(XMMRegisterID src, XMMRegisterID dst)
|
||||
void vxorps_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst)
|
||||
{
|
||||
spew("xorps %s, %s", nameFPReg(src), nameFPReg(dst));
|
||||
m_formatter.twoByteOp(OP2_XORPS_VpsWps, (RegisterID)src, (RegisterID)dst);
|
||||
twoByteOpSimd("vxorps", VEX_PS, OP2_XORPS_VpsWps, src1, src0, dst);
|
||||
}
|
||||
|
||||
void xorps_mr(int offset, RegisterID base, XMMRegisterID dst)
|
||||
void vxorps_mr(int offset, RegisterID base, XMMRegisterID src0, XMMRegisterID dst)
|
||||
{
|
||||
spew("xorps %s0x%x(%s), %s",
|
||||
PRETTY_PRINT_OFFSET(offset), nameIReg(base), nameFPReg(dst));
|
||||
m_formatter.twoByteOp(OP2_XORPS_VpsWps, offset, base, (RegisterID)dst);
|
||||
twoByteOpSimd("vxorps", VEX_PS, OP2_XORPS_VpsWps, offset, base, src0, dst);
|
||||
}
|
||||
|
||||
void xorps_mr(const void* address, XMMRegisterID dst)
|
||||
void vxorps_mr(const void* address, XMMRegisterID src0, XMMRegisterID dst)
|
||||
{
|
||||
spew("xorps %p, %s", address, nameFPReg(dst));
|
||||
m_formatter.twoByteOp(OP2_XORPS_VpsWps, address, (RegisterID)dst);
|
||||
twoByteOpSimd("vxorps", VEX_PS, OP2_XORPS_VpsWps, address, src0, dst);
|
||||
}
|
||||
|
||||
void sqrtsd_rr(XMMRegisterID src, XMMRegisterID dst)
|
||||
|
|
|
@ -506,9 +506,9 @@ CodeGeneratorX86Shared::visitMinMaxD(LMinMaxD *ins)
|
|||
// and negative zero. These instructions merge the sign bits in that
|
||||
// case, and are no-ops otherwise.
|
||||
if (ins->mir()->isMax())
|
||||
masm.andpd(second, first);
|
||||
masm.vandpd(second, first, first);
|
||||
else
|
||||
masm.orpd(second, first);
|
||||
masm.vorpd(second, first, first);
|
||||
masm.jump(&done);
|
||||
|
||||
// x86's min/max are not symmetric; if either operand is a NaN, they return
|
||||
|
@ -557,9 +557,9 @@ CodeGeneratorX86Shared::visitMinMaxF(LMinMaxF *ins)
|
|||
// and negative zero. These instructions merge the sign bits in that
|
||||
// case, and are no-ops otherwise.
|
||||
if (ins->mir()->isMax())
|
||||
masm.andps(second, first);
|
||||
masm.vandps(second, first, first);
|
||||
else
|
||||
masm.orps(second, first);
|
||||
masm.vorps(second, first, first);
|
||||
masm.jump(&done);
|
||||
|
||||
// x86's min/max are not symmetric; if either operand is a NaN, they return
|
||||
|
@ -590,7 +590,7 @@ CodeGeneratorX86Shared::visitAbsD(LAbsD *ins)
|
|||
// Load a value which is all ones except for the sign bit.
|
||||
masm.loadConstantDouble(SpecificNaN<double>(0, FloatingPoint<double>::kSignificandBits),
|
||||
ScratchDoubleReg);
|
||||
masm.andpd(ScratchDoubleReg, input);
|
||||
masm.vandpd(ScratchDoubleReg, input, input);
|
||||
}
|
||||
|
||||
void
|
||||
|
@ -601,7 +601,7 @@ CodeGeneratorX86Shared::visitAbsF(LAbsF *ins)
|
|||
// Same trick as visitAbsD above.
|
||||
masm.loadConstantFloat32(SpecificNaN<float>(0, FloatingPoint<float>::kSignificandBits),
|
||||
ScratchFloat32Reg);
|
||||
masm.andps(ScratchFloat32Reg, input);
|
||||
masm.vandps(ScratchFloat32Reg, input, input);
|
||||
}
|
||||
|
||||
void
|
||||
|
@ -2616,15 +2616,15 @@ CodeGeneratorX86Shared::visitSimdBinaryArithIx4(LSimdBinaryArithIx4 *ins)
|
|||
{
|
||||
FloatRegister lhs = ToFloatRegister(ins->lhs());
|
||||
Operand rhs = ToOperand(ins->rhs());
|
||||
MOZ_ASSERT(ToFloatRegister(ins->output()) == lhs);
|
||||
FloatRegister output = ToFloatRegister(ins->output());
|
||||
|
||||
MSimdBinaryArith::Operation op = ins->operation();
|
||||
switch (op) {
|
||||
case MSimdBinaryArith::Add:
|
||||
masm.packedAddInt32(rhs, lhs);
|
||||
masm.vpaddd(rhs, lhs, output);
|
||||
return;
|
||||
case MSimdBinaryArith::Sub:
|
||||
masm.packedSubInt32(rhs, lhs);
|
||||
masm.vpsubd(rhs, lhs, output);
|
||||
return;
|
||||
case MSimdBinaryArith::Mul: {
|
||||
if (AssemblerX86Shared::HasSSE41()) {
|
||||
|
@ -2695,15 +2695,15 @@ CodeGeneratorX86Shared::visitSimdBinaryArithFx4(LSimdBinaryArithFx4 *ins)
|
|||
masm.vmaxps(Operand(lhs), rhsCopy, tmp);
|
||||
masm.vmaxps(rhs, lhs, output);
|
||||
|
||||
masm.andps(tmp, output);
|
||||
masm.orps(ScratchSimdReg, output); // or in the all-ones NaNs
|
||||
masm.vandps(tmp, output, output);
|
||||
masm.vorps(ScratchSimdReg, output, output); // or in the all-ones NaNs
|
||||
return;
|
||||
}
|
||||
case MSimdBinaryArith::Min: {
|
||||
FloatRegister rhsCopy = masm.reusedInputAlignedFloat32x4(rhs, ScratchSimdReg);
|
||||
masm.vminps(Operand(lhs), rhsCopy, ScratchSimdReg);
|
||||
masm.vminps(rhs, lhs, output);
|
||||
masm.orps(ScratchSimdReg, output); // NaN or'd with arbitrary bits is NaN
|
||||
masm.vorps(ScratchSimdReg, output, output); // NaN or'd with arbitrary bits is NaN
|
||||
return;
|
||||
}
|
||||
case MSimdBinaryArith::MinNum: {
|
||||
|
@ -2713,11 +2713,11 @@ CodeGeneratorX86Shared::visitSimdBinaryArithFx4(LSimdBinaryArithFx4 *ins)
|
|||
|
||||
FloatRegister mask = ScratchSimdReg;
|
||||
masm.pcmpeqd(Operand(lhs), mask);
|
||||
masm.andps(tmp, mask);
|
||||
masm.vandps(tmp, mask, mask);
|
||||
|
||||
FloatRegister lhsCopy = masm.reusedInputFloat32x4(lhs, tmp);
|
||||
masm.vminps(rhs, lhsCopy, tmp);
|
||||
masm.orps(mask, tmp);
|
||||
masm.vorps(mask, tmp, tmp);
|
||||
|
||||
FloatRegister rhsCopy = masm.reusedInputAlignedFloat32x4(rhs, mask);
|
||||
masm.vcmpneqps(rhs, rhsCopy, mask);
|
||||
|
@ -2730,9 +2730,9 @@ CodeGeneratorX86Shared::visitSimdBinaryArithFx4(LSimdBinaryArithFx4 *ins)
|
|||
// it requires the mask to be in xmm0.
|
||||
if (lhs != output)
|
||||
masm.movaps(lhs, output);
|
||||
masm.andps(Operand(mask), output);
|
||||
masm.andnps(Operand(tmp), mask);
|
||||
masm.orps(Operand(mask), output);
|
||||
masm.vandps(Operand(mask), output, output);
|
||||
masm.vandnps(Operand(tmp), mask, mask);
|
||||
masm.vorps(Operand(mask), output, output);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
@ -2743,11 +2743,11 @@ CodeGeneratorX86Shared::visitSimdBinaryArithFx4(LSimdBinaryArithFx4 *ins)
|
|||
|
||||
FloatRegister tmp = ToFloatRegister(ins->temp());
|
||||
masm.loadConstantInt32x4(SimdConstant::SplatX4(int32_t(0x80000000)), tmp);
|
||||
masm.andps(tmp, mask);
|
||||
masm.vandps(tmp, mask, mask);
|
||||
|
||||
FloatRegister lhsCopy = masm.reusedInputFloat32x4(lhs, tmp);
|
||||
masm.vmaxps(rhs, lhsCopy, tmp);
|
||||
masm.andnps(Operand(tmp), mask);
|
||||
masm.vandnps(Operand(tmp), mask, mask);
|
||||
|
||||
// Ensure tmp always contains the temporary result
|
||||
mask = tmp;
|
||||
|
@ -2764,9 +2764,9 @@ CodeGeneratorX86Shared::visitSimdBinaryArithFx4(LSimdBinaryArithFx4 *ins)
|
|||
// it requires the mask to be in xmm0.
|
||||
if (lhs != output)
|
||||
masm.movaps(lhs, output);
|
||||
masm.andps(Operand(mask), output);
|
||||
masm.andnps(Operand(tmp), mask);
|
||||
masm.orps(Operand(mask), output);
|
||||
masm.vandps(Operand(mask), output, output);
|
||||
masm.vandnps(Operand(tmp), mask, mask);
|
||||
masm.vorps(Operand(mask), output, output);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
@ -2784,7 +2784,7 @@ CodeGeneratorX86Shared::visitSimdUnaryArithIx4(LSimdUnaryArithIx4 *ins)
|
|||
|
||||
switch (ins->operation()) {
|
||||
case MSimdUnaryArith::neg:
|
||||
masm.pxor(out, out);
|
||||
masm.zeroInt32x4(out);
|
||||
masm.packedSubInt32(in, out);
|
||||
return;
|
||||
case MSimdUnaryArith::not_:
|
||||
|
@ -2848,18 +2848,27 @@ CodeGeneratorX86Shared::visitSimdBinaryBitwiseX4(LSimdBinaryBitwiseX4 *ins)
|
|||
{
|
||||
FloatRegister lhs = ToFloatRegister(ins->lhs());
|
||||
Operand rhs = ToOperand(ins->rhs());
|
||||
MOZ_ASSERT(ToFloatRegister(ins->output()) == lhs);
|
||||
FloatRegister output = ToFloatRegister(ins->output());
|
||||
|
||||
MSimdBinaryBitwise::Operation op = ins->operation();
|
||||
switch (op) {
|
||||
case MSimdBinaryBitwise::and_:
|
||||
masm.bitwiseAndX4(rhs, lhs);
|
||||
if (ins->type() == MIRType_Float32x4)
|
||||
masm.vandps(rhs, lhs, output);
|
||||
else
|
||||
masm.vpand(rhs, lhs, output);
|
||||
return;
|
||||
case MSimdBinaryBitwise::or_:
|
||||
masm.bitwiseOrX4(rhs, lhs);
|
||||
if (ins->type() == MIRType_Float32x4)
|
||||
masm.vorps(rhs, lhs, output);
|
||||
else
|
||||
masm.vpor(rhs, lhs, output);
|
||||
return;
|
||||
case MSimdBinaryBitwise::xor_:
|
||||
masm.bitwiseXorX4(rhs, lhs);
|
||||
if (ins->type() == MIRType_Float32x4)
|
||||
masm.vxorps(rhs, lhs, output);
|
||||
else
|
||||
masm.vpxor(rhs, lhs, output);
|
||||
return;
|
||||
}
|
||||
MOZ_CRASH("unexpected SIMD bitwise op");
|
||||
|
|
|
@ -96,29 +96,13 @@ LIRGeneratorX86Shared::lowerForALU(LInstructionHelper<1, 2, 0> *ins, MDefinition
|
|||
defineReuseInput(ins, mir, 0);
|
||||
}
|
||||
|
||||
static bool
|
||||
UseAVXEncoding(MIRType type)
|
||||
{
|
||||
if (!Assembler::HasAVX())
|
||||
return false;
|
||||
|
||||
// TODO: For now, we just do this for floating-point types, until the rest
|
||||
// of the assembler support is done.
|
||||
if (IsFloatingPointType(type))
|
||||
return true;
|
||||
if (IsSimdType(type) && IsFloatingPointType(SimdTypeToScalarType(type)))
|
||||
return true;
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
template<size_t Temps>
|
||||
void
|
||||
LIRGeneratorX86Shared::lowerForFPU(LInstructionHelper<1, 2, Temps> *ins, MDefinition *mir, MDefinition *lhs, MDefinition *rhs)
|
||||
{
|
||||
// Without AVX, we'll need to use the x86 encodings where one of the
|
||||
// inputs must be the same location as the output.
|
||||
if (!UseAVXEncoding(mir->type())) {
|
||||
if (!Assembler::HasAVX()) {
|
||||
ins->setOperand(0, useRegisterAtStart(lhs));
|
||||
ins->setOperand(1, lhs != rhs ? use(rhs) : useAtStart(rhs));
|
||||
defineReuseInput(ins, mir, 0);
|
||||
|
|
|
@ -217,7 +217,7 @@ MacroAssemblerX86Shared::branchNegativeZero(FloatRegister reg,
|
|||
// if not already compared to zero
|
||||
if (maybeNonZero) {
|
||||
// Compare to zero. Lets through {0, -0}.
|
||||
xorpd(ScratchDoubleReg, ScratchDoubleReg);
|
||||
zeroDouble(ScratchDoubleReg);
|
||||
|
||||
// If reg is non-zero, jump to nonZero.
|
||||
branchDouble(DoubleNotEqual, reg, ScratchDoubleReg, &nonZero);
|
||||
|
|
|
@ -595,7 +595,7 @@ class MacroAssemblerX86Shared : public Assembler
|
|||
void convertInt32ToDouble(Register src, FloatRegister dest) {
|
||||
// cvtsi2sd and friends write only part of their output register, which
|
||||
// causes slowdowns on out-of-order processors. Explicitly break
|
||||
// dependencies with xorpd (and xorps elsewhere), which are handled
|
||||
// dependencies with vxorpd (and vxorps elsewhere), which are handled
|
||||
// specially in modern CPUs, for this purpose. See sections 8.14, 9.8,
|
||||
// 10.8, 12.9, 13.16, 14.14, and 15.8 of Agner's Microarchitecture
|
||||
// document.
|
||||
|
@ -763,10 +763,10 @@ class MacroAssemblerX86Shared : public Assembler
|
|||
movapd(src, dest);
|
||||
}
|
||||
void zeroDouble(FloatRegister reg) {
|
||||
xorpd(reg, reg);
|
||||
vxorpd(reg, reg, reg);
|
||||
}
|
||||
void zeroFloat32(FloatRegister reg) {
|
||||
xorps(reg, reg);
|
||||
vxorps(reg, reg, reg);
|
||||
}
|
||||
void negateDouble(FloatRegister reg) {
|
||||
// From MacroAssemblerX86Shared::maybeInlineDouble
|
||||
|
@ -774,14 +774,14 @@ class MacroAssemblerX86Shared : public Assembler
|
|||
psllq(Imm32(63), ScratchDoubleReg);
|
||||
|
||||
// XOR the float in a float register with -0.0.
|
||||
xorpd(ScratchDoubleReg, reg); // s ^ 0x80000000000000
|
||||
vxorpd(ScratchDoubleReg, reg, reg); // s ^ 0x80000000000000
|
||||
}
|
||||
void negateFloat(FloatRegister reg) {
|
||||
pcmpeqw(ScratchFloat32Reg, ScratchFloat32Reg);
|
||||
psllq(Imm32(31), ScratchFloat32Reg);
|
||||
|
||||
// XOR the float in a float register with -0.0.
|
||||
xorps(ScratchFloat32Reg, reg); // s ^ 0x80000000
|
||||
vxorps(ScratchFloat32Reg, reg, reg); // s ^ 0x80000000
|
||||
}
|
||||
void addDouble(FloatRegister src, FloatRegister dest) {
|
||||
vaddsd(src, dest, dest);
|
||||
|
@ -820,16 +820,22 @@ class MacroAssemblerX86Shared : public Assembler
|
|||
void bitwiseAndX4(const Operand &src, FloatRegister dest) {
|
||||
// TODO Using the "ps" variant for all types incurs a domain crossing
|
||||
// penalty for integer types and double.
|
||||
andps(src, dest);
|
||||
vandps(src, dest, dest);
|
||||
}
|
||||
void bitwiseAndNotX4(const Operand &src, FloatRegister dest) {
|
||||
andnps(src, dest);
|
||||
vandnps(src, dest, dest);
|
||||
}
|
||||
void bitwiseOrX4(const Operand &src, FloatRegister dest) {
|
||||
orps(src, dest);
|
||||
vorps(src, dest, dest);
|
||||
}
|
||||
void bitwiseXorX4(const Operand &src, FloatRegister dest) {
|
||||
xorps(src, dest);
|
||||
vxorps(src, dest, dest);
|
||||
}
|
||||
void zeroFloat32x4(FloatRegister dest) {
|
||||
vxorps(dest, dest, dest);
|
||||
}
|
||||
void zeroInt32x4(FloatRegister dest) {
|
||||
vpxor(dest, dest, dest);
|
||||
}
|
||||
|
||||
void loadAlignedInt32x4(const Address &src, FloatRegister dest) {
|
||||
|
@ -875,10 +881,10 @@ class MacroAssemblerX86Shared : public Assembler
|
|||
pcmpgtd(src, dest);
|
||||
}
|
||||
void packedAddInt32(const Operand &src, FloatRegister dest) {
|
||||
paddd(src, dest);
|
||||
vpaddd(src, dest, dest);
|
||||
}
|
||||
void packedSubInt32(const Operand &src, FloatRegister dest) {
|
||||
psubd(src, dest);
|
||||
vpsubd(src, dest, dest);
|
||||
}
|
||||
void packedReciprocalFloat32x4(const Operand &src, FloatRegister dest) {
|
||||
// This function is an approximation of the result, this might need
|
||||
|
@ -1105,7 +1111,7 @@ class MacroAssemblerX86Shared : public Assembler
|
|||
|
||||
// Loading zero with xor is specially optimized in hardware.
|
||||
if (u == 0) {
|
||||
xorpd(dest, dest);
|
||||
zeroDouble(dest);
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -1125,7 +1131,7 @@ class MacroAssemblerX86Shared : public Assembler
|
|||
|
||||
// See comment above
|
||||
if (u == 0) {
|
||||
xorps(dest, dest);
|
||||
zeroFloat32(dest);
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
|
@ -1135,7 +1141,7 @@ class MacroAssemblerX86Shared : public Assembler
|
|||
static const SimdConstant zero = SimdConstant::CreateX4(0, 0, 0, 0);
|
||||
static const SimdConstant minusOne = SimdConstant::CreateX4(-1, -1, -1, -1);
|
||||
if (v == zero) {
|
||||
pxor(dest, dest);
|
||||
zeroInt32x4(dest);
|
||||
return true;
|
||||
}
|
||||
if (v == minusOne) {
|
||||
|
@ -1149,7 +1155,7 @@ class MacroAssemblerX86Shared : public Assembler
|
|||
if (v == zero) {
|
||||
// This won't get inlined if the SimdConstant v contains -0 in any
|
||||
// lane, as operator== here does a memcmp.
|
||||
xorps(dest, dest);
|
||||
zeroFloat32x4(dest);
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
|
|
|
@ -85,9 +85,9 @@ MoveEmitterX86::maybeEmitOptimizedCycle(const MoveResolver &moves, size_t i,
|
|||
// it's cheap to do an XOR swap.
|
||||
FloatRegister a = moves.getMove(i).to().floatReg();
|
||||
FloatRegister b = moves.getMove(i + 1).to().floatReg();
|
||||
masm.xorpd(a, b);
|
||||
masm.xorpd(b, a);
|
||||
masm.xorpd(a, b);
|
||||
masm.vxorpd(a, b, b);
|
||||
masm.vxorpd(b, a, a);
|
||||
masm.vxorpd(a, b, b);
|
||||
return true;
|
||||
}
|
||||
|
||||
|
|
Загрузка…
Ссылка в новой задаче