Bug 1111241 - SpiderMonkey: Use VEX encodings for add, sub, and, or, xor, and andn r=jandem

This commit is contained in:
Dan Gohman 2014-12-15 20:53:59 -08:00
Родитель 0f58581e92
Коммит 2911f76be6
8 изменённых файлов: 277 добавлений и 189 удалений

Просмотреть файл

@ -447,6 +447,9 @@ class LSimdBinaryBitwiseX4 : public LInstructionHelper<1, 2, 0>
MSimdBinaryBitwise::Operation operation() const {
return mir_->toSimdBinaryBitwise()->operation();
}
MIRType type() const {
return mir_->type();
}
};
class LSimdShift : public LInstructionHelper<1, 2, 0>

Просмотреть файл

@ -1784,33 +1784,33 @@ class AssemblerX86Shared : public AssemblerShared
MOZ_ASSERT(HasSSE2());
masm.movd_rr(src.code(), dest.code());
}
void paddd(const Operand &src, FloatRegister dest) {
void vpaddd(const Operand &src1, FloatRegister src0, FloatRegister dest) {
MOZ_ASSERT(HasSSE2());
switch (src.kind()) {
switch (src1.kind()) {
case Operand::FPREG:
masm.paddd_rr(src.fpu(), dest.code());
masm.vpaddd_rr(src1.fpu(), src0.code(), dest.code());
break;
case Operand::MEM_REG_DISP:
masm.paddd_mr(src.disp(), src.base(), dest.code());
masm.vpaddd_mr(src1.disp(), src1.base(), src0.code(), dest.code());
break;
case Operand::MEM_ADDRESS32:
masm.paddd_mr(src.address(), dest.code());
masm.vpaddd_mr(src1.address(), src0.code(), dest.code());
break;
default:
MOZ_CRASH("unexpected operand kind");
}
}
void psubd(const Operand &src, FloatRegister dest) {
void vpsubd(const Operand &src1, FloatRegister src0, FloatRegister dest) {
MOZ_ASSERT(HasSSE2());
switch (src.kind()) {
switch (src1.kind()) {
case Operand::FPREG:
masm.psubd_rr(src.fpu(), dest.code());
masm.vpsubd_rr(src1.fpu(), src0.code(), dest.code());
break;
case Operand::MEM_REG_DISP:
masm.psubd_mr(src.disp(), src.base(), dest.code());
masm.vpsubd_mr(src1.disp(), src1.base(), src0.code(), dest.code());
break;
case Operand::MEM_ADDRESS32:
masm.psubd_mr(src.address(), dest.code());
masm.vpsubd_mr(src1.address(), src0.code(), dest.code());
break;
default:
MOZ_CRASH("unexpected operand kind");
@ -1932,75 +1932,152 @@ class AssemblerX86Shared : public AssemblerShared
MOZ_CRASH("unexpected operand kind");
}
}
void andps(const Operand &src, FloatRegister dest) {
void vandps(const Operand &src1, FloatRegister src0, FloatRegister dest) {
MOZ_ASSERT(HasSSE2());
switch (src.kind()) {
switch (src1.kind()) {
case Operand::FPREG:
masm.andps_rr(src.fpu(), dest.code());
masm.vandps_rr(src1.fpu(), src0.code(), dest.code());
break;
case Operand::MEM_REG_DISP:
masm.andps_mr(src.disp(), src.base(), dest.code());
masm.vandps_mr(src1.disp(), src1.base(), src0.code(), dest.code());
break;
case Operand::MEM_ADDRESS32:
masm.andps_mr(src.address(), dest.code());
masm.vandps_mr(src1.address(), src0.code(), dest.code());
break;
default:
MOZ_CRASH("unexpected operand kind");
}
}
void andnps(const Operand &src, FloatRegister dest) {
void vandnps(const Operand &src1, FloatRegister src0, FloatRegister dest) {
// Negates bits of dest and then applies AND
MOZ_ASSERT(HasSSE2());
switch (src.kind()) {
switch (src1.kind()) {
case Operand::FPREG:
masm.andnps_rr(src.fpu(), dest.code());
masm.vandnps_rr(src1.fpu(), src0.code(), dest.code());
break;
case Operand::MEM_REG_DISP:
masm.andnps_mr(src.disp(), src.base(), dest.code());
masm.vandnps_mr(src1.disp(), src1.base(), src0.code(), dest.code());
break;
case Operand::MEM_ADDRESS32:
masm.andnps_mr(src.address(), dest.code());
masm.vandnps_mr(src1.address(), src0.code(), dest.code());
break;
default:
MOZ_CRASH("unexpected operand kind");
}
}
void orps(const Operand &src, FloatRegister dest) {
void vorps(const Operand &src1, FloatRegister src0, FloatRegister dest) {
MOZ_ASSERT(HasSSE2());
switch (src.kind()) {
switch (src1.kind()) {
case Operand::FPREG:
masm.orps_rr(src.fpu(), dest.code());
masm.vorps_rr(src1.fpu(), src0.code(), dest.code());
break;
case Operand::MEM_REG_DISP:
masm.orps_mr(src.disp(), src.base(), dest.code());
masm.vorps_mr(src1.disp(), src1.base(), src0.code(), dest.code());
break;
case Operand::MEM_ADDRESS32:
masm.orps_mr(src.address(), dest.code());
masm.vorps_mr(src1.address(), src0.code(), dest.code());
break;
default:
MOZ_CRASH("unexpected operand kind");
}
}
void xorps(const Operand &src, FloatRegister dest) {
void vxorps(const Operand &src1, FloatRegister src0, FloatRegister dest) {
MOZ_ASSERT(HasSSE2());
switch (src.kind()) {
switch (src1.kind()) {
case Operand::FPREG:
masm.xorps_rr(src.fpu(), dest.code());
masm.vxorps_rr(src1.fpu(), src0.code(), dest.code());
break;
case Operand::MEM_REG_DISP:
masm.xorps_mr(src.disp(), src.base(), dest.code());
masm.vxorps_mr(src1.disp(), src1.base(), src0.code(), dest.code());
break;
case Operand::MEM_ADDRESS32:
masm.xorps_mr(src.address(), dest.code());
masm.vxorps_mr(src1.address(), src0.code(), dest.code());
break;
default:
MOZ_CRASH("unexpected operand kind");
}
}
void pxor(FloatRegister src, FloatRegister dest) {
void vpand(FloatRegister src1, FloatRegister src0, FloatRegister dest) {
MOZ_ASSERT(HasSSE2());
masm.pxor_rr(src.code(), dest.code());
masm.vpand_rr(src1.code(), src0.code(), dest.code());
}
void vpand(const Operand &src1, FloatRegister src0, FloatRegister dest) {
MOZ_ASSERT(HasSSE2());
switch (src1.kind()) {
case Operand::FPREG:
masm.vpand_rr(src1.fpu(), src0.code(), dest.code());
break;
case Operand::MEM_REG_DISP:
masm.vpand_mr(src1.disp(), src1.base(), src0.code(), dest.code());
break;
case Operand::MEM_ADDRESS32:
masm.vpand_mr(src1.address(), src0.code(), dest.code());
break;
default:
MOZ_CRASH("unexpected operand kind");
}
}
void vpor(FloatRegister src1, FloatRegister src0, FloatRegister dest) {
MOZ_ASSERT(HasSSE2());
masm.vpor_rr(src1.code(), src0.code(), dest.code());
}
void vpor(const Operand &src1, FloatRegister src0, FloatRegister dest) {
MOZ_ASSERT(HasSSE2());
switch (src1.kind()) {
case Operand::FPREG:
masm.vpor_rr(src1.fpu(), src0.code(), dest.code());
break;
case Operand::MEM_REG_DISP:
masm.vpor_mr(src1.disp(), src1.base(), src0.code(), dest.code());
break;
case Operand::MEM_ADDRESS32:
masm.vpor_mr(src1.address(), src0.code(), dest.code());
break;
default:
MOZ_CRASH("unexpected operand kind");
}
}
void vpxor(FloatRegister src1, FloatRegister src0, FloatRegister dest) {
MOZ_ASSERT(HasSSE2());
masm.vpxor_rr(src1.code(), src0.code(), dest.code());
}
void vpxor(const Operand &src1, FloatRegister src0, FloatRegister dest) {
MOZ_ASSERT(HasSSE2());
switch (src1.kind()) {
case Operand::FPREG:
masm.vpxor_rr(src1.fpu(), src0.code(), dest.code());
break;
case Operand::MEM_REG_DISP:
masm.vpxor_mr(src1.disp(), src1.base(), src0.code(), dest.code());
break;
case Operand::MEM_ADDRESS32:
masm.vpxor_mr(src1.address(), src0.code(), dest.code());
break;
default:
MOZ_CRASH("unexpected operand kind");
}
}
void vpandn(FloatRegister src1, FloatRegister src0, FloatRegister dest) {
MOZ_ASSERT(HasSSE2());
masm.vpandn_rr(src1.code(), src0.code(), dest.code());
}
void vpandn(const Operand &src1, FloatRegister src0, FloatRegister dest) {
MOZ_ASSERT(HasSSE2());
switch (src1.kind()) {
case Operand::FPREG:
masm.vpandn_rr(src1.fpu(), src0.code(), dest.code());
break;
case Operand::MEM_REG_DISP:
masm.vpandn_mr(src1.disp(), src1.base(), src0.code(), dest.code());
break;
case Operand::MEM_ADDRESS32:
masm.vpandn_mr(src1.address(), src0.code(), dest.code());
break;
default:
MOZ_CRASH("unexpected operand kind");
}
}
void pshufd(uint32_t mask, FloatRegister src, FloatRegister dest) {
MOZ_ASSERT(HasSSE2());
masm.pshufd_irr(mask, src.code(), dest.code());
@ -2199,29 +2276,29 @@ class AssemblerX86Shared : public AssemblerShared
MOZ_CRASH("unexpected operand kind");
}
}
void xorpd(FloatRegister src, FloatRegister dest) {
void vxorpd(FloatRegister src1, FloatRegister src0, FloatRegister dest) {
MOZ_ASSERT(HasSSE2());
masm.xorpd_rr(src.code(), dest.code());
masm.vxorpd_rr(src1.code(), src0.code(), dest.code());
}
void xorps(FloatRegister src, FloatRegister dest) {
void vxorps(FloatRegister src1, FloatRegister src0, FloatRegister dest) {
MOZ_ASSERT(HasSSE2());
masm.xorps_rr(src.code(), dest.code());
masm.vxorps_rr(src1.code(), src0.code(), dest.code());
}
void orpd(FloatRegister src, FloatRegister dest) {
void vorpd(FloatRegister src1, FloatRegister src0, FloatRegister dest) {
MOZ_ASSERT(HasSSE2());
masm.orpd_rr(src.code(), dest.code());
masm.vorpd_rr(src1.code(), src0.code(), dest.code());
}
void orps(FloatRegister src, FloatRegister dest) {
void vorps(FloatRegister src1, FloatRegister src0, FloatRegister dest) {
MOZ_ASSERT(HasSSE2());
masm.orps_rr(src.code(), dest.code());
masm.vorps_rr(src1.code(), src0.code(), dest.code());
}
void andpd(FloatRegister src, FloatRegister dest) {
void vandpd(FloatRegister src1, FloatRegister src0, FloatRegister dest) {
MOZ_ASSERT(HasSSE2());
masm.andpd_rr(src.code(), dest.code());
masm.vandpd_rr(src1.code(), src0.code(), dest.code());
}
void andps(FloatRegister src, FloatRegister dest) {
void vandps(FloatRegister src1, FloatRegister src0, FloatRegister dest) {
MOZ_ASSERT(HasSSE2());
masm.andps_rr(src.code(), dest.code());
masm.vandps_rr(src1.code(), src0.code(), dest.code());
}
void sqrtsd(FloatRegister src, FloatRegister dest) {
MOZ_ASSERT(HasSSE2());

Просмотреть файл

@ -386,7 +386,10 @@ private:
OP2_PEXTRW_GdUdIb = 0xC5,
OP2_SHUFPS_VpsWpsIb = 0xC6,
OP2_PSRLD_VdqWdq = 0xD2,
OP2_PANDDQ_VdqWdq = 0xDB,
OP2_PANDNDQ_VdqWdq = 0xDF,
OP2_PSRAD_VdqWdq = 0xE2,
OP2_PORDQ_VdqWdq = 0xEB,
OP2_PXORDQ_VdqWdq = 0xEF,
OP2_PSLLD_VdqWdq = 0xF2,
OP2_PMULUDQ_VdqWdq = 0xF4,
@ -779,44 +782,30 @@ public:
m_formatter.twoByteOp(OP2_XADD_EvGv, offset, base, index, scale, srcdest);
}
void paddd_rr(XMMRegisterID src, XMMRegisterID dst)
void vpaddd_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst)
{
spew("paddd %s, %s", nameFPReg(src), nameFPReg(dst));
m_formatter.prefix(PRE_SSE_66);
m_formatter.twoByteOp(OP2_PADDD_VdqWdq, (RegisterID)src, (RegisterID)dst);
twoByteOpSimd("vpaddd", VEX_PD, OP2_PADDD_VdqWdq, src1, src0, dst);
}
void paddd_mr(int offset, RegisterID base, XMMRegisterID dst)
void vpaddd_mr(int offset, RegisterID base, XMMRegisterID src0, XMMRegisterID dst)
{
spew("paddd %s0x%x(%s), %s",
PRETTY_PRINT_OFFSET(offset), nameIReg(base), nameFPReg(dst));
m_formatter.prefix(PRE_SSE_66);
m_formatter.twoByteOp(OP2_PADDD_VdqWdq, offset, base, (RegisterID)dst);
twoByteOpSimd("vpaddd", VEX_PD, OP2_PADDD_VdqWdq, offset, base, src0, dst);
}
void paddd_mr(const void* address, XMMRegisterID dst)
void vpaddd_mr(const void* address, XMMRegisterID src0, XMMRegisterID dst)
{
spew("paddd %p, %s", address, nameFPReg(dst));
m_formatter.prefix(PRE_SSE_66);
m_formatter.twoByteOp(OP2_PADDD_VdqWdq, address, (RegisterID)dst);
twoByteOpSimd("vpaddd", VEX_PD, OP2_PADDD_VdqWdq, address, src0, dst);
}
void psubd_rr(XMMRegisterID src, XMMRegisterID dst)
void vpsubd_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst)
{
spew("psubd %s, %s", nameFPReg(src), nameFPReg(dst));
m_formatter.prefix(PRE_SSE_66);
m_formatter.twoByteOp(OP2_PSUBD_VdqWdq, (RegisterID)src, (RegisterID)dst);
twoByteOpSimd("vpsubd", VEX_PD, OP2_PSUBD_VdqWdq, src1, src0, dst);
}
void psubd_mr(int offset, RegisterID base, XMMRegisterID dst)
void vpsubd_mr(int offset, RegisterID base, XMMRegisterID src0, XMMRegisterID dst)
{
spew("psubd %s0x%x(%s), %s",
PRETTY_PRINT_OFFSET(offset), nameIReg(base), nameFPReg(dst));
m_formatter.prefix(PRE_SSE_66);
m_formatter.twoByteOp(OP2_PSUBD_VdqWdq, offset, base, (RegisterID)dst);
twoByteOpSimd("vpsubd", VEX_PD, OP2_PSUBD_VdqWdq, offset, base, src0, dst);
}
void psubd_mr(const void* address, XMMRegisterID dst)
void vpsubd_mr(const void* address, XMMRegisterID src0, XMMRegisterID dst)
{
spew("psubd %p, %s", address, nameFPReg(dst));
m_formatter.prefix(PRE_SSE_66);
m_formatter.twoByteOp(OP2_PSUBD_VdqWdq, address, (RegisterID)dst);
twoByteOpSimd("vpsubd", VEX_PD, OP2_PSUBD_VdqWdq, address, src0, dst);
}
void pmuludq_rr(XMMRegisterID src, XMMRegisterID dst)
@ -3016,11 +3005,53 @@ public:
m_formatter.twoByteOp(OP2_MOVD_VdEd, src, (RegisterID)dst);
}
void pxor_rr(XMMRegisterID src, XMMRegisterID dst)
void vpand_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst)
{
spew("pxor %s, %s", nameFPReg(src), nameFPReg(dst));
m_formatter.prefix(PRE_SSE_66);
m_formatter.twoByteOp(OP2_PXORDQ_VdqWdq, (RegisterID)src, (RegisterID)dst);
twoByteOpSimd("vpand", VEX_PD, OP2_PANDDQ_VdqWdq, src1, src0, dst);
}
void vpand_mr(int offset, RegisterID base, XMMRegisterID src0, XMMRegisterID dst)
{
twoByteOpSimd("vpand", VEX_PD, OP2_PANDDQ_VdqWdq, offset, base, src0, dst);
}
void vpand_mr(const void *address, XMMRegisterID src0, XMMRegisterID dst)
{
twoByteOpSimd("vpand", VEX_PD, OP2_PANDDQ_VdqWdq, address, src0, dst);
}
void vpor_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst)
{
twoByteOpSimd("vpor", VEX_PD, OP2_PORDQ_VdqWdq, src1, src0, dst);
}
void vpor_mr(int offset, RegisterID base, XMMRegisterID src0, XMMRegisterID dst)
{
twoByteOpSimd("vpor", VEX_PD, OP2_PORDQ_VdqWdq, offset, base, src0, dst);
}
void vpor_mr(const void *address, XMMRegisterID src0, XMMRegisterID dst)
{
twoByteOpSimd("vpor", VEX_PD, OP2_PORDQ_VdqWdq, address, src0, dst);
}
void vpxor_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst)
{
twoByteOpSimd("vpxor", VEX_PD, OP2_PXORDQ_VdqWdq, src1, src0, dst);
}
void vpxor_mr(int offset, RegisterID base, XMMRegisterID src0, XMMRegisterID dst)
{
twoByteOpSimd("vpxor", VEX_PD, OP2_PXORDQ_VdqWdq, offset, base, src0, dst);
}
void vpxor_mr(const void *address, XMMRegisterID src0, XMMRegisterID dst)
{
twoByteOpSimd("vpxor", VEX_PD, OP2_PXORDQ_VdqWdq, address, src0, dst);
}
void vpandn_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst)
{
twoByteOpSimd("vpandn", VEX_PD, OP2_PANDNDQ_VdqWdq, src1, src0, dst);
}
void vpandn_mr(int offset, RegisterID base, XMMRegisterID src0, XMMRegisterID dst)
{
twoByteOpSimd("vpandn", VEX_PD, OP2_PANDNDQ_VdqWdq, offset, base, src0, dst);
}
void vpandn_mr(const void *address, XMMRegisterID src0, XMMRegisterID dst)
{
twoByteOpSimd("vpandn", VEX_PD, OP2_PANDNDQ_VdqWdq, address, src0, dst);
}
void pshufd_irr(uint32_t mask, XMMRegisterID src, XMMRegisterID dst)
@ -3703,101 +3734,79 @@ public:
twoByteOpSimd("vdivss", VEX_SS, OP2_DIVSD_VsdWsd, offset, base, src0, dst);
}
void xorpd_rr(XMMRegisterID src, XMMRegisterID dst)
void vxorpd_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst)
{
spew("xorpd %s, %s", nameFPReg(src), nameFPReg(dst));
m_formatter.prefix(PRE_SSE_66);
m_formatter.twoByteOp(OP2_XORPD_VpdWpd, (RegisterID)src, (RegisterID)dst);
twoByteOpSimd("vxorpd", VEX_PD, OP2_XORPD_VpdWpd, src1, src0, dst);
}
void orpd_rr(XMMRegisterID src, XMMRegisterID dst)
void vorpd_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst)
{
spew("orpd %s, %s", nameFPReg(src), nameFPReg(dst));
m_formatter.prefix(PRE_SSE_66);
m_formatter.twoByteOp(OP2_ORPD_VpdWpd, (RegisterID)src, (RegisterID)dst);
twoByteOpSimd("vorpd", VEX_PD, OP2_ORPD_VpdWpd, src1, src0, dst);
}
void andpd_rr(XMMRegisterID src, XMMRegisterID dst)
void vandpd_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst)
{
spew("andpd %s, %s", nameFPReg(src), nameFPReg(dst));
m_formatter.prefix(PRE_SSE_66);
m_formatter.twoByteOp(OP2_ANDPD_VpdWpd, (RegisterID)src, (RegisterID)dst);
twoByteOpSimd("vandpd", VEX_PD, OP2_ANDPD_VpdWpd, src1, src0, dst);
}
void andps_rr(XMMRegisterID src, XMMRegisterID dst)
void vandps_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst)
{
spew("andps %s, %s", nameFPReg(src), nameFPReg(dst));
m_formatter.twoByteOp(OP2_ANDPS_VpsWps, (RegisterID)src, (RegisterID)dst);
twoByteOpSimd("vandps", VEX_PS, OP2_ANDPS_VpsWps, src1, src0, dst);
}
void andps_mr(int offset, RegisterID base, XMMRegisterID dst)
void vandps_mr(int offset, RegisterID base, XMMRegisterID src0, XMMRegisterID dst)
{
spew("andps %s0x%x(%s), %s",
PRETTY_PRINT_OFFSET(offset), nameIReg(base), nameFPReg(dst));
m_formatter.twoByteOp(OP2_ANDPS_VpsWps, offset, base, (RegisterID)dst);
twoByteOpSimd("vandps", VEX_PS, OP2_ANDPS_VpsWps, offset, base, src0, dst);
}
void andps_mr(const void* address, XMMRegisterID dst)
void vandps_mr(const void* address, XMMRegisterID src0, XMMRegisterID dst)
{
spew("andps %p, %s", address, nameFPReg(dst));
m_formatter.twoByteOp(OP2_ANDPS_VpsWps, address, (RegisterID)dst);
twoByteOpSimd("vandps", VEX_PS, OP2_ANDPS_VpsWps, address, src0, dst);
}
void andnps_rr(XMMRegisterID src, XMMRegisterID dst)
void vandnps_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst)
{
spew("andnps %s, %s", nameFPReg(src), nameFPReg(dst));
m_formatter.twoByteOp(OP2_ANDNPS_VpsWps, (RegisterID)src, (RegisterID)dst);
twoByteOpSimd("vandnps", VEX_PS, OP2_ANDNPS_VpsWps, src1, src0, dst);
}
void andnps_mr(int offset, RegisterID base, XMMRegisterID dst)
void vandnps_mr(int offset, RegisterID base, XMMRegisterID src0, XMMRegisterID dst)
{
spew("andnps %s0x%x(%s), %s",
PRETTY_PRINT_OFFSET(offset), nameIReg(base), nameFPReg(dst));
m_formatter.twoByteOp(OP2_ANDNPS_VpsWps, offset, base, (RegisterID)dst);
twoByteOpSimd("vandnps", VEX_PS, OP2_ANDNPS_VpsWps, offset, base, src0, dst);
}
void andnps_mr(const void* address, XMMRegisterID dst)
void vandnps_mr(const void* address, XMMRegisterID src0, XMMRegisterID dst)
{
spew("andnps %p, %s", address, nameFPReg(dst));
m_formatter.twoByteOp(OP2_ANDPS_VpsWps, address, (RegisterID)dst);
twoByteOpSimd("vandnps", VEX_PS, OP2_ANDNPS_VpsWps, address, src0, dst);
}
void orps_rr(XMMRegisterID src, XMMRegisterID dst)
void vorps_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst)
{
spew("orps %s, %s", nameFPReg(src), nameFPReg(dst));
m_formatter.twoByteOp(OP2_ORPS_VpsWps, (RegisterID)src, (RegisterID)dst);
twoByteOpSimd("vorps", VEX_PS, OP2_ORPS_VpsWps, src1, src0, dst);
}
void orps_mr(int offset, RegisterID base, XMMRegisterID dst)
void vorps_mr(int offset, RegisterID base, XMMRegisterID src0, XMMRegisterID dst)
{
spew("orps %s0x%x(%s), %s",
PRETTY_PRINT_OFFSET(offset), nameIReg(base), nameFPReg(dst));
m_formatter.twoByteOp(OP2_ORPS_VpsWps, offset, base, (RegisterID)dst);
twoByteOpSimd("vorps", VEX_PS, OP2_ORPS_VpsWps, offset, base, src0, dst);
}
void orps_mr(const void* address, XMMRegisterID dst)
void vorps_mr(const void* address, XMMRegisterID src0, XMMRegisterID dst)
{
spew("orps %p, %s", address, nameFPReg(dst));
m_formatter.twoByteOp(OP2_ORPS_VpsWps, address, (RegisterID)dst);
twoByteOpSimd("vorps", VEX_PS, OP2_ORPS_VpsWps, address, src0, dst);
}
void xorps_rr(XMMRegisterID src, XMMRegisterID dst)
void vxorps_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst)
{
spew("xorps %s, %s", nameFPReg(src), nameFPReg(dst));
m_formatter.twoByteOp(OP2_XORPS_VpsWps, (RegisterID)src, (RegisterID)dst);
twoByteOpSimd("vxorps", VEX_PS, OP2_XORPS_VpsWps, src1, src0, dst);
}
void xorps_mr(int offset, RegisterID base, XMMRegisterID dst)
void vxorps_mr(int offset, RegisterID base, XMMRegisterID src0, XMMRegisterID dst)
{
spew("xorps %s0x%x(%s), %s",
PRETTY_PRINT_OFFSET(offset), nameIReg(base), nameFPReg(dst));
m_formatter.twoByteOp(OP2_XORPS_VpsWps, offset, base, (RegisterID)dst);
twoByteOpSimd("vxorps", VEX_PS, OP2_XORPS_VpsWps, offset, base, src0, dst);
}
void xorps_mr(const void* address, XMMRegisterID dst)
void vxorps_mr(const void* address, XMMRegisterID src0, XMMRegisterID dst)
{
spew("xorps %p, %s", address, nameFPReg(dst));
m_formatter.twoByteOp(OP2_XORPS_VpsWps, address, (RegisterID)dst);
twoByteOpSimd("vxorps", VEX_PS, OP2_XORPS_VpsWps, address, src0, dst);
}
void sqrtsd_rr(XMMRegisterID src, XMMRegisterID dst)

Просмотреть файл

@ -506,9 +506,9 @@ CodeGeneratorX86Shared::visitMinMaxD(LMinMaxD *ins)
// and negative zero. These instructions merge the sign bits in that
// case, and are no-ops otherwise.
if (ins->mir()->isMax())
masm.andpd(second, first);
masm.vandpd(second, first, first);
else
masm.orpd(second, first);
masm.vorpd(second, first, first);
masm.jump(&done);
// x86's min/max are not symmetric; if either operand is a NaN, they return
@ -557,9 +557,9 @@ CodeGeneratorX86Shared::visitMinMaxF(LMinMaxF *ins)
// and negative zero. These instructions merge the sign bits in that
// case, and are no-ops otherwise.
if (ins->mir()->isMax())
masm.andps(second, first);
masm.vandps(second, first, first);
else
masm.orps(second, first);
masm.vorps(second, first, first);
masm.jump(&done);
// x86's min/max are not symmetric; if either operand is a NaN, they return
@ -590,7 +590,7 @@ CodeGeneratorX86Shared::visitAbsD(LAbsD *ins)
// Load a value which is all ones except for the sign bit.
masm.loadConstantDouble(SpecificNaN<double>(0, FloatingPoint<double>::kSignificandBits),
ScratchDoubleReg);
masm.andpd(ScratchDoubleReg, input);
masm.vandpd(ScratchDoubleReg, input, input);
}
void
@ -601,7 +601,7 @@ CodeGeneratorX86Shared::visitAbsF(LAbsF *ins)
// Same trick as visitAbsD above.
masm.loadConstantFloat32(SpecificNaN<float>(0, FloatingPoint<float>::kSignificandBits),
ScratchFloat32Reg);
masm.andps(ScratchFloat32Reg, input);
masm.vandps(ScratchFloat32Reg, input, input);
}
void
@ -2616,15 +2616,15 @@ CodeGeneratorX86Shared::visitSimdBinaryArithIx4(LSimdBinaryArithIx4 *ins)
{
FloatRegister lhs = ToFloatRegister(ins->lhs());
Operand rhs = ToOperand(ins->rhs());
MOZ_ASSERT(ToFloatRegister(ins->output()) == lhs);
FloatRegister output = ToFloatRegister(ins->output());
MSimdBinaryArith::Operation op = ins->operation();
switch (op) {
case MSimdBinaryArith::Add:
masm.packedAddInt32(rhs, lhs);
masm.vpaddd(rhs, lhs, output);
return;
case MSimdBinaryArith::Sub:
masm.packedSubInt32(rhs, lhs);
masm.vpsubd(rhs, lhs, output);
return;
case MSimdBinaryArith::Mul: {
if (AssemblerX86Shared::HasSSE41()) {
@ -2695,15 +2695,15 @@ CodeGeneratorX86Shared::visitSimdBinaryArithFx4(LSimdBinaryArithFx4 *ins)
masm.vmaxps(Operand(lhs), rhsCopy, tmp);
masm.vmaxps(rhs, lhs, output);
masm.andps(tmp, output);
masm.orps(ScratchSimdReg, output); // or in the all-ones NaNs
masm.vandps(tmp, output, output);
masm.vorps(ScratchSimdReg, output, output); // or in the all-ones NaNs
return;
}
case MSimdBinaryArith::Min: {
FloatRegister rhsCopy = masm.reusedInputAlignedFloat32x4(rhs, ScratchSimdReg);
masm.vminps(Operand(lhs), rhsCopy, ScratchSimdReg);
masm.vminps(rhs, lhs, output);
masm.orps(ScratchSimdReg, output); // NaN or'd with arbitrary bits is NaN
masm.vorps(ScratchSimdReg, output, output); // NaN or'd with arbitrary bits is NaN
return;
}
case MSimdBinaryArith::MinNum: {
@ -2713,11 +2713,11 @@ CodeGeneratorX86Shared::visitSimdBinaryArithFx4(LSimdBinaryArithFx4 *ins)
FloatRegister mask = ScratchSimdReg;
masm.pcmpeqd(Operand(lhs), mask);
masm.andps(tmp, mask);
masm.vandps(tmp, mask, mask);
FloatRegister lhsCopy = masm.reusedInputFloat32x4(lhs, tmp);
masm.vminps(rhs, lhsCopy, tmp);
masm.orps(mask, tmp);
masm.vorps(mask, tmp, tmp);
FloatRegister rhsCopy = masm.reusedInputAlignedFloat32x4(rhs, mask);
masm.vcmpneqps(rhs, rhsCopy, mask);
@ -2730,9 +2730,9 @@ CodeGeneratorX86Shared::visitSimdBinaryArithFx4(LSimdBinaryArithFx4 *ins)
// it requires the mask to be in xmm0.
if (lhs != output)
masm.movaps(lhs, output);
masm.andps(Operand(mask), output);
masm.andnps(Operand(tmp), mask);
masm.orps(Operand(mask), output);
masm.vandps(Operand(mask), output, output);
masm.vandnps(Operand(tmp), mask, mask);
masm.vorps(Operand(mask), output, output);
}
return;
}
@ -2743,11 +2743,11 @@ CodeGeneratorX86Shared::visitSimdBinaryArithFx4(LSimdBinaryArithFx4 *ins)
FloatRegister tmp = ToFloatRegister(ins->temp());
masm.loadConstantInt32x4(SimdConstant::SplatX4(int32_t(0x80000000)), tmp);
masm.andps(tmp, mask);
masm.vandps(tmp, mask, mask);
FloatRegister lhsCopy = masm.reusedInputFloat32x4(lhs, tmp);
masm.vmaxps(rhs, lhsCopy, tmp);
masm.andnps(Operand(tmp), mask);
masm.vandnps(Operand(tmp), mask, mask);
// Ensure tmp always contains the temporary result
mask = tmp;
@ -2764,9 +2764,9 @@ CodeGeneratorX86Shared::visitSimdBinaryArithFx4(LSimdBinaryArithFx4 *ins)
// it requires the mask to be in xmm0.
if (lhs != output)
masm.movaps(lhs, output);
masm.andps(Operand(mask), output);
masm.andnps(Operand(tmp), mask);
masm.orps(Operand(mask), output);
masm.vandps(Operand(mask), output, output);
masm.vandnps(Operand(tmp), mask, mask);
masm.vorps(Operand(mask), output, output);
}
return;
}
@ -2784,7 +2784,7 @@ CodeGeneratorX86Shared::visitSimdUnaryArithIx4(LSimdUnaryArithIx4 *ins)
switch (ins->operation()) {
case MSimdUnaryArith::neg:
masm.pxor(out, out);
masm.zeroInt32x4(out);
masm.packedSubInt32(in, out);
return;
case MSimdUnaryArith::not_:
@ -2848,18 +2848,27 @@ CodeGeneratorX86Shared::visitSimdBinaryBitwiseX4(LSimdBinaryBitwiseX4 *ins)
{
FloatRegister lhs = ToFloatRegister(ins->lhs());
Operand rhs = ToOperand(ins->rhs());
MOZ_ASSERT(ToFloatRegister(ins->output()) == lhs);
FloatRegister output = ToFloatRegister(ins->output());
MSimdBinaryBitwise::Operation op = ins->operation();
switch (op) {
case MSimdBinaryBitwise::and_:
masm.bitwiseAndX4(rhs, lhs);
if (ins->type() == MIRType_Float32x4)
masm.vandps(rhs, lhs, output);
else
masm.vpand(rhs, lhs, output);
return;
case MSimdBinaryBitwise::or_:
masm.bitwiseOrX4(rhs, lhs);
if (ins->type() == MIRType_Float32x4)
masm.vorps(rhs, lhs, output);
else
masm.vpor(rhs, lhs, output);
return;
case MSimdBinaryBitwise::xor_:
masm.bitwiseXorX4(rhs, lhs);
if (ins->type() == MIRType_Float32x4)
masm.vxorps(rhs, lhs, output);
else
masm.vpxor(rhs, lhs, output);
return;
}
MOZ_CRASH("unexpected SIMD bitwise op");

Просмотреть файл

@ -96,29 +96,13 @@ LIRGeneratorX86Shared::lowerForALU(LInstructionHelper<1, 2, 0> *ins, MDefinition
defineReuseInput(ins, mir, 0);
}
static bool
UseAVXEncoding(MIRType type)
{
if (!Assembler::HasAVX())
return false;
// TODO: For now, we just do this for floating-point types, until the rest
// of the assembler support is done.
if (IsFloatingPointType(type))
return true;
if (IsSimdType(type) && IsFloatingPointType(SimdTypeToScalarType(type)))
return true;
return false;
}
template<size_t Temps>
void
LIRGeneratorX86Shared::lowerForFPU(LInstructionHelper<1, 2, Temps> *ins, MDefinition *mir, MDefinition *lhs, MDefinition *rhs)
{
// Without AVX, we'll need to use the x86 encodings where one of the
// inputs must be the same location as the output.
if (!UseAVXEncoding(mir->type())) {
if (!Assembler::HasAVX()) {
ins->setOperand(0, useRegisterAtStart(lhs));
ins->setOperand(1, lhs != rhs ? use(rhs) : useAtStart(rhs));
defineReuseInput(ins, mir, 0);

Просмотреть файл

@ -217,7 +217,7 @@ MacroAssemblerX86Shared::branchNegativeZero(FloatRegister reg,
// if not already compared to zero
if (maybeNonZero) {
// Compare to zero. Lets through {0, -0}.
xorpd(ScratchDoubleReg, ScratchDoubleReg);
zeroDouble(ScratchDoubleReg);
// If reg is non-zero, jump to nonZero.
branchDouble(DoubleNotEqual, reg, ScratchDoubleReg, &nonZero);

Просмотреть файл

@ -595,7 +595,7 @@ class MacroAssemblerX86Shared : public Assembler
void convertInt32ToDouble(Register src, FloatRegister dest) {
// cvtsi2sd and friends write only part of their output register, which
// causes slowdowns on out-of-order processors. Explicitly break
// dependencies with xorpd (and xorps elsewhere), which are handled
// dependencies with vxorpd (and vxorps elsewhere), which are handled
// specially in modern CPUs, for this purpose. See sections 8.14, 9.8,
// 10.8, 12.9, 13.16, 14.14, and 15.8 of Agner's Microarchitecture
// document.
@ -763,10 +763,10 @@ class MacroAssemblerX86Shared : public Assembler
movapd(src, dest);
}
void zeroDouble(FloatRegister reg) {
xorpd(reg, reg);
vxorpd(reg, reg, reg);
}
void zeroFloat32(FloatRegister reg) {
xorps(reg, reg);
vxorps(reg, reg, reg);
}
void negateDouble(FloatRegister reg) {
// From MacroAssemblerX86Shared::maybeInlineDouble
@ -774,14 +774,14 @@ class MacroAssemblerX86Shared : public Assembler
psllq(Imm32(63), ScratchDoubleReg);
// XOR the float in a float register with -0.0.
xorpd(ScratchDoubleReg, reg); // s ^ 0x80000000000000
vxorpd(ScratchDoubleReg, reg, reg); // s ^ 0x80000000000000
}
void negateFloat(FloatRegister reg) {
pcmpeqw(ScratchFloat32Reg, ScratchFloat32Reg);
psllq(Imm32(31), ScratchFloat32Reg);
// XOR the float in a float register with -0.0.
xorps(ScratchFloat32Reg, reg); // s ^ 0x80000000
vxorps(ScratchFloat32Reg, reg, reg); // s ^ 0x80000000
}
void addDouble(FloatRegister src, FloatRegister dest) {
vaddsd(src, dest, dest);
@ -820,16 +820,22 @@ class MacroAssemblerX86Shared : public Assembler
void bitwiseAndX4(const Operand &src, FloatRegister dest) {
// TODO Using the "ps" variant for all types incurs a domain crossing
// penalty for integer types and double.
andps(src, dest);
vandps(src, dest, dest);
}
void bitwiseAndNotX4(const Operand &src, FloatRegister dest) {
andnps(src, dest);
vandnps(src, dest, dest);
}
void bitwiseOrX4(const Operand &src, FloatRegister dest) {
orps(src, dest);
vorps(src, dest, dest);
}
void bitwiseXorX4(const Operand &src, FloatRegister dest) {
xorps(src, dest);
vxorps(src, dest, dest);
}
void zeroFloat32x4(FloatRegister dest) {
vxorps(dest, dest, dest);
}
void zeroInt32x4(FloatRegister dest) {
vpxor(dest, dest, dest);
}
void loadAlignedInt32x4(const Address &src, FloatRegister dest) {
@ -875,10 +881,10 @@ class MacroAssemblerX86Shared : public Assembler
pcmpgtd(src, dest);
}
void packedAddInt32(const Operand &src, FloatRegister dest) {
paddd(src, dest);
vpaddd(src, dest, dest);
}
void packedSubInt32(const Operand &src, FloatRegister dest) {
psubd(src, dest);
vpsubd(src, dest, dest);
}
void packedReciprocalFloat32x4(const Operand &src, FloatRegister dest) {
// This function is an approximation of the result, this might need
@ -1105,7 +1111,7 @@ class MacroAssemblerX86Shared : public Assembler
// Loading zero with xor is specially optimized in hardware.
if (u == 0) {
xorpd(dest, dest);
zeroDouble(dest);
return true;
}
@ -1125,7 +1131,7 @@ class MacroAssemblerX86Shared : public Assembler
// See comment above
if (u == 0) {
xorps(dest, dest);
zeroFloat32(dest);
return true;
}
return false;
@ -1135,7 +1141,7 @@ class MacroAssemblerX86Shared : public Assembler
static const SimdConstant zero = SimdConstant::CreateX4(0, 0, 0, 0);
static const SimdConstant minusOne = SimdConstant::CreateX4(-1, -1, -1, -1);
if (v == zero) {
pxor(dest, dest);
zeroInt32x4(dest);
return true;
}
if (v == minusOne) {
@ -1149,7 +1155,7 @@ class MacroAssemblerX86Shared : public Assembler
if (v == zero) {
// This won't get inlined if the SimdConstant v contains -0 in any
// lane, as operator== here does a memcmp.
xorps(dest, dest);
zeroFloat32x4(dest);
return true;
}
return false;

Просмотреть файл

@ -85,9 +85,9 @@ MoveEmitterX86::maybeEmitOptimizedCycle(const MoveResolver &moves, size_t i,
// it's cheap to do an XOR swap.
FloatRegister a = moves.getMove(i).to().floatReg();
FloatRegister b = moves.getMove(i + 1).to().floatReg();
masm.xorpd(a, b);
masm.xorpd(b, a);
masm.xorpd(a, b);
masm.vxorpd(a, b, b);
masm.vxorpd(b, a, a);
masm.vxorpd(a, b, b);
return true;
}