зеркало из https://github.com/mozilla/gecko-dev.git
Bug 1108825: Implement SIMD.int32x4.mul in Odin; r=sunfish
This commit is contained in:
Родитель
ca1995fa93
Коммит
fc140a436a
|
@ -140,7 +140,6 @@
|
|||
_(reciprocalSqrt) \
|
||||
_(fromInt32x4) \
|
||||
_(fromInt32x4Bits) \
|
||||
_(mul) \
|
||||
_(div) \
|
||||
_(max) \
|
||||
_(min) \
|
||||
|
@ -149,6 +148,7 @@
|
|||
#define FOREACH_COMMONX4_SIMD_OP(_) \
|
||||
_(add) \
|
||||
_(sub) \
|
||||
_(mul) \
|
||||
_(lessThan) \
|
||||
_(lessThanOrEqual) \
|
||||
_(equal) \
|
||||
|
|
|
@ -12,6 +12,7 @@ if (!isSimdAvailable() || typeof SIMD === 'undefined') {
|
|||
const I32 = 'var i4 = glob.SIMD.int32x4;'
|
||||
const I32A = 'var i4a = i4.add;'
|
||||
const I32S = 'var i4s = i4.sub;'
|
||||
const I32M = 'var i4m = i4.mul;'
|
||||
const F32 = 'var f4 = glob.SIMD.float32x4;'
|
||||
const F32A = 'var f4a = f4.add;'
|
||||
const F32S = 'var f4s = f4.sub;'
|
||||
|
@ -453,9 +454,19 @@ CheckF4(F32S, 'var x=f4(13.37,2,3,4); var y=f4(4,3,5,2); x=f4s(x,y)', [Math.frou
|
|||
CheckF4(F32S, 'var x=f4(13.37,2,3,4); var y=f4(4,3,5,2); x=f4(f4s(x,y))', [Math.fround(13.37) - 4,-1,-2,2]);
|
||||
|
||||
// 2.3.3. Multiplications / Divisions
|
||||
assertAsmTypeFail('glob', USE_ASM + I32 + "var f4m=i4.mul; function f() {} return f");
|
||||
assertAsmTypeFail('glob', USE_ASM + I32 + "var f4d=i4.div; function f() {} return f");
|
||||
|
||||
CheckI4(I32M, 'var x=i4(1,2,3,4); var y=i4(-1,1,0,2); x=i4m(x,y)', [-1,2,0,8]);
|
||||
CheckI4(I32M, 'var x=i4(5,4,3,2); var y=i4(1,2,3,4); x=i4m(x,y)', [5,8,9,8]);
|
||||
CheckI4(I32M, 'var x=i4(1,2,3,4); x=i4m(x,x)', [1,4,9,16]);
|
||||
(function() {
|
||||
var m = INT32_MIN, M = INT32_MAX, imul = Math.imul;
|
||||
CheckI4(I32M, `var x=i4(${m},${m}, ${M}, ${M}); var y=i4(2,-3,4,-5); x=i4m(x,y)`,
|
||||
[imul(m, 2), imul(m, -3), imul(M, 4), imul(M, -5)]);
|
||||
CheckI4(I32M, `var x=i4(${m},${m}, ${M}, ${M}); var y=i4(${m}, ${M}, ${m}, ${M}); x=i4m(x,y)`,
|
||||
[imul(m, m), imul(m, M), imul(M, m), imul(M, M)]);
|
||||
})();
|
||||
|
||||
CheckF4(F32M, 'var x=f4(1,2,3,4); x=f4m(x,x)', [1,4,9,16]);
|
||||
CheckF4(F32M, 'var x=f4(1,2,3,4); var y=f4(4,3,5,2); x=f4m(x,y)', [4,6,15,8]);
|
||||
CheckF4(F32M, 'var x=f4(13.37,2,3,4); var y=f4(4,3,5,2); x=f4m(x,y)', [Math.fround(13.37) * 4,6,15,8]);
|
||||
|
|
|
@ -366,8 +366,7 @@ class LSimdBinaryCompFx4 : public LSimdBinaryComp
|
|||
};
|
||||
|
||||
// Binary SIMD arithmetic operation between two SIMD operands
|
||||
template<size_t Temps>
|
||||
class LSimdBinaryArith : public LInstructionHelper<1, 2, Temps>
|
||||
class LSimdBinaryArith : public LInstructionHelper<1, 2, 1>
|
||||
{
|
||||
public:
|
||||
LSimdBinaryArith() {}
|
||||
|
@ -378,6 +377,9 @@ class LSimdBinaryArith : public LInstructionHelper<1, 2, Temps>
|
|||
const LAllocation *rhs() {
|
||||
return this->getOperand(1);
|
||||
}
|
||||
const LDefinition *temp() {
|
||||
return getTemp(0);
|
||||
}
|
||||
|
||||
MSimdBinaryArith::Operation operation() const {
|
||||
return this->mir_->toSimdBinaryArith()->operation();
|
||||
|
@ -388,23 +390,19 @@ class LSimdBinaryArith : public LInstructionHelper<1, 2, Temps>
|
|||
};
|
||||
|
||||
// Binary SIMD arithmetic operation between two Int32x4 operands
|
||||
class LSimdBinaryArithIx4 : public LSimdBinaryArith<0>
|
||||
class LSimdBinaryArithIx4 : public LSimdBinaryArith
|
||||
{
|
||||
public:
|
||||
LIR_HEADER(SimdBinaryArithIx4);
|
||||
LSimdBinaryArithIx4() : LSimdBinaryArith<0>() {}
|
||||
LSimdBinaryArithIx4() : LSimdBinaryArith() {}
|
||||
};
|
||||
|
||||
// Binary SIMD arithmetic operation between two Float32x4 operands
|
||||
class LSimdBinaryArithFx4 : public LSimdBinaryArith<1>
|
||||
class LSimdBinaryArithFx4 : public LSimdBinaryArith
|
||||
{
|
||||
public:
|
||||
LIR_HEADER(SimdBinaryArithFx4);
|
||||
LSimdBinaryArithFx4() : LSimdBinaryArith<1>() {}
|
||||
|
||||
const LDefinition *temp() {
|
||||
return getTemp(0);
|
||||
}
|
||||
LSimdBinaryArithFx4() : LSimdBinaryArith() {}
|
||||
};
|
||||
|
||||
// Unary SIMD arithmetic operation on a SIMD operand
|
||||
|
|
|
@ -631,63 +631,6 @@ ReorderComparison(JSOp op, MDefinition **lhsp, MDefinition **rhsp)
|
|||
return op;
|
||||
}
|
||||
|
||||
static bool
|
||||
ShouldReorderCommutative(MDefinition *lhs, MDefinition *rhs, MInstruction *ins)
|
||||
{
|
||||
// lhs and rhs are used by the commutative operator.
|
||||
MOZ_ASSERT(lhs->hasDefUses());
|
||||
MOZ_ASSERT(rhs->hasDefUses());
|
||||
|
||||
// Ensure that if there is a constant, then it is in rhs.
|
||||
if (rhs->isConstant())
|
||||
return false;
|
||||
if (lhs->isConstant())
|
||||
return true;
|
||||
|
||||
// Since clobbering binary operations clobber the left operand, prefer a
|
||||
// non-constant lhs operand with no further uses. To be fully precise, we
|
||||
// should check whether this is the *last* use, but checking hasOneDefUse()
|
||||
// is a decent approximation which doesn't require any extra analysis.
|
||||
bool rhsSingleUse = rhs->hasOneDefUse();
|
||||
bool lhsSingleUse = lhs->hasOneDefUse();
|
||||
if (rhsSingleUse) {
|
||||
if (!lhsSingleUse)
|
||||
return true;
|
||||
} else {
|
||||
if (lhsSingleUse)
|
||||
return false;
|
||||
}
|
||||
|
||||
// If this is a reduction-style computation, such as
|
||||
//
|
||||
// sum = 0;
|
||||
// for (...)
|
||||
// sum += ...;
|
||||
//
|
||||
// put the phi on the left to promote coalescing. This is fairly specific.
|
||||
if (rhsSingleUse &&
|
||||
rhs->isPhi() &&
|
||||
rhs->block()->isLoopHeader() &&
|
||||
ins == rhs->toPhi()->getLoopBackedgeOperand())
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
static void
|
||||
ReorderCommutative(MDefinition **lhsp, MDefinition **rhsp, MInstruction *ins)
|
||||
{
|
||||
MDefinition *lhs = *lhsp;
|
||||
MDefinition *rhs = *rhsp;
|
||||
|
||||
if (ShouldReorderCommutative(lhs, rhs, ins)) {
|
||||
*rhsp = lhs;
|
||||
*lhsp = rhs;
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
LIRGenerator::visitTest(MTest *test)
|
||||
{
|
||||
|
@ -4083,34 +4026,6 @@ LIRGenerator::visitSimdBinaryComp(MSimdBinaryComp *ins)
|
|||
}
|
||||
}
|
||||
|
||||
void
|
||||
LIRGenerator::visitSimdBinaryArith(MSimdBinaryArith *ins)
|
||||
{
|
||||
MOZ_ASSERT(IsSimdType(ins->type()));
|
||||
|
||||
MDefinition *lhs = ins->lhs();
|
||||
MDefinition *rhs = ins->rhs();
|
||||
|
||||
if (ins->isCommutative())
|
||||
ReorderCommutative(&lhs, &rhs, ins);
|
||||
|
||||
if (ins->type() == MIRType_Int32x4) {
|
||||
lowerForFPU(new(alloc()) LSimdBinaryArithIx4(), ins, lhs, rhs);
|
||||
return;
|
||||
}
|
||||
|
||||
MOZ_ASSERT(ins->type() == MIRType_Float32x4, "unknown simd type on binary arith operation");
|
||||
|
||||
LSimdBinaryArithFx4 *lir = new(alloc()) LSimdBinaryArithFx4();
|
||||
|
||||
bool needsTemp = ins->operation() == MSimdBinaryArith::Max ||
|
||||
ins->operation() == MSimdBinaryArith::MinNum ||
|
||||
ins->operation() == MSimdBinaryArith::MaxNum;
|
||||
lir->setTemp(0, needsTemp ? temp(LDefinition::FLOAT32X4) : LDefinition::BogusTemp());
|
||||
|
||||
lowerForFPU(lir, ins, lhs, rhs);
|
||||
}
|
||||
|
||||
void
|
||||
LIRGenerator::visitSimdBinaryBitwise(MSimdBinaryBitwise *ins)
|
||||
{
|
||||
|
|
|
@ -283,7 +283,6 @@ class LIRGenerator : public LIRGeneratorSpecific
|
|||
void visitSimdShuffle(MSimdShuffle *ins);
|
||||
void visitSimdUnaryArith(MSimdUnaryArith *ins);
|
||||
void visitSimdBinaryComp(MSimdBinaryComp *ins);
|
||||
void visitSimdBinaryArith(MSimdBinaryArith *ins);
|
||||
void visitSimdBinaryBitwise(MSimdBinaryBitwise *ins);
|
||||
void visitSimdShift(MSimdShift *ins);
|
||||
void visitSimdConstant(MSimdConstant *ins);
|
||||
|
|
|
@ -1883,7 +1883,7 @@ class MSimdBinaryArith : public MBinaryInstruction
|
|||
MSimdBinaryArith(MDefinition *left, MDefinition *right, Operation op, MIRType type)
|
||||
: MBinaryInstruction(left, right), operation_(op)
|
||||
{
|
||||
MOZ_ASSERT_IF(type == MIRType_Int32x4, op == Add || op == Sub);
|
||||
MOZ_ASSERT_IF(type == MIRType_Int32x4, op == Add || op == Sub || op == Mul);
|
||||
MOZ_ASSERT(IsSimdType(type));
|
||||
MOZ_ASSERT(left->type() == right->type());
|
||||
MOZ_ASSERT(left->type() == type);
|
||||
|
|
|
@ -556,6 +556,12 @@ LIRGeneratorARM::visitForkJoinGetSlice(MForkJoinGetSlice *ins)
|
|||
MOZ_CRASH("NYI");
|
||||
}
|
||||
|
||||
void
|
||||
LIRGeneratorARM::visitSimdBinaryArith(MSimdBinaryArith *ins)
|
||||
{
|
||||
MOZ_CRASH("NYI");
|
||||
}
|
||||
|
||||
void
|
||||
LIRGeneratorARM::visitSimdTernaryBitwise(MSimdTernaryBitwise *ins)
|
||||
{
|
||||
|
|
|
@ -107,6 +107,7 @@ class LIRGeneratorARM : public LIRGeneratorShared
|
|||
void visitAsmJSAtomicBinopHeap(MAsmJSAtomicBinopHeap *ins);
|
||||
void visitStoreTypedArrayElementStatic(MStoreTypedArrayElementStatic *ins);
|
||||
void visitForkJoinGetSlice(MForkJoinGetSlice *ins);
|
||||
void visitSimdBinaryArith(MSimdBinaryArith *ins);
|
||||
void visitSimdTernaryBitwise(MSimdTernaryBitwise *ins);
|
||||
void visitSimdSplatX4(MSimdSplatX4 *ins);
|
||||
void visitSimdValueX4(MSimdValueX4 *ins);
|
||||
|
|
|
@ -546,6 +546,12 @@ LIRGeneratorMIPS::visitForkJoinGetSlice(MForkJoinGetSlice *ins)
|
|||
MOZ_CRASH("NYI");
|
||||
}
|
||||
|
||||
void
|
||||
LIRGeneratorMIPS::visitSimdBinaryArith(MSimdBinaryArith *ins)
|
||||
{
|
||||
MOZ_CRASH("NYI");
|
||||
}
|
||||
|
||||
void
|
||||
LIRGeneratorMIPS::visitSimdTernaryBitwise(MSimdTernaryBitwise *ins)
|
||||
{
|
||||
|
|
|
@ -107,6 +107,7 @@ class LIRGeneratorMIPS : public LIRGeneratorShared
|
|||
void visitAsmJSLoadFuncPtr(MAsmJSLoadFuncPtr *ins);
|
||||
void visitStoreTypedArrayElementStatic(MStoreTypedArrayElementStatic *ins);
|
||||
void visitForkJoinGetSlice(MForkJoinGetSlice *ins);
|
||||
void visitSimdBinaryArith(MSimdBinaryArith *ins);
|
||||
void visitSimdTernaryBitwise(MSimdTernaryBitwise *ins);
|
||||
void visitSimdSplatX4(MSimdSplatX4 *ins);
|
||||
void visitSimdValueX4(MSimdValueX4 *ins);
|
||||
|
|
|
@ -602,6 +602,9 @@ class AssemblerX86Shared : public AssemblerShared
|
|||
void movdqa(const Operand &src, FloatRegister dest) {
|
||||
MOZ_ASSERT(HasSSE2());
|
||||
switch (src.kind()) {
|
||||
case Operand::FPREG:
|
||||
masm.movdqa_rr(src.fpu(), dest.code());
|
||||
break;
|
||||
case Operand::MEM_REG_DISP:
|
||||
masm.movdqa_mr(src.disp(), src.base(), dest.code());
|
||||
break;
|
||||
|
@ -1812,6 +1815,26 @@ class AssemblerX86Shared : public AssemblerShared
|
|||
MOZ_CRASH("unexpected operand kind");
|
||||
}
|
||||
}
|
||||
void pmuludq(FloatRegister src, FloatRegister dest) {
|
||||
MOZ_ASSERT(HasSSE2());
|
||||
masm.pmuludq_rr(src.code(), dest.code());
|
||||
}
|
||||
void pmulld(const Operand &src, FloatRegister dest) {
|
||||
MOZ_ASSERT(HasSSE41());
|
||||
switch (src.kind()) {
|
||||
case Operand::FPREG:
|
||||
masm.pmulld_rr(src.fpu(), dest.code());
|
||||
break;
|
||||
case Operand::MEM_REG_DISP:
|
||||
masm.pmulld_mr(src.disp(), src.base(), dest.code());
|
||||
break;
|
||||
case Operand::MEM_ADDRESS32:
|
||||
masm.pmulld_mr(src.address(), dest.code());
|
||||
break;
|
||||
default:
|
||||
MOZ_CRASH("unexpected operand kind");
|
||||
}
|
||||
}
|
||||
void vaddps(const Operand &src1, FloatRegister src0, FloatRegister dest) {
|
||||
MOZ_ASSERT(HasSSE2());
|
||||
switch (src1.kind()) {
|
||||
|
@ -1981,6 +2004,22 @@ class AssemblerX86Shared : public AssemblerShared
|
|||
MOZ_ASSERT(HasSSE2());
|
||||
masm.pshufd_irr(mask, src.code(), dest.code());
|
||||
}
|
||||
void pshufd(uint32_t mask, const Operand &src, FloatRegister dest) {
|
||||
MOZ_ASSERT(HasSSE2());
|
||||
switch (src.kind()) {
|
||||
case Operand::FPREG:
|
||||
masm.pshufd_irr(mask, src.fpu(), dest.code());
|
||||
break;
|
||||
case Operand::MEM_REG_DISP:
|
||||
masm.pshufd_imr(mask, src.disp(), src.base(), dest.code());
|
||||
break;
|
||||
case Operand::MEM_ADDRESS32:
|
||||
masm.pshufd_imr(mask, src.address(), dest.code());
|
||||
break;
|
||||
default:
|
||||
MOZ_CRASH("unexpected operand kind");
|
||||
}
|
||||
}
|
||||
void movhlps(FloatRegister src, FloatRegister dest) {
|
||||
MOZ_ASSERT(HasSSE2());
|
||||
masm.movhlps_rr(src.code(), dest.code());
|
||||
|
|
|
@ -387,6 +387,7 @@ private:
|
|||
OP2_PSRAD_VdqWdq = 0xE2,
|
||||
OP2_PXORDQ_VdqWdq = 0xEF,
|
||||
OP2_PSLLD_VdqWdq = 0xF2,
|
||||
OP2_PMULUDQ_VdqWdq = 0xF4,
|
||||
OP2_PSUBD_VdqWdq = 0xFA,
|
||||
OP2_PADDD_VdqWdq = 0xFE
|
||||
} TwoByteOpcodeID;
|
||||
|
@ -400,11 +401,13 @@ private:
|
|||
OP3_PTEST_VdVd = 0x17,
|
||||
OP3_INSERTPS_VpsUps = 0x21,
|
||||
OP3_PINSRD_VdqEdIb = 0x22,
|
||||
OP3_PMULLD_VdqWdq = 0x40,
|
||||
OP3_VBLENDVPS_VdqWdq = 0x4A
|
||||
} ThreeByteOpcodeID;
|
||||
|
||||
typedef enum {
|
||||
ESCAPE_BLENDVPS = 0x38,
|
||||
ESCAPE_PMULLD = 0x38,
|
||||
ESCAPE_PTEST = 0x38,
|
||||
ESCAPE_PINSRD = 0x3A,
|
||||
ESCAPE_PEXTRD = 0x3A,
|
||||
|
@ -802,6 +805,33 @@ public:
|
|||
m_formatter.twoByteOp(OP2_PSUBD_VdqWdq, address, (RegisterID)dst);
|
||||
}
|
||||
|
||||
void pmuludq_rr(XMMRegisterID src, XMMRegisterID dst)
|
||||
{
|
||||
spew("pmuludq %s, %s", nameFPReg(src), nameFPReg(dst));
|
||||
m_formatter.prefix(PRE_SSE_66);
|
||||
m_formatter.twoByteOp(OP2_PMULUDQ_VdqWdq, (RegisterID)src, (RegisterID)dst);
|
||||
}
|
||||
|
||||
void pmulld_rr(XMMRegisterID src, XMMRegisterID dst)
|
||||
{
|
||||
spew("pmulld %s, %s", nameFPReg(src), nameFPReg(dst));
|
||||
m_formatter.prefix(PRE_SSE_66);
|
||||
m_formatter.threeByteOp(OP3_PMULLD_VdqWdq, ESCAPE_PMULLD, (RegisterID)src, (RegisterID)dst);
|
||||
}
|
||||
void pmulld_mr(int offset, RegisterID base, XMMRegisterID dst)
|
||||
{
|
||||
spew("pmulld %s0x%x(%s), %s",
|
||||
PRETTY_PRINT_OFFSET(offset), nameIReg(base), nameFPReg(dst));
|
||||
m_formatter.prefix(PRE_SSE_66);
|
||||
m_formatter.threeByteOp(OP3_PMULLD_VdqWdq, ESCAPE_PMULLD, offset, base, (RegisterID)dst);
|
||||
}
|
||||
void pmulld_mr(const void* address, XMMRegisterID dst)
|
||||
{
|
||||
spew("pmulld %p, %s", address, nameFPReg(dst));
|
||||
m_formatter.prefix(PRE_SSE_66);
|
||||
m_formatter.threeByteOp(OP3_PMULLD_VdqWdq, ESCAPE_PMULLD, address, (RegisterID)dst);
|
||||
}
|
||||
|
||||
void vaddps_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst)
|
||||
{
|
||||
twoByteOpSimd("vaddps", VEX_PS, OP2_ADDPS_VpsWps, src1, src0, dst);
|
||||
|
@ -2941,6 +2971,24 @@ public:
|
|||
m_formatter.immediate8(uint8_t(mask));
|
||||
}
|
||||
|
||||
void pshufd_imr(uint32_t mask, int offset, RegisterID base, XMMRegisterID dst)
|
||||
{
|
||||
MOZ_ASSERT(mask < 256);
|
||||
spew("pshufd 0x%x, %s0x%x(%s), %s",
|
||||
mask, PRETTY_PRINT_OFFSET(offset), nameIReg(base), nameFPReg(dst));
|
||||
m_formatter.prefix(PRE_SSE_66);
|
||||
m_formatter.twoByteOp(OP2_PSHUFD_VdqWdqIb, offset, base, (RegisterID)dst);
|
||||
m_formatter.immediate8(uint8_t(mask));
|
||||
}
|
||||
|
||||
void pshufd_imr(uint32_t mask, const void* address, XMMRegisterID dst)
|
||||
{
|
||||
spew("pshufd %x, %p, %s", mask, address, nameFPReg(dst));
|
||||
m_formatter.prefix(PRE_SSE_66);
|
||||
m_formatter.twoByteOp(OP2_PSHUFD_VdqWdqIb, address, (RegisterID)dst);
|
||||
m_formatter.immediate8(uint8_t(mask));
|
||||
}
|
||||
|
||||
void shufps_irr(uint32_t mask, XMMRegisterID src, XMMRegisterID dst)
|
||||
{
|
||||
MOZ_ASSERT(mask < 256);
|
||||
|
@ -2961,7 +3009,6 @@ public:
|
|||
void shufps_imr(uint32_t mask, const void* address, XMMRegisterID dst)
|
||||
{
|
||||
spew("shufps %x, %p, %s", mask, address, nameFPReg(dst));
|
||||
m_formatter.prefix(PRE_SSE_F3);
|
||||
m_formatter.twoByteOp(OP2_SHUFPS_VpsWpsIb, address, (RegisterID)dst);
|
||||
m_formatter.immediate8(uint8_t(mask));
|
||||
}
|
||||
|
@ -4740,6 +4787,16 @@ private:
|
|||
memoryModRM(offset, base, reg);
|
||||
}
|
||||
|
||||
void threeByteOp(ThreeByteOpcodeID opcode, ThreeByteEscape escape, const void* address, int reg)
|
||||
{
|
||||
m_buffer.ensureSpace(maxInstructionSize);
|
||||
emitRexIfNeeded(reg, 0, 0);
|
||||
m_buffer.putByteUnchecked(OP_2BYTE_ESCAPE);
|
||||
m_buffer.putByteUnchecked(escape);
|
||||
m_buffer.putByteUnchecked(opcode);
|
||||
memoryModRM(address, reg);
|
||||
}
|
||||
|
||||
void vblendvOpVex(VexOperandType ty, ThreeByteOpcodeID opcode, ThreeByteEscape escape,
|
||||
XMMRegisterID mask, RegisterID rm, XMMRegisterID src0, int reg)
|
||||
{
|
||||
|
|
|
@ -2626,9 +2626,27 @@ CodeGeneratorX86Shared::visitSimdBinaryArithIx4(LSimdBinaryArithIx4 *ins)
|
|||
case MSimdBinaryArith::Sub:
|
||||
masm.packedSubInt32(rhs, lhs);
|
||||
return;
|
||||
case MSimdBinaryArith::Mul:
|
||||
// we can do mul with a single instruction only if we have SSE4.1
|
||||
// using the PMULLD instruction.
|
||||
case MSimdBinaryArith::Mul: {
|
||||
if (AssemblerX86Shared::HasSSE41()) {
|
||||
masm.pmulld(rhs, lhs);
|
||||
return;
|
||||
}
|
||||
|
||||
masm.loadAlignedInt32x4(rhs, ScratchSimdReg);
|
||||
masm.pmuludq(lhs, ScratchSimdReg);
|
||||
// ScratchSimdReg contains (Rx, _, Rz, _) where R is the resulting vector.
|
||||
|
||||
FloatRegister temp = ToFloatRegister(ins->temp());
|
||||
masm.pshufd(MacroAssembler::ComputeShuffleMask(LaneY, LaneY, LaneW, LaneW), lhs, lhs);
|
||||
masm.pshufd(MacroAssembler::ComputeShuffleMask(LaneY, LaneY, LaneW, LaneW), rhs, temp);
|
||||
masm.pmuludq(temp, lhs);
|
||||
// lhs contains (Ry, _, Rw, _) where R is the resulting vector.
|
||||
|
||||
masm.shufps(MacroAssembler::ComputeShuffleMask(LaneX, LaneZ, LaneX, LaneZ), ScratchSimdReg, lhs);
|
||||
// lhs contains (Ry, Rw, Rx, Rz)
|
||||
masm.shufps(MacroAssembler::ComputeShuffleMask(LaneZ, LaneX, LaneW, LaneY), lhs, lhs);
|
||||
return;
|
||||
}
|
||||
case MSimdBinaryArith::Div:
|
||||
// x86 doesn't have SIMD i32 div.
|
||||
break;
|
||||
|
|
|
@ -14,6 +14,63 @@
|
|||
using namespace js;
|
||||
using namespace jit;
|
||||
|
||||
bool
|
||||
LIRGeneratorShared::ShouldReorderCommutative(MDefinition *lhs, MDefinition *rhs, MInstruction *ins)
|
||||
{
|
||||
// lhs and rhs are used by the commutative operator.
|
||||
MOZ_ASSERT(lhs->hasDefUses());
|
||||
MOZ_ASSERT(rhs->hasDefUses());
|
||||
|
||||
// Ensure that if there is a constant, then it is in rhs.
|
||||
if (rhs->isConstant())
|
||||
return false;
|
||||
if (lhs->isConstant())
|
||||
return true;
|
||||
|
||||
// Since clobbering binary operations clobber the left operand, prefer a
|
||||
// non-constant lhs operand with no further uses. To be fully precise, we
|
||||
// should check whether this is the *last* use, but checking hasOneDefUse()
|
||||
// is a decent approximation which doesn't require any extra analysis.
|
||||
bool rhsSingleUse = rhs->hasOneDefUse();
|
||||
bool lhsSingleUse = lhs->hasOneDefUse();
|
||||
if (rhsSingleUse) {
|
||||
if (!lhsSingleUse)
|
||||
return true;
|
||||
} else {
|
||||
if (lhsSingleUse)
|
||||
return false;
|
||||
}
|
||||
|
||||
// If this is a reduction-style computation, such as
|
||||
//
|
||||
// sum = 0;
|
||||
// for (...)
|
||||
// sum += ...;
|
||||
//
|
||||
// put the phi on the left to promote coalescing. This is fairly specific.
|
||||
if (rhsSingleUse &&
|
||||
rhs->isPhi() &&
|
||||
rhs->block()->isLoopHeader() &&
|
||||
ins == rhs->toPhi()->getLoopBackedgeOperand())
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
void
|
||||
LIRGeneratorShared::ReorderCommutative(MDefinition **lhsp, MDefinition **rhsp, MInstruction *ins)
|
||||
{
|
||||
MDefinition *lhs = *lhsp;
|
||||
MDefinition *rhs = *rhsp;
|
||||
|
||||
if (ShouldReorderCommutative(lhs, rhs, ins)) {
|
||||
*rhsp = lhs;
|
||||
*lhsp = rhs;
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
LIRGeneratorShared::visitConstant(MConstant *ins)
|
||||
{
|
||||
|
|
|
@ -50,6 +50,10 @@ class LIRGeneratorShared : public MDefinitionVisitor
|
|||
}
|
||||
|
||||
protected:
|
||||
|
||||
static void ReorderCommutative(MDefinition **lhsp, MDefinition **rhsp, MInstruction *ins);
|
||||
static bool ShouldReorderCommutative(MDefinition *lhs, MDefinition *rhs, MInstruction *ins);
|
||||
|
||||
// A backend can decide that an instruction should be emitted at its uses,
|
||||
// rather than at its definition. To communicate this, set the
|
||||
// instruction's virtual register set to 0. When using the instruction,
|
||||
|
|
|
@ -655,6 +655,34 @@ LIRGeneratorX86Shared::visitAsmJSAtomicBinopHeap(MAsmJSAtomicBinopHeap *ins)
|
|||
defineFixed(lir, ins, LAllocation(AnyRegister(eax)));
|
||||
}
|
||||
|
||||
void
|
||||
LIRGeneratorX86Shared::visitSimdBinaryArith(MSimdBinaryArith *ins)
|
||||
{
|
||||
MOZ_ASSERT(IsSimdType(ins->type()));
|
||||
|
||||
MDefinition *lhs = ins->lhs();
|
||||
MDefinition *rhs = ins->rhs();
|
||||
|
||||
if (ins->isCommutative())
|
||||
ReorderCommutative(&lhs, &rhs, ins);
|
||||
|
||||
if (ins->type() == MIRType_Int32x4) {
|
||||
lowerForFPU(new(alloc()) LSimdBinaryArithIx4(), ins, lhs, rhs);
|
||||
return;
|
||||
}
|
||||
|
||||
MOZ_ASSERT(ins->type() == MIRType_Float32x4, "unknown simd type on binary arith operation");
|
||||
|
||||
LSimdBinaryArithFx4 *lir = new(alloc()) LSimdBinaryArithFx4();
|
||||
|
||||
bool needsTemp = ins->operation() == MSimdBinaryArith::Max ||
|
||||
ins->operation() == MSimdBinaryArith::MinNum ||
|
||||
ins->operation() == MSimdBinaryArith::MaxNum;
|
||||
lir->setTemp(0, needsTemp ? temp(LDefinition::FLOAT32X4) : LDefinition::BogusTemp());
|
||||
|
||||
lowerForFPU(lir, ins, lhs, rhs);
|
||||
}
|
||||
|
||||
void
|
||||
LIRGeneratorX86Shared::visitSimdTernaryBitwise(MSimdTernaryBitwise *ins)
|
||||
{
|
||||
|
|
|
@ -53,6 +53,7 @@ class LIRGeneratorX86Shared : public LIRGeneratorShared
|
|||
void lowerTruncateDToInt32(MTruncateToInt32 *ins);
|
||||
void lowerTruncateFToInt32(MTruncateToInt32 *ins);
|
||||
void visitForkJoinGetSlice(MForkJoinGetSlice *ins);
|
||||
void visitSimdBinaryArith(MSimdBinaryArith *ins);
|
||||
void visitSimdTernaryBitwise(MSimdTernaryBitwise *ins);
|
||||
void visitSimdSplatX4(MSimdSplatX4 *ins);
|
||||
void visitSimdValueX4(MSimdValueX4 *ins);
|
||||
|
|
Загрузка…
Ссылка в новой задаче