зеркало из https://github.com/mozilla/gecko-dev.git
Bug 1021716: SIMD: Use specific instructions for special cases; r=sunfish
This commit is contained in:
Родитель
fbdb740234
Коммит
2c3d97c4d8
|
@ -242,10 +242,14 @@ class LSimdSwizzleBase : public LInstructionHelper<1, 1, 0>
|
|||
return getOperand(0);
|
||||
}
|
||||
|
||||
int32_t laneX() const { return mir_->toSimdSwizzle()->laneX(); }
|
||||
int32_t laneY() const { return mir_->toSimdSwizzle()->laneY(); }
|
||||
int32_t laneZ() const { return mir_->toSimdSwizzle()->laneZ(); }
|
||||
int32_t laneW() const { return mir_->toSimdSwizzle()->laneW(); }
|
||||
uint32_t laneX() const { return mir_->toSimdSwizzle()->laneX(); }
|
||||
uint32_t laneY() const { return mir_->toSimdSwizzle()->laneY(); }
|
||||
uint32_t laneZ() const { return mir_->toSimdSwizzle()->laneZ(); }
|
||||
uint32_t laneW() const { return mir_->toSimdSwizzle()->laneW(); }
|
||||
|
||||
bool lanesMatch(uint32_t x, uint32_t y, uint32_t z, uint32_t w) const {
|
||||
return mir_->toSimdSwizzle()->lanesMatch(x, y, z, w);
|
||||
}
|
||||
};
|
||||
|
||||
// Shuffles a int32x4 into another int32x4 vector.
|
||||
|
@ -287,10 +291,14 @@ class LSimdShuffle : public LInstructionHelper<1, 2, 1>
|
|||
return getTemp(0);
|
||||
}
|
||||
|
||||
int32_t laneX() const { return mir_->toSimdShuffle()->laneX(); }
|
||||
int32_t laneY() const { return mir_->toSimdShuffle()->laneY(); }
|
||||
int32_t laneZ() const { return mir_->toSimdShuffle()->laneZ(); }
|
||||
int32_t laneW() const { return mir_->toSimdShuffle()->laneW(); }
|
||||
uint32_t laneX() const { return mir_->toSimdShuffle()->laneX(); }
|
||||
uint32_t laneY() const { return mir_->toSimdShuffle()->laneY(); }
|
||||
uint32_t laneZ() const { return mir_->toSimdShuffle()->laneZ(); }
|
||||
uint32_t laneW() const { return mir_->toSimdShuffle()->laneW(); }
|
||||
|
||||
bool lanesMatch(uint32_t x, uint32_t y, uint32_t z, uint32_t w) const {
|
||||
return mir_->toSimdShuffle()->lanesMatch(x, y, z, w);
|
||||
}
|
||||
};
|
||||
|
||||
// Binary SIMD comparison operation between two SIMD operands
|
||||
|
|
|
@ -819,6 +819,14 @@ MSimdSplatX4::foldsTo(TempAllocator &alloc)
|
|||
return MSimdConstant::New(alloc, cst, type());
|
||||
}
|
||||
|
||||
MDefinition *
|
||||
MSimdSwizzle::foldsTo(TempAllocator &alloc)
|
||||
{
|
||||
if (lanesMatch(0, 1, 2, 3))
|
||||
return input();
|
||||
return this;
|
||||
}
|
||||
|
||||
MCloneLiteral *
|
||||
MCloneLiteral::New(TempAllocator &alloc, MDefinition *obj)
|
||||
{
|
||||
|
|
|
@ -1585,7 +1585,7 @@ class MSimdShuffleBase
|
|||
uint32_t laneMask_;
|
||||
uint32_t arity_;
|
||||
|
||||
MSimdShuffleBase(int32_t laneX, int32_t laneY, int32_t laneZ, int32_t laneW, MIRType type)
|
||||
MSimdShuffleBase(uint32_t laneX, uint32_t laneY, uint32_t laneZ, uint32_t laneW, MIRType type)
|
||||
{
|
||||
MOZ_ASSERT(SimdTypeToLength(type) == 4);
|
||||
MOZ_ASSERT(IsSimdType(type));
|
||||
|
@ -1600,10 +1600,14 @@ class MSimdShuffleBase
|
|||
public:
|
||||
// For now, these formulas are fine for x4 types. They'll need to be
|
||||
// generalized for other SIMD type lengths.
|
||||
int32_t laneX() const { MOZ_ASSERT(arity_ == 4); return laneMask_ & 7; }
|
||||
int32_t laneY() const { MOZ_ASSERT(arity_ == 4); return (laneMask_ >> 3) & 7; }
|
||||
int32_t laneZ() const { MOZ_ASSERT(arity_ == 4); return (laneMask_ >> 6) & 7; }
|
||||
int32_t laneW() const { MOZ_ASSERT(arity_ == 4); return (laneMask_ >> 9) & 7; }
|
||||
uint32_t laneX() const { MOZ_ASSERT(arity_ == 4); return laneMask_ & 7; }
|
||||
uint32_t laneY() const { MOZ_ASSERT(arity_ == 4); return (laneMask_ >> 3) & 7; }
|
||||
uint32_t laneZ() const { MOZ_ASSERT(arity_ == 4); return (laneMask_ >> 6) & 7; }
|
||||
uint32_t laneW() const { MOZ_ASSERT(arity_ == 4); return (laneMask_ >> 9) & 7; }
|
||||
|
||||
bool lanesMatch(uint32_t x, uint32_t y, uint32_t z, uint32_t w) const {
|
||||
return ((x << 0) | (y << 3) | (z << 6) | (w << 9)) == laneMask_;
|
||||
}
|
||||
};
|
||||
|
||||
// Applies a shuffle operation to the input, putting the input lanes as
|
||||
|
@ -1613,7 +1617,7 @@ class MSimdSwizzle : public MUnaryInstruction, public MSimdShuffleBase
|
|||
{
|
||||
protected:
|
||||
MSimdSwizzle(MDefinition *obj, MIRType type,
|
||||
int32_t laneX, int32_t laneY, int32_t laneZ, int32_t laneW)
|
||||
uint32_t laneX, uint32_t laneY, uint32_t laneZ, uint32_t laneW)
|
||||
: MUnaryInstruction(obj), MSimdShuffleBase(laneX, laneY, laneZ, laneW, type)
|
||||
{
|
||||
MOZ_ASSERT(laneX < 4 && laneY < 4 && laneZ < 4 && laneW < 4);
|
||||
|
@ -1628,7 +1632,7 @@ class MSimdSwizzle : public MUnaryInstruction, public MSimdShuffleBase
|
|||
INSTRUCTION_HEADER(SimdSwizzle);
|
||||
|
||||
static MSimdSwizzle *NewAsmJS(TempAllocator &alloc, MDefinition *obj, MIRType type,
|
||||
int32_t laneX, int32_t laneY, int32_t laneZ, int32_t laneW)
|
||||
uint32_t laneX, uint32_t laneY, uint32_t laneZ, uint32_t laneW)
|
||||
{
|
||||
return new(alloc) MSimdSwizzle(obj, type, laneX, laneY, laneZ, laneW);
|
||||
}
|
||||
|
@ -1644,6 +1648,8 @@ class MSimdSwizzle : public MUnaryInstruction, public MSimdShuffleBase
|
|||
return AliasSet::None();
|
||||
}
|
||||
|
||||
MDefinition *foldsTo(TempAllocator &alloc);
|
||||
|
||||
ALLOW_CLONE(MSimdSwizzle)
|
||||
};
|
||||
|
||||
|
@ -1653,7 +1659,7 @@ class MSimdSwizzle : public MUnaryInstruction, public MSimdShuffleBase
|
|||
class MSimdShuffle : public MBinaryInstruction, public MSimdShuffleBase
|
||||
{
|
||||
MSimdShuffle(MDefinition *lhs, MDefinition *rhs, MIRType type,
|
||||
int32_t laneX, int32_t laneY, int32_t laneZ, int32_t laneW)
|
||||
uint32_t laneX, uint32_t laneY, uint32_t laneZ, uint32_t laneW)
|
||||
: MBinaryInstruction(lhs, rhs), MSimdShuffleBase(laneX, laneY, laneZ, laneW, lhs->type())
|
||||
{
|
||||
MOZ_ASSERT(laneX < 8 && laneY < 8 && laneZ < 8 && laneW < 8);
|
||||
|
@ -1670,8 +1676,8 @@ class MSimdShuffle : public MBinaryInstruction, public MSimdShuffleBase
|
|||
INSTRUCTION_HEADER(SimdShuffle);
|
||||
|
||||
static MInstruction *NewAsmJS(TempAllocator &alloc, MDefinition *lhs, MDefinition *rhs,
|
||||
MIRType type, int32_t laneX, int32_t laneY, int32_t laneZ,
|
||||
int32_t laneW)
|
||||
MIRType type, uint32_t laneX, uint32_t laneY, uint32_t laneZ,
|
||||
uint32_t laneW)
|
||||
{
|
||||
// Swap operands so that new lanes come from LHS in majority.
|
||||
// In the balanced case, swap operands if needs be, in order to be able
|
||||
|
|
|
@ -1414,10 +1414,6 @@ class AssemblerX86Shared : public AssemblerShared
|
|||
masm.divl_r(divisor.code());
|
||||
}
|
||||
|
||||
void unpcklps(FloatRegister src, FloatRegister dest) {
|
||||
MOZ_ASSERT(HasSSE2());
|
||||
masm.unpcklps_rr(src.code(), dest.code());
|
||||
}
|
||||
void pinsrd(unsigned lane, Register src, FloatRegister dest) {
|
||||
MOZ_ASSERT(HasSSE41());
|
||||
masm.pinsrd_irr(lane, src.code(), dest.code());
|
||||
|
@ -1860,6 +1856,18 @@ class AssemblerX86Shared : public AssemblerShared
|
|||
MOZ_ASSERT(HasSSE2());
|
||||
masm.movhlps_rr(src.code(), dest.code());
|
||||
}
|
||||
void movlhps(FloatRegister src, FloatRegister dest) {
|
||||
MOZ_ASSERT(HasSSE2());
|
||||
masm.movlhps_rr(src.code(), dest.code());
|
||||
}
|
||||
void unpcklps(FloatRegister src, FloatRegister dest) {
|
||||
MOZ_ASSERT(HasSSE2());
|
||||
masm.unpcklps_rr(src.code(), dest.code());
|
||||
}
|
||||
void unpckhps(FloatRegister src, FloatRegister dest) {
|
||||
MOZ_ASSERT(HasSSE2());
|
||||
masm.unpckhps_rr(src.code(), dest.code());
|
||||
}
|
||||
void shufps(uint32_t mask, FloatRegister src, FloatRegister dest) {
|
||||
MOZ_ASSERT(HasSSE2());
|
||||
masm.shufps_irr(mask, src.code(), dest.code());
|
||||
|
|
|
@ -295,6 +295,8 @@ private:
|
|||
OP2_MOVPS_WpsVps = 0x11,
|
||||
OP2_MOVHLPS_VqUq = 0x12,
|
||||
OP2_UNPCKLPS_VsdWsd = 0x14,
|
||||
OP2_UNPCKHPS_VsdWsd = 0x15,
|
||||
OP2_MOVLHPS_VqUq = 0x16,
|
||||
OP2_MOVAPD_VsdWsd = 0x28,
|
||||
OP2_MOVAPS_VsdWsd = 0x28,
|
||||
OP2_MOVAPS_WsdVsd = 0x29,
|
||||
|
@ -2921,6 +2923,13 @@ public:
|
|||
m_formatter.twoByteOp(OP2_UNPCKLPS_VsdWsd, (RegisterID)dst, (RegisterID)src);
|
||||
}
|
||||
|
||||
void unpckhps_rr(XMMRegisterID src, XMMRegisterID dst)
|
||||
{
|
||||
spew("unpckhps %s, %s",
|
||||
nameFPReg(src), nameFPReg(dst));
|
||||
m_formatter.twoByteOp(OP2_UNPCKHPS_VsdWsd, (RegisterID)dst, (RegisterID)src);
|
||||
}
|
||||
|
||||
void movd_rr(RegisterID src, XMMRegisterID dst)
|
||||
{
|
||||
spew("movd %s, %s",
|
||||
|
@ -2981,6 +2990,13 @@ public:
|
|||
m_formatter.twoByteOp(OP2_MOVHLPS_VqUq, (RegisterID)dst, (RegisterID)src);
|
||||
}
|
||||
|
||||
void movlhps_rr(XMMRegisterID src, XMMRegisterID dst)
|
||||
{
|
||||
spew("movlhps %s, %s",
|
||||
nameFPReg(src), nameFPReg(dst));
|
||||
m_formatter.twoByteOp(OP2_MOVLHPS_VqUq, (RegisterID)dst, (RegisterID)src);
|
||||
}
|
||||
|
||||
void psrldq_ir(int shift, XMMRegisterID dest)
|
||||
{
|
||||
spew("psrldq $%d, %s",
|
||||
|
|
|
@ -2394,8 +2394,12 @@ CodeGeneratorX86Shared::visitSimdSwizzleI(LSimdSwizzleI *ins)
|
|||
FloatRegister input = ToFloatRegister(ins->input());
|
||||
FloatRegister output = ToFloatRegister(ins->output());
|
||||
|
||||
uint32_t mask = MacroAssembler::ComputeShuffleMask(ins->laneX(), ins->laneY(), ins->laneZ(),
|
||||
ins->laneW());
|
||||
uint32_t x = ins->laneX();
|
||||
uint32_t y = ins->laneY();
|
||||
uint32_t z = ins->laneZ();
|
||||
uint32_t w = ins->laneW();
|
||||
|
||||
uint32_t mask = MacroAssembler::ComputeShuffleMask(x, y, z, w);
|
||||
masm.shuffleInt32(mask, input, output);
|
||||
return true;
|
||||
}
|
||||
|
@ -2406,8 +2410,38 @@ CodeGeneratorX86Shared::visitSimdSwizzleF(LSimdSwizzleF *ins)
|
|||
FloatRegister input = ToFloatRegister(ins->input());
|
||||
FloatRegister output = ToFloatRegister(ins->output());
|
||||
|
||||
uint32_t mask = MacroAssembler::ComputeShuffleMask(ins->laneX(), ins->laneY(), ins->laneZ(),
|
||||
ins->laneW());
|
||||
uint32_t x = ins->laneX();
|
||||
uint32_t y = ins->laneY();
|
||||
uint32_t z = ins->laneZ();
|
||||
uint32_t w = ins->laneW();
|
||||
|
||||
// TODO Here and below, arch specific lowering could identify this pattern
|
||||
// and use defineReuseInput to avoid this move (bug 1084404)
|
||||
if (ins->lanesMatch(2, 3, 2, 3)) {
|
||||
masm.movaps(input, output);
|
||||
masm.movhlps(input, output);
|
||||
return true;
|
||||
}
|
||||
|
||||
if (ins->lanesMatch(0, 1, 0, 1)) {
|
||||
masm.movaps(input, output);
|
||||
masm.movlhps(input, output);
|
||||
return true;
|
||||
}
|
||||
|
||||
if (ins->lanesMatch(0, 0, 1, 1)) {
|
||||
masm.movaps(input, output);
|
||||
masm.unpcklps(input, output);
|
||||
return true;
|
||||
}
|
||||
|
||||
if (ins->lanesMatch(2, 2, 3, 3)) {
|
||||
masm.movaps(input, output);
|
||||
masm.unpckhps(input, output);
|
||||
return true;
|
||||
}
|
||||
|
||||
uint32_t mask = MacroAssembler::ComputeShuffleMask(x, y, z, w);
|
||||
masm.shuffleFloat32(mask, input, output);
|
||||
return true;
|
||||
}
|
||||
|
@ -2448,6 +2482,11 @@ CodeGeneratorX86Shared::visitSimdShuffle(LSimdShuffle *ins)
|
|||
if (numLanesFromLHS == 3) {
|
||||
unsigned firstMask = -1, secondMask = -1;
|
||||
|
||||
if (ins->lanesMatch(4, 1, 2, 3)) {
|
||||
masm.movss(rhs, out);
|
||||
return true;
|
||||
}
|
||||
|
||||
FloatRegister rhsCopy = ToFloatRegister(ins->temp());
|
||||
|
||||
if (x < 4 && y < 4) {
|
||||
|
@ -2497,6 +2536,46 @@ CodeGeneratorX86Shared::visitSimdShuffle(LSimdShuffle *ins)
|
|||
// Two elements from one vector, two other elements from the other
|
||||
MOZ_ASSERT(numLanesFromLHS == 2);
|
||||
|
||||
// TODO Here and below, symmetric case would be more handy to avoid a move,
|
||||
// but can't be reached because operands would get swapped (bug 1084404).
|
||||
if (ins->lanesMatch(2, 3, 6, 7)) {
|
||||
masm.movaps(rhs, ScratchSimdReg);
|
||||
masm.movhlps(lhs, ScratchSimdReg);
|
||||
masm.movaps(ScratchSimdReg, out);
|
||||
return true;
|
||||
}
|
||||
|
||||
if (ins->lanesMatch(0, 1, 4, 5)) {
|
||||
masm.movlhps(rhs, lhs);
|
||||
return true;
|
||||
}
|
||||
|
||||
if (ins->lanesMatch(0, 4, 1, 5)) {
|
||||
masm.unpcklps(rhs, lhs);
|
||||
return true;
|
||||
}
|
||||
|
||||
// TODO swapped case would be better (bug 1084404)
|
||||
if (ins->lanesMatch(4, 0, 5, 1)) {
|
||||
masm.movaps(rhs, ScratchSimdReg);
|
||||
masm.unpcklps(lhs, ScratchSimdReg);
|
||||
masm.movaps(ScratchSimdReg, out);
|
||||
return true;
|
||||
}
|
||||
|
||||
if (ins->lanesMatch(2, 6, 3, 7)) {
|
||||
masm.unpckhps(rhs, lhs);
|
||||
return true;
|
||||
}
|
||||
|
||||
// TODO swapped case would be better (bug 1084404)
|
||||
if (ins->lanesMatch(6, 2, 7, 3)) {
|
||||
masm.movaps(rhs, ScratchSimdReg);
|
||||
masm.unpckhps(lhs, ScratchSimdReg);
|
||||
masm.movaps(ScratchSimdReg, out);
|
||||
return true;
|
||||
}
|
||||
|
||||
// In one shufps
|
||||
if (x < 4 && y < 4) {
|
||||
mask = MacroAssembler::ComputeShuffleMask(x, y, z % 4, w % 4);
|
||||
|
|
Загрузка…
Ссылка в новой задаче