Bug 1021716: SIMD: Use specific instructions for special cases; r=sunfish

This commit is contained in:
Benjamin Bouvier 2014-10-21 15:39:31 +02:00
Родитель fbdb740234
Коммит 2c3d97c4d8
6 изменённых файлов: 151 добавлений и 26 удалений

Просмотреть файл

@ -242,10 +242,14 @@ class LSimdSwizzleBase : public LInstructionHelper<1, 1, 0>
return getOperand(0);
}
int32_t laneX() const { return mir_->toSimdSwizzle()->laneX(); }
int32_t laneY() const { return mir_->toSimdSwizzle()->laneY(); }
int32_t laneZ() const { return mir_->toSimdSwizzle()->laneZ(); }
int32_t laneW() const { return mir_->toSimdSwizzle()->laneW(); }
uint32_t laneX() const { return mir_->toSimdSwizzle()->laneX(); }
uint32_t laneY() const { return mir_->toSimdSwizzle()->laneY(); }
uint32_t laneZ() const { return mir_->toSimdSwizzle()->laneZ(); }
uint32_t laneW() const { return mir_->toSimdSwizzle()->laneW(); }
bool lanesMatch(uint32_t x, uint32_t y, uint32_t z, uint32_t w) const {
return mir_->toSimdSwizzle()->lanesMatch(x, y, z, w);
}
};
// Shuffles a int32x4 into another int32x4 vector.
@ -287,10 +291,14 @@ class LSimdShuffle : public LInstructionHelper<1, 2, 1>
return getTemp(0);
}
int32_t laneX() const { return mir_->toSimdShuffle()->laneX(); }
int32_t laneY() const { return mir_->toSimdShuffle()->laneY(); }
int32_t laneZ() const { return mir_->toSimdShuffle()->laneZ(); }
int32_t laneW() const { return mir_->toSimdShuffle()->laneW(); }
uint32_t laneX() const { return mir_->toSimdShuffle()->laneX(); }
uint32_t laneY() const { return mir_->toSimdShuffle()->laneY(); }
uint32_t laneZ() const { return mir_->toSimdShuffle()->laneZ(); }
uint32_t laneW() const { return mir_->toSimdShuffle()->laneW(); }
bool lanesMatch(uint32_t x, uint32_t y, uint32_t z, uint32_t w) const {
return mir_->toSimdShuffle()->lanesMatch(x, y, z, w);
}
};
// Binary SIMD comparison operation between two SIMD operands

Просмотреть файл

@ -819,6 +819,14 @@ MSimdSplatX4::foldsTo(TempAllocator &alloc)
return MSimdConstant::New(alloc, cst, type());
}
MDefinition *
MSimdSwizzle::foldsTo(TempAllocator &alloc)
{
if (lanesMatch(0, 1, 2, 3))
return input();
return this;
}
MCloneLiteral *
MCloneLiteral::New(TempAllocator &alloc, MDefinition *obj)
{

Просмотреть файл

@ -1585,7 +1585,7 @@ class MSimdShuffleBase
uint32_t laneMask_;
uint32_t arity_;
MSimdShuffleBase(int32_t laneX, int32_t laneY, int32_t laneZ, int32_t laneW, MIRType type)
MSimdShuffleBase(uint32_t laneX, uint32_t laneY, uint32_t laneZ, uint32_t laneW, MIRType type)
{
MOZ_ASSERT(SimdTypeToLength(type) == 4);
MOZ_ASSERT(IsSimdType(type));
@ -1600,10 +1600,14 @@ class MSimdShuffleBase
public:
// For now, these formulas are fine for x4 types. They'll need to be
// generalized for other SIMD type lengths.
int32_t laneX() const { MOZ_ASSERT(arity_ == 4); return laneMask_ & 7; }
int32_t laneY() const { MOZ_ASSERT(arity_ == 4); return (laneMask_ >> 3) & 7; }
int32_t laneZ() const { MOZ_ASSERT(arity_ == 4); return (laneMask_ >> 6) & 7; }
int32_t laneW() const { MOZ_ASSERT(arity_ == 4); return (laneMask_ >> 9) & 7; }
uint32_t laneX() const { MOZ_ASSERT(arity_ == 4); return laneMask_ & 7; }
uint32_t laneY() const { MOZ_ASSERT(arity_ == 4); return (laneMask_ >> 3) & 7; }
uint32_t laneZ() const { MOZ_ASSERT(arity_ == 4); return (laneMask_ >> 6) & 7; }
uint32_t laneW() const { MOZ_ASSERT(arity_ == 4); return (laneMask_ >> 9) & 7; }
bool lanesMatch(uint32_t x, uint32_t y, uint32_t z, uint32_t w) const {
return ((x << 0) | (y << 3) | (z << 6) | (w << 9)) == laneMask_;
}
};
// Applies a shuffle operation to the input, putting the input lanes as
@ -1613,7 +1617,7 @@ class MSimdSwizzle : public MUnaryInstruction, public MSimdShuffleBase
{
protected:
MSimdSwizzle(MDefinition *obj, MIRType type,
int32_t laneX, int32_t laneY, int32_t laneZ, int32_t laneW)
uint32_t laneX, uint32_t laneY, uint32_t laneZ, uint32_t laneW)
: MUnaryInstruction(obj), MSimdShuffleBase(laneX, laneY, laneZ, laneW, type)
{
MOZ_ASSERT(laneX < 4 && laneY < 4 && laneZ < 4 && laneW < 4);
@ -1628,7 +1632,7 @@ class MSimdSwizzle : public MUnaryInstruction, public MSimdShuffleBase
INSTRUCTION_HEADER(SimdSwizzle);
static MSimdSwizzle *NewAsmJS(TempAllocator &alloc, MDefinition *obj, MIRType type,
int32_t laneX, int32_t laneY, int32_t laneZ, int32_t laneW)
uint32_t laneX, uint32_t laneY, uint32_t laneZ, uint32_t laneW)
{
return new(alloc) MSimdSwizzle(obj, type, laneX, laneY, laneZ, laneW);
}
@ -1644,6 +1648,8 @@ class MSimdSwizzle : public MUnaryInstruction, public MSimdShuffleBase
return AliasSet::None();
}
MDefinition *foldsTo(TempAllocator &alloc);
ALLOW_CLONE(MSimdSwizzle)
};
@ -1653,7 +1659,7 @@ class MSimdSwizzle : public MUnaryInstruction, public MSimdShuffleBase
class MSimdShuffle : public MBinaryInstruction, public MSimdShuffleBase
{
MSimdShuffle(MDefinition *lhs, MDefinition *rhs, MIRType type,
int32_t laneX, int32_t laneY, int32_t laneZ, int32_t laneW)
uint32_t laneX, uint32_t laneY, uint32_t laneZ, uint32_t laneW)
: MBinaryInstruction(lhs, rhs), MSimdShuffleBase(laneX, laneY, laneZ, laneW, lhs->type())
{
MOZ_ASSERT(laneX < 8 && laneY < 8 && laneZ < 8 && laneW < 8);
@ -1670,8 +1676,8 @@ class MSimdShuffle : public MBinaryInstruction, public MSimdShuffleBase
INSTRUCTION_HEADER(SimdShuffle);
static MInstruction *NewAsmJS(TempAllocator &alloc, MDefinition *lhs, MDefinition *rhs,
MIRType type, int32_t laneX, int32_t laneY, int32_t laneZ,
int32_t laneW)
MIRType type, uint32_t laneX, uint32_t laneY, uint32_t laneZ,
uint32_t laneW)
{
// Swap operands so that new lanes come from LHS in majority.
// In the balanced case, swap operands if needs be, in order to be able

Просмотреть файл

@ -1414,10 +1414,6 @@ class AssemblerX86Shared : public AssemblerShared
masm.divl_r(divisor.code());
}
void unpcklps(FloatRegister src, FloatRegister dest) {
MOZ_ASSERT(HasSSE2());
masm.unpcklps_rr(src.code(), dest.code());
}
void pinsrd(unsigned lane, Register src, FloatRegister dest) {
MOZ_ASSERT(HasSSE41());
masm.pinsrd_irr(lane, src.code(), dest.code());
@ -1860,6 +1856,18 @@ class AssemblerX86Shared : public AssemblerShared
MOZ_ASSERT(HasSSE2());
masm.movhlps_rr(src.code(), dest.code());
}
void movlhps(FloatRegister src, FloatRegister dest) {
MOZ_ASSERT(HasSSE2());
masm.movlhps_rr(src.code(), dest.code());
}
void unpcklps(FloatRegister src, FloatRegister dest) {
MOZ_ASSERT(HasSSE2());
masm.unpcklps_rr(src.code(), dest.code());
}
void unpckhps(FloatRegister src, FloatRegister dest) {
MOZ_ASSERT(HasSSE2());
masm.unpckhps_rr(src.code(), dest.code());
}
void shufps(uint32_t mask, FloatRegister src, FloatRegister dest) {
MOZ_ASSERT(HasSSE2());
masm.shufps_irr(mask, src.code(), dest.code());

Просмотреть файл

@ -295,6 +295,8 @@ private:
OP2_MOVPS_WpsVps = 0x11,
OP2_MOVHLPS_VqUq = 0x12,
OP2_UNPCKLPS_VsdWsd = 0x14,
OP2_UNPCKHPS_VsdWsd = 0x15,
OP2_MOVLHPS_VqUq = 0x16,
OP2_MOVAPD_VsdWsd = 0x28,
OP2_MOVAPS_VsdWsd = 0x28,
OP2_MOVAPS_WsdVsd = 0x29,
@ -2921,6 +2923,13 @@ public:
m_formatter.twoByteOp(OP2_UNPCKLPS_VsdWsd, (RegisterID)dst, (RegisterID)src);
}
void unpckhps_rr(XMMRegisterID src, XMMRegisterID dst)
{
spew("unpckhps %s, %s",
nameFPReg(src), nameFPReg(dst));
m_formatter.twoByteOp(OP2_UNPCKHPS_VsdWsd, (RegisterID)dst, (RegisterID)src);
}
void movd_rr(RegisterID src, XMMRegisterID dst)
{
spew("movd %s, %s",
@ -2981,6 +2990,13 @@ public:
m_formatter.twoByteOp(OP2_MOVHLPS_VqUq, (RegisterID)dst, (RegisterID)src);
}
void movlhps_rr(XMMRegisterID src, XMMRegisterID dst)
{
spew("movlhps %s, %s",
nameFPReg(src), nameFPReg(dst));
m_formatter.twoByteOp(OP2_MOVLHPS_VqUq, (RegisterID)dst, (RegisterID)src);
}
void psrldq_ir(int shift, XMMRegisterID dest)
{
spew("psrldq $%d, %s",

Просмотреть файл

@ -2394,8 +2394,12 @@ CodeGeneratorX86Shared::visitSimdSwizzleI(LSimdSwizzleI *ins)
FloatRegister input = ToFloatRegister(ins->input());
FloatRegister output = ToFloatRegister(ins->output());
uint32_t mask = MacroAssembler::ComputeShuffleMask(ins->laneX(), ins->laneY(), ins->laneZ(),
ins->laneW());
uint32_t x = ins->laneX();
uint32_t y = ins->laneY();
uint32_t z = ins->laneZ();
uint32_t w = ins->laneW();
uint32_t mask = MacroAssembler::ComputeShuffleMask(x, y, z, w);
masm.shuffleInt32(mask, input, output);
return true;
}
@ -2406,8 +2410,38 @@ CodeGeneratorX86Shared::visitSimdSwizzleF(LSimdSwizzleF *ins)
FloatRegister input = ToFloatRegister(ins->input());
FloatRegister output = ToFloatRegister(ins->output());
uint32_t mask = MacroAssembler::ComputeShuffleMask(ins->laneX(), ins->laneY(), ins->laneZ(),
ins->laneW());
uint32_t x = ins->laneX();
uint32_t y = ins->laneY();
uint32_t z = ins->laneZ();
uint32_t w = ins->laneW();
// TODO Here and below, arch specific lowering could identify this pattern
// and use defineReuseInput to avoid this move (bug 1084404)
if (ins->lanesMatch(2, 3, 2, 3)) {
masm.movaps(input, output);
masm.movhlps(input, output);
return true;
}
if (ins->lanesMatch(0, 1, 0, 1)) {
masm.movaps(input, output);
masm.movlhps(input, output);
return true;
}
if (ins->lanesMatch(0, 0, 1, 1)) {
masm.movaps(input, output);
masm.unpcklps(input, output);
return true;
}
if (ins->lanesMatch(2, 2, 3, 3)) {
masm.movaps(input, output);
masm.unpckhps(input, output);
return true;
}
uint32_t mask = MacroAssembler::ComputeShuffleMask(x, y, z, w);
masm.shuffleFloat32(mask, input, output);
return true;
}
@ -2448,6 +2482,11 @@ CodeGeneratorX86Shared::visitSimdShuffle(LSimdShuffle *ins)
if (numLanesFromLHS == 3) {
unsigned firstMask = -1, secondMask = -1;
if (ins->lanesMatch(4, 1, 2, 3)) {
masm.movss(rhs, out);
return true;
}
FloatRegister rhsCopy = ToFloatRegister(ins->temp());
if (x < 4 && y < 4) {
@ -2497,6 +2536,46 @@ CodeGeneratorX86Shared::visitSimdShuffle(LSimdShuffle *ins)
// Two elements from one vector, two other elements from the other
MOZ_ASSERT(numLanesFromLHS == 2);
// TODO Here and below, symmetric case would be more handy to avoid a move,
// but can't be reached because operands would get swapped (bug 1084404).
if (ins->lanesMatch(2, 3, 6, 7)) {
masm.movaps(rhs, ScratchSimdReg);
masm.movhlps(lhs, ScratchSimdReg);
masm.movaps(ScratchSimdReg, out);
return true;
}
if (ins->lanesMatch(0, 1, 4, 5)) {
masm.movlhps(rhs, lhs);
return true;
}
if (ins->lanesMatch(0, 4, 1, 5)) {
masm.unpcklps(rhs, lhs);
return true;
}
// TODO swapped case would be better (bug 1084404)
if (ins->lanesMatch(4, 0, 5, 1)) {
masm.movaps(rhs, ScratchSimdReg);
masm.unpcklps(lhs, ScratchSimdReg);
masm.movaps(ScratchSimdReg, out);
return true;
}
if (ins->lanesMatch(2, 6, 3, 7)) {
masm.unpckhps(rhs, lhs);
return true;
}
// TODO swapped case would be better (bug 1084404)
if (ins->lanesMatch(6, 2, 7, 3)) {
masm.movaps(rhs, ScratchSimdReg);
masm.unpckhps(lhs, ScratchSimdReg);
masm.movaps(ScratchSimdReg, out);
return true;
}
// In one shufps
if (x < 4 && y < 4) {
mask = MacroAssembler::ComputeShuffleMask(x, y, z % 4, w % 4);