Bug 1021716: SIMD x86-x64: Implement MSimdShuffleMix; r=sunfish

This commit is contained in:
Benjamin Bouvier 2014-08-27 19:24:41 +02:00
Родитель 39ac3875bc
Коммит b2b91caa03
12 изменённых файлов: 191 добавлений и 47 удалений

Просмотреть файл

@ -242,10 +242,10 @@ class LSimdSwizzleBase : public LInstructionHelper<1, 1, 0>
return getOperand(0);
}
SimdLane laneX() const { return mir_->toSimdSwizzle()->laneX(); }
SimdLane laneY() const { return mir_->toSimdSwizzle()->laneY(); }
SimdLane laneZ() const { return mir_->toSimdSwizzle()->laneZ(); }
SimdLane laneW() const { return mir_->toSimdSwizzle()->laneW(); }
int32_t laneX() const { return mir_->toSimdSwizzle()->laneX(); }
int32_t laneY() const { return mir_->toSimdSwizzle()->laneY(); }
int32_t laneZ() const { return mir_->toSimdSwizzle()->laneZ(); }
int32_t laneW() const { return mir_->toSimdSwizzle()->laneW(); }
};
// Shuffles a int32x4 into another int32x4 vector.
@ -265,6 +265,27 @@ class LSimdSwizzleF : public LSimdSwizzleBase
{}
};
// Base class for both int32x4 and float32x4 shuffle instructions.
class LSimdShuffle : public LInstructionHelper<1, 2, 0>
{
public:
LIR_HEADER(SimdShuffle);
LSimdShuffle()
{}
const LAllocation *lhs() {
return getOperand(0);
}
const LAllocation *rhs() {
return getOperand(1);
}
int32_t laneX() const { return mir_->toSimdShuffle()->laneX(); }
int32_t laneY() const { return mir_->toSimdShuffle()->laneY(); }
int32_t laneZ() const { return mir_->toSimdShuffle()->laneZ(); }
int32_t laneW() const { return mir_->toSimdShuffle()->laneW(); }
};
// Binary SIMD comparison operation between two SIMD operands
class LSimdBinaryComp: public LInstructionHelper<1, 2, 0>
{

Просмотреть файл

@ -26,6 +26,7 @@
_(SimdSignMaskX4) \
_(SimdSwizzleI) \
_(SimdSwizzleF) \
_(SimdShuffle) \
_(SimdUnaryArithIx4) \
_(SimdUnaryArithFx4) \
_(SimdBinaryCompIx4) \

Просмотреть файл

@ -3830,6 +3830,24 @@ LIRGenerator::visitSimdSwizzle(MSimdSwizzle *ins)
return false;
}
bool
LIRGenerator::visitSimdShuffle(MSimdShuffle *ins)
{
MOZ_ASSERT(IsSimdType(ins->lhs()->type()));
MOZ_ASSERT(IsSimdType(ins->rhs()->type()));
MOZ_ASSERT(IsSimdType(ins->type()));
if (ins->type() == MIRType_Int32x4 || ins->type() == MIRType_Float32x4) {
MDefinition *lhs = ins->lhs();
MDefinition *rhs = ins->rhs();
LSimdShuffle *lir = new (alloc()) LSimdShuffle;
return lowerForFPU(lir, ins, lhs, rhs);
}
MOZ_CRASH("Unknown SIMD kind when getting lane");
return false;
}
bool
LIRGenerator::visitSimdUnaryArith(MSimdUnaryArith *ins)
{

Просмотреть файл

@ -273,6 +273,7 @@ class LIRGenerator : public LIRGeneratorSpecific
bool visitSimdInsertElement(MSimdInsertElement *ins);
bool visitSimdSignMask(MSimdSignMask *ins);
bool visitSimdSwizzle(MSimdSwizzle *ins);
bool visitSimdShuffle(MSimdShuffle *ins);
bool visitSimdUnaryArith(MSimdUnaryArith *ins);
bool visitSimdBinaryComp(MSimdBinaryComp *ins);
bool visitSimdBinaryArith(MSimdBinaryArith *ins);

Просмотреть файл

@ -1575,34 +1575,51 @@ class MSimdSignMask : public MUnaryInstruction
ALLOW_CLONE(MSimdSignMask)
};
// Base for the MSimdSwizzle and MSimdShuffle classes.
class MSimdShuffleBase
{
protected:
// As of now, there are at most 4 lanes. For each lane, we need to know
// which input we choose and which of the 4 lanes we choose; that can be
// packed in 3 bits for each lane, so 12 bits in total.
uint32_t laneMask_;
uint32_t arity_;
MSimdShuffleBase(int32_t laneX, int32_t laneY, int32_t laneZ, int32_t laneW, MIRType type)
{
MOZ_ASSERT(SimdTypeToLength(type) == 4);
MOZ_ASSERT(IsSimdType(type));
laneMask_ = (laneX << 0) | (laneY << 3) | (laneZ << 6) | (laneW << 9);
arity_ = 4;
}
bool sameLanes(const MSimdShuffleBase *other) const {
return laneMask_ == other->laneMask_;
}
public:
// For now, these formulas are fine for x4 types. They'll need to be
// generalized for other SIMD type lengths.
int32_t laneX() const { MOZ_ASSERT(arity_ == 4); return laneMask_ & 7; }
int32_t laneY() const { MOZ_ASSERT(arity_ == 4); return (laneMask_ >> 3) & 7; }
int32_t laneZ() const { MOZ_ASSERT(arity_ == 4); return (laneMask_ >> 6) & 7; }
int32_t laneW() const { MOZ_ASSERT(arity_ == 4); return (laneMask_ >> 9) & 7; }
};
// Applies a shuffle operation to the input, putting the input lanes as
// indicated in the output register's lanes. This implements the SIMD.js
// "shuffle" function, that takes one vector and one mask.
class MSimdSwizzle : public MUnaryInstruction
class MSimdSwizzle : public MUnaryInstruction, public MSimdShuffleBase
{
protected:
// As of now, there are at most 4 lanes.
SimdLane laneX_;
SimdLane laneY_;
SimdLane laneZ_;
SimdLane laneW_;
MSimdSwizzle(MDefinition *obj, MIRType type,
SimdLane laneX, SimdLane laneY, SimdLane laneZ, SimdLane laneW)
: MUnaryInstruction(obj),
laneX_(laneX), laneY_(laneY), laneZ_(laneZ), laneW_(laneW)
int32_t laneX, int32_t laneY, int32_t laneZ, int32_t laneW)
: MUnaryInstruction(obj), MSimdShuffleBase(laneX, laneY, laneZ, laneW, type)
{
MOZ_ASSERT(laneX < 4 && laneY < 4 && laneZ < 4 && laneW < 4);
MOZ_ASSERT(IsSimdType(obj->type()));
// Returned value needs to be in a vector too
MOZ_ASSERT(IsSimdType(type));
MOZ_ASSERT(SimdTypeToScalarType(obj->type()) == type);
mozilla::DebugOnly<uint32_t> expectedLength = SimdTypeToLength(obj->type());
MOZ_ASSERT(uint32_t(laneX_) < expectedLength);
MOZ_ASSERT(uint32_t(laneY_) < expectedLength);
MOZ_ASSERT(uint32_t(laneZ_) < expectedLength);
MOZ_ASSERT(uint32_t(laneW_) < expectedLength);
MOZ_ASSERT(obj->type() == type);
setResultType(type);
setMovable();
}
@ -1611,36 +1628,68 @@ class MSimdSwizzle : public MUnaryInstruction
INSTRUCTION_HEADER(SimdSwizzle);
static MSimdSwizzle *NewAsmJS(TempAllocator &alloc, MDefinition *obj, MIRType type,
SimdLane laneX, SimdLane laneY, SimdLane laneZ, SimdLane laneW)
int32_t laneX, int32_t laneY, int32_t laneZ, int32_t laneW)
{
return new(alloc) MSimdSwizzle(obj, type, laneX, laneY, laneZ, laneW);
}
SimdLane laneX() const { return laneX_; }
SimdLane laneY() const { return laneY_; }
SimdLane laneZ() const { return laneZ_; }
SimdLane laneW() const { return laneW_; }
AliasSet getAliasSet() const {
return AliasSet::None();
}
bool congruentTo(const MDefinition *ins) const {
if (!ins->isSimdSwizzle())
return false;
const MSimdSwizzle *other = ins->toSimdSwizzle();
if (other->laneX_ != laneX_ ||
other->laneY_ != laneY_ ||
other->laneZ_ != laneZ_ ||
other->laneW_ != laneW_)
{
return false;
}
return congruentIfOperandsEqual(other);
return sameLanes(other) && congruentIfOperandsEqual(other);
}
AliasSet getAliasSet() const {
return AliasSet::None();
}
ALLOW_CLONE(MSimdSwizzle)
};
// Applies a shuffle operation to the inputs, selecting the 2 first lanes of the
// output from lanes of the first input, and the 2 last lanes of the output from
// lanes of the second input.
class MSimdShuffle : public MBinaryInstruction, public MSimdShuffleBase
{
MSimdShuffle(MDefinition *lhs, MDefinition *rhs, MIRType type,
int32_t laneX, int32_t laneY, int32_t laneZ, int32_t laneW)
: MBinaryInstruction(lhs, rhs), MSimdShuffleBase(laneX, laneY, laneZ, laneW, lhs->type())
{
MOZ_ASSERT(laneX < 8 && laneY < 8 && laneZ < 8 && laneW < 8);
MOZ_ASSERT(IsSimdType(lhs->type()));
MOZ_ASSERT(IsSimdType(rhs->type()));
MOZ_ASSERT(lhs->type() == rhs->type());
MOZ_ASSERT(IsSimdType(type));
MOZ_ASSERT(lhs->type() == type);
setResultType(type);
setMovable();
}
public:
INSTRUCTION_HEADER(SimdShuffle);
static MSimdShuffle *NewAsmJS(TempAllocator &alloc, MDefinition *lhs, MDefinition *rhs,
MIRType type, int32_t laneX, int32_t laneY, int32_t laneZ,
int32_t laneW)
{
return new(alloc) MSimdShuffle(lhs, rhs, type, laneX, laneY, laneZ, laneW);
}
bool congruentTo(const MDefinition *ins) const {
if (!ins->isSimdShuffle())
return false;
const MSimdShuffle *other = ins->toSimdShuffle();
return sameLanes(other) && binaryCongruentTo(other);
}
AliasSet getAliasSet() const {
return AliasSet::None();
}
ALLOW_CLONE(MSimdShuffle)
};
class MSimdUnaryArith : public MUnaryInstruction
{
public:

Просмотреть файл

@ -21,6 +21,7 @@ namespace jit {
_(SimdInsertElement) \
_(SimdSignMask) \
_(SimdSwizzle) \
_(SimdShuffle) \
_(SimdUnaryArith) \
_(SimdBinaryComp) \
_(SimdBinaryArith) \

Просмотреть файл

@ -120,6 +120,7 @@ class ParallelSafetyVisitor : public MDefinitionVisitor
SAFE_OP(SimdInsertElement)
SAFE_OP(SimdSignMask)
SAFE_OP(SimdSwizzle)
SAFE_OP(SimdShuffle)
SAFE_OP(SimdUnaryArith)
SAFE_OP(SimdBinaryComp)
SAFE_OP(SimdBinaryArith)

Просмотреть файл

@ -1864,6 +1864,22 @@ class AssemblerX86Shared : public AssemblerShared
MOZ_ASSERT(HasSSE2());
masm.shufps_irr(mask, src.code(), dest.code());
}
void shufps(uint32_t mask, const Operand &src, FloatRegister dest) {
MOZ_ASSERT(HasSSE2());
switch (src.kind()) {
case Operand::FPREG:
masm.shufps_irr(mask, src.fpu(), dest.code());
break;
case Operand::MEM_REG_DISP:
masm.shufps_imr(mask, src.disp(), src.base(), dest.code());
break;
case Operand::MEM_ADDRESS32:
masm.shufps_imr(mask, src.address(), dest.code());
break;
default:
MOZ_CRASH("unexpected operand kind");
}
}
void addsd(FloatRegister src, FloatRegister dest) {
MOZ_ASSERT(HasSSE2());
masm.addsd_rr(src.code(), dest.code());

Просмотреть файл

@ -2940,7 +2940,7 @@ public:
void pshufd_irr(uint32_t mask, XMMRegisterID src, XMMRegisterID dst)
{
MOZ_ASSERT(mask < 256);
spew("pshufd 0x%x, %s, %s",
spew("pshufd 0x%x, %s, %s",
mask, nameFPReg(src), nameFPReg(dst));
m_formatter.prefix(PRE_SSE_66);
m_formatter.twoByteOp(OP2_PSHUFD_VdqWdqIb, (RegisterID)dst, (RegisterID)src);
@ -2956,6 +2956,24 @@ public:
m_formatter.immediate8(uint8_t(mask));
}
void shufps_imr(uint32_t mask, int offset, RegisterID base, XMMRegisterID dst)
{
MOZ_ASSERT(mask < 256);
spew("shufps 0x%x, %s0x%x(%s), %s",
mask, PRETTY_PRINT_OFFSET(offset), nameIReg(base), nameFPReg(dst));
m_formatter.twoByteOp(OP2_SHUFPS_VpsWpsIb, (RegisterID)dst, base, offset);
m_formatter.immediate8(uint8_t(mask));
}
void shufps_imr(uint32_t mask, const void* address, XMMRegisterID dst)
{
spew("shufps %x, %p, %s",
mask, address, nameFPReg(dst));
m_formatter.prefix(PRE_SSE_F3);
m_formatter.twoByteOp(OP2_SHUFPS_VpsWpsIb, (RegisterID)dst, address);
m_formatter.immediate8(uint8_t(mask));
}
void movhlps_rr(XMMRegisterID src, XMMRegisterID dst)
{
spew("movhlps %s, %s",

Просмотреть файл

@ -2412,6 +2412,19 @@ CodeGeneratorX86Shared::visitSimdSwizzleF(LSimdSwizzleF *ins)
return true;
}
bool
CodeGeneratorX86Shared::visitSimdShuffle(LSimdShuffle *ins)
{
FloatRegister lhs = ToFloatRegister(ins->lhs());
Operand rhs = ToOperand(ins->rhs());
MOZ_ASSERT(ToFloatRegister(ins->output()) == lhs);
uint32_t mask = MacroAssembler::ComputeShuffleMask(ins->laneX(), ins->laneY(), ins->laneZ() - 4,
ins->laneW() - 4);
masm.shuffleMix(mask, rhs, lhs);
return true;
}
bool
CodeGeneratorX86Shared::visitSimdBinaryCompIx4(LSimdBinaryCompIx4 *ins)
{

Просмотреть файл

@ -221,6 +221,7 @@ class CodeGeneratorX86Shared : public CodeGeneratorShared
bool visitSimdSignMaskX4(LSimdSignMaskX4 *ins);
bool visitSimdSwizzleI(LSimdSwizzleI *lir);
bool visitSimdSwizzleF(LSimdSwizzleF *lir);
bool visitSimdShuffle(LSimdShuffle *lir);
bool visitSimdUnaryArithIx4(LSimdUnaryArithIx4 *lir);
bool visitSimdUnaryArithFx4(LSimdUnaryArithFx4 *lir);
bool visitSimdBinaryCompIx4(LSimdBinaryCompIx4 *lir);

Просмотреть файл

@ -595,13 +595,12 @@ class MacroAssemblerX86Shared : public Assembler
void packedDivFloat32(const Operand &src, FloatRegister dest) {
divps(src, dest);
}
static uint32_t ComputeShuffleMask(SimdLane x, SimdLane y = LaneX,
SimdLane z = LaneX, SimdLane w = LaneX)
static uint32_t ComputeShuffleMask(uint32_t x = LaneX, uint32_t y = LaneY,
uint32_t z = LaneZ, uint32_t w = LaneW)
{
uint32_t r = (uint32_t(w) << 6) |
(uint32_t(z) << 4) |
(uint32_t(y) << 2) |
uint32_t(x);
MOZ_ASSERT(x < 4 && y < 4 && z < 4 && w < 4);
uint32_t r = (w << 6) | (z << 4) | (y << 2) | (x << 0);
MOZ_ASSERT(r < 256);
return r;
}
@ -626,6 +625,11 @@ class MacroAssemblerX86Shared : public Assembler
moveAlignedFloat32x4(src, dest);
shufps(mask, dest, dest);
}
void shuffleMix(uint32_t mask, const Operand &src, FloatRegister dest) {
// Note this uses shufps, which is a cross-domain penaly on CPU where it
// applies, but that's the way clang and gcc do it.
shufps(mask, src, dest);
}
void moveFloatAsDouble(Register src, FloatRegister dest) {
movd(src, dest);