Bug 1021716: SIMD x86-x64: Implement generic binary shuffle; r=sunfish

This commit is contained in:
Benjamin Bouvier 2014-10-21 15:39:15 +02:00
Родитель b2b91caa03
Коммит ba86b71604
4 изменённых файлов: 155 добавлений и 16 удалений

Просмотреть файл

@ -266,12 +266,16 @@ class LSimdSwizzleF : public LSimdSwizzleBase
};
// Base class for both int32x4 and float32x4 shuffle instructions.
class LSimdShuffle : public LInstructionHelper<1, 2, 0>
class LSimdShuffle : public LInstructionHelper<1, 2, 1>
{
public:
LIR_HEADER(SimdShuffle);
LSimdShuffle()
{}
LSimdShuffle(const LAllocation &lhs, const LAllocation &rhs, const LDefinition &temp)
{
setOperand(0, lhs);
setOperand(1, rhs);
setTemp(0, temp);
}
const LAllocation *lhs() {
return getOperand(0);
@ -279,6 +283,9 @@ class LSimdShuffle : public LInstructionHelper<1, 2, 0>
const LAllocation *rhs() {
return getOperand(1);
}
const LDefinition *temp() {
return getTemp(0);
}
int32_t laneX() const { return mir_->toSimdShuffle()->laneX(); }
int32_t laneY() const { return mir_->toSimdShuffle()->laneY(); }

Просмотреть файл

@ -3836,16 +3836,20 @@ LIRGenerator::visitSimdShuffle(MSimdShuffle *ins)
MOZ_ASSERT(IsSimdType(ins->lhs()->type()));
MOZ_ASSERT(IsSimdType(ins->rhs()->type()));
MOZ_ASSERT(IsSimdType(ins->type()));
MOZ_ASSERT(ins->type() == MIRType_Int32x4 || ins->type() == MIRType_Float32x4);
if (ins->type() == MIRType_Int32x4 || ins->type() == MIRType_Float32x4) {
MDefinition *lhs = ins->lhs();
MDefinition *rhs = ins->rhs();
LSimdShuffle *lir = new (alloc()) LSimdShuffle;
return lowerForFPU(lir, ins, lhs, rhs);
}
bool zFromLHS = ins->laneZ() < 4;
bool wFromLHS = ins->laneW() < 4;
uint32_t lanesFromLHS = (ins->laneX() < 4) + (ins->laneY() < 4) + zFromLHS + wFromLHS;
MOZ_CRASH("Unknown SIMD kind when getting lane");
return false;
LUse lhs = useRegisterAtStart(ins->lhs());
LUse rhs = useRegister(ins->rhs());
// See codegen for requirements details.
LDefinition temp = (lanesFromLHS == 3) ? tempCopy(ins->rhs(), 1) : LDefinition::BogusTemp();
LSimdShuffle *lir = new (alloc()) LSimdShuffle(lhs, rhs, temp);
return defineReuseInput(lir, ins, 0);
}
bool

Просмотреть файл

@ -1673,6 +1673,18 @@ class MSimdShuffle : public MBinaryInstruction, public MSimdShuffleBase
MIRType type, int32_t laneX, int32_t laneY, int32_t laneZ,
int32_t laneW)
{
// Swap operands so that new lanes come from LHS in majority.
// In the balanced case, swap operands if needs be, in order to be able
// to do only one shufps on x86.
unsigned lanesFromLHS = (laneX < 4) + (laneY < 4) + (laneZ < 4) + (laneW < 4);
if (lanesFromLHS < 2 || (lanesFromLHS == 2 && laneX >= 4 && laneY >=4)) {
laneX = (laneX + 4) % 8;
laneY = (laneY + 4) % 8;
laneZ = (laneZ + 4) % 8;
laneW = (laneW + 4) % 8;
mozilla::Swap(lhs, rhs);
}
return new(alloc) MSimdShuffle(lhs, rhs, type, laneX, laneY, laneZ, laneW);
}

Просмотреть файл

@ -2416,12 +2416,128 @@ bool
CodeGeneratorX86Shared::visitSimdShuffle(LSimdShuffle *ins)
{
FloatRegister lhs = ToFloatRegister(ins->lhs());
Operand rhs = ToOperand(ins->rhs());
MOZ_ASSERT(ToFloatRegister(ins->output()) == lhs);
FloatRegister rhs = ToFloatRegister(ins->rhs());
FloatRegister out = ToFloatRegister(ins->output());
MOZ_ASSERT(out == lhs); // define reuse input
uint32_t mask = MacroAssembler::ComputeShuffleMask(ins->laneX(), ins->laneY(), ins->laneZ() - 4,
ins->laneW() - 4);
masm.shuffleMix(mask, rhs, lhs);
uint32_t x = ins->laneX();
uint32_t y = ins->laneY();
uint32_t z = ins->laneZ();
uint32_t w = ins->laneW();
// Check that lanes come from LHS in majority:
unsigned numLanesFromLHS = (x < 4) + (y < 4) + (z < 4) + (w < 4);
MOZ_ASSERT(numLanesFromLHS >= 2);
// When reading this method, remember that shufps takes the two first
// inputs of the destination operand (right operand) and the two last
// inputs of the source operand (left operand).
//
// Legend for explanations:
// - L: LHS
// - R: RHS
// - T: temporary
uint32_t mask;
// Trivial cases: all lanes come from only one vector (Lx, Ly, Lz, Lz).
if (numLanesFromLHS == 4) {
mask = MacroAssembler::ComputeShuffleMask(x, y, z, w);
masm.shufps(mask, lhs, out);
return true;
}
// One element of the second, all other elements of the first
if (numLanesFromLHS == 3) {
unsigned firstMask = -1, secondMask = -1;
FloatRegister rhsCopy = ToFloatRegister(ins->temp());
if (x < 4 && y < 4) {
if (w >= 4) {
w %= 4;
// T = (Rw Rw Lz Lz) = shufps(firstMask, lhs, rhs)
firstMask = MacroAssembler::ComputeShuffleMask(w, w, z, z);
// (Lx Ly Lz Rw) = (Lx Ly Tz Tx) = shufps(secondMask, T, lhs)
secondMask = MacroAssembler::ComputeShuffleMask(x, y, LaneZ, LaneX);
} else {
MOZ_ASSERT(z >= 4);
z %= 4;
// T = (Rz Rz Lw Lw) = shufps(firstMask, lhs, rhs)
firstMask = MacroAssembler::ComputeShuffleMask(z, z, w, w);
// (Lx Ly Rz Lw) = (Lx Ly Tx Tz) = shufps(secondMask, T, lhs)
secondMask = MacroAssembler::ComputeShuffleMask(x, y, LaneX, LaneZ);
}
masm.shufps(firstMask, lhs, rhsCopy);
masm.shufps(secondMask, rhsCopy, lhs);
return true;
}
MOZ_ASSERT(z < 4 && w < 4);
if (y >= 4) {
y %= 4;
// T = (Ry Ry Lx Lx) = shufps(firstMask, lhs, rhs)
firstMask = MacroAssembler::ComputeShuffleMask(y, y, x, x);
// (Lx Ry Lz Lw) = (Tz Tx Lz Lw) = shufps(secondMask, lhs, T)
secondMask = MacroAssembler::ComputeShuffleMask(LaneZ, LaneX, z, w);
} else {
MOZ_ASSERT(x >= 4);
x %= 4;
// T = (Rx Rx Ly Ly) = shufps(firstMask, lhs, rhs)
firstMask = MacroAssembler::ComputeShuffleMask(x, x, y, y);
// (Rx Ly Lz Lw) = (Tx Tz Lz Lw) = shufps(secondMask, lhs, T)
secondMask = MacroAssembler::ComputeShuffleMask(LaneX, LaneZ, z, w);
}
masm.shufps(firstMask, lhs, rhsCopy);
masm.shufps(secondMask, lhs, rhsCopy);
masm.movaps(rhsCopy, out);
return true;
}
// Two elements from one vector, two other elements from the other
MOZ_ASSERT(numLanesFromLHS == 2);
// In one shufps
if (x < 4 && y < 4) {
mask = MacroAssembler::ComputeShuffleMask(x, y, z % 4, w % 4);
masm.shufps(mask, rhs, out);
return true;
}
// At creation, we should have explicitly swapped in this case.
MOZ_ASSERT(!(z >= 4 && w >= 4));
// In two shufps, for the most generic case:
uint32_t firstMask[4], secondMask[4];
unsigned i = 0, j = 2, k = 0;
#define COMPUTE_MASK(lane) \
if (lane >= 4) { \
firstMask[j] = lane % 4; \
secondMask[k++] = j++; \
} else { \
firstMask[i] = lane; \
secondMask[k++] = i++; \
}
COMPUTE_MASK(x)
COMPUTE_MASK(y)
COMPUTE_MASK(z)
COMPUTE_MASK(w)
#undef COMPUTE_MASK
MOZ_ASSERT(i == 2 && j == 4 && k == 4);
mask = MacroAssembler::ComputeShuffleMask(firstMask[0], firstMask[1],
firstMask[2], firstMask[3]);
masm.shufps(mask, rhs, lhs);
mask = MacroAssembler::ComputeShuffleMask(secondMask[0], secondMask[1],
secondMask[2], secondMask[3]);
masm.shufps(mask, lhs, lhs);
return true;
}