Bug 1110164: Extend MSimdSelect to also handle element-wise select; r=sunfish

--HG--
extra : rebase_source : 4502a8718d467da247d481476013e8b718f2f38a
extra : histedit_source : 9dd605bf892aa843cc892ab9f0fa07925225df85%2C765790385369b77fa4f13a6921d79109666a9017
This commit is contained in:
Benjamin Bouvier 2014-12-29 18:04:47 +01:00
Родитель bea36965c9
Коммит ebbd6d5231
5 изменённых файлов: 53 добавлений и 8 удалений

Просмотреть файл

@ -2656,7 +2656,7 @@ class FunctionCompiler
MOZ_ASSERT(mask->type() == MIRType_Int32x4);
MOZ_ASSERT(IsSimdType(lhs->type()) && rhs->type() == lhs->type());
MOZ_ASSERT(lhs->type() == type);
MSimdSelect *ins = MSimdSelect::NewAsmJS(alloc(), mask, lhs, rhs, type);
MSimdSelect *ins = MSimdSelect::NewAsmJS(alloc(), mask, lhs, rhs, type, /* isElementWise */ true);
curBlock_->add(ins);
return ins;
}

Просмотреть файл

@ -836,13 +836,24 @@ assertAsmTypeFail('glob', USE_ASM + I32 + F32 + I32SEL + "function f() {var m=i4
assertAsmTypeFail('glob', USE_ASM + I32 + F32 + I32SEL + "function f() {var m=i4(1,2,3,4); var x=i4(1,2,3,4); var y=f4(5,6,7,8); return i4(i4sel(m,x,y));} return f");
assertAsmTypeFail('glob', USE_ASM + I32 + F32 + I32SEL + "function f() {var m=i4(1,2,3,4); var x=f4(1,2,3,4); var y=f4(5,6,7,8); return i4(i4sel(m,x,y));} return f");
assertAsmTypeFail('glob', USE_ASM + F32 + F32SEL + "function f() {var m=f4(1,2,3,4); return f4(f4sel(x,x,x));} return f");
assertAsmTypeFail('glob', USE_ASM + I32 + F32 + F32SEL + "function f() {var m=f4(1,2,3,4); var x=i4(1,2,3,4); return f4(f4sel(m,x,x));} return f");
assertAsmTypeFail('glob', USE_ASM + I32 + F32 + F32SEL + "function f() {var m=f4(1,2,3,4); var x=f4(1,2,3,4); return f4(f4sel(m,x,x));} return f");
assertAsmTypeFail('glob', USE_ASM + I32 + F32 + F32SEL + "function f() {var m=i4(1,2,3,4); var x=f4(1,2,3,4); var y=i4(5,6,7,8); return f4(f4sel(m,x,y));} return f");
assertAsmTypeFail('glob', USE_ASM + I32 + F32 + F32SEL + "function f() {var m=i4(1,2,3,4); var x=i4(1,2,3,4); var y=f4(5,6,7,8); return f4(f4sel(m,x,y));} return f");
// These pass with select but not bitselect
assertEqX4(asmLink(asmCompile('glob', USE_ASM + I32 + I32SEL + "function f() {var m=i4(0,0,0,0); var x=i4(1,2,3,4); var y=i4(5,6,7,8); return i4(i4sel(m,x,y)); } return f"), this)(), [5, 6, 7, 8]);
assertEqX4(asmLink(asmCompile('glob', USE_ASM + I32 + I32SEL + "function f() {var m=i4(-1,-2,-3,-42); var x=i4(1,2,3,4); var y=i4(5,6,7,8); return i4(i4sel(m,x,y)); } return f"), this)(), [1, 2, 3, 4]);
assertEqX4(asmLink(asmCompile('glob', USE_ASM + I32 + I32SEL + "function f() {var m=i4(1,-1,2,-2); var x=i4(1,2,3,4); var y=i4(5,6,7,8); return i4(i4sel(m,x,y)); } return f"), this)(), [5, 2, 7, 4]);
assertEqX4(asmLink(asmCompile('glob', USE_ASM + I32 + I32SEL + "function f() {var m=i4(42,45,-42,-47); var x=i4(1,2,3,4); var y=i4(5,6,7,8); return i4(i4sel(m,x,y)); } return f"), this)(), [5, 6, 3, 4]);
assertEqX4(asmLink(asmCompile('glob', USE_ASM + I32 + F32 + F32SEL + "function f() {var m=i4(0,0,0,0); var x=f4(1,2,3,4); var y=f4(5,6,7,8); return f4(f4sel(m,x,y)); } return f"), this)(), [5, 6, 7, 8]);
assertEqX4(asmLink(asmCompile('glob', USE_ASM + I32 + F32 + F32SEL + "function f() {var m=i4(-1,-2,-3,-42); var x=f4(1,2,3,4); var y=f4(5,6,7,8); return f4(f4sel(m,x,y)); } return f"), this)(), [1, 2, 3, 4]);
assertEqX4(asmLink(asmCompile('glob', USE_ASM + I32 + F32 + F32SEL + "function f() {var m=i4(1,-1,2,-2); var x=f4(1,2,3,4); var y=f4(5,6,7,8); return f4(f4sel(m,x,y)); } return f"), this)(), [5, 2, 7, 4]);
assertEqX4(asmLink(asmCompile('glob', USE_ASM + I32 + F32 + F32SEL + "function f() {var m=i4(42,45,-42,-47); var x=f4(1,2,3,4); var y=f4(5,6,7,8); return f4(f4sel(m,x,y)); } return f"), this)(), [5, 6, 3, 4]);
// These pass for both select and bitselect
assertEqX4(asmLink(asmCompile('glob', USE_ASM + I32 + I32SEL + "function f() {var m=i4(0,0,0,0); var x=i4(1,2,3,4); var y=i4(5,6,7,8); return i4(i4sel(m,x,y)); } return f"), this)(), [5, 6, 7, 8]);
assertEqX4(asmLink(asmCompile('glob', USE_ASM + I32 + I32SEL + "function f() {var m=i4(0xffffffff,0xffffffff,0xffffffff,0xffffffff); var x=i4(1,2,3,4); var y=i4(5,6,7,8); return i4(i4sel(m,x,y)); } return f"), this)(), [1, 2, 3, 4]);
assertEqX4(asmLink(asmCompile('glob', USE_ASM + I32 + I32SEL + "function f() {var m=i4(0,0xffffffff,0,0xffffffff); var x=i4(1,2,3,4); var y=i4(5,6,7,8); return i4(i4sel(m,x,y)); } return f"), this)(), [5, 2, 7, 4]);

Просмотреть файл

@ -498,6 +498,9 @@ class LSimdSelect : public LInstructionHelper<1, 3, 1>
const LDefinition *temp() {
return getTemp(0);
}
MSimdSelect *mir() const {
return mir_->toSimdSelect();
}
};
// Constant 32-bit integer.

Просмотреть файл

@ -2023,9 +2023,11 @@ class MSimdShift : public MBinaryInstruction
class MSimdSelect : public MTernaryInstruction
{
private:
MSimdSelect(MDefinition *mask, MDefinition *lhs, MDefinition *rhs, MIRType type)
: MTernaryInstruction(mask, lhs, rhs)
bool isElementWise_;
MSimdSelect(MDefinition *mask, MDefinition *lhs, MDefinition *rhs, MIRType type,
bool isElementWise)
: MTernaryInstruction(mask, lhs, rhs), isElementWise_(isElementWise)
{
MOZ_ASSERT(IsSimdType(type));
MOZ_ASSERT(mask->type() == MIRType_Int32x4);
@ -2038,15 +2040,29 @@ class MSimdSelect : public MTernaryInstruction
public:
INSTRUCTION_HEADER(SimdSelect);
static MSimdSelect *NewAsmJS(TempAllocator &alloc, MDefinition *mask, MDefinition *lhs,
MDefinition *rhs, MIRType t)
MDefinition *rhs, MIRType t, bool isElementWise)
{
return new(alloc) MSimdSelect(mask, lhs, rhs, t);
return new(alloc) MSimdSelect(mask, lhs, rhs, t, isElementWise);
}
MDefinition *mask() const {
return getOperand(0);
}
AliasSet getAliasSet() const {
return AliasSet::None();
}
bool isElementWise() const {
return isElementWise_;
}
bool congruentTo(const MDefinition *ins) const {
if (!congruentIfOperandsEqual(ins))
return false;
return isElementWise_ == ins->toSimdSelect()->isElementWise();
}
ALLOW_CLONE(MSimdSelect)
};

Просмотреть файл

@ -2964,7 +2964,22 @@ CodeGeneratorX86Shared::visitSimdSelect(LSimdSelect *ins)
if (mask != temp)
masm.vmovaps(mask, temp);
masm.bitwiseAndX4(Operand(mask), output);
MSimdSelect *mir = ins->mir();
if (mir->isElementWise()) {
if (AssemblerX86Shared::HasAVX()) {
masm.vblendvps(mask, onTrue, onFalse, output);
return;
}
// SSE4.1 has plain blendvps which can do this, but it is awkward
// to use because it requires the mask to be in xmm0.
// Propagate sign to all bits of mask vector, if necessary.
if (!mir->mask()->isSimdBinaryComp())
masm.packedRightShiftByScalar(Imm32(31), temp);
}
masm.bitwiseAndX4(Operand(temp), output);
masm.bitwiseAndNotX4(Operand(onFalse), temp);
masm.bitwiseOrX4(Operand(temp), output);
}