Bug 1135042: Inline SIMD loads in Ion; r=bhackett

--HG--
extra : rebase_source : 9a41cda1780b07ba5ed4cb68010da30c8c91c6c8
This commit is contained in:
Benjamin Bouvier 2015-03-02 12:11:19 +01:00
Родитель c1a4bb0b11
Коммит b8de7831f3
12 изменённых файлов: 155 добавлений и 9 удалений

Просмотреть файл

@ -242,16 +242,16 @@
_(not) \
_(neg) \
_(swizzle) \
_(load) \
_(store) \
_(check)
#define FOREACH_COMMONX4_SIMD_OP(_) \
ION_COMMONX4_SIMD_OP(_) \
COMP_COMMONX4_TO_INT32X4_SIMD_OP(_) \
_(shuffle) \
_(load) \
_(loadX) \
_(loadXY) \
_(loadXYZ) \
_(store) \
_(storeX) \
_(storeXY) \
_(storeXYZ)

Просмотреть файл

@ -0,0 +1,48 @@
load(libdir + 'simd.js');
setJitCompilerOption("ion.warmup.trigger", 40);
function f() {
var f32 = new Float32Array(16);
for (var i = 0; i < 16; i++)
f32[i] = i + 1;
var f64 = new Float64Array(f32.buffer);
var i32 = new Int32Array(f32.buffer);
var u32 = new Uint32Array(f32.buffer);
var i16 = new Int16Array(f32.buffer);
var u16 = new Uint16Array(f32.buffer);
var i8 = new Int8Array(f32.buffer);
var u8 = new Uint8Array(f32.buffer);
var r;
for (var i = 0; i < 150; i++) {
assertEqX4(SIMD.float32x4.load(f64, 0), [1,2,3,4]);
assertEqX4(SIMD.float32x4.load(f32, 1), [2,3,4,5]);
assertEqX4(SIMD.float32x4.load(i32, 2), [3,4,5,6]);
assertEqX4(SIMD.float32x4.load(i16, 3 << 1), [4,5,6,7]);
assertEqX4(SIMD.float32x4.load(u16, 4 << 1), [5,6,7,8]);
assertEqX4(SIMD.float32x4.load(i8 , 5 << 2), [6,7,8,9]);
assertEqX4(SIMD.float32x4.load(u8 , 6 << 2), [7,8,9,10]);
assertEqX4(SIMD.float32x4.load(f64, (16 >> 1) - (4 >> 1)), [13,14,15,16]);
assertEqX4(SIMD.float32x4.load(f32, 16 - 4), [13,14,15,16]);
assertEqX4(SIMD.float32x4.load(i32, 16 - 4), [13,14,15,16]);
assertEqX4(SIMD.float32x4.load(i16, (16 << 1) - (4 << 1)), [13,14,15,16]);
assertEqX4(SIMD.float32x4.load(u16, (16 << 1) - (4 << 1)), [13,14,15,16]);
assertEqX4(SIMD.float32x4.load(i8, (16 << 2) - (4 << 2)), [13,14,15,16]);
assertEqX4(SIMD.float32x4.load(u8, (16 << 2) - (4 << 2)), [13,14,15,16]);
var caught = false;
try {
SIMD.float32x4.load(i8, (i < 149) ? 0 : (16 << 2) - (4 << 2) + 1);
} catch (e) {
caught = true;
}
assertEq(i < 149 || caught, true);
}
return r
}
f();

Просмотреть файл

@ -8608,17 +8608,18 @@ CodeGenerator::visitLoadTypedArrayElement(LLoadTypedArrayElement *lir)
AnyRegister out = ToAnyRegister(lir->output());
Scalar::Type arrayType = lir->mir()->arrayType();
Scalar::Type readType = lir->mir()->readType();
int width = Scalar::byteSize(arrayType);
Label fail;
if (lir->index()->isConstant()) {
Address source(elements, ToInt32(lir->index()) * width + lir->mir()->offsetAdjustment());
masm.loadFromTypedArray(arrayType, source, out, temp, &fail,
masm.loadFromTypedArray(readType, source, out, temp, &fail,
lir->mir()->canonicalizeDoubles());
} else {
BaseIndex source(elements, ToRegister(lir->index()), ScaleFromElemWidth(width),
lir->mir()->offsetAdjustment());
masm.loadFromTypedArray(arrayType, source, out, temp, &fail,
masm.loadFromTypedArray(readType, source, out, temp, &fail,
lir->mir()->canonicalizeDoubles());
}

Просмотреть файл

@ -834,6 +834,7 @@ class IonBuilder
SimdTypeDescr::Type from, SimdTypeDescr::Type to);
InliningStatus inlineSimdSelect(CallInfo &callInfo, JSNative native, bool isElementWise,
SimdTypeDescr::Type type);
InliningStatus inlineSimdLoad(CallInfo &callInfo, JSNative native, SimdTypeDescr::Type type);
// Utility intrinsics.
InliningStatus inlineIsCallable(CallInfo &callInfo);

Просмотреть файл

@ -2888,7 +2888,8 @@ LIRGenerator::visitLoadTypedArrayElement(MLoadTypedArrayElement *ins)
const LUse elements = useRegister(ins->elements());
const LAllocation index = useRegisterOrConstant(ins->index());
MOZ_ASSERT(IsNumberType(ins->type()) || ins->type() == MIRType_Boolean);
MOZ_ASSERT(IsNumberType(ins->type()) || IsSimdType(ins->type()) ||
ins->type() == MIRType_Boolean);
// We need a temp register for Uint32Array with known double result.
LDefinition tempDef = LDefinition::BogusTemp();

Просмотреть файл

@ -354,6 +354,11 @@ IonBuilder::inlineNativeCall(CallInfo &callInfo, JSFunction *target)
if (native == js::simd_int32x4_swizzle)
return inlineSimdSwizzle(callInfo, native, SimdTypeDescr::TYPE_INT32);
if (native == js::simd_int32x4_load)
return inlineSimdLoad(callInfo, native, SimdTypeDescr::TYPE_INT32);
if (native == js::simd_float32x4_load)
return inlineSimdLoad(callInfo, native, SimdTypeDescr::TYPE_FLOAT32);
return InliningStatus_NotInlined;
}
@ -3118,5 +3123,65 @@ IonBuilder::inlineSimdSwizzle(CallInfo &callInfo, JSNative native, SimdTypeDescr
return boxSimd(callInfo, ins, templateObj);
}
static Scalar::Type
SimdTypeToScalarType(SimdTypeDescr::Type type)
{
switch (type) {
case SimdTypeDescr::TYPE_FLOAT32: return Scalar::Float32x4;
case SimdTypeDescr::TYPE_INT32: return Scalar::Int32x4;
case SimdTypeDescr::TYPE_FLOAT64: break;
}
MOZ_CRASH("unexpected simd type");
}
IonBuilder::InliningStatus
IonBuilder::inlineSimdLoad(CallInfo &callInfo, JSNative native, SimdTypeDescr::Type type)
{
InlineTypedObject *templateObj = nullptr;
if (!checkInlineSimd(callInfo, native, type, 2, &templateObj))
return InliningStatus_NotInlined;
MDefinition *array = callInfo.getArg(0);
MDefinition *index = callInfo.getArg(1);
Scalar::Type arrayType;
if (!ElementAccessIsAnyTypedArray(constraints(), array, index, &arrayType))
return InliningStatus_NotInlined;
MInstruction *indexAsInt32 = MToInt32::New(alloc(), index);
current->add(indexAsInt32);
index = indexAsInt32;
MDefinition *indexForBoundsCheck = index;
// Artificially make sure the index is in bounds by adding the difference
// number of slots needed (e.g. reading from Float32Array we need to make
// sure to be in bounds for 4 slots, so add 3, etc.).
MOZ_ASSERT(Simd128DataSize % Scalar::byteSize(arrayType) == 0);
int32_t suppSlotsNeeded = Simd128DataSize / Scalar::byteSize(arrayType) - 1;
if (suppSlotsNeeded) {
MConstant *suppSlots = constant(Int32Value(suppSlotsNeeded));
MAdd *addedIndex = MAdd::New(alloc(), index, suppSlots);
// Even if this addition overflows, we're fine because the code generated
// for the bounds check uses uint32 arithmetic
addedIndex->setInt32();
current->add(addedIndex);
indexForBoundsCheck = addedIndex;
}
MInstruction *length;
MInstruction *elements;
addTypedArrayLengthAndData(array, SkipBoundsCheck, &index, &length, &elements);
MInstruction *check = MBoundsCheck::New(alloc(), indexForBoundsCheck, length);
current->add(check);
MLoadTypedArrayElement *load = MLoadTypedArrayElement::New(alloc(), elements, index, arrayType);
load->setResultType(SimdTypeDescrToMIRType(type));
load->setReadType(SimdTypeToScalarType(type));
return boxSimd(callInfo, load, templateObj);
}
} // namespace jit
} // namespace js

Просмотреть файл

@ -8832,6 +8832,7 @@ class MLoadTypedArrayElement
public SingleObjectPolicy::Data
{
Scalar::Type arrayType_;
Scalar::Type readType_;
bool requiresBarrier_;
int32_t offsetAdjustment_;
bool canonicalizeDoubles_;
@ -8841,6 +8842,7 @@ class MLoadTypedArrayElement
int32_t offsetAdjustment, bool canonicalizeDoubles)
: MBinaryInstruction(elements, index),
arrayType_(arrayType),
readType_(arrayType),
requiresBarrier_(requiresBarrier == DoesRequireMemoryBarrier),
offsetAdjustment_(offsetAdjustment),
canonicalizeDoubles_(canonicalizeDoubles)
@ -8869,6 +8871,13 @@ class MLoadTypedArrayElement
canonicalizeDoubles);
}
void setReadType(Scalar::Type type) {
readType_ = type;
}
Scalar::Type readType() const {
return readType_;
}
Scalar::Type arrayType() const {
return arrayType_;
}
@ -8907,6 +8916,8 @@ class MLoadTypedArrayElement
const MLoadTypedArrayElement *other = ins->toLoadTypedArrayElement();
if (arrayType_ != other->arrayType_)
return false;
if (readType_ != other->readType_)
return false;
if (offsetAdjustment() != other->offsetAdjustment())
return false;
if (canonicalizeDoubles() != other->canonicalizeDoubles())

Просмотреть файл

@ -356,6 +356,12 @@ MacroAssembler::loadFromTypedArray(Scalar::Type arrayType, const T &src, AnyRegi
if (canonicalizeDoubles)
canonicalizeDouble(dest.fpu());
break;
case Scalar::Int32x4:
loadUnalignedInt32x4(src, dest.fpu());
break;
case Scalar::Float32x4:
loadUnalignedFloat32x4(src, dest.fpu());
break;
default:
MOZ_CRASH("Invalid typed array type");
}

Просмотреть файл

@ -1648,7 +1648,8 @@ MLimitedTruncate::computeRange(TempAllocator &alloc)
setRange(output);
}
static Range *GetTypedArrayRange(TempAllocator &alloc, int type)
static Range *
GetTypedArrayRange(TempAllocator &alloc, Scalar::Type type)
{
switch (type) {
case Scalar::Uint8Clamped:
@ -1668,10 +1669,12 @@ static Range *GetTypedArrayRange(TempAllocator &alloc, int type)
case Scalar::Float32:
case Scalar::Float64:
case Scalar::Float32x4:
case Scalar::Int32x4:
case Scalar::MaxTypedArrayViewType:
break;
}
return nullptr;
return nullptr;
}
void
@ -1679,7 +1682,7 @@ MLoadTypedArrayElement::computeRange(TempAllocator &alloc)
{
// We have an Int32 type and if this is a UInt32 load it may produce a value
// outside of our range, but we have a bailout to handle those cases.
setRange(GetTypedArrayRange(alloc, arrayType()));
setRange(GetTypedArrayRange(alloc, readType()));
}
void

Просмотреть файл

@ -1398,11 +1398,13 @@ class MacroAssemblerARMCompat : public MacroAssemblerARM
void loadAlignedInt32x4(const Address &addr, FloatRegister dest) { MOZ_CRASH("NYI"); }
void storeAlignedInt32x4(FloatRegister src, Address addr) { MOZ_CRASH("NYI"); }
void loadUnalignedInt32x4(const Address &addr, FloatRegister dest) { MOZ_CRASH("NYI"); }
void loadUnalignedInt32x4(const BaseIndex &addr, FloatRegister dest) { MOZ_CRASH("NYI"); }
void storeUnalignedInt32x4(FloatRegister src, Address addr) { MOZ_CRASH("NYI"); }
void loadAlignedFloat32x4(const Address &addr, FloatRegister dest) { MOZ_CRASH("NYI"); }
void storeAlignedFloat32x4(FloatRegister src, Address addr) { MOZ_CRASH("NYI"); }
void loadUnalignedFloat32x4(const Address &addr, FloatRegister dest) { MOZ_CRASH("NYI"); }
void loadUnalignedFloat32x4(const BaseIndex &addr, FloatRegister dest) { MOZ_CRASH("NYI"); }
void storeUnalignedFloat32x4(FloatRegister src, Address addr) { MOZ_CRASH("NYI"); }
void loadDouble(const Address &addr, FloatRegister dest);

Просмотреть файл

@ -1259,11 +1259,13 @@ public:
void loadAlignedInt32x4(const Address &addr, FloatRegister dest) { MOZ_CRASH("NYI"); }
void storeAlignedInt32x4(FloatRegister src, Address addr) { MOZ_CRASH("NYI"); }
void loadUnalignedInt32x4(const Address &addr, FloatRegister dest) { MOZ_CRASH("NYI"); }
void loadUnalignedInt32x4(const BaseIndex &addr, FloatRegister dest) { MOZ_CRASH("NYI"); }
void storeUnalignedInt32x4(FloatRegister src, Address addr) { MOZ_CRASH("NYI"); }
void loadAlignedFloat32x4(const Address &addr, FloatRegister dest) { MOZ_CRASH("NYI"); }
void storeAlignedFloat32x4(FloatRegister src, Address addr) { MOZ_CRASH("NYI"); }
void loadUnalignedFloat32x4(const Address &addr, FloatRegister dest) { MOZ_CRASH("NYI"); }
void loadUnalignedFloat32x4(const BaseIndex &addr, FloatRegister dest) { MOZ_CRASH("NYI"); }
void storeUnalignedFloat32x4(FloatRegister src, Address addr) { MOZ_CRASH("NYI"); }
void loadDouble(const Address &addr, FloatRegister dest);

Просмотреть файл

@ -930,6 +930,9 @@ class MacroAssemblerX86Shared : public Assembler
void loadUnalignedInt32x4(const Address &src, FloatRegister dest) {
vmovdqu(Operand(src), dest);
}
void loadUnalignedInt32x4(const BaseIndex &src, FloatRegister dest) {
vmovdqu(Operand(src), dest);
}
void loadUnalignedInt32x4(const Operand &src, FloatRegister dest) {
vmovdqu(src, dest);
}
@ -1011,6 +1014,9 @@ class MacroAssemblerX86Shared : public Assembler
void loadUnalignedFloat32x4(const Address &src, FloatRegister dest) {
vmovups(Operand(src), dest);
}
void loadUnalignedFloat32x4(const BaseIndex &src, FloatRegister dest) {
vmovdqu(Operand(src), dest);
}
void loadUnalignedFloat32x4(const Operand &src, FloatRegister dest) {
vmovups(src, dest);
}