зеркало из https://github.com/mozilla/gecko-dev.git
Bug 1135042: Inline SIMD loads in Ion; r=bhackett
--HG-- extra : rebase_source : 9a41cda1780b07ba5ed4cb68010da30c8c91c6c8
This commit is contained in:
Родитель
c1a4bb0b11
Коммит
b8de7831f3
|
@ -242,16 +242,16 @@
|
|||
_(not) \
|
||||
_(neg) \
|
||||
_(swizzle) \
|
||||
_(load) \
|
||||
_(store) \
|
||||
_(check)
|
||||
#define FOREACH_COMMONX4_SIMD_OP(_) \
|
||||
ION_COMMONX4_SIMD_OP(_) \
|
||||
COMP_COMMONX4_TO_INT32X4_SIMD_OP(_) \
|
||||
_(shuffle) \
|
||||
_(load) \
|
||||
_(loadX) \
|
||||
_(loadXY) \
|
||||
_(loadXYZ) \
|
||||
_(store) \
|
||||
_(storeX) \
|
||||
_(storeXY) \
|
||||
_(storeXYZ)
|
||||
|
|
|
@ -0,0 +1,48 @@
|
|||
load(libdir + 'simd.js');
|
||||
|
||||
setJitCompilerOption("ion.warmup.trigger", 40);
|
||||
|
||||
function f() {
|
||||
var f32 = new Float32Array(16);
|
||||
for (var i = 0; i < 16; i++)
|
||||
f32[i] = i + 1;
|
||||
|
||||
var f64 = new Float64Array(f32.buffer);
|
||||
var i32 = new Int32Array(f32.buffer);
|
||||
var u32 = new Uint32Array(f32.buffer);
|
||||
var i16 = new Int16Array(f32.buffer);
|
||||
var u16 = new Uint16Array(f32.buffer);
|
||||
var i8 = new Int8Array(f32.buffer);
|
||||
var u8 = new Uint8Array(f32.buffer);
|
||||
|
||||
var r;
|
||||
for (var i = 0; i < 150; i++) {
|
||||
assertEqX4(SIMD.float32x4.load(f64, 0), [1,2,3,4]);
|
||||
assertEqX4(SIMD.float32x4.load(f32, 1), [2,3,4,5]);
|
||||
assertEqX4(SIMD.float32x4.load(i32, 2), [3,4,5,6]);
|
||||
assertEqX4(SIMD.float32x4.load(i16, 3 << 1), [4,5,6,7]);
|
||||
assertEqX4(SIMD.float32x4.load(u16, 4 << 1), [5,6,7,8]);
|
||||
assertEqX4(SIMD.float32x4.load(i8 , 5 << 2), [6,7,8,9]);
|
||||
assertEqX4(SIMD.float32x4.load(u8 , 6 << 2), [7,8,9,10]);
|
||||
|
||||
assertEqX4(SIMD.float32x4.load(f64, (16 >> 1) - (4 >> 1)), [13,14,15,16]);
|
||||
assertEqX4(SIMD.float32x4.load(f32, 16 - 4), [13,14,15,16]);
|
||||
assertEqX4(SIMD.float32x4.load(i32, 16 - 4), [13,14,15,16]);
|
||||
assertEqX4(SIMD.float32x4.load(i16, (16 << 1) - (4 << 1)), [13,14,15,16]);
|
||||
assertEqX4(SIMD.float32x4.load(u16, (16 << 1) - (4 << 1)), [13,14,15,16]);
|
||||
assertEqX4(SIMD.float32x4.load(i8, (16 << 2) - (4 << 2)), [13,14,15,16]);
|
||||
assertEqX4(SIMD.float32x4.load(u8, (16 << 2) - (4 << 2)), [13,14,15,16]);
|
||||
|
||||
var caught = false;
|
||||
try {
|
||||
SIMD.float32x4.load(i8, (i < 149) ? 0 : (16 << 2) - (4 << 2) + 1);
|
||||
} catch (e) {
|
||||
caught = true;
|
||||
}
|
||||
assertEq(i < 149 || caught, true);
|
||||
}
|
||||
return r
|
||||
}
|
||||
|
||||
f();
|
||||
|
|
@ -8608,17 +8608,18 @@ CodeGenerator::visitLoadTypedArrayElement(LLoadTypedArrayElement *lir)
|
|||
AnyRegister out = ToAnyRegister(lir->output());
|
||||
|
||||
Scalar::Type arrayType = lir->mir()->arrayType();
|
||||
Scalar::Type readType = lir->mir()->readType();
|
||||
int width = Scalar::byteSize(arrayType);
|
||||
|
||||
Label fail;
|
||||
if (lir->index()->isConstant()) {
|
||||
Address source(elements, ToInt32(lir->index()) * width + lir->mir()->offsetAdjustment());
|
||||
masm.loadFromTypedArray(arrayType, source, out, temp, &fail,
|
||||
masm.loadFromTypedArray(readType, source, out, temp, &fail,
|
||||
lir->mir()->canonicalizeDoubles());
|
||||
} else {
|
||||
BaseIndex source(elements, ToRegister(lir->index()), ScaleFromElemWidth(width),
|
||||
lir->mir()->offsetAdjustment());
|
||||
masm.loadFromTypedArray(arrayType, source, out, temp, &fail,
|
||||
masm.loadFromTypedArray(readType, source, out, temp, &fail,
|
||||
lir->mir()->canonicalizeDoubles());
|
||||
}
|
||||
|
||||
|
|
|
@ -834,6 +834,7 @@ class IonBuilder
|
|||
SimdTypeDescr::Type from, SimdTypeDescr::Type to);
|
||||
InliningStatus inlineSimdSelect(CallInfo &callInfo, JSNative native, bool isElementWise,
|
||||
SimdTypeDescr::Type type);
|
||||
InliningStatus inlineSimdLoad(CallInfo &callInfo, JSNative native, SimdTypeDescr::Type type);
|
||||
|
||||
// Utility intrinsics.
|
||||
InliningStatus inlineIsCallable(CallInfo &callInfo);
|
||||
|
|
|
@ -2888,7 +2888,8 @@ LIRGenerator::visitLoadTypedArrayElement(MLoadTypedArrayElement *ins)
|
|||
const LUse elements = useRegister(ins->elements());
|
||||
const LAllocation index = useRegisterOrConstant(ins->index());
|
||||
|
||||
MOZ_ASSERT(IsNumberType(ins->type()) || ins->type() == MIRType_Boolean);
|
||||
MOZ_ASSERT(IsNumberType(ins->type()) || IsSimdType(ins->type()) ||
|
||||
ins->type() == MIRType_Boolean);
|
||||
|
||||
// We need a temp register for Uint32Array with known double result.
|
||||
LDefinition tempDef = LDefinition::BogusTemp();
|
||||
|
|
|
@ -354,6 +354,11 @@ IonBuilder::inlineNativeCall(CallInfo &callInfo, JSFunction *target)
|
|||
if (native == js::simd_int32x4_swizzle)
|
||||
return inlineSimdSwizzle(callInfo, native, SimdTypeDescr::TYPE_INT32);
|
||||
|
||||
if (native == js::simd_int32x4_load)
|
||||
return inlineSimdLoad(callInfo, native, SimdTypeDescr::TYPE_INT32);
|
||||
if (native == js::simd_float32x4_load)
|
||||
return inlineSimdLoad(callInfo, native, SimdTypeDescr::TYPE_FLOAT32);
|
||||
|
||||
return InliningStatus_NotInlined;
|
||||
}
|
||||
|
||||
|
@ -3118,5 +3123,65 @@ IonBuilder::inlineSimdSwizzle(CallInfo &callInfo, JSNative native, SimdTypeDescr
|
|||
return boxSimd(callInfo, ins, templateObj);
|
||||
}
|
||||
|
||||
static Scalar::Type
|
||||
SimdTypeToScalarType(SimdTypeDescr::Type type)
|
||||
{
|
||||
switch (type) {
|
||||
case SimdTypeDescr::TYPE_FLOAT32: return Scalar::Float32x4;
|
||||
case SimdTypeDescr::TYPE_INT32: return Scalar::Int32x4;
|
||||
case SimdTypeDescr::TYPE_FLOAT64: break;
|
||||
}
|
||||
MOZ_CRASH("unexpected simd type");
|
||||
}
|
||||
|
||||
IonBuilder::InliningStatus
|
||||
IonBuilder::inlineSimdLoad(CallInfo &callInfo, JSNative native, SimdTypeDescr::Type type)
|
||||
{
|
||||
InlineTypedObject *templateObj = nullptr;
|
||||
if (!checkInlineSimd(callInfo, native, type, 2, &templateObj))
|
||||
return InliningStatus_NotInlined;
|
||||
|
||||
MDefinition *array = callInfo.getArg(0);
|
||||
MDefinition *index = callInfo.getArg(1);
|
||||
|
||||
Scalar::Type arrayType;
|
||||
if (!ElementAccessIsAnyTypedArray(constraints(), array, index, &arrayType))
|
||||
return InliningStatus_NotInlined;
|
||||
|
||||
MInstruction *indexAsInt32 = MToInt32::New(alloc(), index);
|
||||
current->add(indexAsInt32);
|
||||
index = indexAsInt32;
|
||||
|
||||
MDefinition *indexForBoundsCheck = index;
|
||||
|
||||
// Artificially make sure the index is in bounds by adding the difference
|
||||
// number of slots needed (e.g. reading from Float32Array we need to make
|
||||
// sure to be in bounds for 4 slots, so add 3, etc.).
|
||||
MOZ_ASSERT(Simd128DataSize % Scalar::byteSize(arrayType) == 0);
|
||||
int32_t suppSlotsNeeded = Simd128DataSize / Scalar::byteSize(arrayType) - 1;
|
||||
if (suppSlotsNeeded) {
|
||||
MConstant *suppSlots = constant(Int32Value(suppSlotsNeeded));
|
||||
MAdd *addedIndex = MAdd::New(alloc(), index, suppSlots);
|
||||
// Even if this addition overflows, we're fine because the code generated
|
||||
// for the bounds check uses uint32 arithmetic
|
||||
addedIndex->setInt32();
|
||||
current->add(addedIndex);
|
||||
indexForBoundsCheck = addedIndex;
|
||||
}
|
||||
|
||||
MInstruction *length;
|
||||
MInstruction *elements;
|
||||
addTypedArrayLengthAndData(array, SkipBoundsCheck, &index, &length, &elements);
|
||||
|
||||
MInstruction *check = MBoundsCheck::New(alloc(), indexForBoundsCheck, length);
|
||||
current->add(check);
|
||||
|
||||
MLoadTypedArrayElement *load = MLoadTypedArrayElement::New(alloc(), elements, index, arrayType);
|
||||
load->setResultType(SimdTypeDescrToMIRType(type));
|
||||
load->setReadType(SimdTypeToScalarType(type));
|
||||
|
||||
return boxSimd(callInfo, load, templateObj);
|
||||
}
|
||||
|
||||
} // namespace jit
|
||||
} // namespace js
|
||||
|
|
|
@ -8832,6 +8832,7 @@ class MLoadTypedArrayElement
|
|||
public SingleObjectPolicy::Data
|
||||
{
|
||||
Scalar::Type arrayType_;
|
||||
Scalar::Type readType_;
|
||||
bool requiresBarrier_;
|
||||
int32_t offsetAdjustment_;
|
||||
bool canonicalizeDoubles_;
|
||||
|
@ -8841,6 +8842,7 @@ class MLoadTypedArrayElement
|
|||
int32_t offsetAdjustment, bool canonicalizeDoubles)
|
||||
: MBinaryInstruction(elements, index),
|
||||
arrayType_(arrayType),
|
||||
readType_(arrayType),
|
||||
requiresBarrier_(requiresBarrier == DoesRequireMemoryBarrier),
|
||||
offsetAdjustment_(offsetAdjustment),
|
||||
canonicalizeDoubles_(canonicalizeDoubles)
|
||||
|
@ -8869,6 +8871,13 @@ class MLoadTypedArrayElement
|
|||
canonicalizeDoubles);
|
||||
}
|
||||
|
||||
void setReadType(Scalar::Type type) {
|
||||
readType_ = type;
|
||||
}
|
||||
Scalar::Type readType() const {
|
||||
return readType_;
|
||||
}
|
||||
|
||||
Scalar::Type arrayType() const {
|
||||
return arrayType_;
|
||||
}
|
||||
|
@ -8907,6 +8916,8 @@ class MLoadTypedArrayElement
|
|||
const MLoadTypedArrayElement *other = ins->toLoadTypedArrayElement();
|
||||
if (arrayType_ != other->arrayType_)
|
||||
return false;
|
||||
if (readType_ != other->readType_)
|
||||
return false;
|
||||
if (offsetAdjustment() != other->offsetAdjustment())
|
||||
return false;
|
||||
if (canonicalizeDoubles() != other->canonicalizeDoubles())
|
||||
|
|
|
@ -356,6 +356,12 @@ MacroAssembler::loadFromTypedArray(Scalar::Type arrayType, const T &src, AnyRegi
|
|||
if (canonicalizeDoubles)
|
||||
canonicalizeDouble(dest.fpu());
|
||||
break;
|
||||
case Scalar::Int32x4:
|
||||
loadUnalignedInt32x4(src, dest.fpu());
|
||||
break;
|
||||
case Scalar::Float32x4:
|
||||
loadUnalignedFloat32x4(src, dest.fpu());
|
||||
break;
|
||||
default:
|
||||
MOZ_CRASH("Invalid typed array type");
|
||||
}
|
||||
|
|
|
@ -1648,7 +1648,8 @@ MLimitedTruncate::computeRange(TempAllocator &alloc)
|
|||
setRange(output);
|
||||
}
|
||||
|
||||
static Range *GetTypedArrayRange(TempAllocator &alloc, int type)
|
||||
static Range *
|
||||
GetTypedArrayRange(TempAllocator &alloc, Scalar::Type type)
|
||||
{
|
||||
switch (type) {
|
||||
case Scalar::Uint8Clamped:
|
||||
|
@ -1668,10 +1669,12 @@ static Range *GetTypedArrayRange(TempAllocator &alloc, int type)
|
|||
|
||||
case Scalar::Float32:
|
||||
case Scalar::Float64:
|
||||
case Scalar::Float32x4:
|
||||
case Scalar::Int32x4:
|
||||
case Scalar::MaxTypedArrayViewType:
|
||||
break;
|
||||
}
|
||||
|
||||
return nullptr;
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
void
|
||||
|
@ -1679,7 +1682,7 @@ MLoadTypedArrayElement::computeRange(TempAllocator &alloc)
|
|||
{
|
||||
// We have an Int32 type and if this is a UInt32 load it may produce a value
|
||||
// outside of our range, but we have a bailout to handle those cases.
|
||||
setRange(GetTypedArrayRange(alloc, arrayType()));
|
||||
setRange(GetTypedArrayRange(alloc, readType()));
|
||||
}
|
||||
|
||||
void
|
||||
|
|
|
@ -1398,11 +1398,13 @@ class MacroAssemblerARMCompat : public MacroAssemblerARM
|
|||
void loadAlignedInt32x4(const Address &addr, FloatRegister dest) { MOZ_CRASH("NYI"); }
|
||||
void storeAlignedInt32x4(FloatRegister src, Address addr) { MOZ_CRASH("NYI"); }
|
||||
void loadUnalignedInt32x4(const Address &addr, FloatRegister dest) { MOZ_CRASH("NYI"); }
|
||||
void loadUnalignedInt32x4(const BaseIndex &addr, FloatRegister dest) { MOZ_CRASH("NYI"); }
|
||||
void storeUnalignedInt32x4(FloatRegister src, Address addr) { MOZ_CRASH("NYI"); }
|
||||
|
||||
void loadAlignedFloat32x4(const Address &addr, FloatRegister dest) { MOZ_CRASH("NYI"); }
|
||||
void storeAlignedFloat32x4(FloatRegister src, Address addr) { MOZ_CRASH("NYI"); }
|
||||
void loadUnalignedFloat32x4(const Address &addr, FloatRegister dest) { MOZ_CRASH("NYI"); }
|
||||
void loadUnalignedFloat32x4(const BaseIndex &addr, FloatRegister dest) { MOZ_CRASH("NYI"); }
|
||||
void storeUnalignedFloat32x4(FloatRegister src, Address addr) { MOZ_CRASH("NYI"); }
|
||||
|
||||
void loadDouble(const Address &addr, FloatRegister dest);
|
||||
|
|
|
@ -1259,11 +1259,13 @@ public:
|
|||
void loadAlignedInt32x4(const Address &addr, FloatRegister dest) { MOZ_CRASH("NYI"); }
|
||||
void storeAlignedInt32x4(FloatRegister src, Address addr) { MOZ_CRASH("NYI"); }
|
||||
void loadUnalignedInt32x4(const Address &addr, FloatRegister dest) { MOZ_CRASH("NYI"); }
|
||||
void loadUnalignedInt32x4(const BaseIndex &addr, FloatRegister dest) { MOZ_CRASH("NYI"); }
|
||||
void storeUnalignedInt32x4(FloatRegister src, Address addr) { MOZ_CRASH("NYI"); }
|
||||
|
||||
void loadAlignedFloat32x4(const Address &addr, FloatRegister dest) { MOZ_CRASH("NYI"); }
|
||||
void storeAlignedFloat32x4(FloatRegister src, Address addr) { MOZ_CRASH("NYI"); }
|
||||
void loadUnalignedFloat32x4(const Address &addr, FloatRegister dest) { MOZ_CRASH("NYI"); }
|
||||
void loadUnalignedFloat32x4(const BaseIndex &addr, FloatRegister dest) { MOZ_CRASH("NYI"); }
|
||||
void storeUnalignedFloat32x4(FloatRegister src, Address addr) { MOZ_CRASH("NYI"); }
|
||||
|
||||
void loadDouble(const Address &addr, FloatRegister dest);
|
||||
|
|
|
@ -930,6 +930,9 @@ class MacroAssemblerX86Shared : public Assembler
|
|||
void loadUnalignedInt32x4(const Address &src, FloatRegister dest) {
|
||||
vmovdqu(Operand(src), dest);
|
||||
}
|
||||
void loadUnalignedInt32x4(const BaseIndex &src, FloatRegister dest) {
|
||||
vmovdqu(Operand(src), dest);
|
||||
}
|
||||
void loadUnalignedInt32x4(const Operand &src, FloatRegister dest) {
|
||||
vmovdqu(src, dest);
|
||||
}
|
||||
|
@ -1011,6 +1014,9 @@ class MacroAssemblerX86Shared : public Assembler
|
|||
void loadUnalignedFloat32x4(const Address &src, FloatRegister dest) {
|
||||
vmovups(Operand(src), dest);
|
||||
}
|
||||
void loadUnalignedFloat32x4(const BaseIndex &src, FloatRegister dest) {
|
||||
vmovdqu(Operand(src), dest);
|
||||
}
|
||||
void loadUnalignedFloat32x4(const Operand &src, FloatRegister dest) {
|
||||
vmovups(src, dest);
|
||||
}
|
||||
|
|
Загрузка…
Ссылка в новой задаче