зеркало из https://github.com/mozilla/gecko-dev.git
Bug 1240796 - Implement Uint32x4 <==> Float32x4 conversions. r=sunfish
The conversion from Uint32x4 to Float32x4 is not available as an SSE instruction, so we need to expand into a larger instruction sequence lifted from LLVM. Make this expansion early when generating MIR so that it can be exposed to LICM and GVN optimizations. The conversion from Float32x4 to Uint32x4 can throw a RangeError. It is handled similarly to LFloat32x4ToInt32x4. This expansion depends on the details of the cvttps2dq instruction that can't be expressed in MIR, so it can't be expanded early.
This commit is contained in:
Родитель
d01dd37267
Коммит
cccd80f8b7
|
@ -0,0 +1,81 @@
|
|||
load(libdir + 'simd.js');
|
||||
|
||||
setJitCompilerOption("ion.warmup.trigger", 30);
|
||||
|
||||
// Testing Uint32 <-> Float32 conversions.
|
||||
// These conversions deserve special attention because SSE doesn't provide
|
||||
// simple conversion instructions.
|
||||
|
||||
// Convert an Uint32Array to a Float32Array using scalar conversions.
|
||||
function cvt_utof_scalar(u32s, f32s) {
|
||||
assertEq(u32s.length, f32s.length);
|
||||
for (var i = 0; i < u32s.length; i++) {
|
||||
f32s[i] = u32s[i];
|
||||
}
|
||||
}
|
||||
|
||||
// Convert an Uint32Array to a Float32Array using simd conversions.
|
||||
function cvt_utof_simd(u32s, f32s) {
|
||||
assertEq(u32s.length, f32s.length);
|
||||
for (var i = 0; i < u32s.length; i += 4) {
|
||||
SIMD.Float32x4.store(f32s, i, SIMD.Float32x4.fromUint32x4(SIMD.Uint32x4.load(u32s, i)));
|
||||
}
|
||||
}
|
||||
|
||||
// Convert a Float32Array to an Uint32Array using scalar conversions.
|
||||
function cvt_ftou_scalar(f32s, u32s) {
|
||||
assertEq(f32s.length, u32s.length);
|
||||
for (var i = 0; i < f32s.length; i++) {
|
||||
u32s[i] = f32s[i];
|
||||
}
|
||||
}
|
||||
|
||||
// Convert a Float32Array to an Uint32Array using simd conversions.
|
||||
function cvt_ftou_simd(f32s, u32s) {
|
||||
assertEq(f32s.length, u32s.length);
|
||||
for (var i = 0; i < f32s.length; i += 4) {
|
||||
SIMD.Uint32x4.store(u32s, i, SIMD.Uint32x4.fromFloat32x4(SIMD.Float32x4.load(f32s, i)));
|
||||
}
|
||||
}
|
||||
|
||||
function check(a, b) {
|
||||
assertEq(a.length, b.length);
|
||||
for (var i = 0; i < a.length; i++) {
|
||||
assertEq(a[i], b[i]);
|
||||
}
|
||||
}
|
||||
|
||||
// Uint32x4 --> Float32x4 tests.
|
||||
var src = new Uint32Array(8000);
|
||||
var dst1 = new Float32Array(8000);
|
||||
var dst2 = new Float32Array(8000);
|
||||
|
||||
for (var i = 0; i < 2000; i++) {
|
||||
src[i] = i;
|
||||
src[i + 2000] = 0x7fffffff - i;
|
||||
src[i + 4000] = 0x80000000 + i;
|
||||
src[i + 6000] = 0xffffffff - i;
|
||||
}
|
||||
|
||||
for (var n = 0; n < 10; n++) {
|
||||
cvt_utof_scalar(src, dst1);
|
||||
cvt_utof_simd(src, dst2);
|
||||
check(dst1, dst2);
|
||||
}
|
||||
|
||||
// Float32x4 --> Uint32x4 tests.
|
||||
var fsrc = dst1;
|
||||
var fdst1 = new Uint32Array(8000);
|
||||
var fdst2 = new Uint32Array(8000);
|
||||
|
||||
// The 0xffffffff entries in fsrc round to 0x1.0p32f which throws.
|
||||
// Go as high as 0x0.ffffffp32f.
|
||||
for (var i = 0; i < 2000; i++) {
|
||||
fsrc[i + 6000] = 0xffffff7f - i;
|
||||
}
|
||||
|
||||
for (var n = 0; n < 10; n++) {
|
||||
cvt_ftou_scalar(fsrc, fdst1);
|
||||
cvt_ftou_simd(fsrc, fdst2);
|
||||
check(fdst1, fdst2);
|
||||
}
|
|
@ -4101,12 +4101,28 @@ LIRGenerator::visitSimdConvert(MSimdConvert* ins)
|
|||
LUse use = useRegister(input);
|
||||
if (ins->type() == MIRType_Int32x4) {
|
||||
MOZ_ASSERT(input->type() == MIRType_Float32x4);
|
||||
LFloat32x4ToInt32x4* lir = new(alloc()) LFloat32x4ToInt32x4(use, temp());
|
||||
if (!gen->compilingAsmJS())
|
||||
assignSnapshot(lir, Bailout_BoundsCheck);
|
||||
define(lir, ins);
|
||||
switch (ins->signedness()) {
|
||||
case SimdSign::Signed: {
|
||||
LFloat32x4ToInt32x4* lir = new(alloc()) LFloat32x4ToInt32x4(use, temp());
|
||||
if (!gen->compilingAsmJS())
|
||||
assignSnapshot(lir, Bailout_BoundsCheck);
|
||||
define(lir, ins);
|
||||
break;
|
||||
}
|
||||
case SimdSign::Unsigned: {
|
||||
LFloat32x4ToUint32x4* lir =
|
||||
new (alloc()) LFloat32x4ToUint32x4(use, temp(), temp(LDefinition::INT32X4));
|
||||
if (!gen->compilingAsmJS())
|
||||
assignSnapshot(lir, Bailout_BoundsCheck);
|
||||
define(lir, ins);
|
||||
break;
|
||||
}
|
||||
default:
|
||||
MOZ_CRASH("Unexpected SimdConvert sign");
|
||||
}
|
||||
} else if (ins->type() == MIRType_Float32x4) {
|
||||
MOZ_ASSERT(input->type() == MIRType_Int32x4);
|
||||
MOZ_ASSERT(ins->signedness() == SimdSign::Signed, "Unexpected SimdConvert sign");
|
||||
define(new(alloc()) LInt32x4ToFloat32x4(use), ins);
|
||||
} else {
|
||||
MOZ_CRASH("Unknown SIMD kind when generating constant");
|
||||
|
|
|
@ -3377,7 +3377,10 @@ IonBuilder::boxSimd(CallInfo& callInfo, MInstruction* ins, InlineTypedObject* te
|
|||
{
|
||||
MSimdBox* obj = MSimdBox::New(alloc(), constraints(), ins, templateObj,
|
||||
templateObj->group()->initialHeap(constraints()));
|
||||
current->add(ins);
|
||||
|
||||
// In some cases, ins has already been added to current.
|
||||
if (!ins->block())
|
||||
current->add(ins);
|
||||
current->add(obj);
|
||||
current->push(obj);
|
||||
|
||||
|
@ -3525,16 +3528,15 @@ IonBuilder::inlineSimdConvert(CallInfo& callInfo, JSNative native, bool isCast,
|
|||
if (!canInlineSimd(callInfo, native, 1, &templateObj))
|
||||
return InliningStatus_NotInlined;
|
||||
|
||||
// TODO JSO: Implement unsigned integer conversions.
|
||||
if (sign == SimdSign::Unsigned)
|
||||
return InliningStatus_NotInlined;
|
||||
|
||||
// See comment in inlineSimdBinary
|
||||
MInstruction* ins;
|
||||
if (isCast)
|
||||
// Signed/Unsigned doesn't matter for bitcasts.
|
||||
ins = MSimdReinterpretCast::New(alloc(), callInfo.getArg(0), fromType, toType);
|
||||
else
|
||||
ins = MSimdConvert::New(alloc(), callInfo.getArg(0), fromType, toType);
|
||||
// Possibly expand into multiple instructions.
|
||||
ins = MSimdConvert::AddLegalized(alloc(), current, callInfo.getArg(0),
|
||||
fromType, toType, sign);
|
||||
|
||||
return boxSimd(callInfo, ins, templateObj);
|
||||
}
|
||||
|
|
|
@ -1037,6 +1037,105 @@ MSimdGeneralShuffle::foldsTo(TempAllocator& alloc)
|
|||
return MSimdShuffle::New(alloc, vector(0), vector(1), type(), lanes[0], lanes[1], lanes[2], lanes[3]);
|
||||
}
|
||||
|
||||
MInstruction*
|
||||
MSimdConvert::AddLegalized(TempAllocator& alloc, MBasicBlock* addTo, MDefinition* obj,
|
||||
MIRType fromType, MIRType toType, SimdSign sign)
|
||||
{
|
||||
if (SupportsUint32x4FloatConversions || sign != SimdSign::Unsigned) {
|
||||
MInstruction* ins = New(alloc, obj, fromType, toType, sign);
|
||||
addTo->add(ins);
|
||||
return ins;
|
||||
}
|
||||
|
||||
// This architecture can't do Uint32x4 <-> Float32x4 conversions (Hi SSE!)
|
||||
MOZ_ASSERT(sign == SimdSign::Unsigned);
|
||||
if (fromType == MIRType_Int32x4 && toType == MIRType_Float32x4) {
|
||||
// Converting Uint32x4 -> Float32x4. This algorithm is from LLVM.
|
||||
//
|
||||
// Split the input number into high and low parts:
|
||||
//
|
||||
// uint32_t hi = x >> 16;
|
||||
// uint32_t lo = x & 0xffff;
|
||||
//
|
||||
// Insert these parts as the low mantissa bits in a float32 number with
|
||||
// the corresponding exponent:
|
||||
//
|
||||
// float fhi = (bits-as-float)(hi | 0x53000000); // 0x1.0p39f + hi*2^16
|
||||
// float flo = (bits-as-float)(lo | 0x4b000000); // 0x1.0p23f + lo
|
||||
//
|
||||
// Subtract the bias from the hi part:
|
||||
//
|
||||
// fhi -= (0x1.0p39 + 0x1.0p23) // hi*2^16 - 0x1.0p23
|
||||
//
|
||||
// And finally combine:
|
||||
//
|
||||
// result = flo + fhi // lo + hi*2^16.
|
||||
|
||||
// Compute hi = obj >> 16 (lane-wise unsigned shift).
|
||||
MInstruction* c16 = MConstant::New(alloc, Int32Value(16));
|
||||
addTo->add(c16);
|
||||
MInstruction* hi = MSimdShift::New(alloc, obj, c16, MSimdShift::ursh, MIRType_Int32x4);
|
||||
addTo->add(hi);
|
||||
|
||||
// Compute lo = obj & 0xffff (lane-wise).
|
||||
MInstruction* m16 =
|
||||
MSimdConstant::New(alloc, SimdConstant::SplatX4(0xffff), MIRType_Int32x4);
|
||||
addTo->add(m16);
|
||||
MInstruction* lo =
|
||||
MSimdBinaryBitwise::New(alloc, obj, m16, MSimdBinaryBitwise::and_, MIRType_Int32x4);
|
||||
addTo->add(lo);
|
||||
|
||||
// Mix in the exponents.
|
||||
MInstruction* exphi =
|
||||
MSimdConstant::New(alloc, SimdConstant::SplatX4(0x53000000), MIRType_Int32x4);
|
||||
addTo->add(exphi);
|
||||
MInstruction* mhi =
|
||||
MSimdBinaryBitwise::New(alloc, hi, exphi, MSimdBinaryBitwise::or_, MIRType_Int32x4);
|
||||
addTo->add(mhi);
|
||||
MInstruction* explo =
|
||||
MSimdConstant::New(alloc, SimdConstant::SplatX4(0x4b000000), MIRType_Int32x4);
|
||||
addTo->add(explo);
|
||||
MInstruction* mlo =
|
||||
MSimdBinaryBitwise::New(alloc, lo, explo, MSimdBinaryBitwise::or_, MIRType_Int32x4);
|
||||
addTo->add(mlo);
|
||||
|
||||
// Bit-cast both to Float32x4.
|
||||
MInstruction* fhi =
|
||||
MSimdReinterpretCast::New(alloc, mhi, MIRType_Int32x4, MIRType_Float32x4);
|
||||
addTo->add(fhi);
|
||||
MInstruction* flo =
|
||||
MSimdReinterpretCast::New(alloc, mlo, MIRType_Int32x4, MIRType_Float32x4);
|
||||
addTo->add(flo);
|
||||
|
||||
// Subtract out the bias: 0x1.0p39f + 0x1.0p23f.
|
||||
// MSVC doesn't support the hexadecimal float syntax.
|
||||
const float BiasValue = 549755813888.f + 8388608.f;
|
||||
MInstruction* bias =
|
||||
MSimdConstant::New(alloc, SimdConstant::SplatX4(BiasValue), MIRType_Float32x4);
|
||||
addTo->add(bias);
|
||||
MInstruction* fhi_debiased =
|
||||
MSimdBinaryArith::New(alloc, fhi, bias, MSimdBinaryArith::Op_sub, MIRType_Float32x4);
|
||||
addTo->add(fhi_debiased);
|
||||
|
||||
// Compute the final result.
|
||||
MInstruction* result = MSimdBinaryArith::New(alloc, fhi_debiased, flo,
|
||||
MSimdBinaryArith::Op_add, MIRType_Float32x4);
|
||||
addTo->add(result);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
if (fromType == MIRType_Float32x4 && toType == MIRType_Int32x4) {
|
||||
// The Float32x4 -> Uint32x4 conversion can throw if the input is out of
|
||||
// range. This is handled by the LFloat32x4ToUint32x4 expansion.
|
||||
MInstruction* ins = New(alloc, obj, fromType, toType, sign);
|
||||
addTo->add(ins);
|
||||
return ins;
|
||||
}
|
||||
|
||||
MOZ_CRASH("Unhandled SIMD type conversion");
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
static void
|
||||
PrintOpcodeOperation(T* mir, GenericPrinter& out)
|
||||
|
|
|
@ -1542,10 +1542,18 @@ class MSimdConvert
|
|||
: public MUnaryInstruction,
|
||||
public SimdPolicy<0>::Data
|
||||
{
|
||||
MSimdConvert(MDefinition* obj, MIRType fromType, MIRType toType)
|
||||
: MUnaryInstruction(obj)
|
||||
// When either fromType or toType is an integer vector, should it be treated
|
||||
// as signed or unsigned. Note that we don't support int-int conversions -
|
||||
// use MSimdReinterpretCast for that.
|
||||
SimdSign sign_;
|
||||
|
||||
MSimdConvert(MDefinition* obj, MIRType fromType, MIRType toType, SimdSign sign)
|
||||
: MUnaryInstruction(obj), sign_(sign)
|
||||
{
|
||||
MOZ_ASSERT(IsSimdType(toType));
|
||||
// All conversions are int <-> float, so signedness is required.
|
||||
MOZ_ASSERT(sign != SimdSign::NotApplicable);
|
||||
|
||||
setResultType(toType);
|
||||
specialization_ = fromType; // expects fromType as input
|
||||
|
||||
|
@ -1562,20 +1570,35 @@ class MSimdConvert
|
|||
MIRType toType)
|
||||
{
|
||||
MOZ_ASSERT(IsSimdType(obj->type()) && fromType == obj->type());
|
||||
return new(alloc) MSimdConvert(obj, fromType, toType);
|
||||
// AsmJS only has signed integer vectors for now.
|
||||
return new(alloc) MSimdConvert(obj, fromType, toType, SimdSign::Signed);
|
||||
}
|
||||
|
||||
static MSimdConvert* New(TempAllocator& alloc, MDefinition* obj, MIRType fromType,
|
||||
MIRType toType)
|
||||
MIRType toType, SimdSign sign)
|
||||
{
|
||||
return new(alloc) MSimdConvert(obj, fromType, toType);
|
||||
return new(alloc) MSimdConvert(obj, fromType, toType, sign);
|
||||
}
|
||||
|
||||
// Create a MSimdConvert instruction and add it to the basic block.
|
||||
// Possibly create and add an equivalent sequence of instructions instead if
|
||||
// the current target doesn't support the requested conversion directly.
|
||||
// Return the inserted MInstruction that computes the converted value.
|
||||
static MInstruction* AddLegalized(TempAllocator& alloc, MBasicBlock* addTo, MDefinition* obj,
|
||||
MIRType fromType, MIRType toType, SimdSign sign);
|
||||
|
||||
SimdSign signedness() const {
|
||||
return sign_;
|
||||
}
|
||||
|
||||
AliasSet getAliasSet() const override {
|
||||
return AliasSet::None();
|
||||
}
|
||||
bool congruentTo(const MDefinition* ins) const override {
|
||||
return congruentIfOperandsEqual(ins);
|
||||
if (!congruentIfOperandsEqual(ins))
|
||||
return false;
|
||||
const MSimdConvert* other = ins->toSimdConvert();
|
||||
return sign_ == other->sign_;
|
||||
}
|
||||
ALLOW_CLONE(MSimdConvert)
|
||||
};
|
||||
|
|
|
@ -220,6 +220,9 @@ static_assert(JitStackAlignment % SimdMemoryAlignment == 0,
|
|||
|
||||
static const uint32_t AsmJSStackAlignment = SimdMemoryAlignment;
|
||||
|
||||
// Does this architecture support SIMD conversions between Uint32x4 and Float32x4?
|
||||
static MOZ_CONSTEXPR_VAR bool SupportsUint32x4FloatConversions = false;
|
||||
|
||||
static const Scale ScalePointer = TimesFour;
|
||||
|
||||
class Instruction;
|
||||
|
|
|
@ -176,6 +176,9 @@ static_assert(CodeAlignment % SimdMemoryAlignment == 0,
|
|||
static const uint32_t AsmJSStackAlignment = SimdMemoryAlignment;
|
||||
static const int32_t AsmJSGlobalRegBias = 1024;
|
||||
|
||||
// Does this architecture support SIMD conversions between Uint32x4 and Float32x4?
|
||||
static MOZ_CONSTEXPR_VAR bool SupportsUint32x4FloatConversions = false;
|
||||
|
||||
class Assembler : public vixl::Assembler
|
||||
{
|
||||
public:
|
||||
|
|
|
@ -98,6 +98,9 @@ static_assert(JitStackAlignment % sizeof(Value) == 0 && JitStackValueAlignment >
|
|||
static MOZ_CONSTEXPR_VAR uint32_t SimdMemoryAlignment = 8;
|
||||
static MOZ_CONSTEXPR_VAR uint32_t AsmJSStackAlignment = SimdMemoryAlignment;
|
||||
|
||||
// Does this architecture support SIMD conversions between Uint32x4 and Float32x4?
|
||||
static MOZ_CONSTEXPR_VAR bool SupportsUint32x4FloatConversions = false;
|
||||
|
||||
static MOZ_CONSTEXPR_VAR Scale ScalePointer = TimesFour;
|
||||
|
||||
class Assembler : public AssemblerMIPSShared
|
||||
|
|
|
@ -109,6 +109,9 @@ static MOZ_CONSTEXPR_VAR uint32_t SimdMemoryAlignment = 16;
|
|||
|
||||
static MOZ_CONSTEXPR_VAR uint32_t AsmJSStackAlignment = SimdMemoryAlignment;
|
||||
|
||||
// Does this architecture support SIMD conversions between Uint32x4 and Float32x4?
|
||||
static MOZ_CONSTEXPR_VAR bool SupportsUint32x4FloatConversions = false;
|
||||
|
||||
static MOZ_CONSTEXPR_VAR Scale ScalePointer = TimesEight;
|
||||
|
||||
class Assembler : public AssemblerMIPSShared
|
||||
|
|
|
@ -18,6 +18,9 @@ static const bool SupportsSimd = false;
|
|||
static const uint32_t SimdMemoryAlignment = 4; // Make it 4 to avoid a bunch of div-by-zero warnings
|
||||
static const uint32_t AsmJSStackAlignment = 8;
|
||||
|
||||
// Does this architecture support SIMD conversions between Uint32x4 and Float32x4?
|
||||
static MOZ_CONSTEXPR_VAR bool SupportsUint32x4FloatConversions = false;
|
||||
|
||||
class Registers
|
||||
{
|
||||
public:
|
||||
|
|
|
@ -3806,6 +3806,29 @@ class LFloat32x4ToInt32x4 : public LInstructionHelper<1, 1, 1>
|
|||
}
|
||||
};
|
||||
|
||||
// Float32x4 to Uint32x4 needs one GPR temp and one FloatReg temp.
|
||||
class LFloat32x4ToUint32x4 : public LInstructionHelper<1, 1, 2>
|
||||
{
|
||||
public:
|
||||
LIR_HEADER(Float32x4ToUint32x4);
|
||||
explicit LFloat32x4ToUint32x4(const LAllocation& input, const LDefinition& tempR,
|
||||
const LDefinition& tempF)
|
||||
{
|
||||
setOperand(0, input);
|
||||
setTemp(0, tempR);
|
||||
setTemp(1, tempF);
|
||||
}
|
||||
const LDefinition* tempR() {
|
||||
return getTemp(0);
|
||||
}
|
||||
const LDefinition* tempF() {
|
||||
return getTemp(1);
|
||||
}
|
||||
const MSimdConvert* mir() const {
|
||||
return mir_->toSimdConvert();
|
||||
}
|
||||
};
|
||||
|
||||
// Double raised to a half power.
|
||||
class LPowHalfD : public LInstructionHelper<1, 1, 0>
|
||||
{
|
||||
|
|
|
@ -185,6 +185,7 @@
|
|||
_(ValueToObjectOrNull) \
|
||||
_(Int32x4ToFloat32x4) \
|
||||
_(Float32x4ToInt32x4) \
|
||||
_(Float32x4ToUint32x4) \
|
||||
_(Start) \
|
||||
_(OsrEntry) \
|
||||
_(OsrValue) \
|
||||
|
|
|
@ -20,6 +20,9 @@
|
|||
namespace js {
|
||||
namespace jit {
|
||||
|
||||
// Does this architecture support SIMD conversions between Uint32x4 and Float32x4?
|
||||
static const bool SupportsUint32x4FloatConversions = false;
|
||||
|
||||
#if defined(JS_CODEGEN_X86)
|
||||
// In bytes: slots needed for potential memory->memory move spills.
|
||||
// +8 for cycles
|
||||
|
|
|
@ -2306,6 +2306,91 @@ CodeGeneratorX86Shared::visitOutOfLineSimdFloatToIntCheck(OutOfLineSimdFloatToIn
|
|||
}
|
||||
}
|
||||
|
||||
// Convert Float32x4 to Uint32x4.
|
||||
//
|
||||
// If any input lane value is out of range or NaN, bail out.
|
||||
void
|
||||
CodeGeneratorX86Shared::visitFloat32x4ToUint32x4(LFloat32x4ToUint32x4* ins)
|
||||
{
|
||||
FloatRegister in = ToFloatRegister(ins->input());
|
||||
FloatRegister out = ToFloatRegister(ins->output());
|
||||
Register temp = ToRegister(ins->tempR());
|
||||
FloatRegister tempF = ToFloatRegister(ins->tempF());
|
||||
|
||||
// Classify lane values into 4 disjoint classes:
|
||||
//
|
||||
// N-lanes: in < -0.0
|
||||
// A-lanes: -0.0 <= in <= 0x0.ffffffp31
|
||||
// B-lanes: 0x1.0p31 <= in <= 0x0.ffffffp32
|
||||
// V-lanes: 0x1.0p32 <= in, or isnan(in)
|
||||
//
|
||||
// We need to bail out to throw a RangeError if we see any N-lanes or
|
||||
// V-lanes.
|
||||
//
|
||||
// For A-lanes and B-lanes, we make two float -> int32 conversions:
|
||||
//
|
||||
// A = cvttps2dq(in)
|
||||
// B = cvttps2dq(in - 0x1.0p31f)
|
||||
//
|
||||
// Note that the subtraction for the B computation is exact for B-lanes.
|
||||
// There is no rounding, so B is the low 31 bits of the correctly converted
|
||||
// result.
|
||||
//
|
||||
// The cvttps2dq instruction produces 0x80000000 when the input is NaN or
|
||||
// out of range for a signed int32_t. This conveniently provides the missing
|
||||
// high bit for B, so the desired result is A for A-lanes and A|B for
|
||||
// B-lanes.
|
||||
|
||||
ScratchSimd128Scope scratch(masm);
|
||||
|
||||
// First we need to filter out N-lanes. We need to use a floating point
|
||||
// comparison to do that because cvttps2dq maps the negative range
|
||||
// [-0x0.ffffffp0;-0.0] to 0. We can't simply look at the sign bits of in
|
||||
// because -0.0 is a valid input.
|
||||
// TODO: It may be faster to let ool code deal with -0.0 and skip the
|
||||
// vcmpleps here.
|
||||
masm.zeroFloat32x4(scratch);
|
||||
masm.vcmpleps(Operand(in), scratch, scratch);
|
||||
masm.vmovmskps(scratch, temp);
|
||||
masm.cmp32(temp, Imm32(15));
|
||||
bailoutIf(Assembler::NotEqual, ins->snapshot());
|
||||
|
||||
// TODO: If the majority of lanes are A-lanes, it could be faster to compute
|
||||
// A first, use vmovmskps to check for any non-A-lanes and handle them in
|
||||
// ool code. OTOH, we we're wrong about the lane distribution, that would be
|
||||
// slower.
|
||||
|
||||
// Compute B in |scratch|.
|
||||
static const float Adjust = 0x80000000; // 0x1.0p31f for the benefit of MSVC.
|
||||
static const SimdConstant Bias = SimdConstant::SplatX4(-Adjust);
|
||||
masm.loadConstantFloat32x4(Bias, scratch);
|
||||
masm.packedAddFloat32(Operand(in), scratch);
|
||||
masm.convertFloat32x4ToInt32x4(scratch, scratch);
|
||||
|
||||
// Compute A in |out|. This is the last time we use |in| and the first time
|
||||
// we use |out|, so we can tolerate if they are the same register.
|
||||
masm.convertFloat32x4ToInt32x4(in, out);
|
||||
|
||||
// Since we filtered out N-lanes, we can identify A-lanes by the sign bits
|
||||
// in A: Any A-lanes will be positive in A, and B-lanes and V-lanes will be
|
||||
// 0x80000000 in A. Compute a mask of non-A-lanes into |tempF|.
|
||||
masm.zeroFloat32x4(tempF);
|
||||
masm.packedGreaterThanInt32x4(Operand(out), tempF);
|
||||
|
||||
// Clear the A-lanes in B.
|
||||
masm.bitwiseAndX4(Operand(tempF), scratch);
|
||||
|
||||
// Compute the final result: A for A-lanes, A|B for B-lanes.
|
||||
masm.bitwiseOrX4(Operand(scratch), out);
|
||||
|
||||
// We still need to filter out the V-lanes. They would show up as 0x80000000
|
||||
// in both A and B. Since we cleared the valid A-lanes in B, the V-lanes are
|
||||
// the remaining negative lanes in B.
|
||||
masm.vmovmskps(scratch, temp);
|
||||
masm.cmp32(temp, Imm32(0));
|
||||
bailoutIf(Assembler::NotEqual, ins->snapshot());
|
||||
}
|
||||
|
||||
void
|
||||
CodeGeneratorX86Shared::visitSimdValueInt32x4(LSimdValueInt32x4* ins)
|
||||
{
|
||||
|
|
|
@ -261,6 +261,7 @@ class CodeGeneratorX86Shared : public CodeGeneratorShared
|
|||
void visitFloat32x4(LFloat32x4* ins);
|
||||
void visitInt32x4ToFloat32x4(LInt32x4ToFloat32x4* ins);
|
||||
void visitFloat32x4ToInt32x4(LFloat32x4ToInt32x4* ins);
|
||||
void visitFloat32x4ToUint32x4(LFloat32x4ToUint32x4* ins);
|
||||
void visitSimdReinterpretCast(LSimdReinterpretCast* lir);
|
||||
void visitSimdExtractElementB(LSimdExtractElementB* lir);
|
||||
void visitSimdExtractElementI(LSimdExtractElementI* lir);
|
||||
|
|
Загрузка…
Ссылка в новой задаче