Bug 1731853 - Prototype relaxed-SIMD i32x4.trunc_sat_fXXX instructions. r=lth

See https://github.com/WebAssembly/relaxed-simd/issues/21

Differential Revision: https://phabricator.services.mozilla.com/D126513
This commit is contained in:
Yury Delendik 2021-09-28 18:14:10 +00:00
Родитель b5bd20340e
Коммит 548b6d38c2
16 изменённых файлов: 256 добавлений и 4 удалений

Просмотреть файл

@ -141,6 +141,10 @@ const F32x4RelaxedMin = 0xb4;
const F32x4RelaxedMax = 0xe2;
const F64x2RelaxedMin = 0xd4;
const F64x2RelaxedMax = 0xee;
const I32x4RelaxedTruncSSatF32x4 = 0xa5;
const I32x4RelaxedTruncUSatF32x4 = 0xa6;
const I32x4RelaxedTruncSatF64x2SZero = 0xc5;
const I32x4RelaxedTruncSatF64x2UZero = 0xc6;
// SIMD wormhole opcodes.
const WORMHOLE_SELFTEST = 0;

Просмотреть файл

@ -186,3 +186,59 @@ for (let k of [4, 2]) {
SimdPrefix, varU32(op)])]})])])));
}
}
// Relaxed I32x4.TruncFXXX, https://github.com/WebAssembly/relaxed-simd/issues/21
var ins = wasmValidateAndEval(moduleWithSections([
sigSection([v2vSig]),
declSection([0, 0, 0, 0]),
memorySection(1),
exportSection([{funcIndex: 0, name: "from32s"},
{funcIndex: 1, name: "from32u"},
{funcIndex: 2, name: "from64s"},
{funcIndex: 3, name: "from64u"},
{memIndex: 0, name: "mem"}]),
bodySection([
funcBody({locals:[],
body: [...V128StoreExpr(0, [...V128Load(16),
SimdPrefix, varU32(I32x4RelaxedTruncSSatF32x4)])]}),
funcBody({locals:[],
body: [...V128StoreExpr(0, [...V128Load(16),
SimdPrefix, varU32(I32x4RelaxedTruncUSatF32x4)])]}),
funcBody({locals:[],
body: [...V128StoreExpr(0, [...V128Load(16),
SimdPrefix, varU32(I32x4RelaxedTruncSatF64x2SZero)])]}),
funcBody({locals:[],
body: [...V128StoreExpr(0, [...V128Load(16),
SimdPrefix, varU32(I32x4RelaxedTruncSatF64x2UZero)])]})])]));
var mem = ins.exports.mem.buffer;
set(new Float32Array(mem), 4, [0, 2.3, -3.4, 100000]);
ins.exports.from32s();
var result = get(new Int32Array(mem), 0, 4);
assertSame(result, [0, 2, -3, 100000]);
set(new Float32Array(mem), 4, [0, 3.3, 0x80000000, 200000]);
ins.exports.from32u();
var result = get(new Uint32Array(mem), 0, 4);
assertSame(result, [0, 3, 0x80000000, 200000]);
set(new Float64Array(mem), 2, [200000.3, -3.4]);
ins.exports.from64s();
var result = get(new Int32Array(mem), 0, 2);
assertSame(result, [200000, -3]);
set(new Float64Array(mem), 2, [0x90000000 + 0.1, 0]);
ins.exports.from64u();
var result = get(new Uint32Array(mem), 0, 2);
assertSame(result, [0x90000000, 0]);
for (let op of [I32x4RelaxedTruncSSatF32x4, I32x4RelaxedTruncUSatF32x4,
I32x4RelaxedTruncSatF64x2SZero, I32x4RelaxedTruncSatF64x2UZero]) {
assertEq(false, WebAssembly.validate(moduleWithSections([
sigSection([v2vSig]),
declSection([0]),
memorySection(1),
exportSection([]),
bodySection([
funcBody({locals:[],
body: [...V128StoreExpr(0, [SimdPrefix, varU32(op)])]})])])));
}

Просмотреть файл

@ -3315,6 +3315,22 @@ class MacroAssembler : public MacroAssemblerSpecific {
FloatRegister temp)
DEFINED_ON(x86_shared, arm64);
inline void truncSatFloat32x4ToInt32x4Relaxed(FloatRegister src,
FloatRegister dest)
DEFINED_ON(x86_shared, arm64);
inline void unsignedTruncSatFloat32x4ToInt32x4Relaxed(FloatRegister src,
FloatRegister dest)
DEFINED_ON(x86_shared, arm64);
inline void truncSatFloat64x2ToInt32x4Relaxed(FloatRegister src,
FloatRegister dest)
DEFINED_ON(x86_shared, arm64);
inline void unsignedTruncSatFloat64x2ToInt32x4Relaxed(FloatRegister src,
FloatRegister dest)
DEFINED_ON(x86_shared, arm64);
// Floating point narrowing
inline void convertFloat64x2ToFloat32x4(FloatRegister src, FloatRegister dest)

Просмотреть файл

@ -3934,6 +3934,18 @@ void CodeGenerator::visitWasmUnarySimd128(LWasmUnarySimd128* ins) {
case wasm::SimdOp::I8x16Popcnt:
masm.popcntInt8x16(src, dest);
break;
case wasm::SimdOp::I32x4RelaxedTruncSSatF32x4:
masm.truncSatFloat32x4ToInt32x4Relaxed(src, dest);
break;
case wasm::SimdOp::I32x4RelaxedTruncUSatF32x4:
masm.unsignedTruncSatFloat32x4ToInt32x4Relaxed(src, dest);
break;
case wasm::SimdOp::I32x4RelaxedTruncSatF64x2SZero:
masm.truncSatFloat64x2ToInt32x4Relaxed(src, dest);
break;
case wasm::SimdOp::I32x4RelaxedTruncSatF64x2UZero:
masm.unsignedTruncSatFloat64x2ToInt32x4Relaxed(src, dest);
break;
default:
MOZ_CRASH("Unary SimdOp not implemented");
}

Просмотреть файл

@ -1283,6 +1283,10 @@ void LIRGenerator::visitWasmUnarySimd128(MWasmUnarySimd128* ins) {
case wasm::SimdOp::I32x4ExtAddPairwiseI16x8S:
case wasm::SimdOp::I32x4ExtAddPairwiseI16x8U:
case wasm::SimdOp::I8x16Popcnt:
case wasm::SimdOp::I32x4RelaxedTruncSSatF32x4:
case wasm::SimdOp::I32x4RelaxedTruncUSatF32x4:
case wasm::SimdOp::I32x4RelaxedTruncSatF64x2SZero:
case wasm::SimdOp::I32x4RelaxedTruncSatF64x2UZero:
break;
case wasm::SimdOp::I32x4TruncSatF64x2SZero:
case wasm::SimdOp::I32x4TruncSatF64x2UZero:

Просмотреть файл

@ -3542,6 +3542,28 @@ void MacroAssembler::unsignedTruncSatFloat64x2ToInt32x4(FloatRegister src,
Uqxtn(Simd2S(dest), Simd2D(dest));
}
void MacroAssembler::truncSatFloat32x4ToInt32x4Relaxed(FloatRegister src,
FloatRegister dest) {
Fcvtzs(Simd4S(dest), Simd4S(src));
}
void MacroAssembler::unsignedTruncSatFloat32x4ToInt32x4Relaxed(
FloatRegister src, FloatRegister dest) {
Fcvtzu(Simd4S(dest), Simd4S(src));
}
void MacroAssembler::truncSatFloat64x2ToInt32x4Relaxed(FloatRegister src,
FloatRegister dest) {
Fcvtzs(Simd2D(dest), Simd2D(src));
Sqxtn(Simd2S(dest), Simd2D(dest));
}
void MacroAssembler::unsignedTruncSatFloat64x2ToInt32x4Relaxed(
FloatRegister src, FloatRegister dest) {
Fcvtzu(Simd2D(dest), Simd2D(src));
Uqxtn(Simd2S(dest), Simd2D(dest));
}
// Floating point narrowing
void MacroAssembler::convertFloat64x2ToFloat32x4(FloatRegister src,

Просмотреть файл

@ -3618,6 +3618,18 @@ void CodeGenerator::visitWasmUnarySimd128(LWasmUnarySimd128* ins) {
case wasm::SimdOp::I32x4ExtAddPairwiseI16x8U:
masm.unsignedExtAddPairwiseInt16x8(src, dest);
break;
case wasm::SimdOp::I32x4RelaxedTruncSSatF32x4:
masm.truncSatFloat32x4ToInt32x4Relaxed(src, dest);
break;
case wasm::SimdOp::I32x4RelaxedTruncUSatF32x4:
masm.unsignedTruncSatFloat32x4ToInt32x4Relaxed(src, dest);
break;
case wasm::SimdOp::I32x4RelaxedTruncSatF64x2SZero:
masm.truncSatFloat64x2ToInt32x4Relaxed(src, dest);
break;
case wasm::SimdOp::I32x4RelaxedTruncSatF64x2UZero:
masm.unsignedTruncSatFloat64x2ToInt32x4Relaxed(src, dest);
break;
default:
MOZ_CRASH("Unary SimdOp not implemented");
}

Просмотреть файл

@ -1396,6 +1396,10 @@ void LIRGenerator::visitWasmUnarySimd128(MWasmUnarySimd128* ins) {
case wasm::SimdOp::I16x8ExtAddPairwiseI8x16U:
case wasm::SimdOp::I32x4ExtAddPairwiseI16x8S:
case wasm::SimdOp::I32x4ExtAddPairwiseI16x8U:
case wasm::SimdOp::I32x4RelaxedTruncSSatF32x4:
case wasm::SimdOp::I32x4RelaxedTruncUSatF32x4:
case wasm::SimdOp::I32x4RelaxedTruncSatF64x2SZero:
case wasm::SimdOp::I32x4RelaxedTruncSatF64x2UZero:
// Prefer src == dest to avoid an unconditional src->dest move.
useAtStart = true;
reuseInput = true;

Просмотреть файл

@ -1188,6 +1188,28 @@ void MacroAssemblerX86Shared::unsignedTruncSatFloat32x4ToInt32x4(
vpaddd(Operand(temp), dest, dest);
}
void MacroAssemblerX86Shared::unsignedTruncSatFloat32x4ToInt32x4Relaxed(
FloatRegister src, FloatRegister dest) {
ScratchSimd128Scope scratch(asMasm());
asMasm().moveSimd128Float(src, dest);
// Place lanes below 80000000h into dest, otherwise into scratch.
// Keep dest or scratch 0 as default.
asMasm().loadConstantSimd128Float(SimdConstant::SplatX4(0x4f000000), scratch);
vcmpltps(Operand(src), scratch);
vpand(Operand(src), scratch, scratch);
vpxor(Operand(scratch), dest, dest);
// Convert lanes below 80000000h into unsigned int without issues.
vcvttps2dq(dest, dest);
// Knowing IEEE-754 number representation, to convert lanes above
// 7FFFFFFFh, just shift left by 7 bits.
vpslld(Imm32(7), scratch, scratch);
// Combine the results.
vpaddd(Operand(scratch), dest, dest);
}
void MacroAssemblerX86Shared::unsignedConvertInt32x4ToFloat64x2(
FloatRegister src, FloatRegister dest) {
ScratchSimd128Scope scratch(asMasm());
@ -1232,6 +1254,20 @@ void MacroAssemblerX86Shared::unsignedTruncSatFloat64x2ToInt32x4(
vshufps(0x88, scratch, dest, dest);
}
void MacroAssemblerX86Shared::unsignedTruncSatFloat64x2ToInt32x4Relaxed(
FloatRegister src, FloatRegister dest) {
ScratchSimd128Scope scratch(asMasm());
asMasm().moveSimd128Float(src, dest);
// The same as unsignedConvertInt32x4ToFloat64x2, but without NaN
// and out-of-bounds checks.
vroundpd(SSERoundingMode::Trunc, Operand(dest), dest);
asMasm().loadConstantSimd128Float(SimdConstant::SplatX2(4503599627370496.0),
scratch);
vaddpd(Operand(scratch), dest, dest);
vshufps(0x88, scratch, dest, dest);
}
void MacroAssemblerX86Shared::popcntInt8x16(FloatRegister src,
FloatRegister temp,
FloatRegister output) {

Просмотреть файл

@ -2719,6 +2719,26 @@ void MacroAssembler::unsignedTruncSatFloat64x2ToInt32x4(FloatRegister src,
MacroAssemblerX86Shared::unsignedTruncSatFloat64x2ToInt32x4(src, temp, dest);
}
void MacroAssembler::truncSatFloat32x4ToInt32x4Relaxed(FloatRegister src,
FloatRegister dest) {
vcvttps2dq(src, dest);
}
void MacroAssembler::unsignedTruncSatFloat32x4ToInt32x4Relaxed(
FloatRegister src, FloatRegister dest) {
MacroAssemblerX86Shared::unsignedTruncSatFloat32x4ToInt32x4Relaxed(src, dest);
}
void MacroAssembler::truncSatFloat64x2ToInt32x4Relaxed(FloatRegister src,
FloatRegister dest) {
vcvttpd2dq(src, dest);
}
void MacroAssembler::unsignedTruncSatFloat64x2ToInt32x4Relaxed(
FloatRegister src, FloatRegister dest) {
MacroAssemblerX86Shared::unsignedTruncSatFloat64x2ToInt32x4Relaxed(src, dest);
}
// Floating point widening
void MacroAssembler::convertFloat64x2ToFloat32x4(FloatRegister src,

Просмотреть файл

@ -410,10 +410,14 @@ class MacroAssemblerX86Shared : public Assembler {
void truncSatFloat32x4ToInt32x4(FloatRegister src, FloatRegister dest);
void unsignedTruncSatFloat32x4ToInt32x4(FloatRegister src, FloatRegister temp,
FloatRegister dest);
void unsignedTruncSatFloat32x4ToInt32x4Relaxed(FloatRegister src,
FloatRegister dest);
void truncSatFloat64x2ToInt32x4(FloatRegister src, FloatRegister temp,
FloatRegister dest);
void unsignedTruncSatFloat64x2ToInt32x4(FloatRegister src, FloatRegister temp,
FloatRegister dest);
void unsignedTruncSatFloat64x2ToInt32x4Relaxed(FloatRegister src,
FloatRegister dest);
void splatX16(Register input, FloatRegister output);
void splatX8(Register input, FloatRegister output);

Просмотреть файл

@ -7382,6 +7382,26 @@ static void ConvertF64x2ToUI32x4(MacroAssembler& masm, RegV128 rs, RegV128 rd,
masm.unsignedTruncSatFloat64x2ToInt32x4(rs, rd, temp);
}
static void RelaxedConvertF32x4ToI32x4(MacroAssembler& masm, RegV128 rs,
RegV128 rd) {
masm.truncSatFloat32x4ToInt32x4Relaxed(rs, rd);
}
static void RelaxedConvertF32x4ToUI32x4(MacroAssembler& masm, RegV128 rs,
RegV128 rd) {
masm.unsignedTruncSatFloat32x4ToInt32x4Relaxed(rs, rd);
}
static void RelaxedConvertF64x2ToI32x4(MacroAssembler& masm, RegV128 rs,
RegV128 rd) {
masm.truncSatFloat64x2ToInt32x4Relaxed(rs, rd);
}
static void RelaxedConvertF64x2ToUI32x4(MacroAssembler& masm, RegV128 rs,
RegV128 rd) {
masm.unsignedTruncSatFloat64x2ToInt32x4Relaxed(rs, rd);
}
static void DemoteF64x2ToF32x4(MacroAssembler& masm, RegV128 rs, RegV128 rd) {
masm.convertFloat64x2ToFloat32x4(rs, rd);
}
@ -9084,6 +9104,26 @@ bool BaseCompiler::emitBody() {
return iter_.unrecognizedOpcode(&op);
}
CHECK_NEXT(dispatchVectorBinary(RelaxedMaxF64x2));
case uint32_t(SimdOp::I32x4RelaxedTruncSSatF32x4):
if (!moduleEnv_.v128RelaxedEnabled()) {
return iter_.unrecognizedOpcode(&op);
}
CHECK_NEXT(dispatchVectorUnary(RelaxedConvertF32x4ToI32x4));
case uint32_t(SimdOp::I32x4RelaxedTruncUSatF32x4):
if (!moduleEnv_.v128RelaxedEnabled()) {
return iter_.unrecognizedOpcode(&op);
}
CHECK_NEXT(dispatchVectorUnary(RelaxedConvertF32x4ToUI32x4));
case uint32_t(SimdOp::I32x4RelaxedTruncSatF64x2SZero):
if (!moduleEnv_.v128RelaxedEnabled()) {
return iter_.unrecognizedOpcode(&op);
}
CHECK_NEXT(dispatchVectorUnary(RelaxedConvertF64x2ToI32x4));
case uint32_t(SimdOp::I32x4RelaxedTruncSatF64x2UZero):
if (!moduleEnv_.v128RelaxedEnabled()) {
return iter_.unrecognizedOpcode(&op);
}
CHECK_NEXT(dispatchVectorUnary(RelaxedConvertF64x2ToUI32x4));
# endif
default:
break;

Просмотреть файл

@ -668,8 +668,8 @@ enum class SimdOp {
// Narrow = 0xa2
I32x4AllTrue = 0xa3,
I32x4Bitmask = 0xa4,
// Narrow = 0xa5
// Narrow = 0xa6
I32x4RelaxedTruncSSatF32x4 = 0xa5,
I32x4RelaxedTruncUSatF32x4 = 0xa6,
I32x4WidenLowSI16x8 = 0xa7,
I32x4WidenHighSI16x8 = 0xa8,
I32x4WidenLowUI16x8 = 0xa9,
@ -700,8 +700,8 @@ enum class SimdOp {
// AnyTrue = 0xc2
I64x2AllTrue = 0xc3,
I64x2Bitmask = 0xc4,
// Narrow = 0xc5
// Narrow = 0xc6
I32x4RelaxedTruncSatF64x2SZero = 0xc5,
I32x4RelaxedTruncSatF64x2UZero = 0xc6,
I64x2WidenLowSI32x4 = 0xc7,
I64x2WidenHighSI32x4 = 0xc8,
I64x2WidenLowUI32x4 = 0xc9,

Просмотреть файл

@ -5472,6 +5472,15 @@ static bool EmitBodyExprs(FunctionCompiler& f) {
}
CHECK(EmitBinarySimd128(f, /* commutative= */ true, SimdOp(op.b1)));
}
case uint32_t(SimdOp::I32x4RelaxedTruncSSatF32x4):
case uint32_t(SimdOp::I32x4RelaxedTruncUSatF32x4):
case uint32_t(SimdOp::I32x4RelaxedTruncSatF64x2SZero):
case uint32_t(SimdOp::I32x4RelaxedTruncSatF64x2UZero): {
if (!f.moduleEnv().v128RelaxedEnabled()) {
return f.iter().unrecognizedOpcode(&op);
}
CHECK(EmitUnarySimd128(f, SimdOp(op.b1)));
}
# endif
default:

Просмотреть файл

@ -551,6 +551,10 @@ OpKind wasm::Classify(OpBytes op) {
case SimdOp::I16x8ExtAddPairwiseI8x16U:
case SimdOp::I32x4ExtAddPairwiseI16x8S:
case SimdOp::I32x4ExtAddPairwiseI16x8U:
case SimdOp::I32x4RelaxedTruncSSatF32x4:
case SimdOp::I32x4RelaxedTruncUSatF32x4:
case SimdOp::I32x4RelaxedTruncSatF64x2SZero:
case SimdOp::I32x4RelaxedTruncSatF64x2UZero:
WASM_SIMD_OP(OpKind::Unary);
case SimdOp::I8x16Shl:
case SimdOp::I8x16ShrS:

Просмотреть файл

@ -1026,6 +1026,15 @@ static bool DecodeFunctionBodyExprs(const ModuleEnvironment& env,
}
CHECK(iter.readBinary(ValType::V128, &nothing, &nothing));
}
case uint32_t(SimdOp::I32x4RelaxedTruncSSatF32x4):
case uint32_t(SimdOp::I32x4RelaxedTruncUSatF32x4):
case uint32_t(SimdOp::I32x4RelaxedTruncSatF64x2SZero):
case uint32_t(SimdOp::I32x4RelaxedTruncSatF64x2UZero): {
if (!env.v128RelaxedEnabled()) {
return iter.unrecognizedOpcode(&op);
}
CHECK(iter.readUnary(ValType::V128, &nothing));
}
# endif
default: