зеркало из https://github.com/mozilla/gecko-dev.git
Bug 1686001 - Implement i16x8.q15mulr_sat_s. r=jseward
Implement the new i16x8.q15mulr_sat_s instruction in baseline (x86, x64, arm64) and ion (x86, x64). Add basic test cases. Differential Revision: https://phabricator.services.mozilla.com/D102692
This commit is contained in:
Родитель
1dd6be9de1
Коммит
979ff1ff5d
|
@ -305,7 +305,7 @@ if (!wasmSimdEnabled()) {
|
||||||
let reservedSimd = [
|
let reservedSimd = [
|
||||||
0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e,
|
0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e,
|
||||||
0x5f, 0x67, 0x68, 0x69, 0x6a, 0x74, 0x75, 0x7a, 0x7c, 0x7d, 0x7e,
|
0x5f, 0x67, 0x68, 0x69, 0x6a, 0x74, 0x75, 0x7a, 0x7c, 0x7d, 0x7e,
|
||||||
0x7f, 0x94, 0x9c, 0xa5, 0xa6, 0xaf,
|
0x7f, 0x94, 0xa5, 0xa6, 0xaf,
|
||||||
0xb0, 0xb2, 0xb3, 0xb4, 0xbc, 0xc0, 0xc2,
|
0xb0, 0xb2, 0xb3, 0xb4, 0xbc, 0xc0, 0xc2,
|
||||||
0xc3, 0xc5, 0xc6, 0xcf, 0xd0,
|
0xc3, 0xc5, 0xc6, 0xcf, 0xd0,
|
||||||
0xd4,
|
0xd4,
|
||||||
|
|
|
@ -160,4 +160,20 @@ assertSame(get(mem64u, 0, 2), iota(2).map((n) => BigInt(as[n] >>> 0)));
|
||||||
ins.exports.widen_high_i32x4_u();
|
ins.exports.widen_high_i32x4_u();
|
||||||
assertSame(get(mem64u, 0, 2), iota(2).map((n) => BigInt(as[n+2] >>> 0)));
|
assertSame(get(mem64u, 0, 2), iota(2).map((n) => BigInt(as[n+2] >>> 0)));
|
||||||
|
|
||||||
|
// Saturating rounding q-format multiplication.
|
||||||
|
// This is to be moved into ad-hack.js
|
||||||
|
|
||||||
|
var ins = wasmEvalText(`
|
||||||
|
(module
|
||||||
|
(memory (export "mem") 1 1)
|
||||||
|
(func (export "q15mulr_sat_s")
|
||||||
|
(v128.store (i32.const 0) (i16x8.q15mulr_sat_s (v128.load (i32.const 16)) (v128.load (i32.const 32))))))`);
|
||||||
|
|
||||||
|
var mem16 = new Int16Array(ins.exports.mem.buffer);
|
||||||
|
for ( let [as, bs] of cross(Int16Array.inputs) ) {
|
||||||
|
set(mem16, 8, as);
|
||||||
|
set(mem16, 16, bs);
|
||||||
|
ins.exports.q15mulr_sat_s();
|
||||||
|
assertSame(get(mem16, 0, 8),
|
||||||
|
iota(8).map((i) => signed_saturate((as[i] * bs[i] + 0x4000) >> 15, 16)));
|
||||||
|
}
|
||||||
|
|
|
@ -2282,6 +2282,9 @@ class MacroAssembler : public MacroAssemblerSpecific {
|
||||||
FloatRegister lhsDest)
|
FloatRegister lhsDest)
|
||||||
DEFINED_ON(x86_shared, arm64);
|
DEFINED_ON(x86_shared, arm64);
|
||||||
|
|
||||||
|
inline void q15MulrSatInt16x8(FloatRegister rhs, FloatRegister lhsDest)
|
||||||
|
DEFINED_ON(x86_shared, arm64);
|
||||||
|
|
||||||
// Integer Negate
|
// Integer Negate
|
||||||
|
|
||||||
inline void negInt8x16(FloatRegister src, FloatRegister dest)
|
inline void negInt8x16(FloatRegister src, FloatRegister dest)
|
||||||
|
|
|
@ -2274,6 +2274,11 @@ void MacroAssembler::unsignedExtMulHighInt32x4(FloatRegister rhs,
|
||||||
Umull2(Simd2D(lhsDest), Simd4S(lhsDest), Simd4S(rhs));
|
Umull2(Simd2D(lhsDest), Simd4S(lhsDest), Simd4S(rhs));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void MacroAssembler::q15MulrSatInt16x8(FloatRegister rhs,
|
||||||
|
FloatRegister lhsDest) {
|
||||||
|
Sqrdmulh(Simd8H(lhsDest), Simd8H(lhsDest), Simd8H(rhs));
|
||||||
|
}
|
||||||
|
|
||||||
// Integer Negate
|
// Integer Negate
|
||||||
|
|
||||||
void MacroAssembler::negInt8x16(FloatRegister src, FloatRegister dest) {
|
void MacroAssembler::negInt8x16(FloatRegister src, FloatRegister dest) {
|
||||||
|
|
|
@ -2995,6 +2995,20 @@ class AssemblerX86Shared : public AssemblerShared {
|
||||||
MOZ_CRASH("unexpected operand kind");
|
MOZ_CRASH("unexpected operand kind");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
void vpmulhrsw(const Operand& src1, FloatRegister src0, FloatRegister dest) {
|
||||||
|
MOZ_ASSERT(HasSSE2());
|
||||||
|
switch (src1.kind()) {
|
||||||
|
case Operand::FPREG:
|
||||||
|
masm.vpmulhrsw_rr(src1.fpu(), src0.encoding(), dest.encoding());
|
||||||
|
break;
|
||||||
|
case Operand::MEM_REG_DISP:
|
||||||
|
masm.vpmulhrsw_mr(src1.disp(), src1.base(), src0.encoding(),
|
||||||
|
dest.encoding());
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
MOZ_CRASH("unexpected operand kind");
|
||||||
|
}
|
||||||
|
}
|
||||||
void vpmulld(const Operand& src1, FloatRegister src0, FloatRegister dest) {
|
void vpmulld(const Operand& src1, FloatRegister src0, FloatRegister dest) {
|
||||||
MOZ_ASSERT(HasSSE41());
|
MOZ_ASSERT(HasSSE41());
|
||||||
switch (src1.kind()) {
|
switch (src1.kind()) {
|
||||||
|
|
|
@ -813,6 +813,15 @@ class BaseAssembler : public GenericAssembler {
|
||||||
threeByteOpSimd("vpmulld", VEX_PD, OP3_PMULLD_VdqWdq, ESCAPE_38, address,
|
threeByteOpSimd("vpmulld", VEX_PD, OP3_PMULLD_VdqWdq, ESCAPE_38, address,
|
||||||
src0, dst);
|
src0, dst);
|
||||||
}
|
}
|
||||||
|
void vpmulhrsw_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst) {
|
||||||
|
threeByteOpSimd("vpmulhrsw", VEX_PD, OP3_PMULHRSW_VdqWdq, ESCAPE_38, src1,
|
||||||
|
src0, dst);
|
||||||
|
}
|
||||||
|
void vpmulhrsw_mr(int32_t offset, RegisterID base, XMMRegisterID src0,
|
||||||
|
XMMRegisterID dst) {
|
||||||
|
threeByteOpSimd("vpmulhrsw", VEX_PD, OP3_PMULHRSW_VdqWdq, ESCAPE_38, offset,
|
||||||
|
base, src0, dst);
|
||||||
|
}
|
||||||
|
|
||||||
void vaddps_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst) {
|
void vaddps_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst) {
|
||||||
twoByteOpSimd("vaddps", VEX_PS, OP2_ADDPS_VpsWps, src1, src0, dst);
|
twoByteOpSimd("vaddps", VEX_PS, OP2_ADDPS_VpsWps, src1, src0, dst);
|
||||||
|
|
|
@ -2612,6 +2612,9 @@ void CodeGenerator::visitWasmBinarySimd128(LWasmBinarySimd128* ins) {
|
||||||
case wasm::SimdOp::I64x2ExtMulHighUI32x4:
|
case wasm::SimdOp::I64x2ExtMulHighUI32x4:
|
||||||
masm.unsignedExtMulHighInt32x4(rhs, lhsDest);
|
masm.unsignedExtMulHighInt32x4(rhs, lhsDest);
|
||||||
break;
|
break;
|
||||||
|
case wasm::SimdOp::I16x8Q15MulrSatS:
|
||||||
|
masm.q15MulrSatInt16x8(rhs, lhsDest);
|
||||||
|
break;
|
||||||
# ifdef ENABLE_WASM_SIMD_WORMHOLE
|
# ifdef ENABLE_WASM_SIMD_WORMHOLE
|
||||||
case wasm::SimdOp::MozWHSELFTEST: {
|
case wasm::SimdOp::MozWHSELFTEST: {
|
||||||
static const int8_t mask[16] = {0xD, 0xE, 0xA, 0xD, 0xD, 0, 0, 0xD,
|
static const int8_t mask[16] = {0xD, 0xE, 0xA, 0xD, 0xD, 0, 0, 0xD,
|
||||||
|
|
|
@ -340,6 +340,7 @@ enum ThreeByteOpcodeID {
|
||||||
OP3_ROUNDPD_VpdWpd = 0x09,
|
OP3_ROUNDPD_VpdWpd = 0x09,
|
||||||
OP3_ROUNDSS_VsdWsd = 0x0A,
|
OP3_ROUNDSS_VsdWsd = 0x0A,
|
||||||
OP3_ROUNDSD_VsdWsd = 0x0B,
|
OP3_ROUNDSD_VsdWsd = 0x0B,
|
||||||
|
OP3_PMULHRSW_VdqWdq = 0x0B,
|
||||||
OP3_BLENDPS_VpsWpsIb = 0x0C,
|
OP3_BLENDPS_VpsWpsIb = 0x0C,
|
||||||
OP3_PBLENDW_VdqWdqIb = 0x0E,
|
OP3_PBLENDW_VdqWdqIb = 0x0E,
|
||||||
OP3_PALIGNR_VdqWdqIb = 0x0F,
|
OP3_PALIGNR_VdqWdqIb = 0x0F,
|
||||||
|
|
|
@ -1694,6 +1694,15 @@ void MacroAssembler::unsignedExtMulHighInt32x4(FloatRegister rhs,
|
||||||
vpmuludq(Operand(scratch), lhsDest, lhsDest);
|
vpmuludq(Operand(scratch), lhsDest, lhsDest);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void MacroAssembler::q15MulrSatInt16x8(FloatRegister rhs,
|
||||||
|
FloatRegister lhsDest) {
|
||||||
|
ScratchSimd128Scope scratch(*this);
|
||||||
|
vpmulhrsw(Operand(rhs), lhsDest, lhsDest);
|
||||||
|
vmovdqa(lhsDest, scratch);
|
||||||
|
vpcmpeqwSimd128(SimdConstant::SplatX8(0x8000), scratch);
|
||||||
|
vpxor(scratch, lhsDest, lhsDest);
|
||||||
|
}
|
||||||
|
|
||||||
// Integer negate
|
// Integer negate
|
||||||
|
|
||||||
void MacroAssembler::negInt8x16(FloatRegister src, FloatRegister dest) {
|
void MacroAssembler::negInt8x16(FloatRegister src, FloatRegister dest) {
|
||||||
|
|
|
@ -13298,6 +13298,10 @@ static void ExtMulHighUI32x4(MacroAssembler& masm, RegV128 rs, RegV128 rsd) {
|
||||||
masm.unsignedExtMulHighInt32x4(rs, rsd);
|
masm.unsignedExtMulHighInt32x4(rs, rsd);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void Q15MulrSatS(MacroAssembler& masm, RegV128 rs, RegV128 rsd) {
|
||||||
|
masm.q15MulrSatInt16x8(rs, rsd);
|
||||||
|
}
|
||||||
|
|
||||||
static void CmpI8x16(MacroAssembler& masm, Assembler::Condition cond,
|
static void CmpI8x16(MacroAssembler& masm, Assembler::Condition cond,
|
||||||
RegV128 rs, RegV128 rsd) {
|
RegV128 rs, RegV128 rsd) {
|
||||||
masm.compareInt8x16(cond, rs, rsd);
|
masm.compareInt8x16(cond, rs, rsd);
|
||||||
|
@ -15321,6 +15325,8 @@ bool BaseCompiler::emitBody() {
|
||||||
CHECK_NEXT(dispatchVectorBinary(ExtMulLowUI32x4));
|
CHECK_NEXT(dispatchVectorBinary(ExtMulLowUI32x4));
|
||||||
case uint32_t(SimdOp::I64x2ExtMulHighUI32x4):
|
case uint32_t(SimdOp::I64x2ExtMulHighUI32x4):
|
||||||
CHECK_NEXT(dispatchVectorBinary(ExtMulHighUI32x4));
|
CHECK_NEXT(dispatchVectorBinary(ExtMulHighUI32x4));
|
||||||
|
case uint32_t(SimdOp::I16x8Q15MulrSatS):
|
||||||
|
CHECK_NEXT(dispatchVectorBinary(Q15MulrSatS));
|
||||||
case uint32_t(SimdOp::I8x16Neg):
|
case uint32_t(SimdOp::I8x16Neg):
|
||||||
CHECK_NEXT(dispatchVectorUnary(NegI8x16));
|
CHECK_NEXT(dispatchVectorUnary(NegI8x16));
|
||||||
case uint32_t(SimdOp::I16x8Neg):
|
case uint32_t(SimdOp::I16x8Neg):
|
||||||
|
|
|
@ -602,7 +602,7 @@ enum class SimdOp {
|
||||||
I16x8MaxU = 0x99,
|
I16x8MaxU = 0x99,
|
||||||
I16x8ExtMulLowSI8x16 = 0x9a,
|
I16x8ExtMulLowSI8x16 = 0x9a,
|
||||||
I16x8AvgrU = 0x9b,
|
I16x8AvgrU = 0x9b,
|
||||||
// Unused = 0x9c
|
I16x8Q15MulrSatS = 0x9c,
|
||||||
I16x8ExtMulHighSI8x16 = 0x9d,
|
I16x8ExtMulHighSI8x16 = 0x9d,
|
||||||
I16x8ExtMulLowUI8x16 = 0x9e,
|
I16x8ExtMulLowUI8x16 = 0x9e,
|
||||||
I16x8ExtMulHighUI8x16 = 0x9f,
|
I16x8ExtMulHighUI8x16 = 0x9f,
|
||||||
|
|
|
@ -4964,6 +4964,7 @@ static bool EmitBodyExprs(FunctionCompiler& f) {
|
||||||
case uint32_t(SimdOp::I64x2ExtMulHighSI32x4):
|
case uint32_t(SimdOp::I64x2ExtMulHighSI32x4):
|
||||||
case uint32_t(SimdOp::I64x2ExtMulLowUI32x4):
|
case uint32_t(SimdOp::I64x2ExtMulLowUI32x4):
|
||||||
case uint32_t(SimdOp::I64x2ExtMulHighUI32x4):
|
case uint32_t(SimdOp::I64x2ExtMulHighUI32x4):
|
||||||
|
case uint32_t(SimdOp::I16x8Q15MulrSatS):
|
||||||
CHECK(EmitBinarySimd128(f, /* commutative= */ true, SimdOp(op.b1)));
|
CHECK(EmitBinarySimd128(f, /* commutative= */ true, SimdOp(op.b1)));
|
||||||
case uint32_t(SimdOp::V128AndNot):
|
case uint32_t(SimdOp::V128AndNot):
|
||||||
case uint32_t(SimdOp::I8x16Sub):
|
case uint32_t(SimdOp::I8x16Sub):
|
||||||
|
|
|
@ -461,6 +461,7 @@ OpKind wasm::Classify(OpBytes op) {
|
||||||
case SimdOp::I64x2ExtMulHighSI32x4:
|
case SimdOp::I64x2ExtMulHighSI32x4:
|
||||||
case SimdOp::I64x2ExtMulLowUI32x4:
|
case SimdOp::I64x2ExtMulLowUI32x4:
|
||||||
case SimdOp::I64x2ExtMulHighUI32x4:
|
case SimdOp::I64x2ExtMulHighUI32x4:
|
||||||
|
case SimdOp::I16x8Q15MulrSatS:
|
||||||
WASM_SIMD_OP(OpKind::Binary);
|
WASM_SIMD_OP(OpKind::Binary);
|
||||||
case SimdOp::I8x16Neg:
|
case SimdOp::I8x16Neg:
|
||||||
case SimdOp::I16x8Neg:
|
case SimdOp::I16x8Neg:
|
||||||
|
|
|
@ -1070,6 +1070,7 @@ static bool DecodeFunctionBodyExprs(const ModuleEnvironment& env,
|
||||||
case uint32_t(SimdOp::I64x2ExtMulHighSI32x4):
|
case uint32_t(SimdOp::I64x2ExtMulHighSI32x4):
|
||||||
case uint32_t(SimdOp::I64x2ExtMulLowUI32x4):
|
case uint32_t(SimdOp::I64x2ExtMulLowUI32x4):
|
||||||
case uint32_t(SimdOp::I64x2ExtMulHighUI32x4):
|
case uint32_t(SimdOp::I64x2ExtMulHighUI32x4):
|
||||||
|
case uint32_t(SimdOp::I16x8Q15MulrSatS):
|
||||||
CHECK(iter.readBinary(ValType::V128, ¬hing, ¬hing));
|
CHECK(iter.readBinary(ValType::V128, ¬hing, ¬hing));
|
||||||
|
|
||||||
case uint32_t(SimdOp::I8x16Neg):
|
case uint32_t(SimdOp::I8x16Neg):
|
||||||
|
|
Загрузка…
Ссылка в новой задаче