зеркало из https://github.com/mozilla/gecko-dev.git
Bug 1686001 - Implement i16x8.q15mulr_sat_s. r=jseward
Implement the new i16x8.q15mulr_sat_s instruction in baseline (x86, x64, arm64) and ion (x86, x64). Add basic test cases. Differential Revision: https://phabricator.services.mozilla.com/D102692
This commit is contained in:
Родитель
1dd6be9de1
Коммит
979ff1ff5d
|
@ -305,7 +305,7 @@ if (!wasmSimdEnabled()) {
|
|||
let reservedSimd = [
|
||||
0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e,
|
||||
0x5f, 0x67, 0x68, 0x69, 0x6a, 0x74, 0x75, 0x7a, 0x7c, 0x7d, 0x7e,
|
||||
0x7f, 0x94, 0x9c, 0xa5, 0xa6, 0xaf,
|
||||
0x7f, 0x94, 0xa5, 0xa6, 0xaf,
|
||||
0xb0, 0xb2, 0xb3, 0xb4, 0xbc, 0xc0, 0xc2,
|
||||
0xc3, 0xc5, 0xc6, 0xcf, 0xd0,
|
||||
0xd4,
|
||||
|
|
|
@ -160,4 +160,20 @@ assertSame(get(mem64u, 0, 2), iota(2).map((n) => BigInt(as[n] >>> 0)));
|
|||
ins.exports.widen_high_i32x4_u();
|
||||
assertSame(get(mem64u, 0, 2), iota(2).map((n) => BigInt(as[n+2] >>> 0)));
|
||||
|
||||
// Saturating rounding q-format multiplication.
|
||||
// This is to be moved into ad-hack.js
|
||||
|
||||
var ins = wasmEvalText(`
|
||||
(module
|
||||
(memory (export "mem") 1 1)
|
||||
(func (export "q15mulr_sat_s")
|
||||
(v128.store (i32.const 0) (i16x8.q15mulr_sat_s (v128.load (i32.const 16)) (v128.load (i32.const 32))))))`);
|
||||
|
||||
var mem16 = new Int16Array(ins.exports.mem.buffer);
|
||||
for ( let [as, bs] of cross(Int16Array.inputs) ) {
|
||||
set(mem16, 8, as);
|
||||
set(mem16, 16, bs);
|
||||
ins.exports.q15mulr_sat_s();
|
||||
assertSame(get(mem16, 0, 8),
|
||||
iota(8).map((i) => signed_saturate((as[i] * bs[i] + 0x4000) >> 15, 16)));
|
||||
}
|
||||
|
|
|
@ -2282,6 +2282,9 @@ class MacroAssembler : public MacroAssemblerSpecific {
|
|||
FloatRegister lhsDest)
|
||||
DEFINED_ON(x86_shared, arm64);
|
||||
|
||||
inline void q15MulrSatInt16x8(FloatRegister rhs, FloatRegister lhsDest)
|
||||
DEFINED_ON(x86_shared, arm64);
|
||||
|
||||
// Integer Negate
|
||||
|
||||
inline void negInt8x16(FloatRegister src, FloatRegister dest)
|
||||
|
|
|
@ -2274,6 +2274,11 @@ void MacroAssembler::unsignedExtMulHighInt32x4(FloatRegister rhs,
|
|||
Umull2(Simd2D(lhsDest), Simd4S(lhsDest), Simd4S(rhs));
|
||||
}
|
||||
|
||||
void MacroAssembler::q15MulrSatInt16x8(FloatRegister rhs,
|
||||
FloatRegister lhsDest) {
|
||||
Sqrdmulh(Simd8H(lhsDest), Simd8H(lhsDest), Simd8H(rhs));
|
||||
}
|
||||
|
||||
// Integer Negate
|
||||
|
||||
void MacroAssembler::negInt8x16(FloatRegister src, FloatRegister dest) {
|
||||
|
|
|
@ -2995,6 +2995,20 @@ class AssemblerX86Shared : public AssemblerShared {
|
|||
MOZ_CRASH("unexpected operand kind");
|
||||
}
|
||||
}
|
||||
void vpmulhrsw(const Operand& src1, FloatRegister src0, FloatRegister dest) {
|
||||
MOZ_ASSERT(HasSSE2());
|
||||
switch (src1.kind()) {
|
||||
case Operand::FPREG:
|
||||
masm.vpmulhrsw_rr(src1.fpu(), src0.encoding(), dest.encoding());
|
||||
break;
|
||||
case Operand::MEM_REG_DISP:
|
||||
masm.vpmulhrsw_mr(src1.disp(), src1.base(), src0.encoding(),
|
||||
dest.encoding());
|
||||
break;
|
||||
default:
|
||||
MOZ_CRASH("unexpected operand kind");
|
||||
}
|
||||
}
|
||||
void vpmulld(const Operand& src1, FloatRegister src0, FloatRegister dest) {
|
||||
MOZ_ASSERT(HasSSE41());
|
||||
switch (src1.kind()) {
|
||||
|
|
|
@ -813,6 +813,15 @@ class BaseAssembler : public GenericAssembler {
|
|||
threeByteOpSimd("vpmulld", VEX_PD, OP3_PMULLD_VdqWdq, ESCAPE_38, address,
|
||||
src0, dst);
|
||||
}
|
||||
void vpmulhrsw_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst) {
|
||||
threeByteOpSimd("vpmulhrsw", VEX_PD, OP3_PMULHRSW_VdqWdq, ESCAPE_38, src1,
|
||||
src0, dst);
|
||||
}
|
||||
void vpmulhrsw_mr(int32_t offset, RegisterID base, XMMRegisterID src0,
|
||||
XMMRegisterID dst) {
|
||||
threeByteOpSimd("vpmulhrsw", VEX_PD, OP3_PMULHRSW_VdqWdq, ESCAPE_38, offset,
|
||||
base, src0, dst);
|
||||
}
|
||||
|
||||
void vaddps_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst) {
|
||||
twoByteOpSimd("vaddps", VEX_PS, OP2_ADDPS_VpsWps, src1, src0, dst);
|
||||
|
|
|
@ -2612,6 +2612,9 @@ void CodeGenerator::visitWasmBinarySimd128(LWasmBinarySimd128* ins) {
|
|||
case wasm::SimdOp::I64x2ExtMulHighUI32x4:
|
||||
masm.unsignedExtMulHighInt32x4(rhs, lhsDest);
|
||||
break;
|
||||
case wasm::SimdOp::I16x8Q15MulrSatS:
|
||||
masm.q15MulrSatInt16x8(rhs, lhsDest);
|
||||
break;
|
||||
# ifdef ENABLE_WASM_SIMD_WORMHOLE
|
||||
case wasm::SimdOp::MozWHSELFTEST: {
|
||||
static const int8_t mask[16] = {0xD, 0xE, 0xA, 0xD, 0xD, 0, 0, 0xD,
|
||||
|
|
|
@ -340,6 +340,7 @@ enum ThreeByteOpcodeID {
|
|||
OP3_ROUNDPD_VpdWpd = 0x09,
|
||||
OP3_ROUNDSS_VsdWsd = 0x0A,
|
||||
OP3_ROUNDSD_VsdWsd = 0x0B,
|
||||
OP3_PMULHRSW_VdqWdq = 0x0B,
|
||||
OP3_BLENDPS_VpsWpsIb = 0x0C,
|
||||
OP3_PBLENDW_VdqWdqIb = 0x0E,
|
||||
OP3_PALIGNR_VdqWdqIb = 0x0F,
|
||||
|
|
|
@ -1694,6 +1694,15 @@ void MacroAssembler::unsignedExtMulHighInt32x4(FloatRegister rhs,
|
|||
vpmuludq(Operand(scratch), lhsDest, lhsDest);
|
||||
}
|
||||
|
||||
void MacroAssembler::q15MulrSatInt16x8(FloatRegister rhs,
|
||||
FloatRegister lhsDest) {
|
||||
ScratchSimd128Scope scratch(*this);
|
||||
vpmulhrsw(Operand(rhs), lhsDest, lhsDest);
|
||||
vmovdqa(lhsDest, scratch);
|
||||
vpcmpeqwSimd128(SimdConstant::SplatX8(0x8000), scratch);
|
||||
vpxor(scratch, lhsDest, lhsDest);
|
||||
}
|
||||
|
||||
// Integer negate
|
||||
|
||||
void MacroAssembler::negInt8x16(FloatRegister src, FloatRegister dest) {
|
||||
|
|
|
@ -13298,6 +13298,10 @@ static void ExtMulHighUI32x4(MacroAssembler& masm, RegV128 rs, RegV128 rsd) {
|
|||
masm.unsignedExtMulHighInt32x4(rs, rsd);
|
||||
}
|
||||
|
||||
static void Q15MulrSatS(MacroAssembler& masm, RegV128 rs, RegV128 rsd) {
|
||||
masm.q15MulrSatInt16x8(rs, rsd);
|
||||
}
|
||||
|
||||
static void CmpI8x16(MacroAssembler& masm, Assembler::Condition cond,
|
||||
RegV128 rs, RegV128 rsd) {
|
||||
masm.compareInt8x16(cond, rs, rsd);
|
||||
|
@ -15321,6 +15325,8 @@ bool BaseCompiler::emitBody() {
|
|||
CHECK_NEXT(dispatchVectorBinary(ExtMulLowUI32x4));
|
||||
case uint32_t(SimdOp::I64x2ExtMulHighUI32x4):
|
||||
CHECK_NEXT(dispatchVectorBinary(ExtMulHighUI32x4));
|
||||
case uint32_t(SimdOp::I16x8Q15MulrSatS):
|
||||
CHECK_NEXT(dispatchVectorBinary(Q15MulrSatS));
|
||||
case uint32_t(SimdOp::I8x16Neg):
|
||||
CHECK_NEXT(dispatchVectorUnary(NegI8x16));
|
||||
case uint32_t(SimdOp::I16x8Neg):
|
||||
|
|
|
@ -602,7 +602,7 @@ enum class SimdOp {
|
|||
I16x8MaxU = 0x99,
|
||||
I16x8ExtMulLowSI8x16 = 0x9a,
|
||||
I16x8AvgrU = 0x9b,
|
||||
// Unused = 0x9c
|
||||
I16x8Q15MulrSatS = 0x9c,
|
||||
I16x8ExtMulHighSI8x16 = 0x9d,
|
||||
I16x8ExtMulLowUI8x16 = 0x9e,
|
||||
I16x8ExtMulHighUI8x16 = 0x9f,
|
||||
|
|
|
@ -4964,6 +4964,7 @@ static bool EmitBodyExprs(FunctionCompiler& f) {
|
|||
case uint32_t(SimdOp::I64x2ExtMulHighSI32x4):
|
||||
case uint32_t(SimdOp::I64x2ExtMulLowUI32x4):
|
||||
case uint32_t(SimdOp::I64x2ExtMulHighUI32x4):
|
||||
case uint32_t(SimdOp::I16x8Q15MulrSatS):
|
||||
CHECK(EmitBinarySimd128(f, /* commutative= */ true, SimdOp(op.b1)));
|
||||
case uint32_t(SimdOp::V128AndNot):
|
||||
case uint32_t(SimdOp::I8x16Sub):
|
||||
|
|
|
@ -461,6 +461,7 @@ OpKind wasm::Classify(OpBytes op) {
|
|||
case SimdOp::I64x2ExtMulHighSI32x4:
|
||||
case SimdOp::I64x2ExtMulLowUI32x4:
|
||||
case SimdOp::I64x2ExtMulHighUI32x4:
|
||||
case SimdOp::I16x8Q15MulrSatS:
|
||||
WASM_SIMD_OP(OpKind::Binary);
|
||||
case SimdOp::I8x16Neg:
|
||||
case SimdOp::I16x8Neg:
|
||||
|
|
|
@ -1070,6 +1070,7 @@ static bool DecodeFunctionBodyExprs(const ModuleEnvironment& env,
|
|||
case uint32_t(SimdOp::I64x2ExtMulHighSI32x4):
|
||||
case uint32_t(SimdOp::I64x2ExtMulLowUI32x4):
|
||||
case uint32_t(SimdOp::I64x2ExtMulHighUI32x4):
|
||||
case uint32_t(SimdOp::I16x8Q15MulrSatS):
|
||||
CHECK(iter.readBinary(ValType::V128, ¬hing, ¬hing));
|
||||
|
||||
case uint32_t(SimdOp::I8x16Neg):
|
||||
|
|
Загрузка…
Ссылка в новой задаче