Bug 1686001 - Implement i16x8.q15mulr_sat_s. r=jseward

Implement the new i16x8.q15mulr_sat_s instruction in baseline (x86,
x64, arm64) and ion (x86, x64).

Add basic test cases.

Differential Revision: https://phabricator.services.mozilla.com/D102692
This commit is contained in:
Lars T Hansen 2021-01-25 13:35:43 +00:00
Родитель 1dd6be9de1
Коммит 979ff1ff5d
14 изменённых файлов: 71 добавлений и 2 удалений

Просмотреть файл

@ -305,7 +305,7 @@ if (!wasmSimdEnabled()) {
let reservedSimd = [ let reservedSimd = [
0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e,
0x5f, 0x67, 0x68, 0x69, 0x6a, 0x74, 0x75, 0x7a, 0x7c, 0x7d, 0x7e, 0x5f, 0x67, 0x68, 0x69, 0x6a, 0x74, 0x75, 0x7a, 0x7c, 0x7d, 0x7e,
0x7f, 0x94, 0x9c, 0xa5, 0xa6, 0xaf, 0x7f, 0x94, 0xa5, 0xa6, 0xaf,
0xb0, 0xb2, 0xb3, 0xb4, 0xbc, 0xc0, 0xc2, 0xb0, 0xb2, 0xb3, 0xb4, 0xbc, 0xc0, 0xc2,
0xc3, 0xc5, 0xc6, 0xcf, 0xd0, 0xc3, 0xc5, 0xc6, 0xcf, 0xd0,
0xd4, 0xd4,

Просмотреть файл

@ -160,4 +160,20 @@ assertSame(get(mem64u, 0, 2), iota(2).map((n) => BigInt(as[n] >>> 0)));
ins.exports.widen_high_i32x4_u(); ins.exports.widen_high_i32x4_u();
assertSame(get(mem64u, 0, 2), iota(2).map((n) => BigInt(as[n+2] >>> 0))); assertSame(get(mem64u, 0, 2), iota(2).map((n) => BigInt(as[n+2] >>> 0)));
// Saturating rounding q-format multiplication.
// This is to be moved into ad-hack.js
var ins = wasmEvalText(`
(module
(memory (export "mem") 1 1)
(func (export "q15mulr_sat_s")
(v128.store (i32.const 0) (i16x8.q15mulr_sat_s (v128.load (i32.const 16)) (v128.load (i32.const 32))))))`);
var mem16 = new Int16Array(ins.exports.mem.buffer);
for ( let [as, bs] of cross(Int16Array.inputs) ) {
set(mem16, 8, as);
set(mem16, 16, bs);
ins.exports.q15mulr_sat_s();
assertSame(get(mem16, 0, 8),
iota(8).map((i) => signed_saturate((as[i] * bs[i] + 0x4000) >> 15, 16)));
}

Просмотреть файл

@ -2282,6 +2282,9 @@ class MacroAssembler : public MacroAssemblerSpecific {
FloatRegister lhsDest) FloatRegister lhsDest)
DEFINED_ON(x86_shared, arm64); DEFINED_ON(x86_shared, arm64);
inline void q15MulrSatInt16x8(FloatRegister rhs, FloatRegister lhsDest)
DEFINED_ON(x86_shared, arm64);
// Integer Negate // Integer Negate
inline void negInt8x16(FloatRegister src, FloatRegister dest) inline void negInt8x16(FloatRegister src, FloatRegister dest)

Просмотреть файл

@ -2274,6 +2274,11 @@ void MacroAssembler::unsignedExtMulHighInt32x4(FloatRegister rhs,
Umull2(Simd2D(lhsDest), Simd4S(lhsDest), Simd4S(rhs)); Umull2(Simd2D(lhsDest), Simd4S(lhsDest), Simd4S(rhs));
} }
void MacroAssembler::q15MulrSatInt16x8(FloatRegister rhs,
FloatRegister lhsDest) {
Sqrdmulh(Simd8H(lhsDest), Simd8H(lhsDest), Simd8H(rhs));
}
// Integer Negate // Integer Negate
void MacroAssembler::negInt8x16(FloatRegister src, FloatRegister dest) { void MacroAssembler::negInt8x16(FloatRegister src, FloatRegister dest) {

Просмотреть файл

@ -2995,6 +2995,20 @@ class AssemblerX86Shared : public AssemblerShared {
MOZ_CRASH("unexpected operand kind"); MOZ_CRASH("unexpected operand kind");
} }
} }
void vpmulhrsw(const Operand& src1, FloatRegister src0, FloatRegister dest) {
MOZ_ASSERT(HasSSE2());
switch (src1.kind()) {
case Operand::FPREG:
masm.vpmulhrsw_rr(src1.fpu(), src0.encoding(), dest.encoding());
break;
case Operand::MEM_REG_DISP:
masm.vpmulhrsw_mr(src1.disp(), src1.base(), src0.encoding(),
dest.encoding());
break;
default:
MOZ_CRASH("unexpected operand kind");
}
}
void vpmulld(const Operand& src1, FloatRegister src0, FloatRegister dest) { void vpmulld(const Operand& src1, FloatRegister src0, FloatRegister dest) {
MOZ_ASSERT(HasSSE41()); MOZ_ASSERT(HasSSE41());
switch (src1.kind()) { switch (src1.kind()) {

Просмотреть файл

@ -813,6 +813,15 @@ class BaseAssembler : public GenericAssembler {
threeByteOpSimd("vpmulld", VEX_PD, OP3_PMULLD_VdqWdq, ESCAPE_38, address, threeByteOpSimd("vpmulld", VEX_PD, OP3_PMULLD_VdqWdq, ESCAPE_38, address,
src0, dst); src0, dst);
} }
void vpmulhrsw_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst) {
threeByteOpSimd("vpmulhrsw", VEX_PD, OP3_PMULHRSW_VdqWdq, ESCAPE_38, src1,
src0, dst);
}
void vpmulhrsw_mr(int32_t offset, RegisterID base, XMMRegisterID src0,
XMMRegisterID dst) {
threeByteOpSimd("vpmulhrsw", VEX_PD, OP3_PMULHRSW_VdqWdq, ESCAPE_38, offset,
base, src0, dst);
}
void vaddps_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst) { void vaddps_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst) {
twoByteOpSimd("vaddps", VEX_PS, OP2_ADDPS_VpsWps, src1, src0, dst); twoByteOpSimd("vaddps", VEX_PS, OP2_ADDPS_VpsWps, src1, src0, dst);

Просмотреть файл

@ -2612,6 +2612,9 @@ void CodeGenerator::visitWasmBinarySimd128(LWasmBinarySimd128* ins) {
case wasm::SimdOp::I64x2ExtMulHighUI32x4: case wasm::SimdOp::I64x2ExtMulHighUI32x4:
masm.unsignedExtMulHighInt32x4(rhs, lhsDest); masm.unsignedExtMulHighInt32x4(rhs, lhsDest);
break; break;
case wasm::SimdOp::I16x8Q15MulrSatS:
masm.q15MulrSatInt16x8(rhs, lhsDest);
break;
# ifdef ENABLE_WASM_SIMD_WORMHOLE # ifdef ENABLE_WASM_SIMD_WORMHOLE
case wasm::SimdOp::MozWHSELFTEST: { case wasm::SimdOp::MozWHSELFTEST: {
static const int8_t mask[16] = {0xD, 0xE, 0xA, 0xD, 0xD, 0, 0, 0xD, static const int8_t mask[16] = {0xD, 0xE, 0xA, 0xD, 0xD, 0, 0, 0xD,

Просмотреть файл

@ -340,6 +340,7 @@ enum ThreeByteOpcodeID {
OP3_ROUNDPD_VpdWpd = 0x09, OP3_ROUNDPD_VpdWpd = 0x09,
OP3_ROUNDSS_VsdWsd = 0x0A, OP3_ROUNDSS_VsdWsd = 0x0A,
OP3_ROUNDSD_VsdWsd = 0x0B, OP3_ROUNDSD_VsdWsd = 0x0B,
OP3_PMULHRSW_VdqWdq = 0x0B,
OP3_BLENDPS_VpsWpsIb = 0x0C, OP3_BLENDPS_VpsWpsIb = 0x0C,
OP3_PBLENDW_VdqWdqIb = 0x0E, OP3_PBLENDW_VdqWdqIb = 0x0E,
OP3_PALIGNR_VdqWdqIb = 0x0F, OP3_PALIGNR_VdqWdqIb = 0x0F,

Просмотреть файл

@ -1694,6 +1694,15 @@ void MacroAssembler::unsignedExtMulHighInt32x4(FloatRegister rhs,
vpmuludq(Operand(scratch), lhsDest, lhsDest); vpmuludq(Operand(scratch), lhsDest, lhsDest);
} }
void MacroAssembler::q15MulrSatInt16x8(FloatRegister rhs,
FloatRegister lhsDest) {
ScratchSimd128Scope scratch(*this);
vpmulhrsw(Operand(rhs), lhsDest, lhsDest);
vmovdqa(lhsDest, scratch);
vpcmpeqwSimd128(SimdConstant::SplatX8(0x8000), scratch);
vpxor(scratch, lhsDest, lhsDest);
}
// Integer negate // Integer negate
void MacroAssembler::negInt8x16(FloatRegister src, FloatRegister dest) { void MacroAssembler::negInt8x16(FloatRegister src, FloatRegister dest) {

Просмотреть файл

@ -13298,6 +13298,10 @@ static void ExtMulHighUI32x4(MacroAssembler& masm, RegV128 rs, RegV128 rsd) {
masm.unsignedExtMulHighInt32x4(rs, rsd); masm.unsignedExtMulHighInt32x4(rs, rsd);
} }
static void Q15MulrSatS(MacroAssembler& masm, RegV128 rs, RegV128 rsd) {
masm.q15MulrSatInt16x8(rs, rsd);
}
static void CmpI8x16(MacroAssembler& masm, Assembler::Condition cond, static void CmpI8x16(MacroAssembler& masm, Assembler::Condition cond,
RegV128 rs, RegV128 rsd) { RegV128 rs, RegV128 rsd) {
masm.compareInt8x16(cond, rs, rsd); masm.compareInt8x16(cond, rs, rsd);
@ -15321,6 +15325,8 @@ bool BaseCompiler::emitBody() {
CHECK_NEXT(dispatchVectorBinary(ExtMulLowUI32x4)); CHECK_NEXT(dispatchVectorBinary(ExtMulLowUI32x4));
case uint32_t(SimdOp::I64x2ExtMulHighUI32x4): case uint32_t(SimdOp::I64x2ExtMulHighUI32x4):
CHECK_NEXT(dispatchVectorBinary(ExtMulHighUI32x4)); CHECK_NEXT(dispatchVectorBinary(ExtMulHighUI32x4));
case uint32_t(SimdOp::I16x8Q15MulrSatS):
CHECK_NEXT(dispatchVectorBinary(Q15MulrSatS));
case uint32_t(SimdOp::I8x16Neg): case uint32_t(SimdOp::I8x16Neg):
CHECK_NEXT(dispatchVectorUnary(NegI8x16)); CHECK_NEXT(dispatchVectorUnary(NegI8x16));
case uint32_t(SimdOp::I16x8Neg): case uint32_t(SimdOp::I16x8Neg):

Просмотреть файл

@ -602,7 +602,7 @@ enum class SimdOp {
I16x8MaxU = 0x99, I16x8MaxU = 0x99,
I16x8ExtMulLowSI8x16 = 0x9a, I16x8ExtMulLowSI8x16 = 0x9a,
I16x8AvgrU = 0x9b, I16x8AvgrU = 0x9b,
// Unused = 0x9c I16x8Q15MulrSatS = 0x9c,
I16x8ExtMulHighSI8x16 = 0x9d, I16x8ExtMulHighSI8x16 = 0x9d,
I16x8ExtMulLowUI8x16 = 0x9e, I16x8ExtMulLowUI8x16 = 0x9e,
I16x8ExtMulHighUI8x16 = 0x9f, I16x8ExtMulHighUI8x16 = 0x9f,

Просмотреть файл

@ -4964,6 +4964,7 @@ static bool EmitBodyExprs(FunctionCompiler& f) {
case uint32_t(SimdOp::I64x2ExtMulHighSI32x4): case uint32_t(SimdOp::I64x2ExtMulHighSI32x4):
case uint32_t(SimdOp::I64x2ExtMulLowUI32x4): case uint32_t(SimdOp::I64x2ExtMulLowUI32x4):
case uint32_t(SimdOp::I64x2ExtMulHighUI32x4): case uint32_t(SimdOp::I64x2ExtMulHighUI32x4):
case uint32_t(SimdOp::I16x8Q15MulrSatS):
CHECK(EmitBinarySimd128(f, /* commutative= */ true, SimdOp(op.b1))); CHECK(EmitBinarySimd128(f, /* commutative= */ true, SimdOp(op.b1)));
case uint32_t(SimdOp::V128AndNot): case uint32_t(SimdOp::V128AndNot):
case uint32_t(SimdOp::I8x16Sub): case uint32_t(SimdOp::I8x16Sub):

Просмотреть файл

@ -461,6 +461,7 @@ OpKind wasm::Classify(OpBytes op) {
case SimdOp::I64x2ExtMulHighSI32x4: case SimdOp::I64x2ExtMulHighSI32x4:
case SimdOp::I64x2ExtMulLowUI32x4: case SimdOp::I64x2ExtMulLowUI32x4:
case SimdOp::I64x2ExtMulHighUI32x4: case SimdOp::I64x2ExtMulHighUI32x4:
case SimdOp::I16x8Q15MulrSatS:
WASM_SIMD_OP(OpKind::Binary); WASM_SIMD_OP(OpKind::Binary);
case SimdOp::I8x16Neg: case SimdOp::I8x16Neg:
case SimdOp::I16x8Neg: case SimdOp::I16x8Neg:

Просмотреть файл

@ -1070,6 +1070,7 @@ static bool DecodeFunctionBodyExprs(const ModuleEnvironment& env,
case uint32_t(SimdOp::I64x2ExtMulHighSI32x4): case uint32_t(SimdOp::I64x2ExtMulHighSI32x4):
case uint32_t(SimdOp::I64x2ExtMulLowUI32x4): case uint32_t(SimdOp::I64x2ExtMulLowUI32x4):
case uint32_t(SimdOp::I64x2ExtMulHighUI32x4): case uint32_t(SimdOp::I64x2ExtMulHighUI32x4):
case uint32_t(SimdOp::I16x8Q15MulrSatS):
CHECK(iter.readBinary(ValType::V128, &nothing, &nothing)); CHECK(iter.readBinary(ValType::V128, &nothing, &nothing));
case uint32_t(SimdOp::I8x16Neg): case uint32_t(SimdOp::I8x16Neg):