Bug 1691489 - Implement SIMD i8x16.popcnt op. r=lth

Differential Revision: https://phabricator.services.mozilla.com/D104770
This commit is contained in:
Yury Delendik 2021-03-16 20:24:22 +00:00
Родитель c5bad10fbb
Коммит 82dc6f238e
14 изменённых файлов: 78 добавлений и 3 удалений

Просмотреть файл

@ -303,8 +303,7 @@ if (!wasmSimdEnabled()) {
}
} else {
let reservedSimd = [
0x62, 0x9a,
0xa2, 0xa5, 0xa6, 0xaf,
0x9a, 0xa2, 0xa5, 0xa6, 0xaf,
0xb0, 0xb2, 0xb3, 0xb4, 0xbb,
0xc0, 0xc2, 0xc5, 0xc6, 0xcf,
0xd0, 0xd2, 0xd3, 0xd4, 0xd8, 0xd9, 0xda, 0xdb,

Просмотреть файл

@ -452,6 +452,22 @@ assertSame(get(mem32, 8, 8), [
]);
// i8x16.popcnt
var ins = wasmEvalText(`
(module
(memory (export "mem") 1 1)
(func (export "i8x16_popcnt")
(v128.store (i32.const 0) (i8x16.popcnt (v128.load (i32.const 16)) )))
)`);
var mem8 = new Int8Array(ins.exports.mem.buffer);
set(mem8, 16, [0, 1, 2, 4, 8, 0x10, 0x20, 0x40, 0x80, 3, -1, 0xF0, 0x11, 0xFE, 0x0F, 0xE]);
ins.exports.i8x16_popcnt();
assertSame(get(mem8, 0, 16), [0,1,1,1,1,1,1,1,1,2,8,4,2,7,4,3]);
/// Double-precision conversion instructions.
/// f64x2.convert_low_i32x4_{u,s} / i32x4.trunc_sat_f64x2_{u,s}_zero
/// f32x4.demote_f64x2_zero / f64x2.promote_low_f32x4

Просмотреть файл

@ -2539,6 +2539,14 @@ class MacroAssembler : public MacroAssemblerSpecific {
inline void bitwiseSelectSimd128(FloatRegister onTrue, FloatRegister onFalse,
FloatRegister maskDest) DEFINED_ON(arm64);
// Population count
inline void popcntInt8x16(FloatRegister src, FloatRegister dest,
FloatRegister temp) DEFINED_ON(x86_shared);
inline void popcntInt8x16(FloatRegister src, FloatRegister dest)
DEFINED_ON(arm64);
// Any lane true, ie, any bit set
inline void anyTrueSimd128(FloatRegister src, Register dest)

Просмотреть файл

@ -2544,6 +2544,12 @@ void MacroAssembler::bitwiseSelectSimd128(FloatRegister onTrue,
Bsl(Simd16B(maskDest), Simd16B(onTrue), Simd16B(onFalse));
}
// Population count
void MacroAssembler::popcntInt8x16(FloatRegister src, FloatRegister dest) {
Cnt(Simd16B(dest), Simd16B(src));
}
// Any lane true, ie, any bit set
void MacroAssembler::anyTrueSimd128(FloatRegister src, Register dest_) {

Просмотреть файл

@ -3341,6 +3341,9 @@ void CodeGenerator::visitWasmUnarySimd128(LWasmUnarySimd128* ins) {
case wasm::SimdOp::V128Not:
masm.bitwiseNotSimd128(src, dest);
break;
case wasm::SimdOp::I8x16Popcnt:
masm.popcntInt8x16(src, dest, ToFloatRegister(ins->temp()));
break;
case wasm::SimdOp::I8x16Abs:
masm.absInt8x16(src, dest);
break;

Просмотреть файл

@ -1295,6 +1295,7 @@ void LIRGenerator::visitWasmUnarySimd128(MWasmUnarySimd128* ins) {
case wasm::SimdOp::I32x4TruncUSatF32x4:
case wasm::SimdOp::I32x4TruncSatF64x2SZero:
case wasm::SimdOp::I32x4TruncSatF64x2UZero:
case wasm::SimdOp::I8x16Popcnt:
tempReg = tempSimd128();
// Prefer src == dest to avoid an unconditional src->dest move.
useAtStart = true;

Просмотреть файл

@ -1359,3 +1359,20 @@ void MacroAssemblerX86Shared::unsignedTruncSatFloat64x2ToInt32x4(
vaddpd(Operand(temp), dest, dest);
vshufps(0x88, scratch, dest, dest);
}
void MacroAssemblerX86Shared::popcntInt8x16(FloatRegister src,
FloatRegister temp,
FloatRegister output) {
ScratchSimd128Scope scratch(asMasm());
asMasm().loadConstantSimd128Float(SimdConstant::SplatX16(0x0f), scratch);
asMasm().moveSimd128Int(src, temp);
vpand(scratch, temp, temp);
vpandn(src, scratch, scratch);
int8_t counts[] = {0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4};
asMasm().loadConstantSimd128(SimdConstant::CreateX16(counts), output);
vpsrlw(Imm32(4), scratch, scratch);
vpshufb(temp, output, output);
asMasm().loadConstantSimd128(SimdConstant::CreateX16(counts), temp);
vpshufb(scratch, temp, temp);
vpaddb(Operand(temp), output, output);
}

Просмотреть файл

@ -2206,6 +2206,13 @@ void MacroAssembler::bitwiseSelectSimd128(FloatRegister mask,
MacroAssemblerX86Shared::selectSimd128(mask, onTrue, onFalse, temp, dest);
}
// Population count
void MacroAssembler::popcntInt8x16(FloatRegister src, FloatRegister dest,
FloatRegister temp) {
MacroAssemblerX86Shared::popcntInt8x16(src, temp, dest);
}
// Comparisons (integer and floating-point)
void MacroAssembler::compareInt8x16(Assembler::Condition cond,

Просмотреть файл

@ -547,6 +547,8 @@ class MacroAssemblerX86Shared : public Assembler {
void selectSimd128(FloatRegister mask, FloatRegister onTrue,
FloatRegister onFalse, FloatRegister temp,
FloatRegister output);
void popcntInt8x16(FloatRegister src, FloatRegister temp,
FloatRegister output);
// SIMD inline methods private to the implementation, that appear to be used.

Просмотреть файл

@ -14879,6 +14879,17 @@ static void WidenHighUI32x4(MacroAssembler& masm, RegV128 rs, RegV128 rd) {
masm.unsignedWidenHighInt32x4(rs, rd);
}
# if defined(JS_CODEGEN_ARM64)
static void PopcntI8x16(MacroAssembler& masm, RegV128 rs, RegV128 rd) {
masm.popcntInt8x16(rs, rd);
}
# else
static void PopcntI8x16(MacroAssembler& masm, RegV128 rs, RegV128 rd,
RegV128 temp) {
masm.popcntInt8x16(rs, rd, temp);
}
# endif // JS_CODEGEN_ARM64
static void AbsI8x16(MacroAssembler& masm, RegV128 rs, RegV128 rd) {
masm.absInt8x16(rs, rd);
}
@ -16814,6 +16825,8 @@ bool BaseCompiler::emitBody() {
CHECK_NEXT(dispatchVectorUnary(SqrtF64x2));
case uint32_t(SimdOp::V128Not):
CHECK_NEXT(dispatchVectorUnary(NotV128));
case uint32_t(SimdOp::I8x16Popcnt):
CHECK_NEXT(dispatchVectorUnary(PopcntI8x16));
case uint32_t(SimdOp::I8x16Abs):
CHECK_NEXT(dispatchVectorUnary(AbsI8x16));
case uint32_t(SimdOp::I16x8Abs):

Просмотреть файл

@ -575,7 +575,7 @@ enum class SimdOp {
F64x2PromoteLowF32x4 = 0x5f,
I8x16Abs = 0x60,
I8x16Neg = 0x61,
// Unused = 0x62
I8x16Popcnt = 0x62,
I8x16AllTrue = 0x63,
I8x16Bitmask = 0x64,
I8x16NarrowSI16x8 = 0x65,

Просмотреть файл

@ -5112,6 +5112,7 @@ static bool EmitBodyExprs(FunctionCompiler& f) {
case uint32_t(SimdOp::F64x2Neg):
case uint32_t(SimdOp::F64x2Sqrt):
case uint32_t(SimdOp::V128Not):
case uint32_t(SimdOp::I8x16Popcnt):
case uint32_t(SimdOp::I8x16Abs):
case uint32_t(SimdOp::I16x8Abs):
case uint32_t(SimdOp::I32x4Abs):

Просмотреть файл

@ -515,6 +515,7 @@ OpKind wasm::Classify(OpBytes op) {
case SimdOp::F64x2Neg:
case SimdOp::F64x2Sqrt:
case SimdOp::V128Not:
case SimdOp::I8x16Popcnt:
case SimdOp::I8x16Abs:
case SimdOp::I16x8Abs:
case SimdOp::I32x4Abs:

Просмотреть файл

@ -1191,6 +1191,7 @@ static bool DecodeFunctionBodyExprs(const ModuleEnvironment& env,
case uint32_t(SimdOp::F64x2Neg):
case uint32_t(SimdOp::F64x2Sqrt):
case uint32_t(SimdOp::V128Not):
case uint32_t(SimdOp::I8x16Popcnt):
case uint32_t(SimdOp::I8x16Abs):
case uint32_t(SimdOp::I16x8Abs):
case uint32_t(SimdOp::I32x4Abs):