зеркало из https://github.com/mozilla/gecko-dev.git
Bug 1691489 - Implement SIMD i8x16.popcnt op. r=lth
Differential Revision: https://phabricator.services.mozilla.com/D104770
This commit is contained in:
Родитель
c5bad10fbb
Коммит
82dc6f238e
|
@ -303,8 +303,7 @@ if (!wasmSimdEnabled()) {
|
|||
}
|
||||
} else {
|
||||
let reservedSimd = [
|
||||
0x62, 0x9a,
|
||||
0xa2, 0xa5, 0xa6, 0xaf,
|
||||
0x9a, 0xa2, 0xa5, 0xa6, 0xaf,
|
||||
0xb0, 0xb2, 0xb3, 0xb4, 0xbb,
|
||||
0xc0, 0xc2, 0xc5, 0xc6, 0xcf,
|
||||
0xd0, 0xd2, 0xd3, 0xd4, 0xd8, 0xd9, 0xda, 0xdb,
|
||||
|
|
|
@ -452,6 +452,22 @@ assertSame(get(mem32, 8, 8), [
|
|||
]);
|
||||
|
||||
|
||||
// i8x16.popcnt
|
||||
|
||||
var ins = wasmEvalText(`
|
||||
(module
|
||||
(memory (export "mem") 1 1)
|
||||
(func (export "i8x16_popcnt")
|
||||
(v128.store (i32.const 0) (i8x16.popcnt (v128.load (i32.const 16)) )))
|
||||
)`);
|
||||
|
||||
var mem8 = new Int8Array(ins.exports.mem.buffer);
|
||||
|
||||
set(mem8, 16, [0, 1, 2, 4, 8, 0x10, 0x20, 0x40, 0x80, 3, -1, 0xF0, 0x11, 0xFE, 0x0F, 0xE]);
|
||||
ins.exports.i8x16_popcnt();
|
||||
assertSame(get(mem8, 0, 16), [0,1,1,1,1,1,1,1,1,2,8,4,2,7,4,3]);
|
||||
|
||||
|
||||
/// Double-precision conversion instructions.
|
||||
/// f64x2.convert_low_i32x4_{u,s} / i32x4.trunc_sat_f64x2_{u,s}_zero
|
||||
/// f32x4.demote_f64x2_zero / f64x2.promote_low_f32x4
|
||||
|
|
|
@ -2539,6 +2539,14 @@ class MacroAssembler : public MacroAssemblerSpecific {
|
|||
inline void bitwiseSelectSimd128(FloatRegister onTrue, FloatRegister onFalse,
|
||||
FloatRegister maskDest) DEFINED_ON(arm64);
|
||||
|
||||
// Population count
|
||||
|
||||
inline void popcntInt8x16(FloatRegister src, FloatRegister dest,
|
||||
FloatRegister temp) DEFINED_ON(x86_shared);
|
||||
|
||||
inline void popcntInt8x16(FloatRegister src, FloatRegister dest)
|
||||
DEFINED_ON(arm64);
|
||||
|
||||
// Any lane true, ie, any bit set
|
||||
|
||||
inline void anyTrueSimd128(FloatRegister src, Register dest)
|
||||
|
|
|
@ -2544,6 +2544,12 @@ void MacroAssembler::bitwiseSelectSimd128(FloatRegister onTrue,
|
|||
Bsl(Simd16B(maskDest), Simd16B(onTrue), Simd16B(onFalse));
|
||||
}
|
||||
|
||||
// Population count
|
||||
|
||||
void MacroAssembler::popcntInt8x16(FloatRegister src, FloatRegister dest) {
|
||||
Cnt(Simd16B(dest), Simd16B(src));
|
||||
}
|
||||
|
||||
// Any lane true, ie, any bit set
|
||||
|
||||
void MacroAssembler::anyTrueSimd128(FloatRegister src, Register dest_) {
|
||||
|
|
|
@ -3341,6 +3341,9 @@ void CodeGenerator::visitWasmUnarySimd128(LWasmUnarySimd128* ins) {
|
|||
case wasm::SimdOp::V128Not:
|
||||
masm.bitwiseNotSimd128(src, dest);
|
||||
break;
|
||||
case wasm::SimdOp::I8x16Popcnt:
|
||||
masm.popcntInt8x16(src, dest, ToFloatRegister(ins->temp()));
|
||||
break;
|
||||
case wasm::SimdOp::I8x16Abs:
|
||||
masm.absInt8x16(src, dest);
|
||||
break;
|
||||
|
|
|
@ -1295,6 +1295,7 @@ void LIRGenerator::visitWasmUnarySimd128(MWasmUnarySimd128* ins) {
|
|||
case wasm::SimdOp::I32x4TruncUSatF32x4:
|
||||
case wasm::SimdOp::I32x4TruncSatF64x2SZero:
|
||||
case wasm::SimdOp::I32x4TruncSatF64x2UZero:
|
||||
case wasm::SimdOp::I8x16Popcnt:
|
||||
tempReg = tempSimd128();
|
||||
// Prefer src == dest to avoid an unconditional src->dest move.
|
||||
useAtStart = true;
|
||||
|
|
|
@ -1359,3 +1359,20 @@ void MacroAssemblerX86Shared::unsignedTruncSatFloat64x2ToInt32x4(
|
|||
vaddpd(Operand(temp), dest, dest);
|
||||
vshufps(0x88, scratch, dest, dest);
|
||||
}
|
||||
|
||||
void MacroAssemblerX86Shared::popcntInt8x16(FloatRegister src,
|
||||
FloatRegister temp,
|
||||
FloatRegister output) {
|
||||
ScratchSimd128Scope scratch(asMasm());
|
||||
asMasm().loadConstantSimd128Float(SimdConstant::SplatX16(0x0f), scratch);
|
||||
asMasm().moveSimd128Int(src, temp);
|
||||
vpand(scratch, temp, temp);
|
||||
vpandn(src, scratch, scratch);
|
||||
int8_t counts[] = {0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4};
|
||||
asMasm().loadConstantSimd128(SimdConstant::CreateX16(counts), output);
|
||||
vpsrlw(Imm32(4), scratch, scratch);
|
||||
vpshufb(temp, output, output);
|
||||
asMasm().loadConstantSimd128(SimdConstant::CreateX16(counts), temp);
|
||||
vpshufb(scratch, temp, temp);
|
||||
vpaddb(Operand(temp), output, output);
|
||||
}
|
||||
|
|
|
@ -2206,6 +2206,13 @@ void MacroAssembler::bitwiseSelectSimd128(FloatRegister mask,
|
|||
MacroAssemblerX86Shared::selectSimd128(mask, onTrue, onFalse, temp, dest);
|
||||
}
|
||||
|
||||
// Population count
|
||||
|
||||
void MacroAssembler::popcntInt8x16(FloatRegister src, FloatRegister dest,
|
||||
FloatRegister temp) {
|
||||
MacroAssemblerX86Shared::popcntInt8x16(src, temp, dest);
|
||||
}
|
||||
|
||||
// Comparisons (integer and floating-point)
|
||||
|
||||
void MacroAssembler::compareInt8x16(Assembler::Condition cond,
|
||||
|
|
|
@ -547,6 +547,8 @@ class MacroAssemblerX86Shared : public Assembler {
|
|||
void selectSimd128(FloatRegister mask, FloatRegister onTrue,
|
||||
FloatRegister onFalse, FloatRegister temp,
|
||||
FloatRegister output);
|
||||
void popcntInt8x16(FloatRegister src, FloatRegister temp,
|
||||
FloatRegister output);
|
||||
|
||||
// SIMD inline methods private to the implementation, that appear to be used.
|
||||
|
||||
|
|
|
@ -14879,6 +14879,17 @@ static void WidenHighUI32x4(MacroAssembler& masm, RegV128 rs, RegV128 rd) {
|
|||
masm.unsignedWidenHighInt32x4(rs, rd);
|
||||
}
|
||||
|
||||
# if defined(JS_CODEGEN_ARM64)
|
||||
static void PopcntI8x16(MacroAssembler& masm, RegV128 rs, RegV128 rd) {
|
||||
masm.popcntInt8x16(rs, rd);
|
||||
}
|
||||
# else
|
||||
static void PopcntI8x16(MacroAssembler& masm, RegV128 rs, RegV128 rd,
|
||||
RegV128 temp) {
|
||||
masm.popcntInt8x16(rs, rd, temp);
|
||||
}
|
||||
# endif // JS_CODEGEN_ARM64
|
||||
|
||||
static void AbsI8x16(MacroAssembler& masm, RegV128 rs, RegV128 rd) {
|
||||
masm.absInt8x16(rs, rd);
|
||||
}
|
||||
|
@ -16814,6 +16825,8 @@ bool BaseCompiler::emitBody() {
|
|||
CHECK_NEXT(dispatchVectorUnary(SqrtF64x2));
|
||||
case uint32_t(SimdOp::V128Not):
|
||||
CHECK_NEXT(dispatchVectorUnary(NotV128));
|
||||
case uint32_t(SimdOp::I8x16Popcnt):
|
||||
CHECK_NEXT(dispatchVectorUnary(PopcntI8x16));
|
||||
case uint32_t(SimdOp::I8x16Abs):
|
||||
CHECK_NEXT(dispatchVectorUnary(AbsI8x16));
|
||||
case uint32_t(SimdOp::I16x8Abs):
|
||||
|
|
|
@ -575,7 +575,7 @@ enum class SimdOp {
|
|||
F64x2PromoteLowF32x4 = 0x5f,
|
||||
I8x16Abs = 0x60,
|
||||
I8x16Neg = 0x61,
|
||||
// Unused = 0x62
|
||||
I8x16Popcnt = 0x62,
|
||||
I8x16AllTrue = 0x63,
|
||||
I8x16Bitmask = 0x64,
|
||||
I8x16NarrowSI16x8 = 0x65,
|
||||
|
|
|
@ -5112,6 +5112,7 @@ static bool EmitBodyExprs(FunctionCompiler& f) {
|
|||
case uint32_t(SimdOp::F64x2Neg):
|
||||
case uint32_t(SimdOp::F64x2Sqrt):
|
||||
case uint32_t(SimdOp::V128Not):
|
||||
case uint32_t(SimdOp::I8x16Popcnt):
|
||||
case uint32_t(SimdOp::I8x16Abs):
|
||||
case uint32_t(SimdOp::I16x8Abs):
|
||||
case uint32_t(SimdOp::I32x4Abs):
|
||||
|
|
|
@ -515,6 +515,7 @@ OpKind wasm::Classify(OpBytes op) {
|
|||
case SimdOp::F64x2Neg:
|
||||
case SimdOp::F64x2Sqrt:
|
||||
case SimdOp::V128Not:
|
||||
case SimdOp::I8x16Popcnt:
|
||||
case SimdOp::I8x16Abs:
|
||||
case SimdOp::I16x8Abs:
|
||||
case SimdOp::I32x4Abs:
|
||||
|
|
|
@ -1191,6 +1191,7 @@ static bool DecodeFunctionBodyExprs(const ModuleEnvironment& env,
|
|||
case uint32_t(SimdOp::F64x2Neg):
|
||||
case uint32_t(SimdOp::F64x2Sqrt):
|
||||
case uint32_t(SimdOp::V128Not):
|
||||
case uint32_t(SimdOp::I8x16Popcnt):
|
||||
case uint32_t(SimdOp::I8x16Abs):
|
||||
case uint32_t(SimdOp::I16x8Abs):
|
||||
case uint32_t(SimdOp::I32x4Abs):
|
||||
|
|
Загрузка…
Ссылка в новой задаче