зеркало из https://github.com/mozilla/gecko-dev.git
Bug 1753115 - Refactor usage of moveSimd128. r=jseward
* Replace with moveSimd128XXXIfNotAVX, when possible * Fix I16x8ExtaddPairwiseI8x16S lowering; relax lowering for AVX * Add postMoveSimd128IntIfNotAVX utility * Add extadd_pairwise tests * Fix neg-abs-not codegen tests * Fix shift by imm8 VEX encoding Differential Revision: https://phabricator.services.mozilla.com/D137581
This commit is contained in:
Родитель
e661e480a4
Коммит
bb2a780b8a
|
@ -433,3 +433,30 @@ codegenTestX64_v128xLITERAL_v128_avxhack(
|
|||
`c5 f1 eb 05 ${RIPRADDR} vporx ${RIPR}, %xmm1, %xmm0`],
|
||||
['v128.xor', '(v128.const i8x16 1 2 1 2 1 2 1 2 1 2 1 2 1 2 1 2)',
|
||||
`c5 f1 ef 05 ${RIPRADDR} vpxorx ${RIPR}, %xmm1, %xmm0`]]);
|
||||
|
||||
// Shift by constant encodings
|
||||
codegenTestX64_v128xLITERAL_v128_avxhack(
|
||||
[['i8x16.shl', '(i32.const 2)', `
|
||||
c5 f1 fc c1 vpaddb %xmm1, %xmm1, %xmm0
|
||||
66 0f fc c0 paddb %xmm0, %xmm0`],
|
||||
['i8x16.shl', '(i32.const 4)', `
|
||||
c5 f1 db 05 ${RIPRADDR} vpandx ${RIPR}, %xmm1, %xmm0
|
||||
66 0f 71 f0 04 psllw \\$0x04, %xmm0`],
|
||||
['i16x8.shl', '(i32.const 1)',
|
||||
'c5 f9 71 f1 01 vpsllw \\$0x01, %xmm1, %xmm0'],
|
||||
['i16x8.shr_s', '(i32.const 3)',
|
||||
'c5 f9 71 e1 03 vpsraw \\$0x03, %xmm1, %xmm0'],
|
||||
['i16x8.shr_u', '(i32.const 2)',
|
||||
'c5 f9 71 d1 02 vpsrlw \\$0x02, %xmm1, %xmm0'],
|
||||
['i32x4.shl', '(i32.const 5)',
|
||||
'c5 f9 72 f1 05 vpslld \\$0x05, %xmm1, %xmm0'],
|
||||
['i32x4.shr_s', '(i32.const 2)',
|
||||
'c5 f9 72 e1 02 vpsrad \\$0x02, %xmm1, %xmm0'],
|
||||
['i32x4.shr_u', '(i32.const 5)',
|
||||
'c5 f9 72 d1 05 vpsrld \\$0x05, %xmm1, %xmm0'],
|
||||
['i64x2.shr_s', '(i32.const 7)', `
|
||||
c5 79 70 f9 f5 vpshufd \\$0xF5, %xmm1, %xmm15
|
||||
66 41 0f 72 e7 1f psrad \\$0x1F, %xmm15
|
||||
c4 c1 71 ef c7 vpxor %xmm15, %xmm1, %xmm0
|
||||
66 0f 73 d0 07 psrlq \\$0x07, %xmm0
|
||||
66 41 0f ef c7 pxor %xmm15, %xmm0`]]);
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
// |jit-test| skip-if: !wasmSimdEnabled() || !hasDisassembler() || wasmCompileMode() || getBuildConfiguration().simulator; include:codegen-x64-test.js
|
||||
// |jit-test| skip-if: !wasmSimdEnabled() || !hasDisassembler() || wasmCompileMode() != "ion" || !getBuildConfiguration().x64 || getBuildConfiguration().simulator; include:codegen-x64-test.js
|
||||
|
||||
// Test that there are no extraneous moves for variable SIMD negate, abs, and
|
||||
// not instructions. See README-codegen.md for general information about this
|
||||
|
|
|
@ -0,0 +1,38 @@
|
|||
// |jit-test| skip-if: !wasmSimdEnabled() || !hasDisassembler() || wasmCompileMode() != "ion" || !getBuildConfiguration().x64 || getBuildConfiguration().simulator; include:codegen-x64-test.js
|
||||
|
||||
// Tests for SIMD add pairwise instructions.
|
||||
|
||||
if (!isAvxPresent()) {
|
||||
|
||||
codegenTestX64_IGNOREDxv128_v128(
|
||||
[['i16x8.extadd_pairwise_i8x16_s', `
|
||||
66 0f 6f 05 ${RIPRADDR} movdqax ${RIPR}, %xmm0
|
||||
66 0f 38 04 c1 pmaddubsw %xmm1, %xmm0`],
|
||||
['i16x8.extadd_pairwise_i8x16_u', `
|
||||
66 0f 6f c1 movdqa %xmm1, %xmm0
|
||||
66 0f 38 04 05 ${RIPRADDR} pmaddubswx ${RIPR}, %xmm0`],
|
||||
['i32x4.extadd_pairwise_i16x8_s', `
|
||||
66 0f 6f c1 movdqa %xmm1, %xmm0
|
||||
66 0f f5 05 ${RIPRADDR} pmaddwdx ${RIPR}, %xmm0`],
|
||||
['i32x4.extadd_pairwise_i16x8_u', `
|
||||
66 0f 6f c1 movdqa %xmm1, %xmm0
|
||||
66 0f ef 05 ${RIPRADDR} pxorx ${RIPR}, %xmm0
|
||||
66 0f f5 05 ${RIPRADDR} pmaddwdx ${RIPR}, %xmm0
|
||||
66 0f fe 05 ${RIPRADDR} padddx ${RIPR}, %xmm0`]]);
|
||||
|
||||
} else {
|
||||
|
||||
codegenTestX64_IGNOREDxv128_v128(
|
||||
[['i16x8.extadd_pairwise_i8x16_s', `
|
||||
66 0f 6f 05 ${RIPRADDR} movdqax ${RIPR}, %xmm0
|
||||
66 0f 38 04 c1 pmaddubsw %xmm1, %xmm0`],
|
||||
['i16x8.extadd_pairwise_i8x16_u', `
|
||||
c4 e2 71 04 05 ${RIPRADDR} vpmaddubswx ${RIPR}, %xmm1, %xmm0`],
|
||||
['i32x4.extadd_pairwise_i16x8_s', `
|
||||
c5 f1 f5 05 ${RIPRADDR} vpmaddwdx ${RIPR}, %xmm1, %xmm0`],
|
||||
['i32x4.extadd_pairwise_i16x8_u', `
|
||||
c5 f1 ef 05 ${RIPRADDR} vpxorx ${RIPR}, %xmm1, %xmm0
|
||||
66 0f f5 05 ${RIPRADDR} pmaddwdx ${RIPR}, %xmm0
|
||||
66 0f fe 05 ${RIPRADDR} padddx ${RIPR}, %xmm0`]]);
|
||||
|
||||
}
|
|
@ -1118,6 +1118,10 @@ class BaseAssemblerX64 : public BaseAssembler {
|
|||
return twoByteRipOpImmSimd("vcmppd", VEX_PD, OP2_CMPPD_VpdWpd,
|
||||
X86Encoding::ConditionCmp_LE, src, dst);
|
||||
}
|
||||
[[nodiscard]] JmpSrc vpmaddubsw_ripr(XMMRegisterID src, XMMRegisterID dst) {
|
||||
return threeByteRipOpSimd("vpmaddubsw", VEX_PD, OP3_PMADDUBSW_VdqWdq,
|
||||
ESCAPE_38, src, dst);
|
||||
}
|
||||
|
||||
// BMI instructions:
|
||||
|
||||
|
|
|
@ -408,6 +408,13 @@ void MacroAssemblerX64::vcmplepdSimd128(const SimdConstant& v,
|
|||
vpRiprOpSimd128(v, lhs, dest, &X86Encoding::BaseAssemblerX64::vcmplepd_ripr);
|
||||
}
|
||||
|
||||
void MacroAssemblerX64::vpmaddubswSimd128(const SimdConstant& v,
|
||||
FloatRegister lhs,
|
||||
FloatRegister dest) {
|
||||
vpRiprOpSimd128(v, lhs, dest,
|
||||
&X86Encoding::BaseAssemblerX64::vpmaddubsw_ripr);
|
||||
}
|
||||
|
||||
void MacroAssemblerX64::bindOffsets(
|
||||
const MacroAssemblerX86Shared::UsesVector& uses) {
|
||||
for (JmpSrc src : uses) {
|
||||
|
|
|
@ -1100,6 +1100,8 @@ class MacroAssemblerX64 : public MacroAssemblerX86Shared {
|
|||
FloatRegister dest);
|
||||
void vcmplepdSimd128(const SimdConstant& v, FloatRegister lhs,
|
||||
FloatRegister dest);
|
||||
void vpmaddubswSimd128(const SimdConstant& v, FloatRegister lhs,
|
||||
FloatRegister dest);
|
||||
|
||||
void loadWasmPinnedRegsFromTls() {
|
||||
loadPtr(Address(WasmTlsReg, offsetof(wasm::TlsData, memoryBase)), HeapReg);
|
||||
|
|
|
@ -578,6 +578,11 @@ class BaseAssembler : public GenericAssembler {
|
|||
threeByteOpSimd("vpmaddubsw", VEX_PD, OP3_PMADDUBSW_VdqWdq, ESCAPE_38, src1,
|
||||
src0, dst);
|
||||
}
|
||||
void vpmaddubsw_mr(const void* address, XMMRegisterID src0,
|
||||
XMMRegisterID dst) {
|
||||
threeByteOpSimd("vpmaddubsw", VEX_PD, OP3_PMADDUBSW_VdqWdq, ESCAPE_38,
|
||||
address, src0, dst);
|
||||
}
|
||||
|
||||
void vpaddb_rr(XMMRegisterID src1, XMMRegisterID src0, XMMRegisterID dst) {
|
||||
twoByteOpSimd("vpaddb", VEX_PD, OP2_PADDB_VdqWdq, src1, src0, dst);
|
||||
|
@ -5328,7 +5333,8 @@ class BaseAssembler : public GenericAssembler {
|
|||
|
||||
spew("%-11s$%d, %s, %s", name, int32_t(imm), XMMRegName(src),
|
||||
XMMRegName(dst));
|
||||
m_formatter.twoByteOpVex(VEX_PD, opcode, (RegisterID)dst, src,
|
||||
// For shift instructions, destination is stored in vvvv field.
|
||||
m_formatter.twoByteOpVex(VEX_PD, opcode, (RegisterID)src, dst,
|
||||
(int)shiftKind);
|
||||
m_formatter.immediate8u(imm);
|
||||
}
|
||||
|
|
|
@ -2984,9 +2984,7 @@ void CodeGenerator::visitWasmConstantShiftSimd128(
|
|||
int32_t shift = ins->shift();
|
||||
|
||||
if (shift == 0) {
|
||||
if (src != dest) {
|
||||
masm.moveSimd128(src, dest);
|
||||
}
|
||||
masm.moveSimd128(src, dest);
|
||||
return;
|
||||
}
|
||||
|
||||
|
|
|
@ -1231,7 +1231,12 @@ void LIRGenerator::visitWasmShiftSimd128(MWasmShiftSimd128* ins) {
|
|||
case wasm::SimdOp::I64x2ShrS: {
|
||||
auto* lir = new (alloc())
|
||||
LWasmSignReplicationSimd128(useRegisterAtStart(lhs));
|
||||
defineReuseInput(lir, ins, LWasmConstantShiftSimd128::Src);
|
||||
if (isThreeOpAllowed()) {
|
||||
define(lir, ins);
|
||||
} else {
|
||||
// For non-AVX, it is always beneficial to reuse the input.
|
||||
defineReuseInput(lir, ins, LWasmConstantShiftSimd128::Src);
|
||||
}
|
||||
return;
|
||||
}
|
||||
default:
|
||||
|
@ -1242,11 +1247,14 @@ void LIRGenerator::visitWasmShiftSimd128(MWasmShiftSimd128* ins) {
|
|||
# ifdef DEBUG
|
||||
js::wasm::ReportSimdAnalysis("shift -> constant shift");
|
||||
# endif
|
||||
// Almost always beneficial, and never detrimental, to reuse the input if
|
||||
// possible.
|
||||
auto* lir = new (alloc())
|
||||
LWasmConstantShiftSimd128(useRegisterAtStart(lhs), shiftCount);
|
||||
defineReuseInput(lir, ins, LWasmConstantShiftSimd128::Src);
|
||||
if (isThreeOpAllowed()) {
|
||||
define(lir, ins);
|
||||
} else {
|
||||
// For non-AVX, it is always beneficial to reuse the input.
|
||||
defineReuseInput(lir, ins, LWasmConstantShiftSimd128::Src);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
|
@ -1449,8 +1457,11 @@ void LIRGenerator::visitWasmUnarySimd128(MWasmUnarySimd128* ins) {
|
|||
case wasm::SimdOp::I16x8Neg:
|
||||
case wasm::SimdOp::I32x4Neg:
|
||||
case wasm::SimdOp::I64x2Neg:
|
||||
case wasm::SimdOp::I16x8ExtaddPairwiseI8x16S:
|
||||
// Prefer src != dest to avoid an unconditional src->temp move.
|
||||
MOZ_ASSERT(!useAtStart && !reuseInput);
|
||||
MOZ_ASSERT(!reuseInput);
|
||||
// If AVX is enabled, we prefer useRegisterAtStart.
|
||||
useAtStart = isThreeOpAllowed();
|
||||
break;
|
||||
case wasm::SimdOp::F32x4Neg:
|
||||
case wasm::SimdOp::F64x2Neg:
|
||||
|
@ -1465,7 +1476,6 @@ void LIRGenerator::visitWasmUnarySimd128(MWasmUnarySimd128* ins) {
|
|||
case wasm::SimdOp::I64x2Abs:
|
||||
case wasm::SimdOp::I32x4TruncSatF32x4S:
|
||||
case wasm::SimdOp::F32x4ConvertI32x4U:
|
||||
case wasm::SimdOp::I16x8ExtaddPairwiseI8x16S:
|
||||
case wasm::SimdOp::I16x8ExtaddPairwiseI8x16U:
|
||||
case wasm::SimdOp::I32x4ExtaddPairwiseI16x8S:
|
||||
case wasm::SimdOp::I32x4ExtaddPairwiseI16x8U:
|
||||
|
@ -1476,18 +1486,19 @@ void LIRGenerator::visitWasmUnarySimd128(MWasmUnarySimd128* ins) {
|
|||
case wasm::SimdOp::I64x2ExtendHighI32x4S:
|
||||
case wasm::SimdOp::I64x2ExtendHighI32x4U:
|
||||
// Prefer src == dest to avoid an unconditional src->dest move
|
||||
// for better performance (e.g. non-PSHUFD use).
|
||||
// for better performance in non-AVX mode (e.g. non-PSHUFD use).
|
||||
useAtStart = true;
|
||||
reuseInput = true;
|
||||
reuseInput = !isThreeOpAllowed();
|
||||
break;
|
||||
case wasm::SimdOp::I32x4TruncSatF32x4U:
|
||||
case wasm::SimdOp::I32x4TruncSatF64x2SZero:
|
||||
case wasm::SimdOp::I32x4TruncSatF64x2UZero:
|
||||
case wasm::SimdOp::I8x16Popcnt:
|
||||
tempReg = tempSimd128();
|
||||
// Prefer src == dest to avoid an unconditional src->dest move.
|
||||
// Prefer src == dest to avoid an unconditional src->dest move
|
||||
// in non-AVX mode.
|
||||
useAtStart = true;
|
||||
reuseInput = true;
|
||||
reuseInput = !isThreeOpAllowed();
|
||||
break;
|
||||
case wasm::SimdOp::I16x8ExtendLowI8x16S:
|
||||
case wasm::SimdOp::I16x8ExtendHighI8x16S:
|
||||
|
|
|
@ -55,8 +55,8 @@ void MacroAssemblerX86Shared::splatX4(FloatRegister input,
|
|||
vbroadcastss(Operand(input), output);
|
||||
return;
|
||||
}
|
||||
asMasm().moveSimd128Float(input.asSimd128(), output);
|
||||
vshufps(0, output, output, output);
|
||||
input = asMasm().moveSimd128FloatIfNotAVX(input.asSimd128(), output);
|
||||
vshufps(0, input, input, output);
|
||||
}
|
||||
|
||||
void MacroAssemblerX86Shared::splatX2(FloatRegister input,
|
||||
|
@ -251,8 +251,9 @@ void MacroAssemblerX86Shared::compareInt8x16(FloatRegister lhs, Operand rhs,
|
|||
loadAlignedSimd128Int(rhs, scratch);
|
||||
}
|
||||
// src := src > lhs (i.e. lhs < rhs)
|
||||
vpcmpgtb(Operand(lhs), scratch, scratch);
|
||||
moveSimd128Int(scratch, output);
|
||||
FloatRegister outputTemp = selectDestIfAVX(scratch, output);
|
||||
vpcmpgtb(Operand(lhs), scratch, outputTemp);
|
||||
moveSimd128Int(outputTemp, output);
|
||||
break;
|
||||
}
|
||||
case Assembler::Condition::NotEqual:
|
||||
|
@ -351,8 +352,9 @@ void MacroAssemblerX86Shared::compareInt16x8(FloatRegister lhs, Operand rhs,
|
|||
loadAlignedSimd128Int(rhs, scratch);
|
||||
}
|
||||
// src := src > lhs (i.e. lhs < rhs)
|
||||
vpcmpgtw(Operand(lhs), scratch, scratch);
|
||||
moveSimd128Int(scratch, output);
|
||||
FloatRegister outputTemp = selectDestIfAVX(scratch, output);
|
||||
vpcmpgtw(Operand(lhs), scratch, outputTemp);
|
||||
moveSimd128Int(outputTemp, output);
|
||||
break;
|
||||
}
|
||||
case Assembler::Condition::NotEqual:
|
||||
|
@ -450,8 +452,9 @@ void MacroAssemblerX86Shared::compareInt32x4(FloatRegister lhs, Operand rhs,
|
|||
loadAlignedSimd128Int(rhs, scratch);
|
||||
}
|
||||
// src := src > lhs (i.e. lhs < rhs)
|
||||
vpcmpgtd(Operand(lhs), scratch, scratch);
|
||||
moveSimd128Int(scratch, output);
|
||||
FloatRegister outputTemp = selectDestIfAVX(scratch, output);
|
||||
vpcmpgtd(Operand(lhs), scratch, outputTemp);
|
||||
moveSimd128Int(outputTemp, output);
|
||||
break;
|
||||
}
|
||||
case Assembler::Condition::NotEqual:
|
||||
|
@ -583,8 +586,8 @@ void MacroAssemblerX86Shared::compareForOrderingInt64x2(
|
|||
vpsubq(Operand(lhs), temp1, temp1);
|
||||
vpcmpeqd(rhs, temp2, temp2);
|
||||
vandpd(temp2, temp1, temp1);
|
||||
asMasm().moveSimd128(lhs, output);
|
||||
vpcmpgtd(rhs, output, output);
|
||||
lhs = asMasm().moveSimd128IntIfNotAVX(lhs, output);
|
||||
vpcmpgtd(rhs, lhs, output);
|
||||
vpor(Operand(temp1), output, output);
|
||||
vpshufd(MacroAssembler::ComputeShuffleMask(1, 1, 3, 3), output, output);
|
||||
break;
|
||||
|
@ -593,8 +596,8 @@ void MacroAssemblerX86Shared::compareForOrderingInt64x2(
|
|||
vmovdqa(Operand(lhs), temp2);
|
||||
vpcmpgtd(Operand(lhs), temp1, temp1);
|
||||
vpcmpeqd(Operand(rhs), temp2, temp2);
|
||||
asMasm().moveSimd128(lhs, output);
|
||||
vpsubq(rhs, output, output);
|
||||
lhs = asMasm().moveSimd128IntIfNotAVX(lhs, output);
|
||||
vpsubq(rhs, lhs, output);
|
||||
vandpd(temp2, output, output);
|
||||
vpor(Operand(temp1), output, output);
|
||||
vpshufd(MacroAssembler::ComputeShuffleMask(1, 1, 3, 3), output, output);
|
||||
|
@ -604,8 +607,8 @@ void MacroAssemblerX86Shared::compareForOrderingInt64x2(
|
|||
vmovdqa(Operand(lhs), temp2);
|
||||
vpcmpgtd(Operand(lhs), temp1, temp1);
|
||||
vpcmpeqd(Operand(rhs), temp2, temp2);
|
||||
asMasm().moveSimd128(lhs, output);
|
||||
vpsubq(rhs, output, output);
|
||||
lhs = asMasm().moveSimd128IntIfNotAVX(lhs, output);
|
||||
vpsubq(rhs, lhs, output);
|
||||
vandpd(temp2, output, output);
|
||||
vpor(Operand(temp1), output, output);
|
||||
vpshufd(MacroAssembler::ComputeShuffleMask(1, 1, 3, 3), output, output);
|
||||
|
@ -617,8 +620,8 @@ void MacroAssemblerX86Shared::compareForOrderingInt64x2(
|
|||
vpsubq(Operand(lhs), temp1, temp1);
|
||||
vpcmpeqd(rhs, temp2, temp2);
|
||||
vandpd(temp2, temp1, temp1);
|
||||
asMasm().moveSimd128(lhs, output);
|
||||
vpcmpgtd(rhs, output, output);
|
||||
lhs = asMasm().moveSimd128IntIfNotAVX(lhs, output);
|
||||
vpcmpgtd(rhs, lhs, output);
|
||||
vpor(Operand(temp1), output, output);
|
||||
vpshufd(MacroAssembler::ComputeShuffleMask(1, 1, 3, 3), output, output);
|
||||
asMasm().bitwiseXorSimd128(output, allOnes, output);
|
||||
|
@ -967,17 +970,22 @@ void MacroAssemblerX86Shared::packedLeftShiftByScalarInt8x16(
|
|||
void MacroAssemblerX86Shared::packedLeftShiftByScalarInt8x16(
|
||||
Imm32 count, FloatRegister src, FloatRegister dest) {
|
||||
MOZ_ASSERT(count.value <= 7);
|
||||
asMasm().moveSimd128(src, dest);
|
||||
if (MOZ_UNLIKELY(count.value == 0)) {
|
||||
moveSimd128Int(src, dest);
|
||||
return;
|
||||
}
|
||||
src = asMasm().moveSimd128IntIfNotAVX(src, dest);
|
||||
// Use the doubling trick for low shift counts, otherwise mask off the bits
|
||||
// that are shifted out of the low byte of each word and use word shifts. The
|
||||
// optimal cutoff remains to be explored.
|
||||
if (count.value <= 3) {
|
||||
for (int32_t shift = count.value; shift > 0; --shift) {
|
||||
asMasm().addInt8x16(dest, dest);
|
||||
vpaddb(Operand(src), src, dest);
|
||||
for (int32_t shift = count.value - 1; shift > 0; --shift) {
|
||||
vpaddb(Operand(dest), dest, dest);
|
||||
}
|
||||
} else {
|
||||
asMasm().bitwiseAndSimd128(
|
||||
dest, SimdConstant::SplatX16(0xFF >> count.value), dest);
|
||||
asMasm().bitwiseAndSimd128(src, SimdConstant::SplatX16(0xFF >> count.value),
|
||||
dest);
|
||||
vpsllw(count, dest, dest);
|
||||
}
|
||||
}
|
||||
|
@ -1070,10 +1078,10 @@ void MacroAssemblerX86Shared::packedRightShiftByScalarInt64x2(
|
|||
FloatRegister in, Register count, FloatRegister temp, FloatRegister dest) {
|
||||
ScratchSimd128Scope scratch(asMasm());
|
||||
vmovd(count, temp);
|
||||
asMasm().moveSimd128(in, dest);
|
||||
asMasm().signReplicationInt64x2(in, scratch);
|
||||
in = asMasm().moveSimd128FloatIfNotAVX(in, dest);
|
||||
// Invert if negative, shift all, invert back if negative.
|
||||
vpxor(Operand(scratch), dest, dest);
|
||||
vpxor(Operand(scratch), in, dest);
|
||||
vpsrlq(temp, dest, dest);
|
||||
vpxor(Operand(scratch), dest, dest);
|
||||
}
|
||||
|
@ -1088,10 +1096,10 @@ void MacroAssemblerX86Shared::packedUnsignedRightShiftByScalarInt64x2(
|
|||
void MacroAssemblerX86Shared::packedRightShiftByScalarInt64x2(
|
||||
Imm32 count, FloatRegister src, FloatRegister dest) {
|
||||
ScratchSimd128Scope scratch(asMasm());
|
||||
asMasm().moveSimd128(src, dest);
|
||||
asMasm().signReplicationInt64x2(src, scratch);
|
||||
// Invert if negative, shift all, invert back if negative.
|
||||
vpxor(Operand(scratch), dest, dest);
|
||||
src = asMasm().moveSimd128FloatIfNotAVX(src, dest);
|
||||
vpxor(Operand(scratch), src, dest);
|
||||
vpsrlq(Imm32(count.value & 63), dest, dest);
|
||||
vpxor(Operand(scratch), dest, dest);
|
||||
}
|
||||
|
@ -1104,11 +1112,16 @@ void MacroAssemblerX86Shared::selectSimd128(FloatRegister mask,
|
|||
// Normally the codegen will attempt to enforce these register assignments so
|
||||
// that the moves are avoided.
|
||||
|
||||
asMasm().moveSimd128Int(onTrue, output);
|
||||
asMasm().moveSimd128Int(mask, temp);
|
||||
onTrue = asMasm().moveSimd128IntIfNotAVX(onTrue, output);
|
||||
if (MOZ_UNLIKELY(mask == onTrue)) {
|
||||
vpor(Operand(onFalse), onTrue, output);
|
||||
return;
|
||||
}
|
||||
|
||||
vpand(Operand(temp), output, output);
|
||||
vpandn(Operand(onFalse), temp, temp);
|
||||
mask = asMasm().moveSimd128IntIfNotAVX(mask, temp);
|
||||
|
||||
vpand(Operand(mask), onTrue, output);
|
||||
vpandn(Operand(onFalse), mask, temp);
|
||||
vpor(Operand(temp), output, output);
|
||||
}
|
||||
|
||||
|
@ -1131,7 +1144,6 @@ void MacroAssemblerX86Shared::unsignedConvertInt32x4ToFloat32x4(
|
|||
void MacroAssemblerX86Shared::truncSatFloat32x4ToInt32x4(FloatRegister src,
|
||||
FloatRegister dest) {
|
||||
ScratchSimd128Scope scratch(asMasm());
|
||||
asMasm().moveSimd128Float(src, dest);
|
||||
|
||||
// The cvttps2dq instruction is the workhorse but does not handle NaN or out
|
||||
// of range values as we need it to. We want to saturate too-large positive
|
||||
|
@ -1139,9 +1151,10 @@ void MacroAssemblerX86Shared::truncSatFloat32x4ToInt32x4(FloatRegister src,
|
|||
// become 0.
|
||||
|
||||
// Convert NaN to 0 by masking away values that compare unordered to itself.
|
||||
vmovaps(dest, scratch);
|
||||
vmovaps(src, scratch);
|
||||
vcmpeqps(Operand(scratch), scratch, scratch);
|
||||
vpand(Operand(scratch), dest, dest);
|
||||
src = asMasm().moveSimd128FloatIfNotAVX(src, dest);
|
||||
vpand(Operand(scratch), src, dest);
|
||||
|
||||
// Compute the complement of each non-NaN lane's sign bit, we'll need this to
|
||||
// correct the result of cvttps2dq. All other output bits are garbage.
|
||||
|
@ -1165,7 +1178,7 @@ void MacroAssemblerX86Shared::truncSatFloat32x4ToInt32x4(FloatRegister src,
|
|||
void MacroAssemblerX86Shared::unsignedTruncSatFloat32x4ToInt32x4(
|
||||
FloatRegister src, FloatRegister temp, FloatRegister dest) {
|
||||
ScratchSimd128Scope scratch(asMasm());
|
||||
asMasm().moveSimd128Float(src, dest);
|
||||
src = asMasm().moveSimd128FloatIfNotAVX(src, dest);
|
||||
|
||||
// The cvttps2dq instruction is the workhorse but does not handle NaN or out
|
||||
// of range values as we need it to. We want to saturate too-large positive
|
||||
|
@ -1173,7 +1186,7 @@ void MacroAssemblerX86Shared::unsignedTruncSatFloat32x4ToInt32x4(
|
|||
|
||||
// Convert NaN and negative values to zeroes in dest.
|
||||
vpxor(Operand(scratch), scratch, scratch);
|
||||
vmaxps(Operand(scratch), dest, dest);
|
||||
vmaxps(Operand(scratch), src, dest);
|
||||
|
||||
// Place the largest positive signed integer in all lanes in scratch.
|
||||
// We use it to bias the conversion to handle edge cases.
|
||||
|
@ -1217,14 +1230,14 @@ void MacroAssemblerX86Shared::unsignedTruncSatFloat32x4ToInt32x4(
|
|||
void MacroAssemblerX86Shared::unsignedTruncSatFloat32x4ToInt32x4Relaxed(
|
||||
FloatRegister src, FloatRegister dest) {
|
||||
ScratchSimd128Scope scratch(asMasm());
|
||||
asMasm().moveSimd128Float(src, dest);
|
||||
src = asMasm().moveSimd128FloatIfNotAVX(src, dest);
|
||||
|
||||
// Place lanes below 80000000h into dest, otherwise into scratch.
|
||||
// Keep dest or scratch 0 as default.
|
||||
asMasm().loadConstantSimd128Float(SimdConstant::SplatX4(0x4f000000), scratch);
|
||||
vcmpltps(Operand(src), scratch, scratch);
|
||||
vpand(Operand(src), scratch, scratch);
|
||||
vpxor(Operand(scratch), dest, dest);
|
||||
vpxor(Operand(scratch), src, dest);
|
||||
|
||||
// Convert lanes below 80000000h into unsigned int without issues.
|
||||
vcvttps2dq(dest, dest);
|
||||
|
@ -1267,10 +1280,10 @@ void MacroAssemblerX86Shared::truncSatFloat64x2ToInt32x4(FloatRegister src,
|
|||
void MacroAssemblerX86Shared::unsignedTruncSatFloat64x2ToInt32x4(
|
||||
FloatRegister src, FloatRegister temp, FloatRegister dest) {
|
||||
ScratchSimd128Scope scratch(asMasm());
|
||||
asMasm().moveSimd128Float(src, dest);
|
||||
src = asMasm().moveSimd128FloatIfNotAVX(src, dest);
|
||||
|
||||
vxorpd(scratch, scratch, scratch);
|
||||
vmaxpd(Operand(scratch), dest, dest);
|
||||
vmaxpd(Operand(scratch), src, dest);
|
||||
|
||||
asMasm().loadConstantSimd128Float(SimdConstant::SplatX2(4294967295.0), temp);
|
||||
vminpd(Operand(temp), dest, dest);
|
||||
|
@ -1284,11 +1297,10 @@ void MacroAssemblerX86Shared::unsignedTruncSatFloat64x2ToInt32x4(
|
|||
void MacroAssemblerX86Shared::unsignedTruncSatFloat64x2ToInt32x4Relaxed(
|
||||
FloatRegister src, FloatRegister dest) {
|
||||
ScratchSimd128Scope scratch(asMasm());
|
||||
asMasm().moveSimd128Float(src, dest);
|
||||
|
||||
// The same as unsignedConvertInt32x4ToFloat64x2, but without NaN
|
||||
// and out-of-bounds checks.
|
||||
vroundpd(SSERoundingMode::Trunc, Operand(dest), dest);
|
||||
vroundpd(SSERoundingMode::Trunc, Operand(src), dest);
|
||||
asMasm().loadConstantSimd128Float(SimdConstant::SplatX2(4503599627370496.0),
|
||||
scratch);
|
||||
vaddpd(Operand(scratch), dest, dest);
|
||||
|
@ -1299,9 +1311,9 @@ void MacroAssemblerX86Shared::popcntInt8x16(FloatRegister src,
|
|||
FloatRegister temp,
|
||||
FloatRegister output) {
|
||||
ScratchSimd128Scope scratch(asMasm());
|
||||
asMasm().loadConstantSimd128Float(SimdConstant::SplatX16(0x0f), scratch);
|
||||
asMasm().moveSimd128Int(src, temp);
|
||||
vpand(scratch, temp, temp);
|
||||
asMasm().loadConstantSimd128Int(SimdConstant::SplatX16(0x0f), scratch);
|
||||
FloatRegister srcForTemp = asMasm().moveSimd128IntIfNotAVX(src, temp);
|
||||
vpand(scratch, srcForTemp, temp);
|
||||
vpandn(src, scratch, scratch);
|
||||
int8_t counts[] = {0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4};
|
||||
asMasm().loadConstantSimd128(SimdConstant::CreateX16(counts), output);
|
||||
|
|
|
@ -1452,14 +1452,14 @@ void MacroAssembler::concatAndRightShiftSimd128(FloatRegister lhs,
|
|||
|
||||
void MacroAssembler::leftShiftSimd128(Imm32 count, FloatRegister src,
|
||||
FloatRegister dest) {
|
||||
moveSimd128(src, dest);
|
||||
vpslldq(count, dest, dest);
|
||||
src = moveSimd128IntIfNotAVX(src, dest);
|
||||
vpslldq(count, src, dest);
|
||||
}
|
||||
|
||||
void MacroAssembler::rightShiftSimd128(Imm32 count, FloatRegister src,
|
||||
FloatRegister dest) {
|
||||
moveSimd128(src, dest);
|
||||
vpsrldq(count, dest, dest);
|
||||
src = moveSimd128IntIfNotAVX(src, dest);
|
||||
vpsrldq(count, src, dest);
|
||||
}
|
||||
|
||||
// Reverse bytes in lanes.
|
||||
|
@ -1467,10 +1467,10 @@ void MacroAssembler::rightShiftSimd128(Imm32 count, FloatRegister src,
|
|||
void MacroAssembler::reverseInt16x8(FloatRegister src, FloatRegister dest) {
|
||||
// Byteswap is MOV + PSLLW + PSRLW + POR, a small win over PSHUFB.
|
||||
ScratchSimd128Scope scratch(*this);
|
||||
moveSimd128(src, dest);
|
||||
moveSimd128(src, scratch);
|
||||
vpsllw(Imm32(8), dest, dest);
|
||||
vpsrlw(Imm32(8), scratch, scratch);
|
||||
FloatRegister srcForScratch = moveSimd128IntIfNotAVX(src, scratch);
|
||||
vpsrlw(Imm32(8), srcForScratch, scratch);
|
||||
src = moveSimd128IntIfNotAVX(src, dest);
|
||||
vpsllw(Imm32(8), src, dest);
|
||||
vpor(scratch, dest, dest);
|
||||
}
|
||||
|
||||
|
@ -1556,8 +1556,8 @@ void MacroAssembler::bitmaskInt16x8(FloatRegister src, Register dest) {
|
|||
// input and shifting rather than masking at the end, but creates a false
|
||||
// dependency on the old value of scratch. The better fix is to allow src to
|
||||
// be clobbered.
|
||||
moveSimd128(src, scratch);
|
||||
vpacksswb(Operand(scratch), scratch, scratch);
|
||||
src = moveSimd128IntIfNotAVX(src, scratch);
|
||||
vpacksswb(Operand(src), src, scratch);
|
||||
vpmovmskb(scratch, dest);
|
||||
andl(Imm32(0xFF), dest);
|
||||
}
|
||||
|
@ -1713,20 +1713,22 @@ void MacroAssembler::mulInt64x2(FloatRegister lhs, FloatRegister rhs,
|
|||
// lhs = <D C> <B A>
|
||||
// rhs = <H G> <F E>
|
||||
// result = <(DG+CH)_low+CG_high CG_low> <(BE+AF)_low+AE_high AE_low>
|
||||
moveSimd128(lhs, temp); // temp = <D C> <B A>
|
||||
vpsrlq(Imm32(32), temp, temp); // temp = <0 D> <0 B>
|
||||
vpmuludq(rhs, temp, temp); // temp = <DG> <BE>
|
||||
moveSimd128(rhs, temp2); // temp2 = <H G> <F E>
|
||||
vpsrlq(Imm32(32), temp2, temp2); // temp2 = <0 H> <0 F>
|
||||
vpmuludq(lhs, temp2, temp2); // temp2 = <CH> <AF>
|
||||
vpaddq(Operand(temp), temp2, temp2); // temp2 = <DG+CH> <BE+AF>
|
||||
vpsllq(Imm32(32), temp2, temp2); // temp2 = <(DG+CH)_low 0>
|
||||
// <(BE+AF)_low 0>
|
||||
vpmuludq(rhs, dest, dest); // dest = <CG_high CG_low>
|
||||
// <AE_high AE_low>
|
||||
vpaddq(Operand(temp2), dest, dest); // dest =
|
||||
// <(DG+CH)_low+CG_high CG_low>
|
||||
// <(BE+AF)_low+AE_high AE_low>
|
||||
FloatRegister lhsForTemp =
|
||||
moveSimd128IntIfNotAVX(lhs, temp); // temp = <D C> <B A>
|
||||
vpsrlq(Imm32(32), lhsForTemp, temp); // temp = <0 D> <0 B>
|
||||
vpmuludq(rhs, temp, temp); // temp = <DG> <BE>
|
||||
FloatRegister rhsForTemp =
|
||||
moveSimd128IntIfNotAVX(rhs, temp2); // temp2 = <H G> <F E>
|
||||
vpsrlq(Imm32(32), rhsForTemp, temp2); // temp2 = <0 H> <0 F>
|
||||
vpmuludq(lhs, temp2, temp2); // temp2 = <CH> <AF>
|
||||
vpaddq(Operand(temp), temp2, temp2); // temp2 = <DG+CH> <BE+AF>
|
||||
vpsllq(Imm32(32), temp2, temp2); // temp2 = <(DG+CH)_low 0>
|
||||
// <(BE+AF)_low 0>
|
||||
vpmuludq(rhs, dest, dest); // dest = <CG_high CG_low>
|
||||
// <AE_high AE_low>
|
||||
vpaddq(Operand(temp2), dest, dest); // dest =
|
||||
// <(DG+CH)_low+CG_high CG_low>
|
||||
// <(BE+AF)_low+AE_high AE_low>
|
||||
}
|
||||
|
||||
// Code generation from the PR: https://github.com/WebAssembly/simd/pull/376.
|
||||
|
@ -2141,9 +2143,9 @@ void MacroAssembler::absInt32x4(FloatRegister src, FloatRegister dest) {
|
|||
|
||||
void MacroAssembler::absInt64x2(FloatRegister src, FloatRegister dest) {
|
||||
ScratchSimd128Scope scratch(*this);
|
||||
moveSimd128(src, dest);
|
||||
signReplicationInt64x2(src, scratch);
|
||||
vpxor(Operand(scratch), dest, dest);
|
||||
src = moveSimd128IntIfNotAVX(src, dest);
|
||||
vpxor(Operand(scratch), src, dest);
|
||||
vpsubq(Operand(scratch), dest, dest);
|
||||
}
|
||||
|
||||
|
@ -2167,7 +2169,7 @@ void MacroAssembler::leftShiftInt16x8(Register rhs, FloatRegister lhsDest) {
|
|||
|
||||
void MacroAssembler::leftShiftInt16x8(Imm32 count, FloatRegister src,
|
||||
FloatRegister dest) {
|
||||
moveSimd128(src, dest);
|
||||
src = moveSimd128IntIfNotAVX(src, dest);
|
||||
vpsllw(count, src, dest);
|
||||
}
|
||||
|
||||
|
@ -2178,7 +2180,7 @@ void MacroAssembler::leftShiftInt32x4(Register rhs, FloatRegister lhsDest) {
|
|||
|
||||
void MacroAssembler::leftShiftInt32x4(Imm32 count, FloatRegister src,
|
||||
FloatRegister dest) {
|
||||
moveSimd128(src, dest);
|
||||
src = moveSimd128IntIfNotAVX(src, dest);
|
||||
vpslld(count, src, dest);
|
||||
}
|
||||
|
||||
|
@ -2189,7 +2191,7 @@ void MacroAssembler::leftShiftInt64x2(Register rhs, FloatRegister lhsDest) {
|
|||
|
||||
void MacroAssembler::leftShiftInt64x2(Imm32 count, FloatRegister src,
|
||||
FloatRegister dest) {
|
||||
moveSimd128(src, dest);
|
||||
src = moveSimd128IntIfNotAVX(src, dest);
|
||||
vpsllq(count, src, dest);
|
||||
}
|
||||
|
||||
|
@ -2226,7 +2228,7 @@ void MacroAssembler::rightShiftInt16x8(Register rhs, FloatRegister lhsDest) {
|
|||
|
||||
void MacroAssembler::rightShiftInt16x8(Imm32 count, FloatRegister src,
|
||||
FloatRegister dest) {
|
||||
moveSimd128(src, dest);
|
||||
src = moveSimd128IntIfNotAVX(src, dest);
|
||||
vpsraw(count, src, dest);
|
||||
}
|
||||
|
||||
|
@ -2238,7 +2240,7 @@ void MacroAssembler::unsignedRightShiftInt16x8(Register rhs,
|
|||
|
||||
void MacroAssembler::unsignedRightShiftInt16x8(Imm32 count, FloatRegister src,
|
||||
FloatRegister dest) {
|
||||
moveSimd128(src, dest);
|
||||
src = moveSimd128IntIfNotAVX(src, dest);
|
||||
vpsrlw(count, src, dest);
|
||||
}
|
||||
|
||||
|
@ -2249,7 +2251,7 @@ void MacroAssembler::rightShiftInt32x4(Register rhs, FloatRegister lhsDest) {
|
|||
|
||||
void MacroAssembler::rightShiftInt32x4(Imm32 count, FloatRegister src,
|
||||
FloatRegister dest) {
|
||||
moveSimd128(src, dest);
|
||||
src = moveSimd128IntIfNotAVX(src, dest);
|
||||
vpsrad(count, src, dest);
|
||||
}
|
||||
|
||||
|
@ -2261,7 +2263,7 @@ void MacroAssembler::unsignedRightShiftInt32x4(Register rhs,
|
|||
|
||||
void MacroAssembler::unsignedRightShiftInt32x4(Imm32 count, FloatRegister src,
|
||||
FloatRegister dest) {
|
||||
moveSimd128(src, dest);
|
||||
src = moveSimd128IntIfNotAVX(src, dest);
|
||||
vpsrld(count, src, dest);
|
||||
}
|
||||
|
||||
|
@ -2284,7 +2286,7 @@ void MacroAssembler::unsignedRightShiftInt64x2(Register rhs,
|
|||
|
||||
void MacroAssembler::unsignedRightShiftInt64x2(Imm32 count, FloatRegister src,
|
||||
FloatRegister dest) {
|
||||
moveSimd128(src, dest);
|
||||
src = moveSimd128IntIfNotAVX(src, dest);
|
||||
vpsrlq(count, src, dest);
|
||||
}
|
||||
|
||||
|
@ -2299,14 +2301,14 @@ void MacroAssembler::signReplicationInt8x16(FloatRegister src,
|
|||
|
||||
void MacroAssembler::signReplicationInt16x8(FloatRegister src,
|
||||
FloatRegister dest) {
|
||||
moveSimd128(src, dest);
|
||||
vpsraw(Imm32(15), dest, dest);
|
||||
src = moveSimd128IntIfNotAVX(src, dest);
|
||||
vpsraw(Imm32(15), src, dest);
|
||||
}
|
||||
|
||||
void MacroAssembler::signReplicationInt32x4(FloatRegister src,
|
||||
FloatRegister dest) {
|
||||
moveSimd128(src, dest);
|
||||
vpsrad(Imm32(31), dest, dest);
|
||||
src = moveSimd128IntIfNotAVX(src, dest);
|
||||
vpsrad(Imm32(31), src, dest);
|
||||
}
|
||||
|
||||
void MacroAssembler::signReplicationInt64x2(FloatRegister src,
|
||||
|
@ -2810,30 +2812,22 @@ void MacroAssembler::extAddPairwiseInt8x16(FloatRegister src,
|
|||
|
||||
void MacroAssembler::unsignedExtAddPairwiseInt8x16(FloatRegister src,
|
||||
FloatRegister dest) {
|
||||
ScratchSimd128Scope scratch(*this);
|
||||
moveSimd128(src, dest);
|
||||
loadConstantSimd128Int(SimdConstant::SplatX16(1), scratch);
|
||||
vpmaddubsw(scratch, dest, dest);
|
||||
src = moveSimd128IntIfNotAVX(src, dest);
|
||||
vpmaddubswSimd128(SimdConstant::SplatX16(1), src, dest);
|
||||
}
|
||||
|
||||
void MacroAssembler::extAddPairwiseInt16x8(FloatRegister src,
|
||||
FloatRegister dest) {
|
||||
ScratchSimd128Scope scratch(*this);
|
||||
moveSimd128(src, dest);
|
||||
loadConstantSimd128Int(SimdConstant::SplatX8(1), scratch);
|
||||
vpmaddwd(Operand(scratch), dest, dest);
|
||||
src = moveSimd128IntIfNotAVX(src, dest);
|
||||
vpmaddwdSimd128(SimdConstant::SplatX8(1), src, dest);
|
||||
}
|
||||
|
||||
void MacroAssembler::unsignedExtAddPairwiseInt16x8(FloatRegister src,
|
||||
FloatRegister dest) {
|
||||
ScratchSimd128Scope scratch(*this);
|
||||
moveSimd128(src, dest);
|
||||
loadConstantSimd128Int(SimdConstant::SplatX8(0x8000), scratch);
|
||||
vpxor(scratch, dest, dest);
|
||||
loadConstantSimd128Int(SimdConstant::SplatX8(1), scratch);
|
||||
vpmaddwd(Operand(scratch), dest, dest);
|
||||
loadConstantSimd128Int(SimdConstant::SplatX4(0x00010000), scratch);
|
||||
vpaddd(Operand(scratch), dest, dest);
|
||||
src = moveSimd128IntIfNotAVX(src, dest);
|
||||
vpxorSimd128(SimdConstant::SplatX8(-0x8000), src, dest);
|
||||
vpmaddwdSimd128(SimdConstant::SplatX8(1), dest, dest);
|
||||
vpadddSimd128(SimdConstant::SplatX4(0x00010000), dest, dest);
|
||||
}
|
||||
|
||||
// Floating square root
|
||||
|
@ -3023,8 +3017,8 @@ void MacroAssembler::unsignedWidenLowInt32x4(FloatRegister src,
|
|||
}
|
||||
|
||||
void MacroAssembler::widenHighInt32x4(FloatRegister src, FloatRegister dest) {
|
||||
if (src == dest) {
|
||||
vmovhlps(dest, dest, dest);
|
||||
if (src == dest || HasAVX()) {
|
||||
vmovhlps(src, src, dest);
|
||||
} else {
|
||||
vpshufd(ComputeShuffleMask(2, 3, 2, 3), src, dest);
|
||||
}
|
||||
|
@ -3033,11 +3027,10 @@ void MacroAssembler::widenHighInt32x4(FloatRegister src, FloatRegister dest) {
|
|||
|
||||
void MacroAssembler::unsignedWidenHighInt32x4(FloatRegister src,
|
||||
FloatRegister dest) {
|
||||
moveSimd128(src, dest);
|
||||
|
||||
ScratchSimd128Scope scratch(*this);
|
||||
src = moveSimd128IntIfNotAVX(src, dest);
|
||||
vpxor(scratch, scratch, scratch);
|
||||
vpunpckhdq(scratch, dest, dest);
|
||||
vpunpckhdq(scratch, src, dest);
|
||||
}
|
||||
|
||||
// Floating multiply-accumulate: srcDest [+-]= src1 * src2
|
||||
|
|
|
@ -587,6 +587,10 @@ class MacroAssemblerX86Shared : public Assembler {
|
|||
moveSimd128Int(src, dest);
|
||||
return dest;
|
||||
}
|
||||
FloatRegister selectDestIfAVX(FloatRegister src, FloatRegister dest) {
|
||||
MOZ_ASSERT(src.isSimd128() && dest.isSimd128());
|
||||
return HasAVX() ? dest : src;
|
||||
}
|
||||
void loadUnalignedSimd128Int(const Address& src, FloatRegister dest) {
|
||||
vmovdqu(Operand(src), dest);
|
||||
}
|
||||
|
|
|
@ -421,6 +421,12 @@ void MacroAssemblerX86::vcmplepdSimd128(const SimdConstant& v,
|
|||
vpPatchOpSimd128(v, lhs, dest, &X86Encoding::BaseAssemblerX86::vcmplepd_mr);
|
||||
}
|
||||
|
||||
void MacroAssemblerX86::vpmaddubswSimd128(const SimdConstant& v,
|
||||
FloatRegister lhs,
|
||||
FloatRegister dest) {
|
||||
vpPatchOpSimd128(v, lhs, dest, &X86Encoding::BaseAssemblerX86::vpmaddubsw_mr);
|
||||
}
|
||||
|
||||
void MacroAssemblerX86::finish() {
|
||||
// Last instruction may be an indirect jump so eagerly insert an undefined
|
||||
// instruction byte to prevent processors from decoding data values into
|
||||
|
|
|
@ -1063,6 +1063,8 @@ class MacroAssemblerX86 : public MacroAssemblerX86Shared {
|
|||
FloatRegister dest);
|
||||
void vcmplepdSimd128(const SimdConstant& v, FloatRegister lhs,
|
||||
FloatRegister dest);
|
||||
void vpmaddubswSimd128(const SimdConstant& v, FloatRegister lhs,
|
||||
FloatRegister dest);
|
||||
|
||||
Condition testInt32Truthy(bool truthy, const ValueOperand& operand) {
|
||||
test32(operand.payloadReg(), operand.payloadReg());
|
||||
|
|
Загрузка…
Ссылка в новой задаче