зеркало из https://github.com/mozilla/gecko-dev.git
Bug 1478632 - wasm simd, part 5a: extract porting interfaces and x86/x64 impls. r=bbouvier
Here we're *only* extracting porting APIs from the -SIMD.cpp file and exposing them in MacroAssembler.h, along with a little bit of renaming. Almost all the interesting action happens in the next patch. Differential Revision: https://phabricator.services.mozilla.com/D60856
This commit is contained in:
Родитель
3c9e1d4b5f
Коммит
168837c2ec
|
@ -1772,6 +1772,297 @@ class MacroAssembler : public MacroAssemblerSpecific {
|
|||
|
||||
inline void memoryBarrier(MemoryBarrierBits barrier) PER_SHARED_ARCH;
|
||||
|
||||
public:
|
||||
// ========================================================================
|
||||
// SIMD
|
||||
//
|
||||
// Naming is "operationSimd128" when operate on the whole vector, otherwise
|
||||
// it's "operation<Type><Size>x<Lanes>".
|
||||
//
|
||||
// For microarchitectural reasons we can in principle get a performance win by
|
||||
// using int or float specific instructions in the operationSimd128 case when
|
||||
// we know that subsequent operations on the result are int or float oriented.
|
||||
// In practice, we don't care about that yet.
|
||||
//
|
||||
// The order of operations here follows those in the SIMD overview document,
|
||||
// https://github.com/WebAssembly/simd/blob/master/proposals/simd/SIMD.md.
|
||||
//
|
||||
// In many cases on x86, unless AVX is present on the chip then `lhs` and
|
||||
// `dest` must be the same register.
|
||||
|
||||
// Moves
|
||||
|
||||
inline void moveSimd128(FloatRegister src, FloatRegister dest)
|
||||
DEFINED_ON(x86_shared);
|
||||
|
||||
// Constants
|
||||
|
||||
inline void zeroSimd128(FloatRegister dest) DEFINED_ON(x86_shared);
|
||||
|
||||
// Splat
|
||||
|
||||
inline void splatX16(Register src, FloatRegister dest) DEFINED_ON(x86_shared);
|
||||
|
||||
inline void splatX8(Register src, FloatRegister dest) DEFINED_ON(x86_shared);
|
||||
|
||||
inline void splatX4(Register src, FloatRegister dest) DEFINED_ON(x86_shared);
|
||||
|
||||
inline void splatX4(FloatRegister src, FloatRegister dest)
|
||||
DEFINED_ON(x86_shared);
|
||||
|
||||
// Extract lane as scalar
|
||||
|
||||
inline void extractLaneInt8x16(FloatRegister src, Register dest,
|
||||
unsigned lane, SimdSign sign)
|
||||
DEFINED_ON(x86_shared);
|
||||
|
||||
inline void extractLaneInt16x8(FloatRegister src, Register dest,
|
||||
unsigned lane, SimdSign sign)
|
||||
DEFINED_ON(x86_shared);
|
||||
|
||||
inline void extractLaneInt32x4(FloatRegister src, Register dest,
|
||||
unsigned lane) DEFINED_ON(x86_shared);
|
||||
|
||||
inline void extractLaneFloat32x4(FloatRegister src, FloatRegister dest,
|
||||
unsigned lane, bool canonicalize)
|
||||
DEFINED_ON(x86_shared);
|
||||
|
||||
// Replace lane value
|
||||
|
||||
inline void replaceLaneInt8x16(FloatRegister src, Register value,
|
||||
FloatRegister dest, unsigned lane)
|
||||
DEFINED_ON(x86_shared);
|
||||
|
||||
inline void replaceLaneInt16x8(FloatRegister src, Register value,
|
||||
FloatRegister dest, unsigned lane)
|
||||
DEFINED_ON(x86_shared);
|
||||
|
||||
inline void replaceLaneInt32x4(FloatRegister src, Register value,
|
||||
FloatRegister dest, unsigned lane)
|
||||
DEFINED_ON(x86_shared);
|
||||
|
||||
inline void replaceLaneFloat32x4(FloatRegister src, FloatRegister value,
|
||||
FloatRegister dest, unsigned lane)
|
||||
DEFINED_ON(x86_shared);
|
||||
|
||||
// Shuffle - permute with immediate indices
|
||||
|
||||
// Swizzle - permute with variable indices
|
||||
|
||||
// Integer Add
|
||||
|
||||
inline void addInt8x16(FloatRegister lhs, FloatRegister rhs,
|
||||
FloatRegister dest) DEFINED_ON(x86_shared);
|
||||
|
||||
inline void addInt16x8(FloatRegister lhs, FloatRegister rhs,
|
||||
FloatRegister dest) DEFINED_ON(x86_shared);
|
||||
|
||||
inline void addInt32x4(FloatRegister lhs, FloatRegister rhs,
|
||||
FloatRegister dest) DEFINED_ON(x86_shared);
|
||||
|
||||
// Integer Subtract
|
||||
|
||||
inline void subInt8x16(FloatRegister lhs, FloatRegister rhs,
|
||||
FloatRegister dest) DEFINED_ON(x86_shared);
|
||||
|
||||
inline void subInt16x8(FloatRegister lhs, FloatRegister rhs,
|
||||
FloatRegister dest) DEFINED_ON(x86_shared);
|
||||
|
||||
inline void subInt32x4(FloatRegister lhs, FloatRegister rhs,
|
||||
FloatRegister dest) DEFINED_ON(x86_shared);
|
||||
|
||||
// Integer Multiply
|
||||
|
||||
inline void mulInt16x8(FloatRegister lhs, FloatRegister rhs,
|
||||
FloatRegister dest) DEFINED_ON(x86_shared);
|
||||
|
||||
inline bool mulInt32x4RequiresTemp() DEFINED_ON(x86_shared);
|
||||
|
||||
inline void mulInt32x4(FloatRegister lhs, FloatRegister rhs,
|
||||
FloatRegister temp, FloatRegister dest)
|
||||
DEFINED_ON(x86_shared);
|
||||
|
||||
// Integer Negate
|
||||
|
||||
inline void negInt8x16(FloatRegister in, FloatRegister out)
|
||||
DEFINED_ON(x86_shared);
|
||||
|
||||
inline void negInt16x8(FloatRegister in, FloatRegister out)
|
||||
DEFINED_ON(x86_shared);
|
||||
|
||||
inline void negInt32x4(FloatRegister in, FloatRegister out)
|
||||
DEFINED_ON(x86_shared);
|
||||
|
||||
// Saturating integer add
|
||||
|
||||
inline void addSatInt8x16(FloatRegister lhs, FloatRegister rhs, SimdSign sign,
|
||||
FloatRegister dest) DEFINED_ON(x86_shared);
|
||||
|
||||
inline void addSatInt16x8(FloatRegister lhs, FloatRegister rhs, SimdSign sign,
|
||||
FloatRegister dest) DEFINED_ON(x86_shared);
|
||||
|
||||
// Saturating integer subtract
|
||||
|
||||
inline void subSatInt8x16(FloatRegister lhs, FloatRegister rhs, SimdSign sign,
|
||||
FloatRegister dest) DEFINED_ON(x86_shared);
|
||||
|
||||
inline void subSatInt16x8(FloatRegister lhs, FloatRegister rhs, SimdSign sign,
|
||||
FloatRegister dest) DEFINED_ON(x86_shared);
|
||||
|
||||
// Lane-wise integer minimum
|
||||
|
||||
// Lane-wise integer maximum
|
||||
|
||||
// Lane-wise integer rounding average
|
||||
|
||||
// Left shift by scalar
|
||||
|
||||
inline void leftShiftInt16x8(FloatRegister src, Register count, Register temp,
|
||||
FloatRegister out) DEFINED_ON(x86_shared);
|
||||
|
||||
inline void leftShiftInt32x4(FloatRegister src, Register count, Register temp,
|
||||
FloatRegister out) DEFINED_ON(x86_shared);
|
||||
|
||||
// Right shift by scalar
|
||||
|
||||
inline void rightShiftInt16x8(FloatRegister src, Register count,
|
||||
Register temp, FloatRegister out)
|
||||
DEFINED_ON(x86_shared);
|
||||
|
||||
inline void unsignedRightShiftInt16x8(FloatRegister src, Register count,
|
||||
Register temp, FloatRegister out)
|
||||
DEFINED_ON(x86_shared);
|
||||
|
||||
inline void rightShiftInt32x4(FloatRegister src, Register count,
|
||||
Register temp, FloatRegister out)
|
||||
DEFINED_ON(x86_shared);
|
||||
|
||||
inline void unsignedRightShiftInt32x4(FloatRegister src, Register count,
|
||||
Register temp, FloatRegister out)
|
||||
DEFINED_ON(x86_shared);
|
||||
|
||||
// Bitwise and, or, xor, not
|
||||
|
||||
inline void bitwiseAndSimd128(FloatRegister lhs, FloatRegister rhs,
|
||||
FloatRegister dest) DEFINED_ON(x86_shared);
|
||||
|
||||
inline void bitwiseOrSimd128(FloatRegister lhs, FloatRegister rhs,
|
||||
FloatRegister dest) DEFINED_ON(x86_shared);
|
||||
|
||||
inline void bitwiseXorSimd128(FloatRegister lhs, FloatRegister rhs,
|
||||
FloatRegister dest) DEFINED_ON(x86_shared);
|
||||
|
||||
inline void bitwiseNotSimd128(FloatRegister src, FloatRegister dest)
|
||||
DEFINED_ON(x86_shared);
|
||||
|
||||
// Bitwise and-not. Note, this is ~lhs & rhs, which is not what wasm wants
|
||||
// but conforms to what the x86 does.
|
||||
|
||||
inline void bitwiseAndNotSimd128(FloatRegister lhs, FloatRegister rhs,
|
||||
FloatRegister dest) DEFINED_ON(x86_shared);
|
||||
|
||||
// Bitwise select
|
||||
|
||||
inline void bitwiseSelectSimd128(FloatRegister mask, FloatRegister onTrue,
|
||||
FloatRegister onFalse, FloatRegister temp,
|
||||
FloatRegister dest) DEFINED_ON(x86_shared);
|
||||
|
||||
// Any lane true
|
||||
|
||||
// All lanes true
|
||||
|
||||
// Comparisons (integer and floating-point)
|
||||
|
||||
inline void compareInt8x16(FloatRegister lhs, FloatRegister rhs,
|
||||
Assembler::Condition cond, FloatRegister dest)
|
||||
DEFINED_ON(x86_shared);
|
||||
|
||||
inline void compareInt16x8(FloatRegister lhs, FloatRegister rhs,
|
||||
Assembler::Condition cond, FloatRegister dest)
|
||||
DEFINED_ON(x86_shared);
|
||||
|
||||
inline void compareInt32x4(FloatRegister lhs, FloatRegister rhs,
|
||||
Assembler::Condition cond, FloatRegister dest)
|
||||
DEFINED_ON(x86_shared);
|
||||
|
||||
inline void compareFloat32x4(FloatRegister lhs, FloatRegister rhs,
|
||||
Assembler::Condition cond, FloatRegister dest)
|
||||
DEFINED_ON(x86_shared);
|
||||
|
||||
// Load
|
||||
|
||||
inline void loadUnalignedSimd128(const Address& src, FloatRegister dest)
|
||||
DEFINED_ON(x86_shared);
|
||||
|
||||
inline void loadUnalignedSimd128(const BaseIndex& src, FloatRegister dest)
|
||||
DEFINED_ON(x86_shared);
|
||||
|
||||
// Load and splat
|
||||
|
||||
// Load and extend
|
||||
|
||||
// Store
|
||||
|
||||
inline void storeUnalignedSimd128(FloatRegister src, const Address& dest)
|
||||
DEFINED_ON(x86_shared);
|
||||
|
||||
inline void storeUnalignedSimd128(FloatRegister src, const BaseIndex& dest)
|
||||
DEFINED_ON(x86_shared);
|
||||
|
||||
// Floating point negation
|
||||
|
||||
inline void negFloat32x4(FloatRegister src, FloatRegister dest)
|
||||
DEFINED_ON(x86_shared);
|
||||
|
||||
// Floating point absolute value
|
||||
|
||||
inline void absFloat32x4(FloatRegister src, FloatRegister dest)
|
||||
DEFINED_ON(x86_shared);
|
||||
|
||||
// NaN-propagating minimum
|
||||
|
||||
inline void minFloat32x4(FloatRegister lhs, FloatRegister rhs,
|
||||
FloatRegister dest) DEFINED_ON(x86_shared);
|
||||
|
||||
// NaN-propagating maximum
|
||||
|
||||
inline void maxFloat32x4(FloatRegister lhs, FloatRegister rhs,
|
||||
FloatRegister temp, FloatRegister dest)
|
||||
DEFINED_ON(x86_shared);
|
||||
|
||||
// Floating add
|
||||
|
||||
inline void addFloat32x4(FloatRegister lhs, FloatRegister rhs,
|
||||
FloatRegister dest) DEFINED_ON(x86_shared);
|
||||
|
||||
// Floating subtract
|
||||
|
||||
inline void subFloat32x4(FloatRegister lhs, FloatRegister rhs,
|
||||
FloatRegister dest) DEFINED_ON(x86_shared);
|
||||
|
||||
// Floating division
|
||||
|
||||
inline void divFloat32x4(FloatRegister lhs, FloatRegister rhs,
|
||||
FloatRegister dest) DEFINED_ON(x86_shared);
|
||||
|
||||
// Floating Multiply
|
||||
|
||||
inline void mulFloat32x4(FloatRegister lhs, FloatRegister rhs,
|
||||
FloatRegister dest) DEFINED_ON(x86_shared);
|
||||
|
||||
// Floating square root
|
||||
|
||||
inline void sqrtFloat32x4(FloatRegister src, FloatRegister dest)
|
||||
DEFINED_ON(x86_shared);
|
||||
|
||||
// Integer to floating point with rounding
|
||||
|
||||
// Floating point to integer with saturation
|
||||
|
||||
// Integer to integer narrowing
|
||||
|
||||
// Integer to integer widening
|
||||
|
||||
public:
|
||||
// ========================================================================
|
||||
// Truncate floating point.
|
||||
|
|
|
@ -1068,6 +1068,414 @@ void MacroAssembler::memoryBarrier(MemoryBarrierBits barrier) {
|
|||
}
|
||||
}
|
||||
|
||||
// ========================================================================
|
||||
// SIMD
|
||||
//
|
||||
// For vector operations of the form "operationSimd128" we currently bias in
|
||||
// favor of an integer representation on x86, but this is subject to later
|
||||
// adjustment based on an analysis of use cases and actual programs.
|
||||
//
|
||||
// The order of operations here follows the header file.
|
||||
|
||||
// Moves
|
||||
|
||||
void MacroAssembler::moveSimd128(FloatRegister src, FloatRegister dest) {
|
||||
MacroAssemblerX86Shared::moveSimd128Int(src, dest);
|
||||
}
|
||||
|
||||
// Constants
|
||||
|
||||
void MacroAssembler::zeroSimd128(FloatRegister dest) {
|
||||
MacroAssemblerX86Shared::zeroSimd128Int(dest);
|
||||
}
|
||||
|
||||
// Splat
|
||||
|
||||
void MacroAssembler::splatX16(Register src, FloatRegister dest) {
|
||||
MacroAssemblerX86Shared::splatX16(src, dest);
|
||||
}
|
||||
|
||||
void MacroAssembler::splatX8(Register src, FloatRegister dest) {
|
||||
MacroAssemblerX86Shared::splatX8(src, dest);
|
||||
}
|
||||
|
||||
void MacroAssembler::splatX4(Register src, FloatRegister dest) {
|
||||
MacroAssemblerX86Shared::splatX4(src, dest);
|
||||
}
|
||||
|
||||
void MacroAssembler::splatX4(FloatRegister src, FloatRegister dest) {
|
||||
MacroAssemblerX86Shared::splatX4(src, dest);
|
||||
}
|
||||
|
||||
// Extract lane as scalar
|
||||
|
||||
void MacroAssembler::extractLaneInt8x16(FloatRegister src, Register dest,
|
||||
unsigned lane, SimdSign sign) {
|
||||
MacroAssemblerX86Shared::extractLaneInt8x16(src, dest, lane, sign);
|
||||
}
|
||||
|
||||
void MacroAssembler::extractLaneInt16x8(FloatRegister src, Register dest,
|
||||
unsigned lane, SimdSign sign) {
|
||||
MacroAssemblerX86Shared::extractLaneInt16x8(src, dest, lane, sign);
|
||||
}
|
||||
|
||||
void MacroAssembler::extractLaneInt32x4(FloatRegister src, Register dest,
|
||||
unsigned lane) {
|
||||
MacroAssemblerX86Shared::extractLaneInt32x4(src, dest, lane);
|
||||
}
|
||||
|
||||
void MacroAssembler::extractLaneFloat32x4(FloatRegister src, FloatRegister dest,
|
||||
unsigned lane, bool canonicalize) {
|
||||
MacroAssemblerX86Shared::extractLaneFloat32x4(src, dest, lane, canonicalize);
|
||||
}
|
||||
|
||||
// Replace lane value
|
||||
|
||||
void MacroAssembler::replaceLaneInt8x16(FloatRegister src, Register value,
|
||||
FloatRegister dest, unsigned lane) {
|
||||
MacroAssemblerX86Shared::insertLaneSimdInt(src, value, dest, lane, 16);
|
||||
}
|
||||
|
||||
void MacroAssembler::replaceLaneInt16x8(FloatRegister src, Register value,
|
||||
FloatRegister dest, unsigned lane) {
|
||||
MacroAssemblerX86Shared::insertLaneSimdInt(src, value, dest, lane, 8);
|
||||
}
|
||||
|
||||
void MacroAssembler::replaceLaneInt32x4(FloatRegister src, Register value,
|
||||
FloatRegister dest, unsigned lane) {
|
||||
MacroAssemblerX86Shared::insertLaneSimdInt(src, value, dest, lane, 4);
|
||||
}
|
||||
|
||||
void MacroAssembler::replaceLaneFloat32x4(FloatRegister src,
|
||||
FloatRegister value,
|
||||
FloatRegister dest, unsigned lane) {
|
||||
MacroAssemblerX86Shared::insertLaneFloat32x4(src, value, dest, lane);
|
||||
}
|
||||
|
||||
// Shuffle - permute with immediate indices
|
||||
|
||||
// Swizzle - permute with variable indices
|
||||
|
||||
// Integer Add
|
||||
|
||||
void MacroAssembler::addInt8x16(FloatRegister lhs, FloatRegister rhs,
|
||||
FloatRegister dest) {
|
||||
vpaddb(Operand(rhs), lhs, dest);
|
||||
}
|
||||
|
||||
void MacroAssembler::addInt16x8(FloatRegister lhs, FloatRegister rhs,
|
||||
FloatRegister dest) {
|
||||
vpaddw(Operand(rhs), lhs, dest);
|
||||
}
|
||||
|
||||
void MacroAssembler::addInt32x4(FloatRegister lhs, FloatRegister rhs,
|
||||
FloatRegister dest) {
|
||||
vpaddd(Operand(rhs), lhs, dest);
|
||||
}
|
||||
|
||||
// Integer subtract
|
||||
|
||||
void MacroAssembler::subInt8x16(FloatRegister lhs, FloatRegister rhs,
|
||||
FloatRegister dest) {
|
||||
vpsubb(Operand(rhs), lhs, dest);
|
||||
}
|
||||
|
||||
void MacroAssembler::subInt16x8(FloatRegister lhs, FloatRegister rhs,
|
||||
FloatRegister dest) {
|
||||
vpsubw(Operand(rhs), lhs, dest);
|
||||
}
|
||||
|
||||
void MacroAssembler::subInt32x4(FloatRegister lhs, FloatRegister rhs,
|
||||
FloatRegister dest) {
|
||||
vpsubd(Operand(rhs), lhs, dest);
|
||||
}
|
||||
|
||||
// Integer multiply
|
||||
|
||||
void MacroAssembler::mulInt16x8(FloatRegister lhs, FloatRegister rhs,
|
||||
FloatRegister dest) {
|
||||
vpmullw(Operand(rhs), lhs, dest);
|
||||
}
|
||||
|
||||
bool MacroAssembler::mulInt32x4RequiresTemp() {
|
||||
return !AssemblerX86Shared::HasSSE41();
|
||||
}
|
||||
|
||||
void MacroAssembler::mulInt32x4(FloatRegister lhs, FloatRegister rhs,
|
||||
FloatRegister temp, FloatRegister dest) {
|
||||
MOZ_ASSERT(temp.isInvalid() == !mulInt32x4RequiresTemp());
|
||||
mozilla::Maybe<FloatRegister> theTemp =
|
||||
mulInt32x4RequiresTemp() ? mozilla::Some(temp) : mozilla::Nothing();
|
||||
MacroAssemblerX86Shared::mulInt32x4(lhs, Operand(rhs), theTemp, dest);
|
||||
}
|
||||
|
||||
// Integer negate
|
||||
|
||||
void MacroAssembler::negInt8x16(FloatRegister in, FloatRegister out) {
|
||||
zeroSimd128Int(out);
|
||||
packedSubInt8(Operand(in), out);
|
||||
}
|
||||
|
||||
void MacroAssembler::negInt16x8(FloatRegister in, FloatRegister out) {
|
||||
zeroSimd128Int(out);
|
||||
packedSubInt16(Operand(in), out);
|
||||
}
|
||||
|
||||
void MacroAssembler::negInt32x4(FloatRegister in, FloatRegister out) {
|
||||
zeroSimd128Int(out);
|
||||
packedSubInt32(Operand(in), out);
|
||||
}
|
||||
|
||||
// Saturating integer add
|
||||
|
||||
void MacroAssembler::addSatInt8x16(FloatRegister lhs, FloatRegister rhs,
|
||||
SimdSign sign, FloatRegister dest) {
|
||||
if (sign == SimdSign::Signed) {
|
||||
vpaddsb(Operand(rhs), lhs, dest);
|
||||
} else {
|
||||
vpaddusb(Operand(rhs), lhs, dest);
|
||||
}
|
||||
}
|
||||
|
||||
void MacroAssembler::addSatInt16x8(FloatRegister lhs, FloatRegister rhs,
|
||||
SimdSign sign, FloatRegister dest) {
|
||||
if (sign == SimdSign::Signed) {
|
||||
vpaddsw(Operand(rhs), lhs, dest);
|
||||
} else {
|
||||
vpaddusw(Operand(rhs), lhs, dest);
|
||||
}
|
||||
}
|
||||
|
||||
// Saturating integer subtract
|
||||
|
||||
void MacroAssembler::subSatInt8x16(FloatRegister lhs, FloatRegister rhs,
|
||||
SimdSign sign, FloatRegister dest) {
|
||||
if (sign == SimdSign::Signed) {
|
||||
vpsubsb(Operand(rhs), lhs, dest);
|
||||
} else {
|
||||
vpsubusb(Operand(rhs), lhs, dest);
|
||||
}
|
||||
}
|
||||
|
||||
void MacroAssembler::subSatInt16x8(FloatRegister lhs, FloatRegister rhs,
|
||||
SimdSign sign, FloatRegister dest) {
|
||||
if (sign == SimdSign::Signed) {
|
||||
vpsubsw(Operand(rhs), lhs, dest);
|
||||
} else {
|
||||
vpsubusw(Operand(rhs), lhs, dest);
|
||||
}
|
||||
}
|
||||
|
||||
// Lane-wise integer minimum
|
||||
|
||||
// Lane-wise integer maximum
|
||||
|
||||
// Lane-wise integer rounding average
|
||||
|
||||
// Left shift by scalar
|
||||
|
||||
void MacroAssembler::leftShiftInt16x8(FloatRegister src, Register count,
|
||||
Register temp, FloatRegister out) {
|
||||
MacroAssemblerX86Shared::packedLeftShiftByScalarInt16x8(src, count, temp,
|
||||
out);
|
||||
}
|
||||
|
||||
void MacroAssembler::leftShiftInt32x4(FloatRegister src, Register count,
|
||||
Register temp, FloatRegister out) {
|
||||
MacroAssemblerX86Shared::packedLeftShiftByScalarInt32x4(src, count, temp,
|
||||
out);
|
||||
}
|
||||
|
||||
// Right shift by scalar
|
||||
|
||||
void MacroAssembler::rightShiftInt16x8(FloatRegister src, Register count,
|
||||
Register temp, FloatRegister out) {
|
||||
MacroAssemblerX86Shared::packedRightShiftByScalarInt16x8(src, count, temp,
|
||||
out);
|
||||
}
|
||||
|
||||
void MacroAssembler::unsignedRightShiftInt16x8(FloatRegister src,
|
||||
Register count, Register temp,
|
||||
FloatRegister out) {
|
||||
MacroAssemblerX86Shared::packedUnsignedRightShiftByScalarInt16x8(src, count,
|
||||
temp, out);
|
||||
}
|
||||
|
||||
void MacroAssembler::rightShiftInt32x4(FloatRegister src, Register count,
|
||||
Register temp, FloatRegister out) {
|
||||
MacroAssemblerX86Shared::packedRightShiftByScalarInt32x4(src, count, temp,
|
||||
out);
|
||||
}
|
||||
|
||||
void MacroAssembler::unsignedRightShiftInt32x4(FloatRegister src,
|
||||
Register count, Register temp,
|
||||
FloatRegister out) {
|
||||
MacroAssemblerX86Shared::packedUnsignedRightShiftByScalarInt32x4(src, count,
|
||||
temp, out);
|
||||
}
|
||||
|
||||
// Bitwise and, or, xor, not
|
||||
|
||||
void MacroAssembler::bitwiseAndSimd128(FloatRegister lhs, FloatRegister rhs,
|
||||
FloatRegister dest) {
|
||||
MacroAssemblerX86Shared::bitwiseAndSimdInt(lhs, Operand(rhs), dest);
|
||||
}
|
||||
|
||||
void MacroAssembler::bitwiseOrSimd128(FloatRegister lhs, FloatRegister rhs,
|
||||
FloatRegister dest) {
|
||||
MacroAssemblerX86Shared::bitwiseOrSimdInt(lhs, Operand(rhs), dest);
|
||||
}
|
||||
|
||||
void MacroAssembler::bitwiseXorSimd128(FloatRegister lhs, FloatRegister rhs,
|
||||
FloatRegister dest) {
|
||||
MacroAssemblerX86Shared::bitwiseXorSimdInt(lhs, Operand(rhs), dest);
|
||||
}
|
||||
|
||||
void MacroAssembler::bitwiseNotSimd128(FloatRegister src, FloatRegister dest) {
|
||||
MacroAssemblerX86Shared::notInt8x16(Operand(src), dest);
|
||||
}
|
||||
|
||||
// Bitwise and-not
|
||||
|
||||
void MacroAssembler::bitwiseAndNotSimd128(FloatRegister lhs, FloatRegister rhs,
|
||||
FloatRegister dest) {
|
||||
MacroAssemblerX86Shared::bitwiseAndNotSimdInt(lhs, Operand(rhs), dest);
|
||||
}
|
||||
|
||||
// Bitwise select
|
||||
|
||||
void MacroAssembler::bitwiseSelectSimd128(FloatRegister mask,
|
||||
FloatRegister onTrue,
|
||||
FloatRegister onFalse,
|
||||
FloatRegister temp,
|
||||
FloatRegister dest) {
|
||||
MacroAssemblerX86Shared::selectSimd128(mask, onTrue, onFalse, temp, dest);
|
||||
}
|
||||
|
||||
// All lanes true
|
||||
|
||||
// Comparisons (integer and floating-point)
|
||||
|
||||
void MacroAssembler::compareInt8x16(FloatRegister lhs, FloatRegister rhs,
|
||||
Assembler::Condition cond,
|
||||
FloatRegister dest) {
|
||||
MacroAssemblerX86Shared::compareInt8x16(lhs, Operand(rhs), cond, dest);
|
||||
}
|
||||
|
||||
void MacroAssembler::compareInt16x8(FloatRegister lhs, FloatRegister rhs,
|
||||
Assembler::Condition cond,
|
||||
FloatRegister dest) {
|
||||
MacroAssemblerX86Shared::compareInt16x8(lhs, Operand(rhs), cond, dest);
|
||||
}
|
||||
|
||||
void MacroAssembler::compareInt32x4(FloatRegister lhs, FloatRegister rhs,
|
||||
Assembler::Condition cond,
|
||||
FloatRegister dest) {
|
||||
MacroAssemblerX86Shared::compareInt32x4(lhs, Operand(rhs), cond, dest);
|
||||
}
|
||||
|
||||
void MacroAssembler::compareFloat32x4(FloatRegister lhs, FloatRegister rhs,
|
||||
Assembler::Condition cond,
|
||||
FloatRegister dest) {
|
||||
MacroAssemblerX86Shared::compareFloat32x4(lhs, Operand(rhs), cond, dest);
|
||||
}
|
||||
|
||||
// Load
|
||||
|
||||
void MacroAssembler::loadUnalignedSimd128(const Address& src,
|
||||
FloatRegister dest) {
|
||||
vmovups(Operand(src), dest);
|
||||
}
|
||||
|
||||
void MacroAssembler::loadUnalignedSimd128(const BaseIndex& src,
|
||||
FloatRegister dest) {
|
||||
vmovdqu(Operand(src), dest);
|
||||
}
|
||||
|
||||
// Load and splat
|
||||
|
||||
// Load and extend
|
||||
|
||||
// Store
|
||||
|
||||
void MacroAssembler::storeUnalignedSimd128(FloatRegister src,
|
||||
const Address& dest) {
|
||||
vmovups(src, Operand(dest));
|
||||
}
|
||||
|
||||
void MacroAssembler::storeUnalignedSimd128(FloatRegister src,
|
||||
const BaseIndex& dest) {
|
||||
vmovups(src, Operand(dest));
|
||||
}
|
||||
|
||||
// Floating point negation
|
||||
|
||||
void MacroAssembler::negFloat32x4(FloatRegister src, FloatRegister dest) {
|
||||
MacroAssemblerX86Shared::negFloat32x4(Operand(src), dest);
|
||||
}
|
||||
|
||||
// Floating point absolute value
|
||||
|
||||
void MacroAssembler::absFloat32x4(FloatRegister src, FloatRegister dest) {
|
||||
MacroAssemblerX86Shared::absFloat32x4(Operand(src), dest);
|
||||
}
|
||||
|
||||
// NaN-propagating minimum
|
||||
|
||||
void MacroAssembler::minFloat32x4(FloatRegister lhs, FloatRegister rhs,
|
||||
FloatRegister dest) {
|
||||
MacroAssemblerX86Shared::minFloat32x4(lhs, Operand(rhs), dest);
|
||||
}
|
||||
|
||||
// NaN-propagating maxium
|
||||
|
||||
void MacroAssembler::maxFloat32x4(FloatRegister lhs, FloatRegister rhs,
|
||||
FloatRegister temp, FloatRegister dest) {
|
||||
MacroAssemblerX86Shared::maxFloat32x4(lhs, Operand(rhs), temp, dest);
|
||||
}
|
||||
|
||||
// Floating add
|
||||
|
||||
void MacroAssembler::addFloat32x4(FloatRegister lhs, FloatRegister rhs,
|
||||
FloatRegister dest) {
|
||||
vaddps(Operand(rhs), lhs, dest);
|
||||
}
|
||||
|
||||
// Floating subtract
|
||||
|
||||
void MacroAssembler::subFloat32x4(FloatRegister lhs, FloatRegister rhs,
|
||||
FloatRegister dest) {
|
||||
vsubps(Operand(rhs), lhs, dest);
|
||||
}
|
||||
|
||||
// Floating division
|
||||
|
||||
void MacroAssembler::divFloat32x4(FloatRegister lhs, FloatRegister rhs,
|
||||
FloatRegister dest) {
|
||||
vdivps(Operand(rhs), lhs, dest);
|
||||
}
|
||||
|
||||
// Floating Multiply
|
||||
|
||||
void MacroAssembler::mulFloat32x4(FloatRegister lhs, FloatRegister rhs,
|
||||
FloatRegister dest) {
|
||||
vmulps(Operand(rhs), lhs, dest);
|
||||
}
|
||||
|
||||
// Floating square root
|
||||
|
||||
void MacroAssembler::sqrtFloat32x4(FloatRegister src, FloatRegister dest) {
|
||||
vsqrtps(Operand(src), dest);
|
||||
}
|
||||
|
||||
// Integer to floating point with rounding
|
||||
|
||||
// Floating point to integer with saturation
|
||||
|
||||
// Integer to integer narrowing
|
||||
|
||||
// Integer to integer widening
|
||||
|
||||
// ========================================================================
|
||||
// Truncate floating point.
|
||||
|
||||
|
|
|
@ -379,7 +379,7 @@ void MacroAssembler::PushRegsInMask(LiveRegisterSet set) {
|
|||
} else if (reg.isSingle()) {
|
||||
storeFloat32(reg, spillAddress);
|
||||
} else if (reg.isSimd128()) {
|
||||
storeUnalignedSimd128Float(reg, spillAddress);
|
||||
storeUnalignedSimd128(reg, spillAddress);
|
||||
} else {
|
||||
MOZ_CRASH("Unknown register type.");
|
||||
}
|
||||
|
@ -417,7 +417,7 @@ void MacroAssembler::storeRegsInMask(LiveRegisterSet set, Address dest,
|
|||
} else if (reg.isSingle()) {
|
||||
storeFloat32(reg, dest);
|
||||
} else if (reg.isSimd128()) {
|
||||
storeUnalignedSimd128Float(reg, dest);
|
||||
storeUnalignedSimd128(reg, dest);
|
||||
} else {
|
||||
MOZ_CRASH("Unknown register type.");
|
||||
}
|
||||
|
@ -452,7 +452,7 @@ void MacroAssembler::PopRegsInMaskIgnore(LiveRegisterSet set,
|
|||
} else if (reg.isSingle()) {
|
||||
loadFloat32(spillAddress, reg);
|
||||
} else if (reg.isSimd128()) {
|
||||
loadUnalignedSimd128Float(spillAddress, reg);
|
||||
loadUnalignedSimd128(spillAddress, reg);
|
||||
} else {
|
||||
MOZ_CRASH("Unknown register type.");
|
||||
}
|
||||
|
|
|
@ -432,88 +432,10 @@ class MacroAssemblerX86Shared : public Assembler {
|
|||
void compareFloat32x4(FloatRegister lhs, Operand rhs,
|
||||
Assembler::Condition cond, FloatRegister output);
|
||||
|
||||
void addInt8x16(FloatRegister lhs, Operand rhs, FloatRegister output) {
|
||||
vpaddb(rhs, lhs, output);
|
||||
}
|
||||
void addInt16x8(FloatRegister lhs, Operand rhs, FloatRegister output) {
|
||||
vpaddw(rhs, lhs, output);
|
||||
}
|
||||
void addInt32x4(FloatRegister lhs, Operand rhs, FloatRegister output) {
|
||||
vpaddd(rhs, lhs, output);
|
||||
}
|
||||
void addFloat32x4(FloatRegister lhs, Operand rhs, FloatRegister output) {
|
||||
vaddps(rhs, lhs, output);
|
||||
}
|
||||
|
||||
void addSatInt8x16(FloatRegister lhs, Operand rhs, SimdSign sign,
|
||||
FloatRegister output) {
|
||||
if (sign == SimdSign::Signed) {
|
||||
vpaddsb(rhs, lhs, output);
|
||||
} else {
|
||||
vpaddusb(rhs, lhs, output);
|
||||
}
|
||||
}
|
||||
void addSatInt16x8(FloatRegister lhs, Operand rhs, SimdSign sign,
|
||||
FloatRegister output) {
|
||||
if (sign == SimdSign::Signed) {
|
||||
vpaddsw(rhs, lhs, output);
|
||||
} else {
|
||||
vpaddusw(rhs, lhs, output);
|
||||
}
|
||||
}
|
||||
|
||||
void subInt8x16(FloatRegister lhs, Operand rhs, FloatRegister output) {
|
||||
vpsubb(rhs, lhs, output);
|
||||
}
|
||||
void subInt16x8(FloatRegister lhs, Operand rhs, FloatRegister output) {
|
||||
vpsubw(rhs, lhs, output);
|
||||
}
|
||||
void subInt32x4(FloatRegister lhs, Operand rhs, FloatRegister output) {
|
||||
vpsubd(rhs, lhs, output);
|
||||
}
|
||||
void subFloat32x4(FloatRegister lhs, Operand rhs, FloatRegister output) {
|
||||
vsubps(rhs, lhs, output);
|
||||
}
|
||||
|
||||
void subSatInt8x16(FloatRegister lhs, Operand rhs, SimdSign sign,
|
||||
FloatRegister output) {
|
||||
if (sign == SimdSign::Signed) {
|
||||
vpsubsb(rhs, lhs, output);
|
||||
} else {
|
||||
vpsubusb(rhs, lhs, output);
|
||||
}
|
||||
}
|
||||
void subSatInt16x8(FloatRegister lhs, Operand rhs, SimdSign sign,
|
||||
FloatRegister output) {
|
||||
if (sign == SimdSign::Signed) {
|
||||
vpsubsw(rhs, lhs, output);
|
||||
} else {
|
||||
vpsubusw(rhs, lhs, output);
|
||||
}
|
||||
}
|
||||
|
||||
void mulInt16x8(FloatRegister lhs, Operand rhs, FloatRegister output) {
|
||||
vpmullw(rhs, lhs, output);
|
||||
}
|
||||
void mulInt32x4(FloatRegister lhs, Operand rhs,
|
||||
const mozilla::Maybe<FloatRegister>& temp,
|
||||
FloatRegister output);
|
||||
void mulFloat32x4(FloatRegister lhs, Operand rhs, FloatRegister output) {
|
||||
vmulps(rhs, lhs, output);
|
||||
}
|
||||
|
||||
void negInt8x16(Operand in, FloatRegister out) {
|
||||
zeroSimd128Int(out);
|
||||
packedSubInt8(in, out);
|
||||
}
|
||||
void negInt16x8(Operand in, FloatRegister out) {
|
||||
zeroSimd128Int(out);
|
||||
packedSubInt16(in, out);
|
||||
}
|
||||
void negInt32x4(Operand in, FloatRegister out) {
|
||||
zeroSimd128Int(out);
|
||||
packedSubInt32(in, out);
|
||||
}
|
||||
void negFloat32x4(Operand in, FloatRegister out);
|
||||
|
||||
void notInt8x16(Operand in, FloatRegister out);
|
||||
|
@ -521,9 +443,6 @@ class MacroAssemblerX86Shared : public Assembler {
|
|||
void notInt32x4(Operand in, FloatRegister out);
|
||||
void notFloat32x4(Operand in, FloatRegister out);
|
||||
|
||||
void divFloat32x4(FloatRegister lhs, Operand rhs, FloatRegister output) {
|
||||
vdivps(rhs, lhs, output);
|
||||
}
|
||||
void minFloat32x4(FloatRegister lhs, Operand rhs, FloatRegister output);
|
||||
void maxFloat32x4(FloatRegister lhs, Operand rhs, FloatRegister temp,
|
||||
FloatRegister output);
|
||||
|
@ -630,7 +549,6 @@ class MacroAssemblerX86Shared : public Assembler {
|
|||
void loadUnalignedSimd128Int(const Operand& src, FloatRegister dest) {
|
||||
vmovdqu(src, dest);
|
||||
}
|
||||
|
||||
void storeUnalignedSimd128Int(FloatRegister src, const Address& dest) {
|
||||
vmovdqu(src, Operand(dest));
|
||||
}
|
||||
|
@ -675,9 +593,6 @@ class MacroAssemblerX86Shared : public Assembler {
|
|||
// TODO See comment above. See also bug 1068028.
|
||||
vrsqrtps(src, dest);
|
||||
}
|
||||
void packedSqrtFloat32x4(const Operand& src, FloatRegister dest) {
|
||||
vsqrtps(src, dest);
|
||||
}
|
||||
|
||||
public:
|
||||
void packedLeftShiftByScalarInt16x8(FloatRegister in, Register count,
|
||||
|
@ -752,22 +667,10 @@ class MacroAssemblerX86Shared : public Assembler {
|
|||
loadAlignedSimd128Float(src, dest);
|
||||
return dest;
|
||||
}
|
||||
void loadUnalignedSimd128Float(const Address& src, FloatRegister dest) {
|
||||
vmovups(Operand(src), dest);
|
||||
}
|
||||
void loadUnalignedSimd128Float(const BaseIndex& src, FloatRegister dest) {
|
||||
vmovdqu(Operand(src), dest);
|
||||
}
|
||||
void loadUnalignedSimd128Float(const Operand& src, FloatRegister dest) {
|
||||
void loadUnalignedSimd128(const Operand& src, FloatRegister dest) {
|
||||
vmovups(src, dest);
|
||||
}
|
||||
void storeUnalignedSimd128Float(FloatRegister src, const Address& dest) {
|
||||
vmovups(src, Operand(dest));
|
||||
}
|
||||
void storeUnalignedSimd128Float(FloatRegister src, const BaseIndex& dest) {
|
||||
vmovups(src, Operand(dest));
|
||||
}
|
||||
void storeUnalignedSimd128Float(FloatRegister src, const Operand& dest) {
|
||||
void storeUnalignedSimd128(FloatRegister src, const Operand& dest) {
|
||||
vmovups(src, dest);
|
||||
}
|
||||
void packedAddFloat32(const Operand& src, FloatRegister dest) {
|
||||
|
|
|
@ -355,7 +355,7 @@ static void SetupABIArguments(MacroAssembler& masm, const FuncExport& fe,
|
|||
break;
|
||||
case MIRType::Int8x16:
|
||||
#ifdef ENABLE_WASM_SIMD
|
||||
masm.loadUnalignedSimd128Float(src, iter->fpu());
|
||||
masm.loadUnalignedSimd128(src, iter->fpu());
|
||||
break;
|
||||
#else
|
||||
MOZ_CRASH("V128 not supported in SetupABIArguments");
|
||||
|
@ -436,7 +436,7 @@ static void StoreRegisterResult(MacroAssembler& masm, const FuncExport& fe,
|
|||
break;
|
||||
case ValType::V128:
|
||||
#ifdef ENABLE_WASM_SIMD
|
||||
masm.storeUnalignedSimd128Float(result.fpr(), Address(loc, 0));
|
||||
masm.storeUnalignedSimd128(result.fpr(), Address(loc, 0));
|
||||
break;
|
||||
#else
|
||||
MOZ_CRASH("V128 not supported in StoreABIReturn");
|
||||
|
|
Загрузка…
Ссылка в новой задаче