Bug 1113338: Add SIMD partial loads/stores in asm.js; r=sunfish,luke

--HG--
extra : rebase_source : 64c55de279c05492afa5f8b1599aeae44a054068
This commit is contained in:
Benjamin Bouvier 2015-02-05 16:35:32 +01:00
Родитель 26252ed8ba
Коммит 841e931d9a
18 изменённых файлов: 1086 добавлений и 138 удалений

Просмотреть файл

@ -774,6 +774,16 @@ AsmJSModule::staticallyLink(ExclusiveContext *cx)
MOZ_ASSERT(isStaticallyLinked());
}
#if defined(JS_CODEGEN_X86) || defined(JS_CODEGEN_X64)
static size_t
ByteSizeOfHeapAccess(const jit::AsmJSHeapAccess access)
{
Scalar::Type type = access.type();
if (Scalar::isSimdType(type))
return Scalar::scalarByteSize(type) * access.numSimdElems();
return TypedArrayElemSize(type);
}
#endif
void
AsmJSModule::initHeap(Handle<ArrayBufferObjectMaybeShared *> heap, JSContext *cx)
{
@ -794,7 +804,7 @@ AsmJSModule::initHeap(Handle<ArrayBufferObjectMaybeShared *> heap, JSContext *cx
// ptr + data-type-byte-size > heapLength
// i.e. ptr >= heapLength + 1 - data-type-byte-size
// (Note that we need >= as this is what codegen uses.)
size_t scalarByteSize = TypedArrayElemSize(access.type());
size_t scalarByteSize = ByteSizeOfHeapAccess(access);
X86Encoding::SetPointer(access.patchLengthAt(code_),
(void*)(heap->byteLength() + 1 - scalarByteSize));
}
@ -816,7 +826,7 @@ AsmJSModule::initHeap(Handle<ArrayBufferObjectMaybeShared *> heap, JSContext *cx
const jit::AsmJSHeapAccess &access = heapAccesses_[i];
if (access.hasLengthCheck()) {
// See comment above for x86 codegen.
size_t scalarByteSize = TypedArrayElemSize(access.type());
size_t scalarByteSize = ByteSizeOfHeapAccess(access);
X86Encoding::SetInt32(access.patchLengthAt(code_), heapLength + 1 - scalarByteSize);
}
}

Просмотреть файл

@ -484,10 +484,6 @@ HandleFault(PEXCEPTION_POINTERS exception)
if (!heapAccess)
return false;
// Also not necessary, but, since we can, do.
if (heapAccess->isLoad() != !record->ExceptionInformation[0])
return false;
// We now know that this is an out-of-bounds access made by an asm.js
// load/store that we should handle.
@ -497,6 +493,10 @@ HandleFault(PEXCEPTION_POINTERS exception)
return true;
}
// Also not necessary, but, since we can, do.
if (heapAccess->isLoad() != !record->ExceptionInformation[0])
return false;
// If this is a load, assign the JS-defined result value to the destination
// register (ToInt32(undefined) or ToNumber(undefined), determined by the
// type of the destination register) and set the PC to the next op. Upon

Просмотреть файл

@ -5823,8 +5823,20 @@ CheckSimdOperationCall(FunctionCompiler &f, ParseNode *call, const ModuleCompile
case AsmJSSimdOperation_load:
return CheckSimdLoad(f, call, opType, 4, def, type);
case AsmJSSimdOperation_loadX:
return CheckSimdLoad(f, call, opType, 1, def, type);
case AsmJSSimdOperation_loadXY:
return CheckSimdLoad(f, call, opType, 2, def, type);
case AsmJSSimdOperation_loadXYZ:
return CheckSimdLoad(f, call, opType, 3, def, type);
case AsmJSSimdOperation_store:
return CheckSimdStore(f, call, opType, 4, def, type);
case AsmJSSimdOperation_storeX:
return CheckSimdStore(f, call, opType, 1, def, type);
case AsmJSSimdOperation_storeXY:
return CheckSimdStore(f, call, opType, 2, def, type);
case AsmJSSimdOperation_storeXYZ:
return CheckSimdStore(f, call, opType, 3, def, type);
case AsmJSSimdOperation_bitselect:
return CheckSimdSelect(f, call, opType, /*isElementWise */ false, def, type);

Просмотреть файл

@ -227,7 +227,13 @@
_(not) \
_(neg) \
_(load) \
_(store)
_(loadX) \
_(loadXY) \
_(loadXYZ) \
_(store) \
_(storeX) \
_(storeXY) \
_(storeXYZ)
#define FORALL_SIMD_OP(_) \
FOREACH_INT32X4_SIMD_OP(_) \
FOREACH_FLOAT32X4_SIMD_OP(_) \

Просмотреть файл

@ -17,10 +17,15 @@ function assertEqX4(real, expected, assertFunc) {
if (typeof assertFunc === 'undefined')
assertFunc = assertEq;
assertFunc(real.x, expected[0]);
assertFunc(real.y, expected[1]);
assertFunc(real.z, expected[2]);
assertFunc(real.w, expected[3]);
try {
assertFunc(real.x, expected[0]);
assertFunc(real.y, expected[1]);
assertFunc(real.z, expected[2]);
assertFunc(real.w, expected[3]);
} catch (e) {
print("Stack: " + e.stack);
throw e;
}
}
// Load / Store
@ -46,12 +51,14 @@ assertAsmTypeFail('glob', 'ffi', 'heap', IMPORTS + "function f(){var i=0;return
// Literal index constants
var buf = new ArrayBuffer(BUF_MIN);
var SIZE_TA = BUF_MIN >> 2
var asI32 = new Int32Array(buf);
asI32[(BUF_MIN >> 2) - 4] = 4;
asI32[(BUF_MIN >> 2) - 3] = 3;
asI32[(BUF_MIN >> 2) - 2] = 2;
asI32[(BUF_MIN >> 2) - 1] = 1;
asI32[SIZE_TA - 4] = 4;
asI32[SIZE_TA - 3] = 3;
asI32[SIZE_TA - 2] = 2;
asI32[SIZE_TA - 1] = 1;
assertAsmTypeFail('glob', 'ffi', 'heap', IMPORTS + "function f(){load(H, -1);} return f");
assertAsmTypeFail('glob', 'ffi', 'heap', IMPORTS + "function f(){load(H, " + (INT32_MAX + 1) + ");} return f");
assertAsmTypeFail('glob', 'ffi', 'heap', IMPORTS + "function f(){load(H, " + (INT32_MAX + 1 - 15) + ");} return f");
asmCompile('glob', 'ffi', 'heap', IMPORTS + "function f(){load(H, " + (INT32_MAX + 1 - 16) + ");} return f");
@ -259,3 +266,242 @@ assertThrowsInstanceOf(() => i32s(SIZE - 0, vec), RangeError);
for (var i = 0; i < SIZE; i++)
assertEq(I32[i], i + 1);
// Partial loads and stores
(function() {
// Variable indexes
function MakeCodeFor(typeName) {
return `
"use asm";
var type = glob.SIMD.${typeName};
var lx = type.loadX;
var lxy = type.loadXY;
var lxyz = type.loadXYZ;
var sx = type.storeX;
var sxy = type.storeXY;
var sxyz = type.storeXYZ;
var u8 = new glob.Uint8Array(heap);
function loadX(i) { i=i|0; return lx(u8, i); }
function loadXY(i) { i=i|0; return lxy(u8, i); }
function loadXYZ(i) { i=i|0; return lxyz(u8, i); }
function loadCstX() { return lx(u8, 41 << 2); }
function loadCstXY() { return lxy(u8, 41 << 2); }
function loadCstXYZ() { return lxyz(u8, 41 << 2); }
function storeX(i, x) { i=i|0; x=type(x); return sx(u8, i, x); }
function storeXY(i, x) { i=i|0; x=type(x); return sxy(u8, i, x); }
function storeXYZ(i, x) { i=i|0; x=type(x); return sxyz(u8, i, x); }
function storeCstX(x) { x=type(x); return sx(u8, 41 << 2, x); }
function storeCstXY(x) { x=type(x); return sxy(u8, 41 << 2, x); }
function storeCstXYZ(x) { x=type(x); return sxyz(u8, 41 << 2, x); }
return {
loadX: loadX,
loadXY: loadXY,
loadXYZ: loadXYZ,
loadCstX: loadCstX,
loadCstXY: loadCstXY,
loadCstXYZ: loadCstXYZ,
storeX: storeX,
storeXY: storeXY,
storeXYZ: storeXYZ,
storeCstX: storeCstX,
storeCstXY: storeCstXY,
storeCstXYZ: storeCstXYZ,
}
`;
}
var SIZE = 0x10000;
function TestPartialLoads(m, typedArray, x, y, z, w) {
// Fill array with predictable values
for (var i = 0; i < SIZE; i += 4) {
typedArray[i] = x(i);
typedArray[i + 1] = y(i);
typedArray[i + 2] = z(i);
typedArray[i + 3] = w(i);
}
// Test correct loads
var i = 0, j = 0; // i in elems, j in bytes
assertEqX4(m.loadX(j), [x(i), 0, 0, 0]);
assertEqX4(m.loadXY(j), [x(i), y(i), 0, 0]);
assertEqX4(m.loadXYZ(j), [x(i), y(i), z(i), 0]);
j += 4;
assertEqX4(m.loadX(j), [y(i), 0, 0, 0]);
assertEqX4(m.loadXY(j), [y(i), z(i), 0, 0]);
assertEqX4(m.loadXYZ(j), [y(i), z(i), w(i), 0]);
j += 4;
assertEqX4(m.loadX(j), [z(i), 0, 0, 0]);
assertEqX4(m.loadXY(j), [z(i), w(i), 0, 0]);
assertEqX4(m.loadXYZ(j), [z(i), w(i), x(i+4), 0]);
j += 4;
assertEqX4(m.loadX(j), [w(i), 0, 0, 0]);
assertEqX4(m.loadXY(j), [w(i), x(i+4), 0, 0]);
assertEqX4(m.loadXYZ(j), [w(i), x(i+4), y(i+4), 0]);
j += 4;
i += 4;
assertEqX4(m.loadX(j), [x(i), 0, 0, 0]);
assertEqX4(m.loadXY(j), [x(i), y(i), 0, 0]);
assertEqX4(m.loadXYZ(j), [x(i), y(i), z(i), 0]);
// Test loads with constant indexes (41)
assertEqX4(m.loadCstX(), [y(40), 0, 0, 0]);
assertEqX4(m.loadCstXY(), [y(40), z(40), 0, 0]);
assertEqX4(m.loadCstXYZ(), [y(40), z(40), w(40), 0]);
// Test limit and OOB accesses
assertEqX4(m.loadX((SIZE - 1) << 2), [w(SIZE - 4), 0, 0, 0]);
assertThrowsInstanceOf(() => m.loadX(((SIZE - 1) << 2) + 1), RangeError);
assertEqX4(m.loadXY((SIZE - 2) << 2), [z(SIZE - 4), w(SIZE - 4), 0, 0]);
assertThrowsInstanceOf(() => m.loadXY(((SIZE - 2) << 2) + 1), RangeError);
assertEqX4(m.loadXYZ((SIZE - 3) << 2), [y(SIZE - 4), z(SIZE - 4), w(SIZE - 4), 0]);
assertThrowsInstanceOf(() => m.loadXYZ(((SIZE - 3) << 2) + 1), RangeError);
}
// Partial stores
function TestPartialStores(m, typedArray, typeName, x, y, z, w) {
var val = SIMD[typeName](x, y, z, w);
function Reset() {
for (var i = 0; i < SIZE; i++)
typedArray[i] = i + 1;
}
function CheckNotModified(low, high) {
for (var i = low; i < high; i++)
assertEq(typedArray[i], i + 1);
}
function TestStoreX(i) {
m.storeX(i, val);
CheckNotModified(0, i >> 2);
assertEq(typedArray[i >> 2], x);
CheckNotModified((i >> 2) + 1, SIZE);
typedArray[i >> 2] = (i >> 2) + 1;
}
function TestStoreXY(i) {
m.storeXY(i, val);
CheckNotModified(0, i >> 2);
assertEq(typedArray[i >> 2], x);
assertEq(typedArray[(i >> 2) + 1], y);
CheckNotModified((i >> 2) + 2, SIZE);
typedArray[i >> 2] = (i >> 2) + 1;
typedArray[(i >> 2) + 1] = (i >> 2) + 2;
}
function TestStoreXYZ(i) {
m.storeXYZ(i, val);
CheckNotModified(0, i >> 2);
assertEq(typedArray[i >> 2], x);
assertEq(typedArray[(i >> 2) + 1], y);
assertEq(typedArray[(i >> 2) + 2], z);
CheckNotModified((i >> 2) + 3, SIZE);
typedArray[i >> 2] = (i >> 2) + 1;
typedArray[(i >> 2) + 1] = (i >> 2) + 2;
typedArray[(i >> 2) + 2] = (i >> 2) + 3;
}
function TestOOBStore(f) {
assertThrowsInstanceOf(f, RangeError);
CheckNotModified(0, SIZE);
}
Reset();
TestStoreX(0);
TestStoreX(1 << 2);
TestStoreX(2 << 2);
TestStoreX(3 << 2);
TestStoreX(1337 << 2);
var i = (SIZE - 1) << 2;
TestStoreX(i);
TestOOBStore(() => m.storeX(i + 1, val));
TestOOBStore(() => m.storeX(-1, val));
TestStoreXY(0);
TestStoreXY(1 << 2);
TestStoreXY(2 << 2);
TestStoreXY(3 << 2);
TestStoreXY(1337 << 2);
var i = (SIZE - 2) << 2;
TestStoreXY(i);
TestOOBStore(() => m.storeXY(i + 1, val));
TestOOBStore(() => m.storeXY(-1, val));
TestStoreXYZ(0);
TestStoreXYZ(1 << 2);
TestStoreXYZ(2 << 2);
TestStoreXYZ(3 << 2);
TestStoreXYZ(1337 << 2);
var i = (SIZE - 3) << 2;
TestStoreXYZ(i);
TestOOBStore(() => m.storeXYZ(i + 1, val));
TestOOBStore(() => m.storeXYZ(-1, val));
TestOOBStore(() => m.storeXYZ(-9, val));
// Constant indexes (41)
m.storeCstX(val);
CheckNotModified(0, 41);
assertEq(typedArray[41], x);
CheckNotModified(42, SIZE);
typedArray[41] = 42;
m.storeCstXY(val);
CheckNotModified(0, 41);
assertEq(typedArray[41], x);
assertEq(typedArray[42], y);
CheckNotModified(43, SIZE);
typedArray[41] = 42;
typedArray[42] = 43;
m.storeCstXYZ(val);
CheckNotModified(0, 41);
assertEq(typedArray[41], x);
assertEq(typedArray[42], y);
assertEq(typedArray[43], z);
CheckNotModified(44, SIZE);
typedArray[41] = 42;
typedArray[42] = 43;
typedArray[43] = 44;
}
var f32 = new Float32Array(SIZE);
var mfloat32x4 = asmLink(asmCompile('glob', 'ffi', 'heap', MakeCodeFor('float32x4')), this, null, f32.buffer);
TestPartialLoads(mfloat32x4, f32,
(i) => i + 1,
(i) => Math.fround(13.37),
(i) => Math.fround(1/i),
(i) => Math.fround(Math.sqrt(0x2000 - i)));
TestPartialStores(mfloat32x4, f32, 'float32x4', 42, -0, NaN, 0.1337);
var i32 = new Int32Array(f32.buffer);
var mint32x4 = asmLink(asmCompile('glob', 'ffi', 'heap', MakeCodeFor('int32x4')), this, null, i32.buffer);
TestPartialLoads(mint32x4, i32,
(i) => i + 1 | 0,
(i) => -i | 0,
(i) => i * 2 | 0,
(i) => 42);
TestPartialStores(mint32x4, i32, 'int32x4', 42, -3, 13, 37);
})();

Просмотреть файл

@ -12165,7 +12165,7 @@ class MAsmJSHeapAccess
bool needsBoundsCheck() const { return needsBoundsCheck_; }
void removeBoundsCheck() { needsBoundsCheck_ = false; }
Label *outOfBoundsLabel() const { return outOfBoundsLabel_; }
unsigned numSimdElems() const { return numSimdElems_; }
unsigned numSimdElems() const { MOZ_ASSERT(Scalar::isSimdType(accessType_)); return numSimdElems_; }
};
class MAsmJSLoadHeap

Просмотреть файл

@ -769,6 +769,7 @@ class AsmJSHeapAccess
#if defined(JS_CODEGEN_X86) || defined(JS_CODEGEN_X64)
uint8_t cmpDelta_; // the number of bytes from the cmp to the load/store instruction
uint8_t opLength_; // the length of the load/store instruction
uint8_t numSimdElems_; // the number of SIMD lanes to load/store at once
Scalar::Type type_;
AnyRegister::Code loadedReg_ : 8;
#endif
@ -787,16 +788,34 @@ class AsmJSHeapAccess
: offset_(offset),
cmpDelta_(cmp == NoLengthCheck ? 0 : offset - cmp),
opLength_(after - offset),
numSimdElems_(UINT8_MAX),
type_(type),
loadedReg_(loadedReg.code())
{}
{
MOZ_ASSERT(!Scalar::isSimdType(type));
}
AsmJSHeapAccess(uint32_t offset, uint8_t after, Scalar::Type type, uint32_t cmp = NoLengthCheck)
: offset_(offset),
cmpDelta_(cmp == NoLengthCheck ? 0 : offset - cmp),
opLength_(after - offset),
numSimdElems_(UINT8_MAX),
type_(type),
loadedReg_(UINT8_MAX)
{}
{
MOZ_ASSERT(!Scalar::isSimdType(type));
}
// SIMD loads / stores
AsmJSHeapAccess(uint32_t offset, uint32_t after, unsigned numSimdElems, Scalar::Type type,
uint32_t cmp = NoLengthCheck)
: offset_(offset),
cmpDelta_(cmp == NoLengthCheck ? 0 : offset - cmp),
opLength_(after - offset),
numSimdElems_(numSimdElems),
type_(type),
loadedReg_(UINT8_MAX)
{
MOZ_ASSERT(Scalar::isSimdType(type));
}
#elif defined(JS_CODEGEN_ARM) || defined(JS_CODEGEN_MIPS)
explicit AsmJSHeapAccess(uint32_t offset)
: offset_(offset)
@ -808,11 +827,14 @@ class AsmJSHeapAccess
#if defined(JS_CODEGEN_X86)
void *patchOffsetAt(uint8_t *code) const { return code + (offset_ + opLength_); }
#endif
#if defined(JS_CODEGEN_X64)
unsigned opLength() const { MOZ_ASSERT(!Scalar::isSimdType(type_)); return opLength_; }
bool isLoad() const { MOZ_ASSERT(!Scalar::isSimdType(type_)); return loadedReg_ != UINT8_MAX; }
#endif
#if defined(JS_CODEGEN_X86) || defined(JS_CODEGEN_X64)
bool hasLengthCheck() const { return cmpDelta_ > 0; }
void *patchLengthAt(uint8_t *code) const { return code + (offset_ - cmpDelta_); }
unsigned opLength() const { return opLength_; }
bool isLoad() const { return loadedReg_ != UINT8_MAX; }
unsigned numSimdElems() const { MOZ_ASSERT(Scalar::isSimdType(type_)); return numSimdElems_; }
Scalar::Type type() const { return type_; }
AnyRegister loadedReg() const { return AnyRegister::FromCode(loadedReg_); }
#endif

Просмотреть файл

@ -1767,6 +1767,32 @@ class AssemblerX86Shared : public AssemblerShared
MOZ_ASSERT(HasSSE2());
masm.vmovd_rr(src.code(), dest.code());
}
void vmovd(const Operand &src, FloatRegister dest) {
MOZ_ASSERT(HasSSE2());
switch (src.kind()) {
case Operand::MEM_REG_DISP:
masm.vmovd_mr(src.disp(), src.base(), dest.code());
break;
case Operand::MEM_SCALE:
masm.vmovd_mr(src.disp(), src.base(), src.index(), src.scale(), dest.code());
break;
default:
MOZ_CRASH("unexpected operand kind");
}
}
void vmovd(FloatRegister src, const Operand &dest) {
MOZ_ASSERT(HasSSE2());
switch (dest.kind()) {
case Operand::MEM_REG_DISP:
masm.vmovd_rm(src.code(), dest.disp(), dest.base());
break;
case Operand::MEM_SCALE:
masm.vmovd_rm(src.code(), dest.disp(), dest.base(), dest.index(), dest.scale());
break;
default:
MOZ_CRASH("unexpected operand kind");
}
}
void vpaddd(const Operand &src1, FloatRegister src0, FloatRegister dest) {
MOZ_ASSERT(HasSSE2());
switch (src1.kind()) {

Просмотреть файл

@ -127,7 +127,7 @@ public:
// Arithmetic operations:
#ifdef JS_CODEGEN_X86
void adcl_im(int32_t imm, const void* addr)
void adcl_im(int32_t imm, const void *addr)
{
spew("adcl %d, %p", imm, addr);
if (CAN_SIGN_EXTEND_8_32(imm)) {
@ -208,7 +208,7 @@ public:
m_formatter.oneByteOp64(OP_ADD_GvEv, offset, base, dst);
}
void addq_mr(const void* addr, RegisterID dst)
void addq_mr(const void *addr, RegisterID dst)
{
spew("addq %p, %s", addr, GPReg64Name(dst));
m_formatter.oneByteOp64(OP_ADD_GvEv, addr, dst);
@ -241,7 +241,7 @@ public:
}
}
void addq_im(int32_t imm, const void* addr)
void addq_im(int32_t imm, const void *addr)
{
spew("addq $%d, %p", imm, addr);
if (CAN_SIGN_EXTEND_8_32(imm)) {
@ -253,7 +253,7 @@ public:
}
}
#endif
void addl_im(int32_t imm, const void* addr)
void addl_im(int32_t imm, const void *addr)
{
spew("addl $%d, %p", imm, addr);
if (CAN_SIGN_EXTEND_8_32(imm)) {
@ -301,7 +301,7 @@ public:
{
twoByteOpSimd("vpaddd", VEX_PD, OP2_PADDD_VdqWdq, offset, base, src0, dst);
}
void vpaddd_mr(const void* address, XMMRegisterID src0, XMMRegisterID dst)
void vpaddd_mr(const void *address, XMMRegisterID src0, XMMRegisterID dst)
{
twoByteOpSimd("vpaddd", VEX_PD, OP2_PADDD_VdqWdq, address, src0, dst);
}
@ -314,7 +314,7 @@ public:
{
twoByteOpSimd("vpsubd", VEX_PD, OP2_PSUBD_VdqWdq, offset, base, src0, dst);
}
void vpsubd_mr(const void* address, XMMRegisterID src0, XMMRegisterID dst)
void vpsubd_mr(const void *address, XMMRegisterID src0, XMMRegisterID dst)
{
twoByteOpSimd("vpsubd", VEX_PD, OP2_PSUBD_VdqWdq, address, src0, dst);
}
@ -336,7 +336,7 @@ public:
{
threeByteOpSimd("vpmulld", VEX_PD, OP3_PMULLD_VdqWdq, ESCAPE_38, offset, base, src0, dst);
}
void vpmulld_mr(const void* address, XMMRegisterID src0, XMMRegisterID dst)
void vpmulld_mr(const void *address, XMMRegisterID src0, XMMRegisterID dst)
{
threeByteOpSimd("vpmulld", VEX_PD, OP3_PMULLD_VdqWdq, ESCAPE_38, address, src0, dst);
}
@ -349,7 +349,7 @@ public:
{
twoByteOpSimd("vaddps", VEX_PS, OP2_ADDPS_VpsWps, offset, base, src0, dst);
}
void vaddps_mr(const void* address, XMMRegisterID src0, XMMRegisterID dst)
void vaddps_mr(const void *address, XMMRegisterID src0, XMMRegisterID dst)
{
twoByteOpSimd("vaddps", VEX_PS, OP2_ADDPS_VpsWps, address, src0, dst);
}
@ -362,7 +362,7 @@ public:
{
twoByteOpSimd("vsubps", VEX_PS, OP2_SUBPS_VpsWps, offset, base, src0, dst);
}
void vsubps_mr(const void* address, XMMRegisterID src0, XMMRegisterID dst)
void vsubps_mr(const void *address, XMMRegisterID src0, XMMRegisterID dst)
{
twoByteOpSimd("vsubps", VEX_PS, OP2_SUBPS_VpsWps, address, src0, dst);
}
@ -375,7 +375,7 @@ public:
{
twoByteOpSimd("vmulps", VEX_PS, OP2_MULPS_VpsWps, offset, base, src0, dst);
}
void vmulps_mr(const void* address, XMMRegisterID src0, XMMRegisterID dst)
void vmulps_mr(const void *address, XMMRegisterID src0, XMMRegisterID dst)
{
twoByteOpSimd("vmulps", VEX_PS, OP2_MULPS_VpsWps, address, src0, dst);
}
@ -388,7 +388,7 @@ public:
{
twoByteOpSimd("vdivps", VEX_PS, OP2_DIVPS_VpsWps, offset, base, src0, dst);
}
void vdivps_mr(const void* address, XMMRegisterID src0, XMMRegisterID dst)
void vdivps_mr(const void *address, XMMRegisterID src0, XMMRegisterID dst)
{
twoByteOpSimd("vdivps", VEX_PS, OP2_DIVPS_VpsWps, address, src0, dst);
}
@ -401,7 +401,7 @@ public:
{
twoByteOpSimd("vmaxps", VEX_PS, OP2_MAXPS_VpsWps, offset, base, src0, dst);
}
void vmaxps_mr(const void* address, XMMRegisterID src0, XMMRegisterID dst)
void vmaxps_mr(const void *address, XMMRegisterID src0, XMMRegisterID dst)
{
twoByteOpSimd("vmaxps", VEX_PS, OP2_MAXPS_VpsWps, address, src0, dst);
}
@ -414,7 +414,7 @@ public:
{
twoByteOpSimd("vminps", VEX_PS, OP2_MINPS_VpsWps, offset, base, src0, dst);
}
void vminps_mr(const void* address, XMMRegisterID src0, XMMRegisterID dst)
void vminps_mr(const void *address, XMMRegisterID src0, XMMRegisterID dst)
{
twoByteOpSimd("vminps", VEX_PS, OP2_MINPS_VpsWps, address, src0, dst);
}
@ -495,7 +495,7 @@ public:
m_formatter.oneByteOp64(OP_OR_GvEv, offset, base, dst);
}
void orq_mr(const void* addr, RegisterID dst)
void orq_mr(const void *addr, RegisterID dst)
{
spew("orq %p, %s", addr, GPReg64Name(dst));
m_formatter.oneByteOp64(OP_OR_GvEv, addr, dst);
@ -516,7 +516,7 @@ public:
}
}
#else
void andl_im(int32_t imm, const void* addr)
void andl_im(int32_t imm, const void *addr)
{
spew("andl $0x%x, %p", imm, addr);
if (CAN_SIGN_EXTEND_8_32(imm)) {
@ -658,7 +658,7 @@ public:
m_formatter.oneByteOp64(OP_GROUP3_Ev, dst, GROUP3_OP_NOT);
}
#else
void orl_im(int32_t imm, const void* addr)
void orl_im(int32_t imm, const void *addr)
{
spew("orl $0x%x, %p", imm, addr);
if (CAN_SIGN_EXTEND_8_32(imm)) {
@ -735,7 +735,7 @@ public:
m_formatter.oneByteOp64(OP_SUB_GvEv, offset, base, dst);
}
void subq_mr(const void* addr, RegisterID dst)
void subq_mr(const void *addr, RegisterID dst)
{
spew("subq %p, %s", addr, GPReg64Name(dst));
m_formatter.oneByteOp64(OP_SUB_GvEv, addr, dst);
@ -756,7 +756,7 @@ public:
}
}
#else
void subl_im(int32_t imm, const void* addr)
void subl_im(int32_t imm, const void *addr)
{
spew("subl $%d, %p", imm, addr);
if (CAN_SIGN_EXTEND_8_32(imm)) {
@ -1249,7 +1249,7 @@ public:
m_formatter.immediate32(rhs);
}
}
void cmpq_im(int32_t rhs, const void* addr)
void cmpq_im(int32_t rhs, const void *addr)
{
spew("cmpq $0x%" PRIx64 ", %p", int64_t(rhs), addr);
if (CAN_SIGN_EXTEND_8_32(rhs)) {
@ -1260,25 +1260,25 @@ public:
m_formatter.immediate32(rhs);
}
}
void cmpq_rm(RegisterID rhs, const void* addr)
void cmpq_rm(RegisterID rhs, const void *addr)
{
spew("cmpq %s, %p", GPReg64Name(rhs), addr);
m_formatter.oneByteOp64(OP_CMP_EvGv, addr, rhs);
}
#endif
void cmpl_rm(RegisterID rhs, const void* addr)
void cmpl_rm(RegisterID rhs, const void *addr)
{
spew("cmpl %s, %p", GPReg32Name(rhs), addr);
m_formatter.oneByteOp(OP_CMP_EvGv, addr, rhs);
}
void cmpl_rm_disp32(RegisterID rhs, const void* addr)
void cmpl_rm_disp32(RegisterID rhs, const void *addr)
{
spew("cmpl %s, %p", GPReg32Name(rhs), addr);
m_formatter.oneByteOp_disp32(OP_CMP_EvGv, addr, rhs);
}
void cmpl_im(int32_t rhs, const void* addr)
void cmpl_im(int32_t rhs, const void *addr)
{
spew("cmpl $0x%x, %p", rhs, addr);
if (CAN_SIGN_EXTEND_8_32(rhs)) {
@ -1526,7 +1526,7 @@ public:
m_formatter.oneByteOp(OP_MOV_EvGv, offset, base, index, scale, src);
}
void movw_rm(RegisterID src, const void* addr)
void movw_rm(RegisterID src, const void *addr)
{
spew("movw %s, %p", GPReg16Name(src), addr);
m_formatter.prefix(PRE_OPERAND_SIZE);
@ -1551,7 +1551,7 @@ public:
m_formatter.oneByteOp(OP_MOV_EvGv, offset, base, index, scale, src);
}
void movl_mEAX(const void* addr)
void movl_mEAX(const void *addr)
{
#ifdef JS_CODEGEN_X64
if (IsAddressImmediate(addr)) {
@ -1585,7 +1585,7 @@ public:
m_formatter.oneByteOp_disp32(OP_MOV_GvEv, offset, base, dst);
}
void movl_mr(const void* base, RegisterID index, int scale, RegisterID dst)
void movl_mr(const void *base, RegisterID index, int scale, RegisterID dst)
{
int32_t disp = AddressImmediate(base);
@ -1599,7 +1599,7 @@ public:
m_formatter.oneByteOp(OP_MOV_GvEv, offset, base, index, scale, dst);
}
void movl_mr(const void* addr, RegisterID dst)
void movl_mr(const void *addr, RegisterID dst)
{
if (dst == rax
#ifdef JS_CODEGEN_X64
@ -1643,7 +1643,7 @@ public:
m_formatter.immediate8(imm);
}
void movb_im(int32_t imm, const void* addr)
void movb_im(int32_t imm, const void *addr)
{
spew("movb $%d, %p", imm, addr);
m_formatter.oneByteOp_disp32(OP_GROUP11_EvIb, addr, GROUP11_MOV);
@ -1658,7 +1658,7 @@ public:
m_formatter.immediate16(imm);
}
void movw_im(int32_t imm, const void* addr)
void movw_im(int32_t imm, const void *addr)
{
spew("movw $%d, %p", imm, addr);
m_formatter.prefix(PRE_OPERAND_SIZE);
@ -1688,7 +1688,7 @@ public:
m_formatter.immediate32(imm);
}
void movl_EAXm(const void* addr)
void movl_EAXm(const void *addr)
{
#ifdef JS_CODEGEN_X64
if (IsAddressImmediate(addr)) {
@ -1731,7 +1731,7 @@ public:
m_formatter.oneByteOp64(OP_MOV_EvGv, offset, base, index, scale, src);
}
void movq_rm(RegisterID src, const void* addr)
void movq_rm(RegisterID src, const void *addr)
{
if (src == rax && !IsAddressImmediate(addr)) {
movq_EAXm(addr);
@ -1742,7 +1742,28 @@ public:
m_formatter.oneByteOp64(OP_MOV_EvGv, addr, src);
}
void movq_mEAX(const void* addr)
void movq_rm(XMMRegisterID src, int32_t offset, RegisterID base)
{
spew("movq %s, " MEM_ob, XMMRegName(src), ADDR_ob(offset, base));
m_formatter.prefix(PRE_SSE_66);
m_formatter.twoByteOp64(OP2_MOVQ_EdVd, offset, base, src);
}
void movq_rm(XMMRegisterID src, int32_t offset, RegisterID base, RegisterID index, int scale)
{
spew("movq %s, " MEM_obs, XMMRegName(src), ADDR_obs(offset, base, index, scale));
m_formatter.prefix(PRE_SSE_66);
m_formatter.twoByteOp64(OP2_MOVQ_EdVd, offset, base, index, scale, src);
}
void movq_rm(XMMRegisterID src, const void *addr)
{
spew("movq %s, %p", XMMRegName(src), addr);
m_formatter.prefix(PRE_SSE_66);
m_formatter.twoByteOp64(OP2_MOVQ_EdVd, addr, src);
}
void movq_mEAX(const void *addr)
{
if (IsAddressImmediate(addr)) {
movq_mr(addr, rax);
@ -1754,7 +1775,7 @@ public:
m_formatter.immediate64(reinterpret_cast<int64_t>(addr));
}
void movq_EAXm(const void* addr)
void movq_EAXm(const void *addr)
{
if (IsAddressImmediate(addr)) {
movq_rm(rax, addr);
@ -1784,7 +1805,7 @@ public:
m_formatter.oneByteOp64(OP_MOV_GvEv, offset, base, index, scale, dst);
}
void movq_mr(const void* addr, RegisterID dst)
void movq_mr(const void *addr, RegisterID dst)
{
if (dst == rax && !IsAddressImmediate(addr)) {
movq_mEAX(addr);
@ -1795,6 +1816,27 @@ public:
m_formatter.oneByteOp64(OP_MOV_GvEv, addr, dst);
}
void movq_mr(int32_t offset, RegisterID base, XMMRegisterID dst)
{
spew("movq " MEM_ob ", %s", ADDR_ob(offset, base), XMMRegName(dst));
m_formatter.prefix(PRE_SSE_66);
m_formatter.twoByteOp64(OP2_MOVQ_VdEd, offset, base, (RegisterID) dst);
}
void movq_mr(int32_t offset, RegisterID base, RegisterID index, int32_t scale, XMMRegisterID dst)
{
spew("movq " MEM_obs ", %s", ADDR_obs(offset, base, index, scale), XMMRegName(dst));
m_formatter.prefix(PRE_SSE_66);
m_formatter.twoByteOp64(OP2_MOVQ_VdEd, offset, base, index, scale, (RegisterID) dst);
}
void movq_mr(const void *addr, XMMRegisterID dst)
{
spew("movq %p, %s", addr, XMMRegName(dst));
m_formatter.prefix(PRE_SSE_66);
m_formatter.twoByteOp64(OP2_MOVQ_VdEd, addr, (RegisterID) dst);
}
void leaq_mr(int32_t offset, RegisterID base, RegisterID index, int scale, RegisterID dst)
{
spew("leaq " MEM_obs ", %s", ADDR_obs(offset, base, index, scale), GPReg64Name(dst)),
@ -1814,7 +1856,7 @@ public:
m_formatter.oneByteOp64(OP_GROUP11_EvIz, offset, base, index, scale, GROUP11_MOV);
m_formatter.immediate32(imm);
}
void movq_i32m(int32_t imm, const void* addr)
void movq_i32m(int32_t imm, const void *addr)
{
spew("movq $%d, %p", imm, addr);
m_formatter.oneByteOp64(OP_GROUP11_EvIz, addr, GROUP11_MOV);
@ -1873,7 +1915,7 @@ public:
return label;
}
#endif
void movl_rm(RegisterID src, const void* addr)
void movl_rm(RegisterID src, const void *addr)
{
if (src == rax
#ifdef JS_CODEGEN_X64
@ -1888,7 +1930,7 @@ public:
m_formatter.oneByteOp(OP_MOV_EvGv, addr, src);
}
void movl_i32m(int32_t imm, const void* addr)
void movl_i32m(int32_t imm, const void *addr)
{
spew("movl $%d, %p", imm, addr);
m_formatter.oneByteOp(OP_GROUP11_EvIz, addr, GROUP11_MOV);
@ -1913,7 +1955,7 @@ public:
m_formatter.oneByteOp8(OP_MOV_EbGv, offset, base, index, scale, src);
}
void movb_rm(RegisterID src, const void* addr)
void movb_rm(RegisterID src, const void *addr)
{
spew("movb %s, %p", GPReg8Name(src), addr);
m_formatter.oneByteOp8(OP_MOV_EbGv, addr, src);
@ -1949,7 +1991,7 @@ public:
m_formatter.twoByteOp(OP2_MOVZX_GvEb, offset, base, index, scale, dst);
}
void movzbl_mr(const void* addr, RegisterID dst)
void movzbl_mr(const void *addr, RegisterID dst)
{
spew("movzbl %p, %s", addr, GPReg32Name(dst));
m_formatter.twoByteOp(OP2_MOVZX_GvEb, addr, dst);
@ -1979,7 +2021,7 @@ public:
m_formatter.twoByteOp(OP2_MOVSX_GvEb, offset, base, index, scale, dst);
}
void movsbl_mr(const void* addr, RegisterID dst)
void movsbl_mr(const void *addr, RegisterID dst)
{
spew("movsbl %p, %s", addr, GPReg32Name(dst));
m_formatter.twoByteOp(OP2_MOVSX_GvEb, addr, dst);
@ -2009,7 +2051,7 @@ public:
m_formatter.twoByteOp(OP2_MOVZX_GvEw, offset, base, index, scale, dst);
}
void movzwl_mr(const void* addr, RegisterID dst)
void movzwl_mr(const void *addr, RegisterID dst)
{
spew("movzwl %p, %s", addr, GPReg32Name(dst));
m_formatter.twoByteOp(OP2_MOVZX_GvEw, addr, dst);
@ -2039,7 +2081,7 @@ public:
m_formatter.twoByteOp(OP2_MOVSX_GvEw, offset, base, index, scale, dst);
}
void movswl_mr(const void* addr, RegisterID dst)
void movswl_mr(const void *addr, RegisterID dst)
{
spew("movswl %p, %s", addr, GPReg32Name(dst));
m_formatter.twoByteOp(OP2_MOVSX_GvEw, addr, dst);
@ -2211,7 +2253,7 @@ public:
{
twoByteOpSimd("vpcmpeqd", VEX_PD, OP2_PCMPEQD_VdqWdq, offset, base, src0, dst);
}
void vpcmpeqd_mr(const void* address, XMMRegisterID src0, XMMRegisterID dst)
void vpcmpeqd_mr(const void *address, XMMRegisterID src0, XMMRegisterID dst)
{
twoByteOpSimd("vpcmpeqd", VEX_PD, OP2_PCMPEQD_VdqWdq, address, src0, dst);
}
@ -2224,7 +2266,7 @@ public:
{
twoByteOpSimd("vpcmpgtd", VEX_PD, OP2_PCMPGTD_VdqWdq, offset, base, src0, dst);
}
void vpcmpgtd_mr(const void* address, XMMRegisterID src0, XMMRegisterID dst)
void vpcmpgtd_mr(const void *address, XMMRegisterID src0, XMMRegisterID dst)
{
twoByteOpSimd("vpcmpgtd", VEX_PD, OP2_PCMPGTD_VdqWdq, address, src0, dst);
}
@ -2237,7 +2279,7 @@ public:
{
twoByteOpImmSimd("vcmpps", VEX_PS, OP2_CMPPS_VpsWps, order, offset, base, src0, dst);
}
void vcmpps_mr(uint8_t order, const void* address, XMMRegisterID src0, XMMRegisterID dst)
void vcmpps_mr(uint8_t order, const void *address, XMMRegisterID src0, XMMRegisterID dst)
{
twoByteOpImmSimd("vcmpps", VEX_PS, OP2_CMPPS_VpsWps, order, address, src0, dst);
}
@ -2248,7 +2290,7 @@ public:
void vrcpps_mr(int32_t offset, RegisterID base, XMMRegisterID dst) {
twoByteOpSimd("vrcpps", VEX_PS, OP2_RCPPS_VpsWps, offset, base, invalid_xmm, dst);
}
void vrcpps_mr(const void* address, XMMRegisterID dst) {
void vrcpps_mr(const void *address, XMMRegisterID dst) {
twoByteOpSimd("vrcpps", VEX_PS, OP2_RCPPS_VpsWps, address, invalid_xmm, dst);
}
@ -2258,7 +2300,7 @@ public:
void vrsqrtps_mr(int32_t offset, RegisterID base, XMMRegisterID dst) {
twoByteOpSimd("vrsqrtps", VEX_PS, OP2_RSQRTPS_VpsWps, offset, base, invalid_xmm, dst);
}
void vrsqrtps_mr(const void* address, XMMRegisterID dst) {
void vrsqrtps_mr(const void *address, XMMRegisterID dst) {
twoByteOpSimd("vrsqrtps", VEX_PS, OP2_RSQRTPS_VpsWps, address, invalid_xmm, dst);
}
@ -2268,7 +2310,7 @@ public:
void vsqrtps_mr(int32_t offset, RegisterID base, XMMRegisterID dst) {
twoByteOpSimd("vsqrtps", VEX_PS, OP2_SQRTPS_VpsWps, offset, base, invalid_xmm, dst);
}
void vsqrtps_mr(const void* address, XMMRegisterID dst) {
void vsqrtps_mr(const void *address, XMMRegisterID dst) {
twoByteOpSimd("vsqrtps", VEX_PS, OP2_SQRTPS_VpsWps, address, invalid_xmm, dst);
}
@ -2292,11 +2334,11 @@ public:
twoByteOpSimd("vaddss", VEX_SS, OP2_ADDSD_VsdWsd, offset, base, src0, dst);
}
void vaddsd_mr(const void* address, XMMRegisterID src0, XMMRegisterID dst)
void vaddsd_mr(const void *address, XMMRegisterID src0, XMMRegisterID dst)
{
twoByteOpSimd("vaddsd", VEX_SD, OP2_ADDSD_VsdWsd, address, src0, dst);
}
void vaddss_mr(const void* address, XMMRegisterID src0, XMMRegisterID dst)
void vaddss_mr(const void *address, XMMRegisterID src0, XMMRegisterID dst)
{
twoByteOpSimd("vaddss", VEX_SS, OP2_ADDSD_VsdWsd, address, src0, dst);
}
@ -2363,7 +2405,7 @@ public:
}
#ifdef JS_CODEGEN_X86
void vcvtsi2sd_mr(const void* address, XMMRegisterID src0, XMMRegisterID dst)
void vcvtsi2sd_mr(const void *address, XMMRegisterID src0, XMMRegisterID dst)
{
twoByteOpSimd("vcvtsi2sd", VEX_SD, OP2_CVTSI2SD_VsdEd, address, src0, dst);
}
@ -2474,7 +2516,7 @@ public:
{
twoByteOpImmSimd("vpshufd", VEX_PD, OP2_PSHUFD_VdqWdqIb, mask, offset, base, invalid_xmm, dst);
}
void vpshufd_imr(uint32_t mask, const void* address, XMMRegisterID dst)
void vpshufd_imr(uint32_t mask, const void *address, XMMRegisterID dst)
{
twoByteOpImmSimd("vpshufd", VEX_PD, OP2_PSHUFD_VdqWdqIb, mask, address, invalid_xmm, dst);
}
@ -2487,7 +2529,7 @@ public:
{
twoByteOpImmSimd("vshufps", VEX_PS, OP2_SHUFPS_VpsWpsIb, mask, offset, base, src0, dst);
}
void vshufps_imr(uint32_t mask, const void* address, XMMRegisterID src0, XMMRegisterID dst)
void vshufps_imr(uint32_t mask, const void *address, XMMRegisterID src0, XMMRegisterID dst)
{
twoByteOpImmSimd("vshufps", VEX_PS, OP2_SHUFPS_VpsWpsIb, mask, address, src0, dst);
}
@ -2577,6 +2619,46 @@ public:
twoByteOpInt32Simd("vmovd", VEX_PD, OP2_MOVD_VdEd, src, invalid_xmm, dst);
}
void vmovd_mr(int32_t offset, RegisterID base, XMMRegisterID dst)
{
twoByteOpSimd("vmovd", VEX_PD, OP2_MOVD_VdEd, offset, base, invalid_xmm, dst);
}
void vmovd_mr(int32_t offset, RegisterID base, RegisterID index, int32_t scale, XMMRegisterID dst)
{
twoByteOpSimd("vmovd", VEX_PD, OP2_MOVD_VdEd, offset, base, index, scale, invalid_xmm, dst);
}
void vmovd_mr_disp32(int32_t offset, RegisterID base, XMMRegisterID dst)
{
twoByteOpSimd_disp32("vmovd", VEX_PD, OP2_MOVD_VdEd, offset, base, invalid_xmm, dst);
}
void vmovd_mr(const void *address, XMMRegisterID dst)
{
twoByteOpSimd("vmovd", VEX_PD, OP2_MOVD_VdEd, address, invalid_xmm, dst);
}
void vmovd_rm(XMMRegisterID src, int32_t offset, RegisterID base)
{
twoByteOpSimd("vmovd", VEX_PD, OP2_MOVD_EdVd, offset, base, invalid_xmm, src);
}
void vmovd_rm(XMMRegisterID src, int32_t offset, RegisterID base, RegisterID index, int scale)
{
twoByteOpSimd("vmovd", VEX_PD, OP2_MOVD_EdVd, offset, base, index, scale, invalid_xmm, src);
}
void vmovd_rm_disp32(XMMRegisterID src, int32_t offset, RegisterID base)
{
twoByteOpSimd_disp32("vmovd", VEX_PD, OP2_MOVD_EdVd, offset, base, invalid_xmm, src);
}
void vmovd_rm(XMMRegisterID src, const void *address)
{
twoByteOpSimd("vmovd", VEX_PD, OP2_MOVD_EdVd, address, invalid_xmm, src);
}
#ifdef JS_CODEGEN_X64
void vmovq_rr(XMMRegisterID src, RegisterID dst)
{
@ -2664,52 +2746,52 @@ public:
twoByteOpSimd("vmovss", VEX_SS, OP2_MOVSD_VsdWsd, src1, src0, dst);
}
void vmovsd_mr(const void* address, XMMRegisterID dst)
void vmovsd_mr(const void *address, XMMRegisterID dst)
{
twoByteOpSimd("vmovsd", VEX_SD, OP2_MOVSD_VsdWsd, address, invalid_xmm, dst);
}
void vmovss_mr(const void* address, XMMRegisterID dst)
void vmovss_mr(const void *address, XMMRegisterID dst)
{
twoByteOpSimd("vmovss", VEX_SS, OP2_MOVSD_VsdWsd, address, invalid_xmm, dst);
}
void vmovups_mr(const void* address, XMMRegisterID dst)
void vmovups_mr(const void *address, XMMRegisterID dst)
{
twoByteOpSimd("vmovups", VEX_PS, OP2_MOVPS_VpsWps, address, invalid_xmm, dst);
}
void vmovdqu_mr(const void* address, XMMRegisterID dst)
void vmovdqu_mr(const void *address, XMMRegisterID dst)
{
twoByteOpSimd("vmovdqu", VEX_SS, OP2_MOVDQ_VdqWdq, address, invalid_xmm, dst);
}
void vmovsd_rm(XMMRegisterID src, const void* address)
void vmovsd_rm(XMMRegisterID src, const void *address)
{
twoByteOpSimd("vmovsd", VEX_SD, OP2_MOVSD_WsdVsd, address, invalid_xmm, src);
}
void vmovss_rm(XMMRegisterID src, const void* address)
void vmovss_rm(XMMRegisterID src, const void *address)
{
twoByteOpSimd("vmovss", VEX_SS, OP2_MOVSD_WsdVsd, address, invalid_xmm, src);
}
void vmovdqa_rm(XMMRegisterID src, const void* address)
void vmovdqa_rm(XMMRegisterID src, const void *address)
{
twoByteOpSimd("vmovdqa", VEX_PD, OP2_MOVDQ_WdqVdq, address, invalid_xmm, src);
}
void vmovaps_rm(XMMRegisterID src, const void* address)
void vmovaps_rm(XMMRegisterID src, const void *address)
{
twoByteOpSimd("vmovaps", VEX_PS, OP2_MOVAPS_WsdVsd, address, invalid_xmm, src);
}
void vmovdqu_rm(XMMRegisterID src, const void* address)
void vmovdqu_rm(XMMRegisterID src, const void *address)
{
twoByteOpSimd("vmovdqu", VEX_SS, OP2_MOVDQ_WdqVdq, address, invalid_xmm, src);
}
void vmovups_rm(XMMRegisterID src, const void* address)
void vmovups_rm(XMMRegisterID src, const void *address)
{
twoByteOpSimd("vmovups", VEX_PS, OP2_MOVPS_WpsVps, address, invalid_xmm, src);
}
@ -2830,12 +2912,12 @@ public:
return twoByteRipOpSimd("vmovdqa", VEX_PD, OP2_MOVDQ_VdqWdq, invalid_xmm, dst);
}
#else
void vmovaps_mr(const void* address, XMMRegisterID dst)
void vmovaps_mr(const void *address, XMMRegisterID dst)
{
twoByteOpSimd("vmovaps", VEX_PS, OP2_MOVAPS_VsdWsd, address, invalid_xmm, dst);
}
void vmovdqa_mr(const void* address, XMMRegisterID dst)
void vmovdqa_mr(const void *address, XMMRegisterID dst)
{
twoByteOpSimd("vmovdqa", VEX_PD, OP2_MOVDQ_VdqWdq, address, invalid_xmm, dst);
}
@ -3013,7 +3095,7 @@ public:
twoByteOpSimd("vandps", VEX_PS, OP2_ANDPS_VpsWps, offset, base, src0, dst);
}
void vandps_mr(const void* address, XMMRegisterID src0, XMMRegisterID dst)
void vandps_mr(const void *address, XMMRegisterID src0, XMMRegisterID dst)
{
twoByteOpSimd("vandps", VEX_PS, OP2_ANDPS_VpsWps, address, src0, dst);
}
@ -3028,7 +3110,7 @@ public:
twoByteOpSimd("vandnps", VEX_PS, OP2_ANDNPS_VpsWps, offset, base, src0, dst);
}
void vandnps_mr(const void* address, XMMRegisterID src0, XMMRegisterID dst)
void vandnps_mr(const void *address, XMMRegisterID src0, XMMRegisterID dst)
{
twoByteOpSimd("vandnps", VEX_PS, OP2_ANDNPS_VpsWps, address, src0, dst);
}
@ -3043,7 +3125,7 @@ public:
twoByteOpSimd("vorps", VEX_PS, OP2_ORPS_VpsWps, offset, base, src0, dst);
}
void vorps_mr(const void* address, XMMRegisterID src0, XMMRegisterID dst)
void vorps_mr(const void *address, XMMRegisterID src0, XMMRegisterID dst)
{
twoByteOpSimd("vorps", VEX_PS, OP2_ORPS_VpsWps, address, src0, dst);
}
@ -3058,7 +3140,7 @@ public:
twoByteOpSimd("vxorps", VEX_PS, OP2_XORPS_VpsWps, offset, base, src0, dst);
}
void vxorps_mr(const void* address, XMMRegisterID src0, XMMRegisterID dst)
void vxorps_mr(const void *address, XMMRegisterID src0, XMMRegisterID dst)
{
twoByteOpSimd("vxorps", VEX_PS, OP2_XORPS_VpsWps, address, src0, dst);
}
@ -3339,7 +3421,7 @@ threeByteOpImmSimd("vblendps", VEX_PD, OP3_BLENDPS_VpsWpsIb, ESCAPE_3A, imm, off
SetRel32(code + from.offset(), code + to.offset());
}
void executableCopy(void* buffer)
void executableCopy(void *buffer)
{
memcpy(buffer, m_formatter.buffer(), size());
}
@ -3577,7 +3659,7 @@ threeByteOpImmSimd("vblendps", VEX_PD, OP3_BLENDPS_VpsWpsIb, ESCAPE_3A, imm, off
}
void twoByteOpSimd(const char *name, VexOperandType ty, TwoByteOpcodeID opcode,
const void* address, XMMRegisterID src0, XMMRegisterID dst)
const void *address, XMMRegisterID src0, XMMRegisterID dst)
{
if (useLegacySSEEncoding(src0, dst)) {
if (IsXMMReversedOperands(opcode))
@ -4059,7 +4141,7 @@ threeByteOpImmSimd("vblendps", VEX_PD, OP3_BLENDPS_VpsWpsIb, ESCAPE_3A, imm, off
memoryModRM_disp32(offset, index, scale, reg);
}
void oneByteOp(OneByteOpcodeID opcode, const void* address, int reg)
void oneByteOp(OneByteOpcodeID opcode, const void *address, int reg)
{
m_buffer.ensureSpace(MaxInstructionSize);
emitRexIfNeeded(reg, 0, 0);
@ -4067,7 +4149,7 @@ threeByteOpImmSimd("vblendps", VEX_PD, OP3_BLENDPS_VpsWpsIb, ESCAPE_3A, imm, off
memoryModRM_disp32(address, reg);
}
void oneByteOp_disp32(OneByteOpcodeID opcode, const void* address, int reg)
void oneByteOp_disp32(OneByteOpcodeID opcode, const void *address, int reg)
{
m_buffer.ensureSpace(MaxInstructionSize);
emitRexIfNeeded(reg, 0, 0);
@ -4199,7 +4281,7 @@ threeByteOpImmSimd("vblendps", VEX_PD, OP3_BLENDPS_VpsWpsIb, ESCAPE_3A, imm, off
memoryModRM(offset, base, index, scale, reg);
}
void twoByteOp(TwoByteOpcodeID opcode, const void* address, int reg)
void twoByteOp(TwoByteOpcodeID opcode, const void *address, int reg)
{
m_buffer.ensureSpace(MaxInstructionSize);
emitRexIfNeeded(reg, 0, 0);
@ -4209,7 +4291,7 @@ threeByteOpImmSimd("vblendps", VEX_PD, OP3_BLENDPS_VpsWpsIb, ESCAPE_3A, imm, off
}
void twoByteOpVex(VexOperandType ty, TwoByteOpcodeID opcode,
const void* address, XMMRegisterID src0, int reg)
const void *address, XMMRegisterID src0, int reg)
{
int r = (reg >> 3), x = 0, b = 0;
int m = 1; // 0x0F
@ -4266,7 +4348,7 @@ threeByteOpImmSimd("vblendps", VEX_PD, OP3_BLENDPS_VpsWpsIb, ESCAPE_3A, imm, off
memoryModRM(offset, base, reg);
}
void threeByteOp(ThreeByteOpcodeID opcode, ThreeByteEscape escape, const void* address, int reg)
void threeByteOp(ThreeByteOpcodeID opcode, ThreeByteEscape escape, const void *address, int reg)
{
m_buffer.ensureSpace(MaxInstructionSize);
emitRexIfNeeded(reg, 0, 0);
@ -4373,7 +4455,7 @@ threeByteOpImmSimd("vblendps", VEX_PD, OP3_BLENDPS_VpsWpsIb, ESCAPE_3A, imm, off
memoryModRM(offset, base, index, scale, reg);
}
void oneByteOp64(OneByteOpcodeID opcode, const void* address, int reg)
void oneByteOp64(OneByteOpcodeID opcode, const void *address, int reg)
{
m_buffer.ensureSpace(MaxInstructionSize);
emitRexW(reg, 0, 0);
@ -4390,6 +4472,33 @@ threeByteOpImmSimd("vblendps", VEX_PD, OP3_BLENDPS_VpsWpsIb, ESCAPE_3A, imm, off
registerModRM(rm, reg);
}
void twoByteOp64(TwoByteOpcodeID opcode, int offset, RegisterID base, int reg)
{
m_buffer.ensureSpace(MaxInstructionSize);
emitRexW(reg, 0, base);
m_buffer.putByteUnchecked(OP_2BYTE_ESCAPE);
m_buffer.putByteUnchecked(opcode);
memoryModRM(offset, base, reg);
}
void twoByteOp64(TwoByteOpcodeID opcode, int offset, RegisterID base, RegisterID index, int scale, int reg)
{
m_buffer.ensureSpace(MaxInstructionSize);
emitRexW(reg, index, base);
m_buffer.putByteUnchecked(OP_2BYTE_ESCAPE);
m_buffer.putByteUnchecked(opcode);
memoryModRM(offset, base, index, scale, reg);
}
void twoByteOp64(TwoByteOpcodeID opcode, const void *address, int reg)
{
m_buffer.ensureSpace(MaxInstructionSize);
emitRexW(reg, 0, 0);
m_buffer.putByteUnchecked(OP_2BYTE_ESCAPE);
m_buffer.putByteUnchecked(opcode);
memoryModRM(address, reg);
}
void twoByteOpVex64(VexOperandType ty, TwoByteOpcodeID opcode,
RegisterID rm, XMMRegisterID src0, XMMRegisterID reg)
{
@ -4466,7 +4575,7 @@ threeByteOpImmSimd("vblendps", VEX_PD, OP3_BLENDPS_VpsWpsIb, ESCAPE_3A, imm, off
memoryModRM(offset, base, index, scale, reg);
}
void oneByteOp8(OneByteOpcodeID opcode, const void* address, RegisterID reg)
void oneByteOp8(OneByteOpcodeID opcode, const void *address, RegisterID reg)
{
m_buffer.ensureSpace(MaxInstructionSize);
emitRexIf(byteRegRequiresRex(reg), reg, 0, 0);
@ -4810,7 +4919,7 @@ threeByteOpImmSimd("vblendps", VEX_PD, OP3_BLENDPS_VpsWpsIb, ESCAPE_3A, imm, off
m_buffer.putIntUnchecked(offset);
}
void memoryModRM_disp32(const void* address, int reg)
void memoryModRM_disp32(const void *address, int reg)
{
int32_t disp = AddressImmediate(address);
@ -4824,7 +4933,7 @@ threeByteOpImmSimd("vblendps", VEX_PD, OP3_BLENDPS_VpsWpsIb, ESCAPE_3A, imm, off
m_buffer.putIntUnchecked(disp);
}
void memoryModRM(const void* address, int reg)
void memoryModRM(const void *address, int reg)
{
memoryModRM_disp32(address, reg);
}

Просмотреть файл

@ -167,6 +167,7 @@ enum TwoByteOpcodeID {
OP2_XORPD_VpdWpd = 0x57,
OP2_PCMPGTD_VdqWdq = 0x66,
OP2_MOVD_VdEd = 0x6E,
OP2_MOVQ_VdEd = 0x6E,
OP2_MOVDQ_VsdWsd = 0x6F,
OP2_MOVDQ_VdqWdq = 0x6F,
OP2_PSHUFD_VdqWdqIb = 0x70,
@ -177,6 +178,7 @@ enum TwoByteOpcodeID {
OP2_PCMPEQW = 0x75,
OP2_PCMPEQD_VdqWdq = 0x76,
OP2_MOVD_EdVd = 0x7E,
OP2_MOVQ_EdVd = 0x7E,
OP2_MOVDQ_WdqVdq = 0x7F,
OP2_JCC_rel32 = 0x80,
OP_SETCC = 0x90,

Просмотреть файл

@ -361,6 +361,21 @@ class Assembler : public AssemblerX86Shared
MOZ_CRASH("unexpected operand kind");
}
}
void movq(const Operand &src, FloatRegister dest) {
switch (src.kind()) {
case Operand::MEM_REG_DISP:
masm.movq_mr(src.disp(), src.base(), dest.code());
break;
case Operand::MEM_SCALE:
masm.movq_mr(src.disp(), src.base(), src.index(), src.scale(), dest.code());
break;
case Operand::MEM_ADDRESS32:
masm.movq_mr(src.address(), dest.code());
break;
default:
MOZ_CRASH("unexpected operand kind");
}
}
void movq(Register src, const Operand &dest) {
switch (dest.kind()) {
case Operand::REG:
@ -379,6 +394,21 @@ class Assembler : public AssemblerX86Shared
MOZ_CRASH("unexpected operand kind");
}
}
void movq(FloatRegister src, const Operand &dest) {
switch (dest.kind()) {
case Operand::MEM_REG_DISP:
masm.movq_rm(src.code(), dest.disp(), dest.base());
break;
case Operand::MEM_SCALE:
masm.movq_rm(src.code(), dest.disp(), dest.base(), dest.index(), dest.scale());
break;
case Operand::MEM_ADDRESS32:
masm.movq_rm(src.code(), dest.address());
break;
default:
MOZ_CRASH("unexpected operand kind");
}
}
void movq(Imm32 imm32, const Operand &dest) {
switch (dest.kind()) {
case Operand::REG:

Просмотреть файл

@ -254,6 +254,105 @@ CodeGeneratorX64::memoryBarrier(MemoryBarrierBits barrier)
masm.storeLoadFence();
}
void
CodeGeneratorX64::loadSimd(Scalar::Type type, unsigned numElems, const Operand &srcAddr,
FloatRegister out)
{
switch (type) {
case Scalar::Float32x4: {
switch (numElems) {
// In memory-to-register mode, movss zeroes out the high lanes.
case 1: masm.loadFloat32(srcAddr, out); break;
// See comment above, which also applies to movsd.
case 2: masm.loadDouble(srcAddr, out); break;
case 4: masm.loadUnalignedFloat32x4(srcAddr, out); break;
default: MOZ_CRASH("unexpected size for partial load");
}
break;
}
case Scalar::Int32x4: {
switch (numElems) {
// In memory-to-register mode, movd zeroes out the high lanes.
case 1: masm.vmovd(srcAddr, out); break;
// See comment above, which also applies to movq.
case 2: masm.movq(srcAddr, out); break;
case 4: masm.loadUnalignedInt32x4(srcAddr, out); break;
default: MOZ_CRASH("unexpected size for partial load");
}
break;
}
case Scalar::Int8:
case Scalar::Uint8:
case Scalar::Int16:
case Scalar::Uint16:
case Scalar::Int32:
case Scalar::Uint32:
case Scalar::Float32:
case Scalar::Float64:
case Scalar::Uint8Clamped:
case Scalar::MaxTypedArrayViewType:
MOZ_CRASH("should only handle SIMD types");
}
}
void
CodeGeneratorX64::emitSimdLoad(LAsmJSLoadHeap *ins)
{
MAsmJSLoadHeap *mir = ins->mir();
Scalar::Type type = mir->accessType();
const LAllocation *ptr = ins->ptr();
FloatRegister out = ToFloatRegister(ins->output());
Operand srcAddr(HeapReg);
if (ptr->isConstant()) {
int32_t ptrImm = ptr->toConstant()->toInt32();
MOZ_ASSERT(ptrImm >= 0);
srcAddr = Operand(HeapReg, ptrImm);
} else {
srcAddr = Operand(HeapReg, ToRegister(ptr), TimesOne);
}
uint32_t maybeCmpOffset = AsmJSHeapAccess::NoLengthCheck;
if (mir->needsBoundsCheck()) {
maybeCmpOffset = masm.cmp32WithPatch(ToRegister(ptr), Imm32(0)).offset();
masm.j(Assembler::AboveOrEqual, mir->outOfBoundsLabel()); // Throws RangeError
}
unsigned numElems = mir->numSimdElems();
if (numElems == 3) {
MOZ_ASSERT(type == Scalar::Int32x4 || type == Scalar::Float32x4);
Operand shiftedOffset(HeapReg);
if (ptr->isConstant())
shiftedOffset = Operand(HeapReg, ptr->toConstant()->toInt32() + 2 * sizeof(float));
else
shiftedOffset = Operand(HeapReg, ToRegister(ptr), TimesOne, 2 * sizeof(float));
// Load XY
uint32_t before = masm.size();
loadSimd(type, 2, srcAddr, out);
uint32_t after = masm.size();
// We're noting a load of 3 elements, so that the bounds check checks
// for 3 elements.
masm.append(AsmJSHeapAccess(before, after, 3, type, maybeCmpOffset));
// Load Z (W is zeroed)
before = after;
loadSimd(type, 1, shiftedOffset, ScratchSimdReg);
after = masm.size();
masm.append(AsmJSHeapAccess(before, after, 1, type));
// Move ZW atop XY
masm.vmovlhps(ScratchSimdReg, out, out);
return;
}
uint32_t before = masm.size();
loadSimd(type, numElems, srcAddr, out);
uint32_t after = masm.size();
masm.append(AsmJSHeapAccess(before, after, numElems, type, maybeCmpOffset));
}
void
CodeGeneratorX64::visitAsmJSLoadHeap(LAsmJSLoadHeap *ins)
{
@ -263,6 +362,9 @@ CodeGeneratorX64::visitAsmJSLoadHeap(LAsmJSLoadHeap *ins)
const LDefinition *out = ins->output();
Operand srcAddr(HeapReg);
if (Scalar::isSimdType(vt))
return emitSimdLoad(ins);
if (ptr->isConstant()) {
int32_t ptrImm = ptr->toConstant()->toInt32();
MOZ_ASSERT(ptrImm >= 0);
@ -276,13 +378,9 @@ CodeGeneratorX64::visitAsmJSLoadHeap(LAsmJSLoadHeap *ins)
uint32_t maybeCmpOffset = AsmJSHeapAccess::NoLengthCheck;
if (mir->needsBoundsCheck()) {
CodeOffsetLabel cmp = masm.cmp32WithPatch(ToRegister(ptr), Imm32(0));
if (mir->outOfBoundsLabel()) {
masm.j(Assembler::AboveOrEqual, mir->outOfBoundsLabel()); // Throws RangeError
} else {
ool = new(alloc()) OutOfLineLoadTypedArrayOutOfBounds(ToAnyRegister(out), vt);
addOutOfLineCode(ool, ins->mir());
masm.j(Assembler::AboveOrEqual, ool->entry());
}
ool = new(alloc()) OutOfLineLoadTypedArrayOutOfBounds(ToAnyRegister(out), vt);
addOutOfLineCode(ool, ins->mir());
masm.j(Assembler::AboveOrEqual, ool->entry());
maybeCmpOffset = cmp.offset();
}
@ -296,8 +394,8 @@ CodeGeneratorX64::visitAsmJSLoadHeap(LAsmJSLoadHeap *ins)
case Scalar::Uint32: masm.movl(srcAddr, ToRegister(out)); break;
case Scalar::Float32: masm.loadFloat32(srcAddr, ToFloatRegister(out)); break;
case Scalar::Float64: masm.loadDouble(srcAddr, ToFloatRegister(out)); break;
case Scalar::Float32x4: masm.loadUnalignedFloat32x4(srcAddr, ToFloatRegister(out)); break;
case Scalar::Int32x4: masm.loadUnalignedInt32x4(srcAddr, ToFloatRegister(out)); break;
case Scalar::Float32x4:
case Scalar::Int32x4: MOZ_CRASH("SIMD loads should be handled in emitSimdLoad");
case Scalar::Uint8Clamped:
case Scalar::MaxTypedArrayViewType:
MOZ_CRASH("unexpected array type");
@ -310,6 +408,105 @@ CodeGeneratorX64::visitAsmJSLoadHeap(LAsmJSLoadHeap *ins)
masm.append(AsmJSHeapAccess(before, after, vt, ToAnyRegister(out), maybeCmpOffset));
}
void
CodeGeneratorX64::storeSimd(Scalar::Type type, unsigned numElems, FloatRegister in,
const Operand &dstAddr)
{
switch (type) {
case Scalar::Float32x4: {
switch (numElems) {
// In memory-to-register mode, movss zeroes out the high lanes.
case 1: masm.storeFloat32(in, dstAddr); break;
// See comment above, which also applies to movsd.
case 2: masm.storeDouble(in, dstAddr); break;
case 4: masm.storeUnalignedFloat32x4(in, dstAddr); break;
default: MOZ_CRASH("unexpected size for partial load");
}
break;
}
case Scalar::Int32x4: {
switch (numElems) {
// In memory-to-register mode, movd zeroes out the high lanes.
case 1: masm.vmovd(in, dstAddr); break;
// See comment above, which also applies to movq.
case 2: masm.movq(in, dstAddr); break;
case 4: masm.storeUnalignedInt32x4(in, dstAddr); break;
default: MOZ_CRASH("unexpected size for partial load");
}
break;
}
case Scalar::Int8:
case Scalar::Uint8:
case Scalar::Int16:
case Scalar::Uint16:
case Scalar::Int32:
case Scalar::Uint32:
case Scalar::Float32:
case Scalar::Float64:
case Scalar::Uint8Clamped:
case Scalar::MaxTypedArrayViewType:
MOZ_CRASH("should only handle SIMD types");
}
}
void
CodeGeneratorX64::emitSimdStore(LAsmJSStoreHeap *ins)
{
MAsmJSStoreHeap *mir = ins->mir();
Scalar::Type type = mir->accessType();
const LAllocation *ptr = ins->ptr();
FloatRegister in = ToFloatRegister(ins->value());
Operand dstAddr(HeapReg);
if (ptr->isConstant()) {
int32_t ptrImm = ptr->toConstant()->toInt32();
MOZ_ASSERT(ptrImm >= 0);
dstAddr = Operand(HeapReg, ptrImm);
} else {
dstAddr = Operand(HeapReg, ToRegister(ptr), TimesOne);
}
uint32_t maybeCmpOffset = AsmJSHeapAccess::NoLengthCheck;
if (mir->needsBoundsCheck()) {
maybeCmpOffset = masm.cmp32WithPatch(ToRegister(ptr), Imm32(0)).offset();
masm.j(Assembler::AboveOrEqual, mir->outOfBoundsLabel()); // Throws RangeError
}
unsigned numElems = mir->numSimdElems();
if (numElems == 3) {
MOZ_ASSERT(type == Scalar::Int32x4 || type == Scalar::Float32x4);
Operand shiftedOffset(HeapReg);
if (ptr->isConstant())
shiftedOffset = Operand(HeapReg, ptr->toConstant()->toInt32() + 2 * sizeof(float));
else
shiftedOffset = Operand(HeapReg, ToRegister(ptr), TimesOne, 2 * sizeof(float));
// Store Z first: it would be observable to store XY first, in the
// case XY can be stored in bounds but Z can't (in this case, we'd throw
// without restoring the values previously stored before XY).
masm.vmovhlps(in, ScratchSimdReg, ScratchSimdReg);
uint32_t before = masm.size();
storeSimd(type, 1, ScratchSimdReg, shiftedOffset);
uint32_t after = masm.size();
// We're noting a store of 3 elements, so that the bounds check checks
// for 3 elements.
masm.append(AsmJSHeapAccess(before, after, 3, type, maybeCmpOffset));
// Store XY
before = after;
storeSimd(type, 2, in, dstAddr);
after = masm.size();
masm.append(AsmJSHeapAccess(before, after, 2, type));
return;
}
uint32_t before = masm.size();
storeSimd(type, numElems, in, dstAddr);
uint32_t after = masm.size();
masm.append(AsmJSHeapAccess(before, after, numElems, type, maybeCmpOffset));
}
void
CodeGeneratorX64::visitAsmJSStoreHeap(LAsmJSStoreHeap *ins)
{
@ -318,6 +515,9 @@ CodeGeneratorX64::visitAsmJSStoreHeap(LAsmJSStoreHeap *ins)
const LAllocation *ptr = ins->ptr();
Operand dstAddr(HeapReg);
if (Scalar::isSimdType(vt))
return emitSimdStore(ins);
if (ptr->isConstant()) {
int32_t ptrImm = ptr->toConstant()->toInt32();
MOZ_ASSERT(ptrImm >= 0);
@ -331,10 +531,7 @@ CodeGeneratorX64::visitAsmJSStoreHeap(LAsmJSStoreHeap *ins)
uint32_t maybeCmpOffset = AsmJSHeapAccess::NoLengthCheck;
if (mir->needsBoundsCheck()) {
CodeOffsetLabel cmp = masm.cmp32WithPatch(ToRegister(ptr), Imm32(0));
if (mir->outOfBoundsLabel())
masm.j(Assembler::AboveOrEqual, mir->outOfBoundsLabel()); // Throws RangeError
else
masm.j(Assembler::AboveOrEqual, &rejoin);
masm.j(Assembler::AboveOrEqual, &rejoin);
maybeCmpOffset = cmp.offset();
}
@ -365,8 +562,8 @@ CodeGeneratorX64::visitAsmJSStoreHeap(LAsmJSStoreHeap *ins)
case Scalar::Uint32: masm.movl(ToRegister(ins->value()), dstAddr); break;
case Scalar::Float32: masm.storeFloat32(ToFloatRegister(ins->value()), dstAddr); break;
case Scalar::Float64: masm.storeDouble(ToFloatRegister(ins->value()), dstAddr); break;
case Scalar::Float32x4: masm.storeUnalignedFloat32x4(ToFloatRegister(ins->value()), dstAddr); break;
case Scalar::Int32x4: masm.storeUnalignedInt32x4(ToFloatRegister(ins->value()), dstAddr); break;
case Scalar::Float32x4:
case Scalar::Int32x4: MOZ_CRASH("SIMD stores must be handled in emitSimdStore");
case Scalar::Uint8Clamped:
case Scalar::MaxTypedArrayViewType:
MOZ_CRASH("unexpected array type");

Просмотреть файл

@ -27,6 +27,10 @@ class CodeGeneratorX64 : public CodeGeneratorX86Shared
Operand dest, MIRType slotType);
void memoryBarrier(MemoryBarrierBits barrier);
void loadSimd(Scalar::Type type, unsigned numElems, const Operand &srcAddr, FloatRegister out);
void emitSimdLoad(LAsmJSLoadHeap *ins);
void storeSimd(Scalar::Type type, unsigned numElems, FloatRegister in, const Operand &dstAddr);
void emitSimdStore(LAsmJSStoreHeap *ins);
public:
CodeGeneratorX64(MIRGenerator *gen, LIRGraph *graph, MacroAssembler *masm);

Просмотреть файл

@ -148,7 +148,7 @@ LIRGeneratorX64::visitAsmJSLoadHeap(MAsmJSLoadHeap *ins)
// offset in the addressing mode would not wrap back into the protected area
// reserved for the heap. For simplicity (and since we don't care about
// getting maximum performance in these cases) only allow constant
// opererands when skipping bounds checks.
// operands when skipping bounds checks.
LAllocation ptrAlloc = ins->needsBoundsCheck()
? useRegisterAtStart(ptr)
: useRegisterOrNonNegativeConstantAtStart(ptr);

Просмотреть файл

@ -467,6 +467,11 @@ class Assembler : public AssemblerX86Shared
masm.vmovss_mr_disp32(src.offset, src.base.code(), dest.code());
return CodeOffsetLabel(masm.currentOffset());
}
CodeOffsetLabel vmovdWithPatch(Address src, FloatRegister dest) {
MOZ_ASSERT(HasSSE2());
masm.vmovd_mr_disp32(src.offset, src.base.code(), dest.code());
return CodeOffsetLabel(masm.currentOffset());
}
CodeOffsetLabel vmovsdWithPatch(Address src, FloatRegister dest) {
MOZ_ASSERT(HasSSE2());
masm.vmovsd_mr_disp32(src.offset, src.base.code(), dest.code());
@ -496,6 +501,11 @@ class Assembler : public AssemblerX86Shared
masm.movl_rm_disp32(src.code(), dest.offset, dest.base.code());
return CodeOffsetLabel(masm.currentOffset());
}
CodeOffsetLabel vmovdWithPatch(FloatRegister src, Address dest) {
MOZ_ASSERT(HasSSE2());
masm.vmovd_rm_disp32(src.code(), dest.offset, dest.base.code());
return CodeOffsetLabel(masm.currentOffset());
}
CodeOffsetLabel vmovssWithPatch(FloatRegister src, Address dest) {
MOZ_ASSERT(HasSSE2());
masm.vmovss_rm_disp32(src.code(), dest.offset, dest.base.code());
@ -551,6 +561,11 @@ class Assembler : public AssemblerX86Shared
masm.vmovss_mr(src.addr, dest.code());
return CodeOffsetLabel(masm.currentOffset());
}
CodeOffsetLabel vmovdWithPatch(PatchedAbsoluteAddress src, FloatRegister dest) {
MOZ_ASSERT(HasSSE2());
masm.vmovd_mr(src.addr, dest.code());
return CodeOffsetLabel(masm.currentOffset());
}
CodeOffsetLabel vmovsdWithPatch(PatchedAbsoluteAddress src, FloatRegister dest) {
MOZ_ASSERT(HasSSE2());
masm.vmovsd_mr(src.addr, dest.code());
@ -595,6 +610,11 @@ class Assembler : public AssemblerX86Shared
masm.vmovss_rm(src.code(), dest.addr);
return CodeOffsetLabel(masm.currentOffset());
}
CodeOffsetLabel vmovdWithPatch(FloatRegister src, PatchedAbsoluteAddress dest) {
MOZ_ASSERT(HasSSE2());
masm.vmovd_rm(src.code(), dest.addr);
return CodeOffsetLabel(masm.currentOffset());
}
CodeOffsetLabel vmovsdWithPatch(FloatRegister src, PatchedAbsoluteAddress dest) {
MOZ_ASSERT(HasSSE2());
masm.vmovsd_rm(src.code(), dest.addr);

Просмотреть файл

@ -269,8 +269,8 @@ CodeGeneratorX86::load(Scalar::Type vt, const T &srcAddr, const LDefinition *out
case Scalar::Uint32: masm.movlWithPatch(srcAddr, ToRegister(out)); break;
case Scalar::Float32: masm.vmovssWithPatch(srcAddr, ToFloatRegister(out)); break;
case Scalar::Float64: masm.vmovsdWithPatch(srcAddr, ToFloatRegister(out)); break;
case Scalar::Float32x4: masm.vmovupsWithPatch(srcAddr, ToFloatRegister(out)); break;
case Scalar::Int32x4: masm.vmovdquWithPatch(srcAddr, ToFloatRegister(out)); break;
case Scalar::Float32x4:
case Scalar::Int32x4: MOZ_CRASH("SIMD load should be handled in their own function");
case Scalar::MaxTypedArrayViewType: MOZ_CRASH("unexpected type");
}
}
@ -354,14 +354,135 @@ CodeGeneratorX86::memoryBarrier(MemoryBarrierBits barrier)
masm.storeLoadFence();
}
template<typename T>
void
CodeGeneratorX86::loadSimd(Scalar::Type type, unsigned numElems, T srcAddr, FloatRegister out)
{
switch (type) {
case Scalar::Float32x4: {
switch (numElems) {
// In memory-to-register mode, movss zeroes out the high lanes.
case 1: masm.vmovssWithPatch(srcAddr, out); break;
// See comment above, which also applies to movsd.
case 2: masm.vmovsdWithPatch(srcAddr, out); break;
case 4: masm.vmovupsWithPatch(srcAddr, out); break;
default: MOZ_CRASH("unexpected size for partial load");
}
break;
}
case Scalar::Int32x4: {
switch (numElems) {
// In memory-to-register mode, movd zeroes out the high lanes.
case 1: masm.vmovdWithPatch(srcAddr, out); break;
// See comment above, which also applies to movsd.
// TODO memory-to-xmm movq is encodable on x86 as well
case 2: masm.vmovsdWithPatch(srcAddr, out); break;
case 4: masm.vmovdquWithPatch(srcAddr, out); break;
default: MOZ_CRASH("unexpected size for partial load");
}
break;
}
case Scalar::Int8:
case Scalar::Uint8:
case Scalar::Int16:
case Scalar::Uint16:
case Scalar::Int32:
case Scalar::Uint32:
case Scalar::Float32:
case Scalar::Float64:
case Scalar::Uint8Clamped:
case Scalar::MaxTypedArrayViewType:
MOZ_CRASH("should only handle SIMD types");
}
}
void
CodeGeneratorX86::emitSimdLoad(Scalar::Type type, unsigned numElems, const LAllocation *ptr,
FloatRegister out, bool needsBoundsCheck /* = false */,
Label *oobLabel /* = nullptr */)
{
if (ptr->isConstant()) {
MOZ_ASSERT(!needsBoundsCheck);
if (numElems == 3) {
MOZ_ASSERT(type == Scalar::Int32x4 || type == Scalar::Float32x4);
// Load XY
emitSimdLoad(type, 2, ptr, out);
// Load Z (W is zeroed)
// This add won't overflow, as we've checked that we have at least
// room for loading 4 elements during asm.js validation.
PatchedAbsoluteAddress srcAddr((void *) (ptr->toConstant()->toInt32() + 2 * sizeof(float)));
uint32_t before = masm.size();
loadSimd(type, 1, srcAddr, ScratchSimdReg);
uint32_t after = masm.size();
masm.append(AsmJSHeapAccess(before, after, 1, type));
// Move ZW atop XY
masm.vmovlhps(ScratchSimdReg, out, out);
return;
}
PatchedAbsoluteAddress srcAddr((void *) ptr->toConstant()->toInt32());
uint32_t before = masm.size();
loadSimd(type, numElems, srcAddr, out);
uint32_t after = masm.size();
masm.append(AsmJSHeapAccess(before, after, numElems, type));
return;
}
Register ptrReg = ToRegister(ptr);
uint32_t maybeCmpOffset = AsmJSHeapAccess::NoLengthCheck;
if (needsBoundsCheck) {
maybeCmpOffset = masm.cmp32WithPatch(ptrReg, Imm32(0)).offset();
masm.j(Assembler::AboveOrEqual, oobLabel); // Throws RangeError
}
uint32_t before = masm.size();
if (numElems == 3) {
MOZ_ASSERT(type == Scalar::Int32x4 || type == Scalar::Float32x4);
// Load XY
Address addr(ptrReg, 0);
before = masm.size();
loadSimd(type, 2, addr, out);
uint32_t after = masm.size();
masm.append(AsmJSHeapAccess(before, after, 3, type, maybeCmpOffset));
// Load Z (W is zeroed)
// This is still in bounds, as we've checked with a manual bounds check
// or we had enough space for sure when removing the bounds check.
Address shiftedAddr(ptrReg, 2 * sizeof(float));
before = after;
loadSimd(type, 1, shiftedAddr, ScratchSimdReg);
after = masm.size();
masm.append(AsmJSHeapAccess(before, after, 1, type));
// Move ZW atop XY
masm.vmovlhps(ScratchSimdReg, out, out);
return;
}
Address addr(ptrReg, 0);
loadSimd(type, numElems, addr, out);
uint32_t after = masm.size();
masm.append(AsmJSHeapAccess(before, after, numElems, type, maybeCmpOffset));
}
void
CodeGeneratorX86::visitAsmJSLoadHeap(LAsmJSLoadHeap *ins)
{
const MAsmJSLoadHeap *mir = ins->mir();
Scalar::Type vt = mir->accessType();
Scalar::Type accessType = mir->accessType();
const LAllocation *ptr = ins->ptr();
const LDefinition *out = ins->output();
if (Scalar::isSimdType(accessType)) {
return emitSimdLoad(accessType, mir->numSimdElems(), ptr, ToFloatRegister(out),
mir->needsBoundsCheck(), mir->outOfBoundsLabel());
}
memoryBarrier(ins->mir()->barrierBefore());
if (ptr->isConstant()) {
@ -370,7 +491,7 @@ CodeGeneratorX86::visitAsmJSLoadHeap(LAsmJSLoadHeap *ins)
// immediate in the instruction. This displacement will fixed up when the
// base address is known during dynamic linking (AsmJSModule::initHeap).
PatchedAbsoluteAddress srcAddr((void *) ptr->toConstant()->toInt32());
loadAndNoteViewTypeElement(vt, srcAddr, out);
loadAndNoteViewTypeElement(accessType, srcAddr, out);
memoryBarrier(ins->mir()->barrierAfter());
return;
}
@ -379,28 +500,24 @@ CodeGeneratorX86::visitAsmJSLoadHeap(LAsmJSLoadHeap *ins)
Address srcAddr(ptrReg, 0);
if (!mir->needsBoundsCheck()) {
loadAndNoteViewTypeElement(vt, srcAddr, out);
loadAndNoteViewTypeElement(accessType, srcAddr, out);
memoryBarrier(ins->mir()->barrierAfter());
return;
}
OutOfLineLoadTypedArrayOutOfBounds *ool = nullptr;
OutOfLineLoadTypedArrayOutOfBounds *ool =
new(alloc()) OutOfLineLoadTypedArrayOutOfBounds(ToAnyRegister(out), accessType);
CodeOffsetLabel cmp = masm.cmp32WithPatch(ptrReg, Imm32(0));
if (mir->outOfBoundsLabel()) {
masm.j(Assembler::AboveOrEqual, mir->outOfBoundsLabel()); // Throws RangeError
} else {
ool = new(alloc()) OutOfLineLoadTypedArrayOutOfBounds(ToAnyRegister(out), vt);
addOutOfLineCode(ool, mir);
masm.j(Assembler::AboveOrEqual, ool->entry());
}
addOutOfLineCode(ool, mir);
masm.j(Assembler::AboveOrEqual, ool->entry());
uint32_t before = masm.size();
load(vt, srcAddr, out);
load(accessType, srcAddr, out);
uint32_t after = masm.size();
if (ool)
masm.bind(ool->rejoin());
memoryBarrier(ins->mir()->barrierAfter());
masm.append(AsmJSHeapAccess(before, after, vt, ToAnyRegister(out), cmp.offset()));
masm.append(AsmJSHeapAccess(before, after, accessType, ToAnyRegister(out), cmp.offset()));
}
template<typename T>
@ -417,8 +534,8 @@ CodeGeneratorX86::store(Scalar::Type vt, const LAllocation *value, const T &dstA
case Scalar::Uint32: masm.movlWithPatch(ToRegister(value), dstAddr); break;
case Scalar::Float32: masm.vmovssWithPatch(ToFloatRegister(value), dstAddr); break;
case Scalar::Float64: masm.vmovsdWithPatch(ToFloatRegister(value), dstAddr); break;
case Scalar::Float32x4: masm.vmovupsWithPatch(ToFloatRegister(value), dstAddr); break;
case Scalar::Int32x4: masm.vmovdquWithPatch(ToFloatRegister(value), dstAddr); break;
case Scalar::Float32x4:
case Scalar::Int32x4: MOZ_CRASH("SIMD stores should be handled in emitSimdStore");
case Scalar::MaxTypedArrayViewType: MOZ_CRASH("unexpected type");
}
}
@ -459,6 +576,120 @@ CodeGeneratorX86::visitStoreTypedArrayElementStatic(LStoreTypedArrayElementStati
masm.bind(&rejoin);
}
template<typename T>
void
CodeGeneratorX86::storeSimd(Scalar::Type type, unsigned numElems, FloatRegister in, T destAddr)
{
switch (type) {
case Scalar::Float32x4: {
switch (numElems) {
// In memory-to-register mode, movss zeroes out the high lanes.
case 1: masm.vmovssWithPatch(in, destAddr); break;
// See comment above, which also applies to movsd.
case 2: masm.vmovsdWithPatch(in, destAddr); break;
case 4: masm.vmovupsWithPatch(in, destAddr); break;
default: MOZ_CRASH("unexpected size for partial load");
}
break;
}
case Scalar::Int32x4: {
switch (numElems) {
// In memory-to-register mode, movd zeroes destAddr the high lanes.
case 1: masm.vmovdWithPatch(in, destAddr); break;
// See comment above, which also applies to movsd.
// Cross-domain penalty here, as movq isn't encodable on x86.
case 2: masm.vmovsdWithPatch(in, destAddr); break;
case 4: masm.vmovdquWithPatch(in, destAddr); break;
default: MOZ_CRASH("unexpected size for partial load");
}
break;
}
case Scalar::Int8:
case Scalar::Uint8:
case Scalar::Int16:
case Scalar::Uint16:
case Scalar::Int32:
case Scalar::Uint32:
case Scalar::Float32:
case Scalar::Float64:
case Scalar::Uint8Clamped:
case Scalar::MaxTypedArrayViewType:
MOZ_CRASH("should only handle SIMD types");
}
}
void
CodeGeneratorX86::emitSimdStore(Scalar::Type type, unsigned numElems, FloatRegister in,
const LAllocation *ptr, bool needsBoundsCheck /* = false */,
Label *oobLabel /* = nullptr */)
{
if (ptr->isConstant()) {
MOZ_ASSERT(!needsBoundsCheck);
if (numElems == 3) {
MOZ_ASSERT(type == Scalar::Int32x4 || type == Scalar::Float32x4);
// Store XY
emitSimdStore(type, 2, in, ptr);
masm.vmovhlps(in, ScratchSimdReg, ScratchSimdReg);
// Store Z
// This add won't overflow, as we've checked that we have at least
// room for loading 4 elements during asm.js validation.
PatchedAbsoluteAddress dstAddr((void *) (ptr->toConstant()->toInt32() + 2 * sizeof(float)));
uint32_t before = masm.size();
storeSimd(type, 1, ScratchSimdReg, dstAddr);
uint32_t after = masm.size();
masm.append(AsmJSHeapAccess(before, after, 1, type));
return;
}
PatchedAbsoluteAddress dstAddr((void *) ptr->toConstant()->toInt32());
uint32_t before = masm.size();
storeSimd(type, numElems, in, dstAddr);
uint32_t after = masm.size();
masm.append(AsmJSHeapAccess(before, after, 3, type));
return;
}
Register ptrReg = ToRegister(ptr);
uint32_t maybeCmpOffset = AsmJSHeapAccess::NoLengthCheck;
if (needsBoundsCheck) {
maybeCmpOffset = masm.cmp32WithPatch(ptrReg, Imm32(0)).offset();
masm.j(Assembler::AboveOrEqual, oobLabel); // Throws RangeError
}
uint32_t before = masm.size();
if (numElems == 3) {
MOZ_ASSERT(type == Scalar::Int32x4 || type == Scalar::Float32x4);
// Store XY
Address addr(ptrReg, 0);
before = masm.size();
storeSimd(type, 2, in, addr);
uint32_t after = masm.size();
masm.append(AsmJSHeapAccess(before, after, 3, type, maybeCmpOffset));
masm.vmovhlps(in, ScratchSimdReg, ScratchSimdReg);
// Store Z (W is zeroed)
// This is still in bounds, as we've checked with a manual bounds check
// or we had enough space for sure when removing the bounds check.
Address shiftedAddr(ptrReg, 2 * sizeof(float));
before = masm.size();
storeSimd(type, 1, ScratchSimdReg, shiftedAddr);
after = masm.size();
masm.append(AsmJSHeapAccess(before, after, 1, type));
return;
}
Address addr(ptrReg, 0);
storeSimd(type, numElems, in, addr);
uint32_t after = masm.size();
masm.append(AsmJSHeapAccess(before, after, numElems, type, maybeCmpOffset));
}
void
CodeGeneratorX86::visitAsmJSStoreHeap(LAsmJSStoreHeap *ins)
{
@ -467,6 +698,11 @@ CodeGeneratorX86::visitAsmJSStoreHeap(LAsmJSStoreHeap *ins)
const LAllocation *value = ins->value();
const LAllocation *ptr = ins->ptr();
if (Scalar::isSimdType(vt)) {
return emitSimdStore(vt, mir->numSimdElems(), ToFloatRegister(value), ptr,
mir->needsBoundsCheck(), mir->outOfBoundsLabel());
}
memoryBarrier(ins->mir()->barrierBefore());
if (ptr->isConstant()) {
@ -491,11 +727,7 @@ CodeGeneratorX86::visitAsmJSStoreHeap(LAsmJSStoreHeap *ins)
CodeOffsetLabel cmp = masm.cmp32WithPatch(ptrReg, Imm32(0));
Label rejoin;
if (mir->outOfBoundsLabel())
masm.j(Assembler::AboveOrEqual, mir->outOfBoundsLabel()); // Throws RangeError
else
masm.j(Assembler::AboveOrEqual, &rejoin);
masm.j(Assembler::AboveOrEqual, &rejoin);
uint32_t before = masm.size();
store(vt, value, dstAddr);

Просмотреть файл

@ -37,6 +37,17 @@ class CodeGeneratorX86 : public CodeGeneratorX86Shared
template<typename T>
void store(Scalar::Type vt, const LAllocation *value, const T &dstAddr);
template<typename T>
void loadSimd(Scalar::Type type, unsigned numElems, T srcAddr, FloatRegister out);
void emitSimdLoad(Scalar::Type type, unsigned numElems, const LAllocation *ptr,
FloatRegister out, bool needsBoundsCheck = false, Label *oobLabel = nullptr);
template<typename T>
void storeSimd(Scalar::Type type, unsigned numElems, FloatRegister in, T destAddr);
void emitSimdStore(Scalar::Type type, unsigned numElems, FloatRegister in,
const LAllocation *ptr, bool needsBoundsCheck = false,
Label *oobLabel = nullptr);
void memoryBarrier(MemoryBarrierBits barrier);
public:

Просмотреть файл

@ -1467,6 +1467,27 @@ isSimdType(Type atype) {
MOZ_CRASH("invalid scalar type");
}
static inline size_t
scalarByteSize(Type atype) {
switch (atype) {
case Int32x4:
case Float32x4:
return 4;
case Int8:
case Uint8:
case Uint8Clamped:
case Int16:
case Uint16:
case Int32:
case Uint32:
case Float32:
case Float64:
case MaxTypedArrayViewType:
break;
}
MOZ_CRASH("invalid simd type");
}
} /* namespace Scalar */
} /* namespace js */