зеркало из https://github.com/mozilla/gecko-dev.git
Bug 538060 - nanojit: improve 64-bit loads and stores in the X64 back-end. r=gal,rreitmai,edwsmith.
--HG-- extra : convert_revision : 1fd416d0954dc667073f3a92087c01fc23e5c3aa
This commit is contained in:
Родитель
1dcde024a1
Коммит
ebdd2dbd3c
|
@ -659,8 +659,8 @@ FragmentAssembler::assemble_call(const string &op)
|
|||
size_t argc = mTokens.size();
|
||||
for (size_t i = 0; i < argc; ++i) {
|
||||
args[i] = ref(mTokens[mTokens.size() - (i+1)]);
|
||||
if (args[i]->isFloat()) ty = ARGSIZE_F;
|
||||
else if (args[i]->isQuad()) ty = ARGSIZE_Q;
|
||||
if (args[i]->isF64()) ty = ARGSIZE_F;
|
||||
else if (args[i]->isI64()) ty = ARGSIZE_Q;
|
||||
else ty = ARGSIZE_I;
|
||||
// Nb: i+1 because argMask() uses 1-based arg counting.
|
||||
ci->_argtypes |= argMask(ty, i+1, argc);
|
||||
|
|
|
@ -423,22 +423,6 @@ namespace nanojit
|
|||
(offsetof(LInsSti, ins) - offsetof(LInsSti, oprnd_2)) );
|
||||
}
|
||||
|
||||
bool LIns::isFloat() const {
|
||||
switch (opcode()) {
|
||||
default:
|
||||
return false;
|
||||
case LIR_fadd:
|
||||
case LIR_fsub:
|
||||
case LIR_fmul:
|
||||
case LIR_fdiv:
|
||||
case LIR_fneg:
|
||||
case LIR_fcall:
|
||||
case LIR_i2f:
|
||||
case LIR_u2f:
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
LIns* LirWriter::ins2i(LOpcode v, LIns* oprnd1, int32_t imm)
|
||||
{
|
||||
return ins2(v, oprnd1, insImm(imm));
|
||||
|
|
|
@ -492,14 +492,9 @@ namespace nanojit
|
|||
bool isop(LOpcode o) const {
|
||||
return opcode() == o;
|
||||
}
|
||||
bool isQuad() const {
|
||||
LTy ty = retTypes[opcode()];
|
||||
return ty == LTy_I64 || ty == LTy_F64;
|
||||
}
|
||||
bool isCond() const {
|
||||
return (isop(LIR_ov)) || isCmp();
|
||||
}
|
||||
bool isFloat() const; // not inlined because it contains a switch
|
||||
bool isCmp() const {
|
||||
LOpcode op = opcode();
|
||||
return (op >= LIR_eq && op <= LIR_uge) ||
|
||||
|
@ -550,11 +545,26 @@ namespace nanojit
|
|||
return isop(LIR_jt) || isop(LIR_jf) || isop(LIR_j) || isop(LIR_jtbl);
|
||||
}
|
||||
|
||||
bool isPtr() {
|
||||
#ifdef NANOJIT_64BIT
|
||||
return retTypes[opcode()] == LTy_I64;
|
||||
#else
|
||||
bool isVoid() const {
|
||||
return retTypes[opcode()] == LTy_Void;
|
||||
}
|
||||
bool isI32() const {
|
||||
return retTypes[opcode()] == LTy_I32;
|
||||
}
|
||||
bool isI64() const {
|
||||
return retTypes[opcode()] == LTy_I64;
|
||||
}
|
||||
bool isF64() const {
|
||||
return retTypes[opcode()] == LTy_F64;
|
||||
}
|
||||
bool isQuad() const {
|
||||
return isI64() || isF64();
|
||||
}
|
||||
bool isPtr() const {
|
||||
#ifdef NANOJIT_64BIT
|
||||
return isI64();
|
||||
#else
|
||||
return isI32();
|
||||
#endif
|
||||
}
|
||||
|
||||
|
@ -571,7 +581,7 @@ namespace nanojit
|
|||
if (isCall())
|
||||
return !isCse();
|
||||
else
|
||||
return retTypes[opcode()] == LTy_Void;
|
||||
return isVoid();
|
||||
}
|
||||
|
||||
inline void* constvalp() const
|
||||
|
|
|
@ -774,7 +774,7 @@ namespace nanojit
|
|||
|
||||
prepResultReg(div, rmask(RAX));
|
||||
|
||||
Register rhsReg = findRegFor(rhs, (GpRegs ^ (rmask(RAX)|rmask(RDX))));
|
||||
Register rhsReg = findRegFor(rhs, GpRegs & ~(rmask(RAX)|rmask(RDX)));
|
||||
Register lhsReg = lhs->isUnusedOrHasUnknownReg()
|
||||
? findSpecificRegForUnallocated(lhs, RAX)
|
||||
: lhs->getReg();
|
||||
|
@ -1385,23 +1385,20 @@ namespace nanojit
|
|||
}
|
||||
|
||||
void Assembler::asm_load64(LIns *ins) {
|
||||
|
||||
Register rr, rb;
|
||||
int32_t dr;
|
||||
switch (ins->opcode()) {
|
||||
case LIR_ldq:
|
||||
case LIR_ldqc:
|
||||
regalloc_load(ins, GpRegs, rr, dr, rb);
|
||||
NanoAssert(IsGpReg(rr));
|
||||
MOVQRM(rr, dr, rb); // general 64bit load, 32bit const displacement
|
||||
break;
|
||||
case LIR_ldf:
|
||||
case LIR_ldfc:
|
||||
regalloc_load(ins, GpRegs, rr, dr, rb);
|
||||
if (IsGpReg(rr)) {
|
||||
// general 64bit load, 32bit const displacement
|
||||
MOVQRM(rr, dr, rb);
|
||||
} else {
|
||||
regalloc_load(ins, FpRegs, rr, dr, rb);
|
||||
NanoAssert(IsFpReg(rr));
|
||||
// load 64bits into XMM. don't know if double or int64, assume double.
|
||||
MOVSDRM(rr, dr, rb);
|
||||
}
|
||||
MOVSDRM(rr, dr, rb); // load 64bits into XMM
|
||||
break;
|
||||
case LIR_ld32f:
|
||||
case LIR_ldc32f:
|
||||
|
@ -1454,57 +1451,24 @@ namespace nanojit
|
|||
NanoAssert(value->isQuad());
|
||||
|
||||
Register b = getBaseReg(base, d, BaseRegs);
|
||||
Register r;
|
||||
|
||||
// if we have to choose a register, use a GPR, but not the base reg
|
||||
if (value->isUnusedOrHasUnknownReg()) {
|
||||
RegisterMask allow;
|
||||
// If op is LIR_st32f and we have no reg, prefer FPR over GPR: saves an instruction later,
|
||||
// and the value is almost certainly going to operated on as FP later anyway.
|
||||
// XXX: isFloat doesn't cover float/fmod! see bug 520208.
|
||||
if (op == LIR_st32f || value->isFloat() || value->isop(LIR_float) || value->isop(LIR_fmod)) {
|
||||
allow = FpRegs;
|
||||
} else {
|
||||
allow = GpRegs;
|
||||
}
|
||||
r = findRegFor(value, allow & ~rmask(b));
|
||||
} else {
|
||||
r = value->getReg();
|
||||
}
|
||||
|
||||
switch (op) {
|
||||
case LIR_stqi:
|
||||
case LIR_stfi:
|
||||
{
|
||||
if (IsGpReg(r)) {
|
||||
// gpr store
|
||||
MOVQMR(r, d, b);
|
||||
}
|
||||
else {
|
||||
// xmm store
|
||||
MOVSDMR(r, d, b);
|
||||
}
|
||||
case LIR_stqi: {
|
||||
Register r = findRegFor(value, GpRegs & ~rmask(b));
|
||||
MOVQMR(r, d, b); // gpr store
|
||||
break;
|
||||
}
|
||||
case LIR_st32f:
|
||||
{
|
||||
// need a scratch FPR reg
|
||||
case LIR_stfi: {
|
||||
Register r = findRegFor(value, FpRegs);
|
||||
MOVSDMR(r, d, b); // xmm store
|
||||
break;
|
||||
}
|
||||
case LIR_st32f: {
|
||||
Register r = findRegFor(value, FpRegs);
|
||||
Register t = registerAllocTmp(FpRegs & ~rmask(r));
|
||||
|
||||
// store
|
||||
MOVSSMR(t, d, b);
|
||||
|
||||
// cvt to single-precision
|
||||
if (IsGpReg(r))
|
||||
{
|
||||
CVTSD2SS(t, t);
|
||||
MOVQXR(t, r); // xmm <- gpr: use movq xmm, r/m64 (66 REX.W 0F 6E /r)
|
||||
}
|
||||
else
|
||||
{
|
||||
NanoAssert(IsFpReg(r));
|
||||
CVTSD2SS(t, r);
|
||||
}
|
||||
MOVSSMR(t, d, b); // store
|
||||
CVTSD2SS(t, r); // cvt to single-precision
|
||||
XORPS(t); // break dependency chains
|
||||
break;
|
||||
}
|
||||
|
@ -1516,12 +1480,9 @@ namespace nanojit
|
|||
|
||||
void Assembler::asm_store32(LOpcode op, LIns *value, int d, LIns *base) {
|
||||
|
||||
// quirk of x86-64: reg cannot appear to be ah/bh/ch/dh
|
||||
// for single-byte stores with REX prefix
|
||||
const RegisterMask SrcRegs =
|
||||
(op == LIR_stb) ?
|
||||
(GpRegs & ~(1<<RSP | 1<<RBP | 1<<RSI | 1<<RDI)) :
|
||||
GpRegs;
|
||||
// Quirk of x86-64: reg cannot appear to be ah/bh/ch/dh for
|
||||
// single-byte stores with REX prefix.
|
||||
const RegisterMask SrcRegs = (op == LIR_stb) ? SingleByteStoreRegs : GpRegs;
|
||||
|
||||
NanoAssert(!value->isQuad());
|
||||
Register b = getBaseReg(base, d, BaseRegs);
|
||||
|
|
|
@ -329,6 +329,10 @@ namespace nanojit
|
|||
static const int NumSavedRegs = 5; // rbx, r12-15
|
||||
static const int NumArgRegs = 6;
|
||||
#endif
|
||||
// Warning: when talking about single byte registers, RSP/RBP/RSI/RDI are
|
||||
// actually synonyms for AH/CH/DH/BH. So this value means "any
|
||||
// single-byte GpReg except AH/CH/DH/BH".
|
||||
static const int SingleByteStoreRegs = GpRegs & ~(1<<RSP | 1<<RBP | 1<<RSI | 1<<RDI);
|
||||
|
||||
static inline bool IsFpReg(Register r) {
|
||||
return ((1<<r) & FpRegs) != 0;
|
||||
|
|
|
@ -589,7 +589,6 @@ namespace nanojit
|
|||
}
|
||||
else
|
||||
{
|
||||
|
||||
int dr = disp(ins);
|
||||
Register rb;
|
||||
if (base->isop(LIR_alloc)) {
|
||||
|
|
Загрузка…
Ссылка в новой задаче