Bug 538060 - nanojit: improve 64-bit loads and stores in the X64 back-end. r=gal,rreitmai,edwsmith.

--HG--
extra : convert_revision : 1fd416d0954dc667073f3a92087c01fc23e5c3aa
This commit is contained in:
Nicholas Nethercote 2010-01-11 15:51:49 +11:00
Родитель 1dcde024a1
Коммит ebdd2dbd3c
6 изменённых файлов: 51 добавлений и 93 удалений

Просмотреть файл

@ -659,9 +659,9 @@ FragmentAssembler::assemble_call(const string &op)
size_t argc = mTokens.size();
for (size_t i = 0; i < argc; ++i) {
args[i] = ref(mTokens[mTokens.size() - (i+1)]);
if (args[i]->isFloat()) ty = ARGSIZE_F;
else if (args[i]->isQuad()) ty = ARGSIZE_Q;
else ty = ARGSIZE_I;
if (args[i]->isF64()) ty = ARGSIZE_F;
else if (args[i]->isI64()) ty = ARGSIZE_Q;
else ty = ARGSIZE_I;
// Nb: i+1 because argMask() uses 1-based arg counting.
ci->_argtypes |= argMask(ty, i+1, argc);
}

Просмотреть файл

@ -423,22 +423,6 @@ namespace nanojit
(offsetof(LInsSti, ins) - offsetof(LInsSti, oprnd_2)) );
}
bool LIns::isFloat() const {
switch (opcode()) {
default:
return false;
case LIR_fadd:
case LIR_fsub:
case LIR_fmul:
case LIR_fdiv:
case LIR_fneg:
case LIR_fcall:
case LIR_i2f:
case LIR_u2f:
return true;
}
}
LIns* LirWriter::ins2i(LOpcode v, LIns* oprnd1, int32_t imm)
{
return ins2(v, oprnd1, insImm(imm));

Просмотреть файл

@ -492,14 +492,9 @@ namespace nanojit
bool isop(LOpcode o) const {
return opcode() == o;
}
bool isQuad() const {
LTy ty = retTypes[opcode()];
return ty == LTy_I64 || ty == LTy_F64;
}
bool isCond() const {
return (isop(LIR_ov)) || isCmp();
}
bool isFloat() const; // not inlined because it contains a switch
bool isCmp() const {
LOpcode op = opcode();
return (op >= LIR_eq && op <= LIR_uge) ||
@ -550,11 +545,26 @@ namespace nanojit
return isop(LIR_jt) || isop(LIR_jf) || isop(LIR_j) || isop(LIR_jtbl);
}
bool isPtr() {
#ifdef NANOJIT_64BIT
return retTypes[opcode()] == LTy_I64;
#else
bool isVoid() const {
return retTypes[opcode()] == LTy_Void;
}
bool isI32() const {
return retTypes[opcode()] == LTy_I32;
}
bool isI64() const {
return retTypes[opcode()] == LTy_I64;
}
bool isF64() const {
return retTypes[opcode()] == LTy_F64;
}
bool isQuad() const {
return isI64() || isF64();
}
bool isPtr() const {
#ifdef NANOJIT_64BIT
return isI64();
#else
return isI32();
#endif
}
@ -571,7 +581,7 @@ namespace nanojit
if (isCall())
return !isCse();
else
return retTypes[opcode()] == LTy_Void;
return isVoid();
}
inline void* constvalp() const

Просмотреть файл

@ -774,7 +774,7 @@ namespace nanojit
prepResultReg(div, rmask(RAX));
Register rhsReg = findRegFor(rhs, (GpRegs ^ (rmask(RAX)|rmask(RDX))));
Register rhsReg = findRegFor(rhs, GpRegs & ~(rmask(RAX)|rmask(RDX)));
Register lhsReg = lhs->isUnusedOrHasUnknownReg()
? findSpecificRegForUnallocated(lhs, RAX)
: lhs->getReg();
@ -1385,23 +1385,20 @@ namespace nanojit
}
void Assembler::asm_load64(LIns *ins) {
Register rr, rb;
int32_t dr;
switch (ins->opcode()) {
case LIR_ldq:
case LIR_ldqc:
regalloc_load(ins, GpRegs, rr, dr, rb);
NanoAssert(IsGpReg(rr));
MOVQRM(rr, dr, rb); // general 64bit load, 32bit const displacement
break;
case LIR_ldf:
case LIR_ldfc:
regalloc_load(ins, GpRegs, rr, dr, rb);
if (IsGpReg(rr)) {
// general 64bit load, 32bit const displacement
MOVQRM(rr, dr, rb);
} else {
NanoAssert(IsFpReg(rr));
// load 64bits into XMM. don't know if double or int64, assume double.
MOVSDRM(rr, dr, rb);
}
regalloc_load(ins, FpRegs, rr, dr, rb);
NanoAssert(IsFpReg(rr));
MOVSDRM(rr, dr, rb); // load 64bits into XMM
break;
case LIR_ld32f:
case LIR_ldc32f:
@ -1454,58 +1451,25 @@ namespace nanojit
NanoAssert(value->isQuad());
Register b = getBaseReg(base, d, BaseRegs);
Register r;
// if we have to choose a register, use a GPR, but not the base reg
if (value->isUnusedOrHasUnknownReg()) {
RegisterMask allow;
// If op is LIR_st32f and we have no reg, prefer FPR over GPR: saves an instruction later,
// and the value is almost certainly going to operated on as FP later anyway.
// XXX: isFloat doesn't cover float/fmod! see bug 520208.
if (op == LIR_st32f || value->isFloat() || value->isop(LIR_float) || value->isop(LIR_fmod)) {
allow = FpRegs;
} else {
allow = GpRegs;
}
r = findRegFor(value, allow & ~rmask(b));
} else {
r = value->getReg();
}
switch (op) {
case LIR_stqi:
case LIR_stfi:
{
if (IsGpReg(r)) {
// gpr store
MOVQMR(r, d, b);
}
else {
// xmm store
MOVSDMR(r, d, b);
}
case LIR_stqi: {
Register r = findRegFor(value, GpRegs & ~rmask(b));
MOVQMR(r, d, b); // gpr store
break;
}
case LIR_st32f:
{
// need a scratch FPR reg
case LIR_stfi: {
Register r = findRegFor(value, FpRegs);
MOVSDMR(r, d, b); // xmm store
break;
}
case LIR_st32f: {
Register r = findRegFor(value, FpRegs);
Register t = registerAllocTmp(FpRegs & ~rmask(r));
// store
MOVSSMR(t, d, b);
// cvt to single-precision
if (IsGpReg(r))
{
CVTSD2SS(t, t);
MOVQXR(t, r); // xmm <- gpr: use movq xmm, r/m64 (66 REX.W 0F 6E /r)
}
else
{
NanoAssert(IsFpReg(r));
CVTSD2SS(t, r);
}
XORPS(t); // break dependency chains
MOVSSMR(t, d, b); // store
CVTSD2SS(t, r); // cvt to single-precision
XORPS(t); // break dependency chains
break;
}
default:
@ -1516,12 +1480,9 @@ namespace nanojit
void Assembler::asm_store32(LOpcode op, LIns *value, int d, LIns *base) {
// quirk of x86-64: reg cannot appear to be ah/bh/ch/dh
// for single-byte stores with REX prefix
const RegisterMask SrcRegs =
(op == LIR_stb) ?
(GpRegs & ~(1<<RSP | 1<<RBP | 1<<RSI | 1<<RDI)) :
GpRegs;
// Quirk of x86-64: reg cannot appear to be ah/bh/ch/dh for
// single-byte stores with REX prefix.
const RegisterMask SrcRegs = (op == LIR_stb) ? SingleByteStoreRegs : GpRegs;
NanoAssert(!value->isQuad());
Register b = getBaseReg(base, d, BaseRegs);

Просмотреть файл

@ -329,6 +329,10 @@ namespace nanojit
static const int NumSavedRegs = 5; // rbx, r12-15
static const int NumArgRegs = 6;
#endif
// Warning: when talking about single byte registers, RSP/RBP/RSI/RDI are
// actually synonyms for AH/CH/DH/BH. So this value means "any
// single-byte GpReg except AH/CH/DH/BH".
static const int SingleByteStoreRegs = GpRegs & ~(1<<RSP | 1<<RBP | 1<<RSI | 1<<RDI);
static inline bool IsFpReg(Register r) {
return ((1<<r) & FpRegs) != 0;

Просмотреть файл

@ -589,7 +589,6 @@ namespace nanojit
}
else
{
int dr = disp(ins);
Register rb;
if (base->isop(LIR_alloc)) {