Redo X64 asm_fneg to only allocate XMM regs, fix indirect calls, and revert asm_restore to old logic (bug 535706 r=nnethercote+)

The code for indirect calls needed shuffling; we must freeResourcesOf() before
assigning the call address to a register.  The old code was just getting lucky,
and the regstate fixes tickled the latent bug.

asm_restore() can be stricter once we eliminate all cases where an F64 instruction
can be assigned to a GPR.  The only known remaining case is asm_quad which is used
for both LIR_float and LIR_quad, which should be fixed by bug 534310.

--HG--
extra : convert_revision : e46657bf61cade04961da7e8abbb004385aaad7b
This commit is contained in:
Edwin Smith 2010-03-15 21:52:41 -04:00
Родитель 148417c45f
Коммит a64e9de660
2 изменённых файлов: 44 добавлений и 32 удалений

Просмотреть файл

@ -451,7 +451,8 @@ namespace nanojit
// XORPD because it's one byte shorter. This is ok because it's only used for
// zeroing an XMM register; hence the single argument.
// Also note that (unlike most SSE2 instructions) XORPS does not have a prefix, thus emitrr() should be used.
void Assembler::XORPS( R r) { emitrr(X64_xorps, r,r); asm_output("xorps %s, %s", RQ(r),RQ(r)); }
void Assembler::XORPS( R r) { emitrr(X64_xorps, r,r); asm_output("xorps %s, %s", RQ(r),RQ(r)); }
void Assembler::XORPS( R l, R r) { emitrr(X64_xorps, l,r); asm_output("xorps %s, %s", RQ(l),RQ(r)); }
void Assembler::DIVSD( R l, R r) { emitprr(X64_divsd, l,r); asm_output("divsd %s, %s", RQ(l),RQ(r)); }
void Assembler::MULSD( R l, R r) { emitprr(X64_mulsd, l,r); asm_output("mulsd %s, %s", RQ(l),RQ(r)); }
void Assembler::ADDSD( R l, R r) { emitprr(X64_addsd, l,r); asm_output("addsd %s, %s", RQ(l),RQ(r)); }
@ -804,7 +805,7 @@ namespace nanojit
// binary op with integer registers
void Assembler::asm_arith(LIns *ins) {
Register rr, ra, rb;
Register rr, ra, rb = UnspecifiedReg; // init to shut GCC up
switch (ins->opcode()) {
case LIR_lsh: case LIR_qilsh:
@ -895,8 +896,7 @@ namespace nanojit
ArgSize sizes[MAXARGS];
int argc = call->get_sizes(sizes);
bool indirect = call->isIndirect();
if (!indirect) {
if (!call->isIndirect()) {
verbose_only(if (_logc->lcbits & LC_Assembly)
outputf(" %p:", _nIns);
)
@ -908,16 +908,21 @@ namespace nanojit
CALLRAX();
asm_quad(RAX, (uint64_t)target, /*canClobberCCs*/true);
}
// Call this now so that the arg setup can involve 'rr'.
freeResourcesOf(ins);
} else {
// Indirect call: we assign the address arg to RAX since it's not
// used for regular arguments, and is otherwise scratch since it's
// clobberred by the call.
asm_regarg(ARGSIZE_P, ins->arg(--argc), RAX);
CALLRAX();
}
// Call this now so that the arg setup can involve 'rr'.
freeResourcesOf(ins);
// Call this now so that the arg setup can involve 'rr'.
freeResourcesOf(ins);
// Assign the call address to RAX. Must happen after freeResourcesOf()
// since RAX is usually the return value and will be allocated until that point.
asm_regarg(ARGSIZE_P, ins->arg(--argc), RAX);
}
#ifdef _WIN64
int stk_used = 32; // always reserve 32byte shadow area
@ -1380,15 +1385,13 @@ namespace nanojit
}
else {
int d = findMemFor(ins);
if (ins->isF64()) {
NanoAssert(IsFpReg(r));
if (IsFpReg(r)) {
NanoAssert(ins->isF64());
MOVSDRM(r, d, FP);
} else if (ins->isI64()) {
NanoAssert(IsGpReg(r));
} else if (ins->isN64()) {
MOVQRM(r, d, FP);
} else {
NanoAssert(ins->isI32());
NanoAssert(IsGpReg(r));
MOVLRM(r, d, FP);
}
}
@ -1689,6 +1692,8 @@ namespace nanojit
// Register clean-up for 2-address style unary ops of the form R = (op) R.
// Pairs with beginOp1Regs() and beginOp2Regs().
void Assembler::endOpRegs(LIns* ins, Register rr, Register ra) {
(void) rr; // quell warnings when NanoAssert is compiled out.
LIns* a = ins->oprnd1();
// We're finished with 'ins'.
@ -1706,29 +1711,35 @@ namespace nanojit
void Assembler::asm_fneg(LIns *ins) {
Register rr, ra;
if (isS32((uintptr_t)negateMask) || isTargetWithinS32((NIns*)negateMask)) {
beginOp1Regs(ins, FpRegs, rr, ra);
if (isS32((uintptr_t)negateMask)) {
// builtin code is in bottom or top 2GB addr space, use absolute addressing
XORPSA(rr, (int32_t)(uintptr_t)negateMask);
} else {
// jit code is within +/-2GB of builtin code, use rip-relative
XORPSM(rr, (NIns*)negateMask);
}
if (ra != rr)
asm_nongp_copy(rr,ra);
endOpRegs(ins, rr, ra);
beginOp1Regs(ins, FpRegs, rr, ra);
if (isS32((uintptr_t)negateMask)) {
// builtin code is in bottom or top 2GB addr space, use absolute addressing
XORPSA(rr, (int32_t)(uintptr_t)negateMask);
} else if (isTargetWithinS32((NIns*)negateMask)) {
// jit code is within +/-2GB of builtin code, use rip-relative
XORPSM(rr, (NIns*)negateMask);
} else {
// This is just hideous - can't use RIP-relative load, can't use
// absolute-address load, and cant move imm64 const to XMM.
// so do it all in a GPR. hrmph.
rr = prepareResultReg(ins, GpRegs);
ra = findRegFor(ins->oprnd1(), GpRegs & ~rmask(rr));
XORQRR(rr, ra); // xor rr, ra
asm_quad(rr, negateMask[0], /*canClobberCCs*/true); // mov rr, 0x8000000000000000
freeResourcesOf(ins);
// Solution: move negateMask into a temp GP register, then copy to
// a temp XMM register.
// Nb: we don't want any F64 values to end up in a GpReg, nor any
// I64 values to end up in an FpReg.
//
// # 'gt' and 'ga' are temporary GpRegs.
// # ins->oprnd1() is in 'rr' (FpRegs)
// mov gt, 0x8000000000000000
// mov rt, gt
// xorps rr, rt
Register rt = registerAllocTmp(FpRegs & ~(rmask(ra)|rmask(rr)));
Register gt = registerAllocTmp(GpRegs);
XORPS(rr, rt);
MOVQXR(rt, gt);
asm_quad(gt, negateMask[0], /*canClobberCCs*/true);
}
if (ra != rr)
asm_nongp_copy(rr,ra);
endOpRegs(ins, rr, ra);
}
void Assembler::asm_spill(Register rr, int d, bool /*pop*/, bool quad) {

Просмотреть файл

@ -478,6 +478,7 @@ namespace nanojit
void MOVSXDR(Register l, Register r);\
void MOVZX8(Register l, Register r);\
void XORPS(Register r);\
void XORPS(Register l, Register r);\
void DIVSD(Register l, Register r);\
void MULSD(Register l, Register r);\
void ADDSD(Register l, Register r);\