зеркало из https://github.com/mozilla/gecko-dev.git
Redo X64 asm_fneg to only allocate XMM regs, fix indirect calls, and revert asm_restore to old logic (bug 535706 r=nnethercote+)
The code for indirect calls needed shuffling; we must freeResourcesOf() before assigning the call address to a register. The old code was just getting lucky, and the regstate fixes tickled the latent bug. asm_restore() can be stricter once we eliminate all cases where an F64 instruction can be assigned to a GPR. The only known remaining case is asm_quad which is used for both LIR_float and LIR_quad, which should be fixed by bug 534310. --HG-- extra : convert_revision : e46657bf61cade04961da7e8abbb004385aaad7b
This commit is contained in:
Родитель
148417c45f
Коммит
a64e9de660
|
@ -451,7 +451,8 @@ namespace nanojit
|
|||
// XORPD because it's one byte shorter. This is ok because it's only used for
|
||||
// zeroing an XMM register; hence the single argument.
|
||||
// Also note that (unlike most SSE2 instructions) XORPS does not have a prefix, thus emitrr() should be used.
|
||||
void Assembler::XORPS( R r) { emitrr(X64_xorps, r,r); asm_output("xorps %s, %s", RQ(r),RQ(r)); }
|
||||
void Assembler::XORPS( R r) { emitrr(X64_xorps, r,r); asm_output("xorps %s, %s", RQ(r),RQ(r)); }
|
||||
void Assembler::XORPS( R l, R r) { emitrr(X64_xorps, l,r); asm_output("xorps %s, %s", RQ(l),RQ(r)); }
|
||||
void Assembler::DIVSD( R l, R r) { emitprr(X64_divsd, l,r); asm_output("divsd %s, %s", RQ(l),RQ(r)); }
|
||||
void Assembler::MULSD( R l, R r) { emitprr(X64_mulsd, l,r); asm_output("mulsd %s, %s", RQ(l),RQ(r)); }
|
||||
void Assembler::ADDSD( R l, R r) { emitprr(X64_addsd, l,r); asm_output("addsd %s, %s", RQ(l),RQ(r)); }
|
||||
|
@ -804,7 +805,7 @@ namespace nanojit
|
|||
|
||||
// binary op with integer registers
|
||||
void Assembler::asm_arith(LIns *ins) {
|
||||
Register rr, ra, rb;
|
||||
Register rr, ra, rb = UnspecifiedReg; // init to shut GCC up
|
||||
|
||||
switch (ins->opcode()) {
|
||||
case LIR_lsh: case LIR_qilsh:
|
||||
|
@ -895,8 +896,7 @@ namespace nanojit
|
|||
ArgSize sizes[MAXARGS];
|
||||
int argc = call->get_sizes(sizes);
|
||||
|
||||
bool indirect = call->isIndirect();
|
||||
if (!indirect) {
|
||||
if (!call->isIndirect()) {
|
||||
verbose_only(if (_logc->lcbits & LC_Assembly)
|
||||
outputf(" %p:", _nIns);
|
||||
)
|
||||
|
@ -908,16 +908,21 @@ namespace nanojit
|
|||
CALLRAX();
|
||||
asm_quad(RAX, (uint64_t)target, /*canClobberCCs*/true);
|
||||
}
|
||||
// Call this now so that the arg setup can involve 'rr'.
|
||||
freeResourcesOf(ins);
|
||||
} else {
|
||||
// Indirect call: we assign the address arg to RAX since it's not
|
||||
// used for regular arguments, and is otherwise scratch since it's
|
||||
// clobberred by the call.
|
||||
asm_regarg(ARGSIZE_P, ins->arg(--argc), RAX);
|
||||
CALLRAX();
|
||||
}
|
||||
|
||||
// Call this now so that the arg setup can involve 'rr'.
|
||||
freeResourcesOf(ins);
|
||||
// Call this now so that the arg setup can involve 'rr'.
|
||||
freeResourcesOf(ins);
|
||||
|
||||
// Assign the call address to RAX. Must happen after freeResourcesOf()
|
||||
// since RAX is usually the return value and will be allocated until that point.
|
||||
asm_regarg(ARGSIZE_P, ins->arg(--argc), RAX);
|
||||
}
|
||||
|
||||
#ifdef _WIN64
|
||||
int stk_used = 32; // always reserve 32byte shadow area
|
||||
|
@ -1380,15 +1385,13 @@ namespace nanojit
|
|||
}
|
||||
else {
|
||||
int d = findMemFor(ins);
|
||||
if (ins->isF64()) {
|
||||
NanoAssert(IsFpReg(r));
|
||||
if (IsFpReg(r)) {
|
||||
NanoAssert(ins->isF64());
|
||||
MOVSDRM(r, d, FP);
|
||||
} else if (ins->isI64()) {
|
||||
NanoAssert(IsGpReg(r));
|
||||
} else if (ins->isN64()) {
|
||||
MOVQRM(r, d, FP);
|
||||
} else {
|
||||
NanoAssert(ins->isI32());
|
||||
NanoAssert(IsGpReg(r));
|
||||
MOVLRM(r, d, FP);
|
||||
}
|
||||
}
|
||||
|
@ -1689,6 +1692,8 @@ namespace nanojit
|
|||
// Register clean-up for 2-address style unary ops of the form R = (op) R.
|
||||
// Pairs with beginOp1Regs() and beginOp2Regs().
|
||||
void Assembler::endOpRegs(LIns* ins, Register rr, Register ra) {
|
||||
(void) rr; // quell warnings when NanoAssert is compiled out.
|
||||
|
||||
LIns* a = ins->oprnd1();
|
||||
|
||||
// We're finished with 'ins'.
|
||||
|
@ -1706,29 +1711,35 @@ namespace nanojit
|
|||
|
||||
void Assembler::asm_fneg(LIns *ins) {
|
||||
Register rr, ra;
|
||||
if (isS32((uintptr_t)negateMask) || isTargetWithinS32((NIns*)negateMask)) {
|
||||
beginOp1Regs(ins, FpRegs, rr, ra);
|
||||
if (isS32((uintptr_t)negateMask)) {
|
||||
// builtin code is in bottom or top 2GB addr space, use absolute addressing
|
||||
XORPSA(rr, (int32_t)(uintptr_t)negateMask);
|
||||
} else {
|
||||
// jit code is within +/-2GB of builtin code, use rip-relative
|
||||
XORPSM(rr, (NIns*)negateMask);
|
||||
}
|
||||
if (ra != rr)
|
||||
asm_nongp_copy(rr,ra);
|
||||
endOpRegs(ins, rr, ra);
|
||||
|
||||
beginOp1Regs(ins, FpRegs, rr, ra);
|
||||
if (isS32((uintptr_t)negateMask)) {
|
||||
// builtin code is in bottom or top 2GB addr space, use absolute addressing
|
||||
XORPSA(rr, (int32_t)(uintptr_t)negateMask);
|
||||
} else if (isTargetWithinS32((NIns*)negateMask)) {
|
||||
// jit code is within +/-2GB of builtin code, use rip-relative
|
||||
XORPSM(rr, (NIns*)negateMask);
|
||||
} else {
|
||||
// This is just hideous - can't use RIP-relative load, can't use
|
||||
// absolute-address load, and cant move imm64 const to XMM.
|
||||
// so do it all in a GPR. hrmph.
|
||||
rr = prepareResultReg(ins, GpRegs);
|
||||
ra = findRegFor(ins->oprnd1(), GpRegs & ~rmask(rr));
|
||||
XORQRR(rr, ra); // xor rr, ra
|
||||
asm_quad(rr, negateMask[0], /*canClobberCCs*/true); // mov rr, 0x8000000000000000
|
||||
freeResourcesOf(ins);
|
||||
// Solution: move negateMask into a temp GP register, then copy to
|
||||
// a temp XMM register.
|
||||
// Nb: we don't want any F64 values to end up in a GpReg, nor any
|
||||
// I64 values to end up in an FpReg.
|
||||
//
|
||||
// # 'gt' and 'ga' are temporary GpRegs.
|
||||
// # ins->oprnd1() is in 'rr' (FpRegs)
|
||||
// mov gt, 0x8000000000000000
|
||||
// mov rt, gt
|
||||
// xorps rr, rt
|
||||
Register rt = registerAllocTmp(FpRegs & ~(rmask(ra)|rmask(rr)));
|
||||
Register gt = registerAllocTmp(GpRegs);
|
||||
XORPS(rr, rt);
|
||||
MOVQXR(rt, gt);
|
||||
asm_quad(gt, negateMask[0], /*canClobberCCs*/true);
|
||||
}
|
||||
if (ra != rr)
|
||||
asm_nongp_copy(rr,ra);
|
||||
endOpRegs(ins, rr, ra);
|
||||
}
|
||||
|
||||
void Assembler::asm_spill(Register rr, int d, bool /*pop*/, bool quad) {
|
||||
|
|
|
@ -478,6 +478,7 @@ namespace nanojit
|
|||
void MOVSXDR(Register l, Register r);\
|
||||
void MOVZX8(Register l, Register r);\
|
||||
void XORPS(Register r);\
|
||||
void XORPS(Register l, Register r);\
|
||||
void DIVSD(Register l, Register r);\
|
||||
void MULSD(Register l, Register r);\
|
||||
void ADDSD(Register l, Register r);\
|
||||
|
|
Загрузка…
Ссылка в новой задаче