Improve exit code efficiency on ARM. (491678, r=vladimir)

This commit is contained in:
Jacob Bramley 2009-06-24 08:17:44 +01:00
Родитель 53153ae1dd
Коммит 1ac546f87f
1 изменённых файлов: 68 добавлений и 34 удалений

Просмотреть файл

@ -169,27 +169,37 @@ Assembler::genPrologue()
void
Assembler::nFragExit(LInsp guard)
{
SideExit* exit = guard->record()->exit;
Fragment *frag = exit->target;
GuardRecord *lr;
SideExit * exit = guard->record()->exit;
Fragment * frag = exit->target;
if (frag && frag->fragEntry) {
bool target_is_known = frag && frag->fragEntry;
if (target_is_known) {
// The target exists so we can simply emit a branch to its location.
JMP_far(frag->fragEntry);
lr = 0;
} else {
// target doesn't exit yet. emit jump to epilog, and set up to patch later.
lr = guard->record();
// The target doesn't exit yet, so emit a jump to the epilogue. If the
// target is created later on, the jump will be patched.
// jump to the epilogue; JMP_far will insert an extra dummy insn for later
// patching.
GuardRecord * gr = guard->record();
// Jump to the epilogue. This may get patched later, but JMP_far always
// emits two instructions even when only one is required, so patching
// will work correctly.
JMP_far(_epilogue);
// stick the jmp pointer to the start of the sequence
lr->jmp = _nIns;
}
// Load the guard record pointer into R2. We want it in R0 but we can't
// do this at this stage because R0 is used for something else.
// I don't understand why I can't load directly into R0. It works for
// the JavaScript JIT but not for the Regular Expression compiler.
// However, I haven't pushed this further as it only saves a single MOV
// instruction in genEpilogue.
LDi(R2, int(gr));
// pop the stack frame first
MOV(SP, FP);
// Set the jmp pointer to the start of the sequence so that patched
// branches can skip the LDi sequence.
gr->jmp = _nIns;
}
#ifdef NJ_VERBOSE
if (_frago->core()->config.show_stats) {
@ -200,11 +210,8 @@ Assembler::nFragExit(LInsp guard)
}
#endif
// return value is GuardRecord*; note that this goes into
// R2, not R0 -- genEpilogue will move it into R0. Otherwise
// we want R0 to have the original value that it had at the
// start of trace.
LDi(R2, int(lr));
// Pop the stack frame.
MOV(SP, FP);
}
NIns*
@ -221,11 +228,17 @@ Assembler::genEpilogue()
POP_mask(savingMask); // regs
// Pop the stack frame.
// As far as I can tell, the generated code doesn't use the stack between
// popping the stack frame in nFragExit and getting here and so this MOV
// should be redundant. However, removing this seems to break some regular
// expression stuff.
MOV(SP,FP);
// this is needed if we jump here from nFragExit
MOV(R0,R2); // return LinkRecord*
// nFragExit loads the guard record pointer into R2, but we need it in R0
// so it must be moved here.
MOV(R0,R2); // return GuardRecord*
return _nIns;
}
@ -522,15 +535,32 @@ Assembler::nPatchBranch(NIns* at, NIns* target)
NIns* was = 0;
if (at[0] == (NIns)( COND_AL | (0x51<<20) | (PC<<16) | (PC<<12) | (4) )) {
// this needed to be emitted with a 32-bit immediate.
was = (NIns*) at[1];
} else {
// nope, just a regular PC-relative B; calculate the destination address
// based on at and the offset.
NanoAssert((at[0] & 0xff000000) == (COND_AL | (0xA<<24)));
was = (NIns*) (((intptr_t)at + 8) + (intptr_t)((at[0] & 0xffffff) << 2));
}
// Determine how the existing branch was emitted so we can report the
// original destination. Note that this is only useful for debug purposes;
// no real code uses this result.
debug_only(
if (at[0] == (NIns)( COND_AL | (0x51<<20) | (PC<<16) | (PC<<12) | (4) )) {
// The existing branch looks like this:
// at[0] LDR pc, [addr]
// at[1] addr: target
was = (NIns*) at[1];
} else if ((at[0] && 0xff000000) == (NIns)( COND_AL | (0xA<<24))) {
// The existing branch looks like this:
// at[0] B target
// at[1] BKPT (dummy instruction).
was = (NIns*) (((intptr_t)at + 8) + (intptr_t)((at[0] & 0xffffff) << 2));
} else {
// The existing code is not a branch. This can occur, for example,
// when patching exit code generated by nFragExit. Exit branches to
// an epilogue load a value into R2 (using LDi), but this is not
// required for other exit branches so the new branch can be
// emitted over the top of the LDi sequence. It would be nice to
// assert that we're looking at an LDi sequence, but this is not
// trivial because the output of LDi is both platform- and
// context-dependent.
was = (NIns*)-1; // Return an obviously incorrect target address.
}
);
// let's see how we have to emit it
intptr_t offs = PC_OFFSET_FROM(target, at);
@ -1017,19 +1047,23 @@ Assembler::underrunProtect(int bytes)
void
Assembler::JMP_far(NIns* addr)
{
// we may have to stick an immediate into the stream, so always
// reserve space
// Even if a simple branch is all that is required, this function must emit
// two words so that the branch can be arbitrarily patched later on.
underrunProtect(8);
intptr_t offs = PC_OFFSET_FROM(addr,_nIns-2);
if (isS24(offs>>2)) {
// Emit a BKPT to ensure that we reserve enough space for a full 32-bit
// branch patch later on. The BKPT should never be executed.
BKPT_nochk();
// B [PC+offs]
*(--_nIns) = (NIns)( COND_AL | (0xA<<24) | ((offs>>2) & 0xFFFFFF) );
asm_output("b %p", addr);
} else {
// the address
// Insert the target address as a constant in the instruction stream.
*(--_nIns) = (NIns)((addr));
// ldr pc, [pc - #4] // load the address into pc, reading it from [pc-4] (e.g.,
// the next instruction)