зеркало из https://github.com/mozilla/gecko-dev.git
Improve exit code efficiency on ARM. (491678, r=vladimir)
This commit is contained in:
Родитель
53153ae1dd
Коммит
1ac546f87f
|
@ -169,27 +169,37 @@ Assembler::genPrologue()
|
|||
void
|
||||
Assembler::nFragExit(LInsp guard)
|
||||
{
|
||||
SideExit* exit = guard->record()->exit;
|
||||
Fragment *frag = exit->target;
|
||||
GuardRecord *lr;
|
||||
SideExit * exit = guard->record()->exit;
|
||||
Fragment * frag = exit->target;
|
||||
|
||||
if (frag && frag->fragEntry) {
|
||||
bool target_is_known = frag && frag->fragEntry;
|
||||
|
||||
if (target_is_known) {
|
||||
// The target exists so we can simply emit a branch to its location.
|
||||
JMP_far(frag->fragEntry);
|
||||
lr = 0;
|
||||
} else {
|
||||
// target doesn't exit yet. emit jump to epilog, and set up to patch later.
|
||||
lr = guard->record();
|
||||
// The target doesn't exit yet, so emit a jump to the epilogue. If the
|
||||
// target is created later on, the jump will be patched.
|
||||
|
||||
// jump to the epilogue; JMP_far will insert an extra dummy insn for later
|
||||
// patching.
|
||||
GuardRecord * gr = guard->record();
|
||||
|
||||
// Jump to the epilogue. This may get patched later, but JMP_far always
|
||||
// emits two instructions even when only one is required, so patching
|
||||
// will work correctly.
|
||||
JMP_far(_epilogue);
|
||||
|
||||
// stick the jmp pointer to the start of the sequence
|
||||
lr->jmp = _nIns;
|
||||
}
|
||||
// Load the guard record pointer into R2. We want it in R0 but we can't
|
||||
// do this at this stage because R0 is used for something else.
|
||||
// I don't understand why I can't load directly into R0. It works for
|
||||
// the JavaScript JIT but not for the Regular Expression compiler.
|
||||
// However, I haven't pushed this further as it only saves a single MOV
|
||||
// instruction in genEpilogue.
|
||||
LDi(R2, int(gr));
|
||||
|
||||
// pop the stack frame first
|
||||
MOV(SP, FP);
|
||||
// Set the jmp pointer to the start of the sequence so that patched
|
||||
// branches can skip the LDi sequence.
|
||||
gr->jmp = _nIns;
|
||||
}
|
||||
|
||||
#ifdef NJ_VERBOSE
|
||||
if (_frago->core()->config.show_stats) {
|
||||
|
@ -200,11 +210,8 @@ Assembler::nFragExit(LInsp guard)
|
|||
}
|
||||
#endif
|
||||
|
||||
// return value is GuardRecord*; note that this goes into
|
||||
// R2, not R0 -- genEpilogue will move it into R0. Otherwise
|
||||
// we want R0 to have the original value that it had at the
|
||||
// start of trace.
|
||||
LDi(R2, int(lr));
|
||||
// Pop the stack frame.
|
||||
MOV(SP, FP);
|
||||
}
|
||||
|
||||
NIns*
|
||||
|
@ -221,10 +228,16 @@ Assembler::genEpilogue()
|
|||
|
||||
POP_mask(savingMask); // regs
|
||||
|
||||
// Pop the stack frame.
|
||||
// As far as I can tell, the generated code doesn't use the stack between
|
||||
// popping the stack frame in nFragExit and getting here and so this MOV
|
||||
// should be redundant. However, removing this seems to break some regular
|
||||
// expression stuff.
|
||||
MOV(SP,FP);
|
||||
|
||||
// this is needed if we jump here from nFragExit
|
||||
MOV(R0,R2); // return LinkRecord*
|
||||
// nFragExit loads the guard record pointer into R2, but we need it in R0
|
||||
// so it must be moved here.
|
||||
MOV(R0,R2); // return GuardRecord*
|
||||
|
||||
return _nIns;
|
||||
}
|
||||
|
@ -522,15 +535,32 @@ Assembler::nPatchBranch(NIns* at, NIns* target)
|
|||
|
||||
NIns* was = 0;
|
||||
|
||||
if (at[0] == (NIns)( COND_AL | (0x51<<20) | (PC<<16) | (PC<<12) | (4) )) {
|
||||
// this needed to be emitted with a 32-bit immediate.
|
||||
was = (NIns*) at[1];
|
||||
} else {
|
||||
// nope, just a regular PC-relative B; calculate the destination address
|
||||
// based on at and the offset.
|
||||
NanoAssert((at[0] & 0xff000000) == (COND_AL | (0xA<<24)));
|
||||
was = (NIns*) (((intptr_t)at + 8) + (intptr_t)((at[0] & 0xffffff) << 2));
|
||||
}
|
||||
// Determine how the existing branch was emitted so we can report the
|
||||
// original destination. Note that this is only useful for debug purposes;
|
||||
// no real code uses this result.
|
||||
debug_only(
|
||||
if (at[0] == (NIns)( COND_AL | (0x51<<20) | (PC<<16) | (PC<<12) | (4) )) {
|
||||
// The existing branch looks like this:
|
||||
// at[0] LDR pc, [addr]
|
||||
// at[1] addr: target
|
||||
was = (NIns*) at[1];
|
||||
} else if ((at[0] && 0xff000000) == (NIns)( COND_AL | (0xA<<24))) {
|
||||
// The existing branch looks like this:
|
||||
// at[0] B target
|
||||
// at[1] BKPT (dummy instruction).
|
||||
was = (NIns*) (((intptr_t)at + 8) + (intptr_t)((at[0] & 0xffffff) << 2));
|
||||
} else {
|
||||
// The existing code is not a branch. This can occur, for example,
|
||||
// when patching exit code generated by nFragExit. Exit branches to
|
||||
// an epilogue load a value into R2 (using LDi), but this is not
|
||||
// required for other exit branches so the new branch can be
|
||||
// emitted over the top of the LDi sequence. It would be nice to
|
||||
// assert that we're looking at an LDi sequence, but this is not
|
||||
// trivial because the output of LDi is both platform- and
|
||||
// context-dependent.
|
||||
was = (NIns*)-1; // Return an obviously incorrect target address.
|
||||
}
|
||||
);
|
||||
|
||||
// let's see how we have to emit it
|
||||
intptr_t offs = PC_OFFSET_FROM(target, at);
|
||||
|
@ -1017,19 +1047,23 @@ Assembler::underrunProtect(int bytes)
|
|||
void
|
||||
Assembler::JMP_far(NIns* addr)
|
||||
{
|
||||
// we may have to stick an immediate into the stream, so always
|
||||
// reserve space
|
||||
// Even if a simple branch is all that is required, this function must emit
|
||||
// two words so that the branch can be arbitrarily patched later on.
|
||||
underrunProtect(8);
|
||||
|
||||
intptr_t offs = PC_OFFSET_FROM(addr,_nIns-2);
|
||||
|
||||
if (isS24(offs>>2)) {
|
||||
// Emit a BKPT to ensure that we reserve enough space for a full 32-bit
|
||||
// branch patch later on. The BKPT should never be executed.
|
||||
BKPT_nochk();
|
||||
|
||||
// B [PC+offs]
|
||||
*(--_nIns) = (NIns)( COND_AL | (0xA<<24) | ((offs>>2) & 0xFFFFFF) );
|
||||
|
||||
asm_output("b %p", addr);
|
||||
} else {
|
||||
// the address
|
||||
// Insert the target address as a constant in the instruction stream.
|
||||
*(--_nIns) = (NIns)((addr));
|
||||
// ldr pc, [pc - #4] // load the address into pc, reading it from [pc-4] (e.g.,
|
||||
// the next instruction)
|
||||
|
|
Загрузка…
Ссылка в новой задаче