Improve exit code efficiency on ARM. (491678, r=vladimir)

2009-06-24 08:17:44 +01:00 · 2009-06-24 08:17:44 +01:00 · 1ac546f87f
--- a/js/src/nanojit/NativeARM.cpp
+++ b/js/src/nanojit/NativeARM.cpp
@ -169,27 +169,37 @@ Assembler::genPrologue()
 void
 Assembler::nFragExit(LInsp guard)
 {
-    SideExit* exit = guard->record()->exit;
-    Fragment *frag = exit->target;
-    GuardRecord *lr;
+    SideExit *  exit = guard->record()->exit;
+    Fragment *  frag = exit->target;

-    if (frag && frag->fragEntry) {
+    bool        target_is_known = frag && frag->fragEntry;
+
+    if (target_is_known) {
+        // The target exists so we can simply emit a branch to its location.
        JMP_far(frag->fragEntry);
-        lr = 0;
    } else {
-        // target doesn't exit yet.  emit jump to epilog, and set up to patch later.
-        lr = guard->record();
+        // The target doesn't exit yet, so emit a jump to the epilogue. If the
+        // target is created later on, the jump will be patched.

-        // jump to the epilogue; JMP_far will insert an extra dummy insn for later
-        // patching.
+        GuardRecord *   gr = guard->record();
+
+        // Jump to the epilogue. This may get patched later, but JMP_far always
+        // emits two instructions even when only one is required, so patching
+        // will work correctly.
        JMP_far(_epilogue);

-        // stick the jmp pointer to the start of the sequence
-        lr->jmp = _nIns;
-    }
+        // Load the guard record pointer into R2. We want it in R0 but we can't
+        // do this at this stage because R0 is used for something else.
+        // I don't understand why I can't load directly into R0. It works for
+        // the JavaScript JIT but not for the Regular Expression compiler.
+        // However, I haven't pushed this further as it only saves a single MOV
+        // instruction in genEpilogue.
+        LDi(R2, int(gr));

-    // pop the stack frame first
-    MOV(SP, FP);
+        // Set the jmp pointer to the start of the sequence so that patched
+        // branches can skip the LDi sequence.
+        gr->jmp = _nIns;
+    }

 #ifdef NJ_VERBOSE
    if (_frago->core()->config.show_stats) {
@ -200,11 +210,8 @@ Assembler::nFragExit(LInsp guard)
    }
 #endif

-    // return value is GuardRecord*; note that this goes into
-    // R2, not R0 -- genEpilogue will move it into R0.  Otherwise
-    // we want R0 to have the original value that it had at the
-    // start of trace.
-    LDi(R2, int(lr));
+    // Pop the stack frame.
+    MOV(SP, FP);
 }

 NIns*
@ -221,11 +228,17 @@ Assembler::genEpilogue()

    POP_mask(savingMask); // regs

+    // Pop the stack frame.
+    // As far as I can tell, the generated code doesn't use the stack between
+    // popping the stack frame in nFragExit and getting here and so this MOV
+    // should be redundant. However, removing this seems to break some regular
+    // expression stuff.
    MOV(SP,FP);

-    // this is needed if we jump here from nFragExit
-    MOV(R0,R2); // return LinkRecord*
-
+    // nFragExit loads the guard record pointer into R2, but we need it in R0
+    // so it must be moved here.
+    MOV(R0,R2); // return GuardRecord*
+    
    return _nIns;
 }

@ -522,15 +535,32 @@ Assembler::nPatchBranch(NIns* at, NIns* target)

    NIns* was = 0;

-    if (at[0] == (NIns)( COND_AL | (0x51<<20) | (PC<<16) | (PC<<12) | (4) )) {
-        // this needed to be emitted with a 32-bit immediate.
-        was = (NIns*) at[1];
-    } else {
-        // nope, just a regular PC-relative B; calculate the destination address
-        // based on at and the offset.
-        NanoAssert((at[0] & 0xff000000) == (COND_AL | (0xA<<24)));
-        was = (NIns*) (((intptr_t)at + 8) + (intptr_t)((at[0] & 0xffffff) << 2));
-    }
+    // Determine how the existing branch was emitted so we can report the
+    // original destination. Note that this is only useful for debug purposes;
+    // no real code uses this result.
+    debug_only(
+        if (at[0] == (NIns)( COND_AL | (0x51<<20) | (PC<<16) | (PC<<12) | (4) )) {
+            // The existing branch looks like this:
+            //  at[0]           LDR pc, [addr]
+            //  at[1]   addr:   target
+            was = (NIns*) at[1];
+        } else if ((at[0] && 0xff000000) == (NIns)( COND_AL | (0xA<<24))) {
+            // The existing branch looks like this:
+            //  at[0]           B target
+            //  at[1]           BKPT (dummy instruction).
+            was = (NIns*) (((intptr_t)at + 8) + (intptr_t)((at[0] & 0xffffff) << 2));
+        } else {
+            // The existing code is not a branch. This can occur, for example,
+            // when patching exit code generated by nFragExit. Exit branches to
+            // an epilogue load a value into R2 (using LDi), but this is not
+            // required for other exit branches so the new branch can be
+            // emitted over the top of the LDi sequence. It would be nice to
+            // assert that we're looking at an LDi sequence, but this is not
+            // trivial because the output of LDi is both platform- and
+            // context-dependent.
+            was = (NIns*)-1;    // Return an obviously incorrect target address.
+        }
+    );

    // let's see how we have to emit it
    intptr_t offs = PC_OFFSET_FROM(target, at);
@ -1017,19 +1047,23 @@ Assembler::underrunProtect(int bytes)
 void
 Assembler::JMP_far(NIns* addr)
 {
-    // we may have to stick an immediate into the stream, so always
-    // reserve space
+    // Even if a simple branch is all that is required, this function must emit
+    // two words so that the branch can be arbitrarily patched later on.
    underrunProtect(8);

    intptr_t offs = PC_OFFSET_FROM(addr,_nIns-2);

    if (isS24(offs>>2)) {
+        // Emit a BKPT to ensure that we reserve enough space for a full 32-bit
+        // branch patch later on. The BKPT should never be executed.
        BKPT_nochk();
+
+        // B [PC+offs]
        *(--_nIns) = (NIns)( COND_AL | (0xA<<24) | ((offs>>2) & 0xFFFFFF) );

        asm_output("b %p", addr);
    } else {
-        // the address
+        // Insert the target address as a constant in the instruction stream.
        *(--_nIns) = (NIns)((addr));
        // ldr pc, [pc - #4] // load the address into pc, reading it from [pc-4] (e.g.,
        // the next instruction)