From 5725061f5a42636952238a959d52e19e95e0af2a Mon Sep 17 00:00:00 2001
From: Graydon Hoare <graydon@mozilla.com>
Date: Tue, 20 Oct 2009 17:43:13 -0700
Subject: [PATCH] Bug 523262 - further ARM differences from tamarin, r=gal.

--HG--
extra : rebase_source : a39c39c0d6a66886c7a068324187bb3fd50796bd
---
 js/src/nanojit/NativeARM.cpp | 142 ++++++++++++++---------------------
 js/src/nanojit/Nativei386.h  |   2 +
 2 files changed, 57 insertions(+), 87 deletions(-)

diff --git a/js/src/nanojit/NativeARM.cpp b/js/src/nanojit/NativeARM.cpp
index f08132ab71f3..0f447877587f 100644
--- a/js/src/nanojit/NativeARM.cpp
+++ b/js/src/nanojit/NativeARM.cpp
@@ -185,7 +185,7 @@ Assembler::encOp2Imm(uint32_t literal, uint32_t * enc)
     uint32_t    leading_zeroes;
 
     // Components of the operand 2 encoding.
-    uint32_t    rot;
+    int32_t    rot;
     uint32_t    imm8;
 
     // Check the literal to see if it is a simple 8-bit value. I suspect that
@@ -933,7 +933,7 @@ Assembler::nRegisterResetAll(RegAlloc& a)
     a.free =
         rmask(R0) | rmask(R1) | rmask(R2) | rmask(R3) | rmask(R4) |
         rmask(R5) | rmask(R6) | rmask(R7) | rmask(R8) | rmask(R9) |
-        rmask(R10);
+        rmask(R10) | rmask(LR);
     if (ARM_VFP)
         a.free |= FpRegs;
 
@@ -941,78 +941,43 @@ Assembler::nRegisterResetAll(RegAlloc& a)
 }
 
 void
-Assembler::nPatchBranch(NIns* at, NIns* target)
+Assembler::nPatchBranch(NIns* branch, NIns* target)
 {
-    // Patch the jump in a loop, as emitted by JMP_far.
-    // Figure out which, and do the right thing.
+    // Patch the jump in a loop
 
-    NIns* was = 0;
+    int32_t offset = PC_OFFSET_FROM(target, branch);
 
-    // Determine how the existing branch was emitted so we can report the
-    // original destination. Note that this is only useful for debug purposes;
-    // no real code uses this result.
-    debug_only(
-        if (at[0] == (NIns)( COND_AL | (0x51<<20) | (PC<<16) | (PC<<12) | (4) )) {
-            // The existing branch looks like this:
-            //  at[0]           LDR pc, [addr]
-            //  at[1]   addr:   target
-            was = (NIns*) at[1];
-        } else if ((at[0] && 0xff000000) == (NIns)( COND_AL | (0xA<<24))) {
-            // The existing branch looks like this:
-            //  at[0]           B target
-            //  at[1]           BKPT (dummy instruction).
-            was = (NIns*) (((intptr_t)at + 8) + (intptr_t)((at[0] & 0xffffff) << 2));
-        } else {
-            // The existing code is not a branch. This can occur, for example,
-            // when patching exit code generated by nFragExit. Exit branches to
-            // an epilogue load a value into R2 (using LDi), but this is not
-            // required for other exit branches so the new branch can be
-            // emitted over the top of the LDi sequence. It would be nice to
-            // assert that we're looking at an LDi sequence, but this is not
-            // trivial because the output of LDi is both platform- and
-            // context-dependent.
-            was = (NIns*)-1;    // Return an obviously incorrect target address.
-        }
-    );
+    //avmplus::AvmLog("---patching branch at 0x%08x to location 0x%08x (%d-0x%08x)\n", branch, target, offset, offset);
 
-    // Assert that the existing placeholder is not conditional.
-    NanoAssert((uint32_t)(at[0] & 0xf0000000) == COND_AL);
-
-    // We only have to patch unconditional branches, but these may take one of
-    // the following patterns:
-    //
-    //  --- Short branch.
-    //          B       ±32MB
-    //
-    //  --- Long branch.
-    //          LDR     PC, #lit
-    //  lit:    #target
-
-    intptr_t offs = PC_OFFSET_FROM(target, at);
-    if (isS24(offs>>2)) {
-        // Emit a simple branch (B) in the first of the two available
-        // instruction addresses.
-        at[0] = (NIns)( COND_AL | (0xA<<24) | ((offs >> 2) & 0xffffff) );
-        // and reset at[1] for good measure
-        at[1] = BKPT_insn;
+    // We have 2 words to work with here -- if offset is in range of a 24-bit
+    // relative jump, emit that; otherwise, we do a pc-relative load into pc.
+    if (isS24(offset>>2)) {
+        // write a new instruction that preserves the condition of what's there.
+        NIns cond = *branch & 0xF0000000;
+        *branch = (NIns)( cond | (0xA<<24) | ((offset>>2) & 0xFFFFFF) );
     } else {
-        // Emit a branch to a pc-relative address, which we'll store right
-        // after this instruction
-        at[0] = (NIns)( COND_AL | (0x51<<20) | (PC<<16) | (PC<<12) | (4) );
-        // the target address
-        at[1] = (NIns)(target);
-    }
-    VALGRIND_DISCARD_TRANSLATIONS(at, 2*sizeof(NIns));
+        // update const-addr, branch instruction is:
+        // LDRcc pc, [pc, #off-to-const-addr]
+        NanoAssert((*branch & 0x0F7FF000) == 0x051FF000);
 
+        NIns *addr = branch+2;
+        int offset = (*branch & 0xFFF) / sizeof(NIns);
+
+        if (*branch & (1<<23)) {
+            addr += offset;
+        } else {
+            addr -= offset;
+        }
+
+        *addr = (NIns) target;
+    }
+
+    VALGRIND_DISCARD_TRANSLATIONS(branch, 2*sizeof(NIns));
 #if defined(UNDER_CE)
     // we changed the code, so we need to do this (sadly)
     FlushInstructionCache(GetCurrentProcess(), NULL, NULL);
 #elif defined(AVMPLUS_LINUX)
-    __clear_cache((char*)at, (char*)(at+3));
-#endif
-
-#ifdef AVMPLUS_PORTING_API
-    NanoJIT_PortAPI_FlushInstructionCache(at, at+3);
+    __clear_cache((char*)branch, (char*)(branch+3));
 #endif
 }
 
@@ -1267,21 +1232,24 @@ Assembler::asm_quad(LInsp ins)
 {
     //asm_output(">>> asm_quad");
 
-    Reservation *   res = getresv(ins);
-    int             d = disp(res);
-    Register        rr = res->reg;
+    Reservation *res = getresv(ins);
+    int d = disp(res);
+    Register rr = res->reg;
 
     freeRsrcOf(ins, false);
 
     if (ARM_VFP && rr != UnknownReg)
     {
-        if (d)
-            FSTD(rr, FP, d);
+        asm_spill(rr, d, false, true);
 
         underrunProtect(4*4);
         asm_quad_nochk(rr, ins->imm64_0(), ins->imm64_1());
     } else {
         NanoAssert(d);
+        // asm_mmq might spill a reg, so don't call it;
+        // instead do the equivalent directly.
+        //asm_mmq(FP, d, PC, -16);
+
         STR(IP, FP, d+4);
         asm_ld_imm(IP, ins->imm64_1());
         STR(IP, FP, d);
@@ -1357,7 +1325,7 @@ Assembler::asm_mmq(Register rd, int dd, Register rs, int ds)
     // Find the list of free registers from the allocator's free list and the
     // GpRegs mask. This excludes any floating-point registers that may be on
     // the free list.
-    RegisterMask    free = _allocator.free & GpRegs;
+    RegisterMask    free = _allocator.free & AllowableFlagRegs;
 
     if (free) {
         // There is at least one register on the free list, so grab one for
@@ -1719,11 +1687,6 @@ Assembler::B_cond_chk(ConditionCode _c, NIns* _t, bool _chk)
     int32_t offs = PC_OFFSET_FROM(_t,_nIns-1);
     //nj_dprintf("B_cond_chk target: 0x%08x offset: %d @0x%08x\n", _t, offs, _nIns-1);
 
-    // We don't patch conditional branches, and nPatchBranch can't cope with
-    // them. We should therefore check that they are not generated at this
-    // stage.
-    NanoAssert((_t != 0) || (_c == AL));
-
     // optimistically check if this will fit in 24 bits
     if (_chk && isS24(offs>>2) && (_t != 0)) {
         underrunProtect(4);
@@ -1757,11 +1720,13 @@ Assembler::B_cond_chk(ConditionCode _c, NIns* _t, bool _chk)
     if (isS24(offs>>2) && (_t != 0)) {
         // The underrunProtect for this was done above (if required by _chk).
         *(--_nIns) = (NIns)( ((_c)<<28) | (0xA<<24) | (((offs)>>2) & 0xFFFFFF) );
+        asm_output("b%s %p", _c == AL ? "" : condNames[_c], (void*)(_t));
     } else if (_c == AL) {
         if(_chk) underrunProtect(8);
         *(--_nIns) = (NIns)(_t);
         *(--_nIns) = (NIns)( COND_AL | (0x51<<20) | (PC<<16) | (PC<<12) | 0x4 );
-    } else if (PC_OFFSET_FROM(_nSlot, _nIns-1) > -0x1000 /*~(NJ_PAGE_SIZE-1)*/) {
+        asm_output("b%s %p", _c == AL ? "" : condNames[_c], (void*)(_t));
+    } else if (PC_OFFSET_FROM(_nSlot, _nIns-1) > -0x1000) {
         if(_chk) underrunProtect(8);
         *(_nSlot++) = (NIns)(_t);
         offs = PC_OFFSET_FROM(_nSlot-1,_nIns-1);
@@ -1779,9 +1744,8 @@ Assembler::B_cond_chk(ConditionCode _c, NIns* _t, bool _chk)
         *(--_nIns) = (NIns)( COND_AL | (0xA<<24) | 0x0 );
         // Emit the conditional branch.
         *(--_nIns) = (NIns)( ((_c)<<28) | (0x51<<20) | (PC<<16) | (PC<<12) | 0x0 );
+        asm_output("b%s %p", _c == AL ? "" : condNames[_c], (void*)(_t));
     }
-
-    asm_output("b%s %p", condNames[_c], (void*)(_t));
 }
 
 /*
@@ -1868,9 +1832,12 @@ Assembler::asm_fcmp(LInsp ins)
 
     NanoAssert(op >= LIR_feq && op <= LIR_fge);
 
+    Reservation *rA, *rB;
+    findRegFor2(FpRegs, lhs, rA, rhs, rB);
+    Register ra = rA->reg;
+    Register rb = rB->reg;
+
     int e_bit = (op != LIR_feq);
-    Register ra = findRegFor(lhs, FpRegs);
-    Register rb = findRegFor(rhs, FpRegs);
 
     // do the comparison and get results loaded in ARM status register
     FMSTAT();
@@ -1894,13 +1861,12 @@ Assembler::asm_prep_fcall(Reservation*, LInsp)
      * which is clearly broken.
      *
      * This is not a problem for non-floating point calls, because the
-     * restoring of spilled data into R0 is done via a call to
-     * prepResultReg(R0) at the same point in the sequence as this function is
-     * called, meaning that evictScratchRegs() will not modify R0. However,
-     * prepResultReg is not aware of the concept of using a register pair
-     * (R0,R1) for the result of a single operation, so it can only be used
-     * here with the ultimate VFP register, and not R0/R1, which potentially
-     * allows for R0/R1 to get corrupted as described.
+     * restoring of spilled data into R0 is done via a call to prepResultReg(R0)
+     * at the same point in the sequence as this function is called, meaning that
+     * evictScratchRegs() will not modify R0. However, prepResultReg is not aware
+     * of the concept of using a register pair (R0,R1) for the result of a single
+     * operation, so it can only be used here with the ultimate VFP register, and
+     * not R0/R1, which potentially allows for R0/R1 to get corrupted as described.
      */
     return UnknownReg;
 }
@@ -2019,6 +1985,7 @@ Assembler::asm_cmpi(Register r, int32_t imm)
         if (imm > -256) {
             ALUi(AL, cmn, 1, 0, r, -imm);
         } else {
+            underrunProtect(4 + LD32_size);
             CMP(r, IP);
             asm_ld_imm(IP, imm);
         }
@@ -2026,6 +1993,7 @@ Assembler::asm_cmpi(Register r, int32_t imm)
         if (imm < 256) {
             ALUi(AL, cmp, 1, 0, r, imm);
         } else {
+            underrunProtect(4 + LD32_size);
             CMP(r, IP);
             asm_ld_imm(IP, imm);
         }
diff --git a/js/src/nanojit/Nativei386.h b/js/src/nanojit/Nativei386.h
index 274a1ba4f1b1..35106fb890cb 100644
--- a/js/src/nanojit/Nativei386.h
+++ b/js/src/nanojit/Nativei386.h
@@ -189,6 +189,8 @@ namespace nanojit
     _nIns -= 4;     \
     *((int32_t*)_nIns) = (int32_t)(i)
 
+// XXX rearrange NanoAssert() expression to workaround apparent gcc 4.3 bug:
+// XXX "error: logical && with non-zero constant will always evaluate as true"
 #define MODRMs(r,d,b,l,i) \
         NanoAssert(unsigned(i)<8 && unsigned(b)<8 && unsigned(r)<8); \
         if ((d) == 0 && (b) != EBP) { \