Backout the last NJ-to-TM merge (TM revisions b44daa2c0503 to b3167f8f9459, inclusive) because it caused lots of oranges. r=me.

2010-06-03 21:28:18 -07:00 · 2010-06-03 21:28:18 -07:00 · 864fd5ca21
--- a/js/src/lirasm/lirasm.cpp
+++ b/js/src/lirasm/lirasm.cpp
@ -329,11 +329,9 @@ private:
    LIns *assemble_ret(ReturnType rt);
    LIns *assemble_guard(bool isCond);
    LIns *assemble_guard_xov();
-    LIns *assemble_jump_jov();
    void bad(const string &msg);
    void nyi(const string &opname);
    void extract_any_label(string &lab, char lab_delim);
-    void resolve_forward_jumps(string &lab, LIns *ins);
    void endFragment();
 };

@ -747,6 +745,7 @@ FragmentAssembler::createGuardRecord(LasmSideExit *exit)
    return rec;
 }

+
 LIns *
 FragmentAssembler::assemble_guard(bool isCond)
 {
@ -781,29 +780,6 @@ FragmentAssembler::assemble_guard_xov()
    return mLir->insGuardXov(mOpcode, ref(mTokens[0]), ref(mTokens[1]), guard);
 }

-LIns *
-FragmentAssembler::assemble_jump_jov()
-{
-    need(3);
-
-    LIns *a = ref(mTokens[0]);
-    LIns *b = ref(mTokens[1]);
-    string name = mTokens[2];
-
-    if (mLabels.find(name) != mLabels.end()) {
-        LIns *target = ref(name);
-        return mLir->insBranchJov(mOpcode, a, b, target);
-    } else {
-        LIns *ins = mLir->insBranchJov(mOpcode, a, b, NULL);
-#ifdef __SUNPRO_CC
-        mFwdJumps.insert(make_pair<const string, LIns *>(name, ins));
-#else
-        mFwdJumps.insert(make_pair(name, ins));
-#endif
-        return ins;
-    }
-}
-
 void
 FragmentAssembler::endFragment()
 {
@ -882,22 +858,6 @@ FragmentAssembler::extract_any_label(string &lab, char lab_delim)
    }
 }

-void
-FragmentAssembler::resolve_forward_jumps(string &lab, LIns *ins)
-{
-    typedef multimap<string, LIns *> mulmap;
-#ifdef __SUNPRO_CC
-    typedef mulmap::iterator ci;
-#else
-    typedef mulmap::const_iterator ci;
-#endif
-    pair<ci, ci> range = mFwdJumps.equal_range(lab);
-    for (ci i = range.first; i != range.second; ++i) {
-        i->second->setTarget(ins);
-    }
-    mFwdJumps.erase(lab);
-}
-
 void
 FragmentAssembler::assembleFragment(LirTokenStream &in, bool implicitBegin, const LirToken *firstToken)
 {
@ -937,7 +897,17 @@ FragmentAssembler::assembleFragment(LirTokenStream &in, bool implicitBegin, cons
        /* Save label and do any back-patching of deferred forward-jumps. */
        if (!lab.empty()) {
            ins = mLir->ins0(LIR_label);
-            resolve_forward_jumps(lab, ins);
+            typedef multimap<string, LIns *> mulmap;
+#ifdef __SUNPRO_CC
+            typedef mulmap::iterator ci;
+#else
+            typedef mulmap::const_iterator ci;
+#endif
+            pair<ci, ci> range = mFwdJumps.equal_range(lab);
+            for (ci i = range.first; i != range.second; ++i) {
+                i->second->setTarget(ins);
+            }
+            mFwdJumps.erase(lab);
            lab.clear();
        }
        extract_any_label(lab, '=');
@ -1131,14 +1101,6 @@ FragmentAssembler::assembleFragment(LirTokenStream &in, bool implicitBegin, cons
            ins = assemble_guard_xov();
            break;

-          case LIR_addjovi:
-          case LIR_subjovi:
-          case LIR_muljovi:
-          CASE64(LIR_addjovq:)
-          CASE64(LIR_subjovq:)
-            ins = assemble_jump_jov();
-            break;
-
          case LIR_calli:
          CASESF(LIR_hcalli:)
          case LIR_calld:
@ -1155,12 +1117,6 @@ FragmentAssembler::assembleFragment(LirTokenStream &in, bool implicitBegin, cons
            break;

          case LIR_label:
-            ins = mLir->ins0(LIR_label);
-            if (!lab.empty()) {
-                resolve_forward_jumps(lab, ins);
-            }
-            break;
-
          case LIR_file:
          case LIR_line:
          case LIR_xtbl:
--- a/js/src/lirasm/tests/addjovi.in
+++ b/js/src/lirasm/tests/addjovi.in
@ -1,14 +0,0 @@
-	ptr = allocp 8
-
-	a = immi 2147483647
-	b = immi 0
-	c = addjovi a b ovf
-	sti c ptr 0
-
-	j done
-
-ovf:	i = immi 12345678
-	sti i ptr 0
-
-done:	res = ldi ptr 0
-	reti res
--- a/js/src/lirasm/tests/addjovi.out
+++ b/js/src/lirasm/tests/addjovi.out
@ -1 +0,0 @@
-Output is: 2147483647
--- a/js/src/lirasm/tests/addjovi_ovf.in
+++ b/js/src/lirasm/tests/addjovi_ovf.in
@ -1,14 +0,0 @@
-	ptr = allocp 8
-
-	a = immi 2147483647
-	b = immi 1
-	c = addjovi a b ovf
-	sti c ptr 0
-
-	j done
-
-ovf:	i = immi 12345678
-	sti i ptr 0
-
-done:	res = ldi ptr 0
-	reti res
--- a/js/src/lirasm/tests/addjovi_ovf.out
+++ b/js/src/lirasm/tests/addjovi_ovf.out
@ -1 +0,0 @@
-Output is: 12345678
--- a/js/src/lirasm/tests/muljovi.in
+++ b/js/src/lirasm/tests/muljovi.in
@ -1,15 +0,0 @@
-	ptr = allocp 8
-
-	a = immi 65536
-	b = immi 32767
-	c = muljovi a b ovf
-	sti c ptr 0
-
-	j done
-
-ovf:	i = immi 12345678
-	sti i ptr 0
-
-done:	res = ldi ptr 0
-	reti res
-
--- a/js/src/lirasm/tests/muljovi.out
+++ b/js/src/lirasm/tests/muljovi.out
@ -1 +0,0 @@
-Output is: 2147418112
--- a/js/src/lirasm/tests/muljovi_ovf.in
+++ b/js/src/lirasm/tests/muljovi_ovf.in
@ -1,14 +0,0 @@
-	ptr = allocp 8
-
-	a = immi 65536
-	b = immi 32768
-	c = muljovi a b ovf
-	sti c ptr 0
-
-	j done
-
-ovf:	i = immi 12345678
-	sti i ptr 0
-
-done:	res = ldi ptr 0
-	reti res
--- a/js/src/lirasm/tests/muljovi_ovf.out
+++ b/js/src/lirasm/tests/muljovi_ovf.out
@ -1 +0,0 @@
-Output is: 12345678
--- a/js/src/lirasm/tests/subjovi.in
+++ b/js/src/lirasm/tests/subjovi.in
@ -1,15 +0,0 @@
-	ptr = allocp 8
-
-	a = immi -2147483647
-	b = immi 1
-	c = subjovi a b ovf
-	sti c ptr 0
-
-	j done
-
-ovf:	i = immi 12345678
-	sti i ptr 0
-
-done:	res = ldi ptr 0
-	reti res
-
--- a/js/src/lirasm/tests/subjovi.out
+++ b/js/src/lirasm/tests/subjovi.out
@ -1 +0,0 @@
-Output is: -2147483648
--- a/js/src/lirasm/tests/subjovi_ovf.in
+++ b/js/src/lirasm/tests/subjovi_ovf.in
@ -1,14 +0,0 @@
-	ptr = allocp 8
-
-	a = immi -2147483647
-	b = immi 2
-	c = subjovi a b ovf
-	sti c ptr 0
-
-	j done
-
-ovf:	i = immi 12345678
-	sti i ptr 0
-
-done:	res = ldi ptr 0
-	reti res
--- a/js/src/lirasm/tests/subjovi_ovf.out
+++ b/js/src/lirasm/tests/subjovi_ovf.out
@ -1 +0,0 @@
-Output is: 12345678
--- a/js/src/nanojit-import-rev
+++ b/js/src/nanojit-import-rev
@ -1 +1 @@
-10b865fa5d413277c2e11c211302f75d87029f06
+f96ca24c15235dba3282e3649356b6a7acf5d2e2
--- a/js/src/nanojit/Assembler.cpp
+++ b/js/src/nanojit/Assembler.cpp
@ -1216,8 +1216,6 @@ namespace nanojit
            return;
        }

-        // Changes to the logic below will likely need to be propagated to Assembler::asm_jov().
-
        countlir_jcc();
        LInsp to = ins->getTarget();
        LabelState *label = _labels.get(to);
@ -1243,37 +1241,6 @@ namespace nanojit
        }
    }

-    void Assembler::asm_jov(LInsp ins, InsList& pending_lives)
-    {
-        // The caller is responsible for countlir_* profiling, unlike
-        // asm_jcc above.  The reason for this is that asm_jov may not be
-        // be called if the instruction is dead, and it is our convention
-        // to count such instructions anyway.
-        LOpcode op = ins->opcode();
-        LInsp to = ins->getTarget();
-        LabelState *label = _labels.get(to);
-        if (label && label->addr) {
-            // forward jump to known label.  need to merge with label's register state.
-            unionRegisterState(label->regs);
-            asm_branch_ov(op, label->addr);
-        }
-        else {
-            // back edge.
-            handleLoopCarriedExprs(pending_lives);
-            if (!label) {
-                // evict all registers, most conservative approach.
-                evictAllActiveRegs();
-                _labels.add(to, 0, _allocator);
-            }
-            else {
-                // evict all registers, most conservative approach.
-                intersectRegisterState(label->regs);
-            }
-            NIns *branch = asm_branch_ov(op, 0);
-            _patches.put(branch,to);
-        }
-    }
-
    void Assembler::asm_x(LInsp ins)
    {
        verbose_only( _thisfrag->nStaticExits++; )
@ -1540,7 +1507,6 @@ namespace nanojit

 #if defined NANOJIT_64BIT
                case LIR_addq:
-                case LIR_subq:
                case LIR_andq:
                case LIR_lshq:
                case LIR_rshuq:
@ -1801,7 +1767,7 @@ namespace nanojit

                case LIR_addxovi:
                case LIR_subxovi:
-                case LIR_mulxovi:
+                case LIR_mulxovi: {
                    verbose_only( _thisfrag->nStaticExits++; )
                    countlir_xcc();
                    countlir_alu();
@ -1809,37 +1775,11 @@ namespace nanojit
                    ins->oprnd2()->setResultLive();
                    if (ins->isExtant()) {
                        NIns* exit = asm_exit(ins); // does intersectRegisterState()
-                        asm_branch_ov(op, exit);
+                        asm_branch_xov(op, exit);
                        asm_arith(ins);
                    }
                    break;
-
-                case LIR_addjovi:
-                case LIR_subjovi:
-                case LIR_muljovi:
-                    countlir_jcc();
-                    countlir_alu();
-                    ins->oprnd1()->setResultLive();
-                    ins->oprnd2()->setResultLive();
-                    if (ins->isExtant()) {
-                        asm_jov(ins, pending_lives);
-                        asm_arith(ins);
-                    }
-                    break;
-
-#ifdef NANOJIT_64BIT
-                case LIR_addjovq:
-                case LIR_subjovq:
-                    countlir_jcc();
-                    countlir_alu();
-                    ins->oprnd1()->setResultLive();
-                    ins->oprnd2()->setResultLive();
-                    if (ins->isExtant()) {
-                        asm_jov(ins, pending_lives);
-                        asm_qbinop(ins);
-                    }
-                    break;
-#endif
+                }

                case LIR_eqd:
                case LIR_led:
--- a/js/src/nanojit/Assembler.h
+++ b/js/src/nanojit/Assembler.h
@ -412,7 +412,6 @@ namespace nanojit
            void        asm_mmq(Register rd, int dd, Register rs, int ds);
            void        asm_jmp(LInsp ins, InsList& pending_lives);
            void        asm_jcc(LInsp ins, InsList& pending_lives);
-            void        asm_jov(LInsp ins, InsList& pending_lives);
            void        asm_x(LInsp ins);
            void        asm_xcc(LInsp ins);
            NIns*       asm_exit(LInsp guard);
@ -459,7 +458,7 @@ namespace nanojit
            void        asm_call(LInsp);
            Register    asm_binop_rhs_reg(LInsp ins);
            NIns*       asm_branch(bool branchOnFalse, LInsp cond, NIns* targ);
-            NIns*       asm_branch_ov(LOpcode op, NIns* targ);
+            void        asm_branch_xov(LOpcode op, NIns* targ);
            void        asm_switch(LIns* ins, NIns* target);
            void        asm_jtbl(LIns* ins, NIns** table);
            void        emitJumpTable(SwitchInfo* si, NIns* target);
--- a/js/src/nanojit/CodeAlloc.cpp
+++ b/js/src/nanojit/CodeAlloc.cpp
@ -428,18 +428,6 @@ extern  "C" void sync_instruction_memory(caddr_t v, u_int len);
        }
    }

-#ifdef PERFM
-    // This method is used only for profiling purposes.
-    // See CodegenLIR::emitMD() in Tamarin for an example.
-
-    size_t CodeAlloc::size(const CodeList* blocks) {
-        size_t size = 0;
-        for (const CodeList* b = blocks; b != 0; b = b->next)
-            size += int((uintptr_t)b->end - (uintptr_t)b);
-        return size;
-    }
-#endif
-
    size_t CodeAlloc::size() {
        return totalAllocated;
    }
--- a/js/src/nanojit/CodeAlloc.h
+++ b/js/src/nanojit/CodeAlloc.h
@ -188,11 +188,6 @@ namespace nanojit
        /** add a block previously returned by alloc(), to code */
        static void add(CodeList* &code, NIns* start, NIns* end);

-        /** return the number of bytes in all the code blocks in "code", including block overhead */
-#ifdef PERFM
-        static size_t size(const CodeList* code);
-#endif
-
        /** return the total number of bytes held by this CodeAlloc. */
        size_t size();

--- a/js/src/nanojit/LIR.cpp
+++ b/js/src/nanojit/LIR.cpp
@ -327,11 +327,6 @@ namespace nanojit
        return ins2(op, condition, toLabel);
    }

-    LInsp LirBufWriter::insBranchJov(LOpcode op, LInsp a, LInsp b, LInsp toLabel)
-    {
-        return ins3(op, a, b, toLabel);
-    }
-
    LIns* LirBufWriter::insJtbl(LIns* index, uint32_t size)
    {
        LInsJtbl* insJtbl = (LInsJtbl*) _buf->makeRoom(sizeof(LInsJtbl));
@ -542,14 +537,6 @@ namespace nanojit
            if (oprnd->isImmQ())
                return insImmI(oprnd->immQlo());
            break;
-        case LIR_i2q:
-            if (oprnd->isImmI())
-                return insImmQ(int64_t(int32_t(oprnd->immI())));
-            break;
-        case LIR_ui2uq:
-            if (oprnd->isImmI())
-                return insImmQ(uint64_t(uint32_t(oprnd->immI())));
-            break;
 #endif
 #if NJ_SOFTFLOAT_SUPPORTED
        case LIR_dlo2i:
@ -811,11 +798,7 @@ namespace nanojit
                    return oprnd1;
                case LIR_andi:
                case LIR_muli:
-                case LIR_ltui: // unsigned < 0 -> always false
-                    // note that we know that oprnd2 == insImmI(0), so just return that
                    return oprnd2;
-                case LIR_gtui: // unsigned >= 0 -> always true
-                    return insImmI(1);
                case LIR_eqi:
                    if (oprnd1->isop(LIR_ori) &&
                        oprnd1->oprnd2()->isImmI() &&
@ -826,40 +809,19 @@ namespace nanojit
                default:
                    ;
                }
-            } else if (c == -1) {
+            } else if (c == -1 || (c == 1 && oprnd1->isCmp())) {
                switch (v) {
                case LIR_ori:
-                    // x | -1 = -1
+                    // x | -1 = -1, cmp | 1 = 1
                    return oprnd2;
                case LIR_andi:
-                    // x & -1 = x
+                    // x & -1 = x, cmp & 1 = cmp
                    return oprnd1;
-                case LIR_gtui:
-                    // u32 > 0xffffffff -> always false
-                    return insImmI(0);
                default:
                    ;
                }
-            } else if (c == 1) {
-                if (oprnd1->isCmp()) {
-                    switch (v) {
-                    case LIR_ori:
-                        // cmp | 1 = 1
-                        // note that we know that oprnd2 == insImmI(1), so just return that
-                        return oprnd2;
-                    case LIR_andi:
-                        // cmp & 1 = cmp
-                        return oprnd1;
-                    case LIR_gtui:
-                        // (0|1) > 1 -> always false
-                        return insImmI(0);
-                    default:
-                        ;
-                    }
-                } else if (v == LIR_muli) {
-                    // x * 1 = x
-                    return oprnd1;
-                }
+            } else if (c == 1 && v == LIR_muli) {
+                return oprnd1;
            }
        }

@ -925,31 +887,18 @@ namespace nanojit
        return out->insGuard(v, c, gr);
    }

-    // Simplify operator if possible.  Always return NULL if overflow is possible.
-
-    LIns* ExprFilter::simplifyOverflowArith(LOpcode op, LInsp *opnd1, LInsp *opnd2)
+    LIns* ExprFilter::insGuardXov(LOpcode op, LInsp oprnd1, LInsp oprnd2, GuardRecord *gr)
    {
-        LInsp oprnd1 = *opnd1;
-        LInsp oprnd2 = *opnd2;
-
        if (oprnd1->isImmI() && oprnd2->isImmI()) {
            int32_t c1 = oprnd1->immI();
            int32_t c2 = oprnd2->immI();
            double d = 0.0;

-            // The code below attempts to perform the operation while
-            // detecting overflow.  For multiplication, we may unnecessarily
-            // infer a possible overflow due to the insufficient integer
-            // range of the double type.
-
            switch (op) {
-            case LIR_addjovi:
            case LIR_addxovi:    d = double(c1) + double(c2);    break;
-            case LIR_subjovi:
            case LIR_subxovi:    d = double(c1) - double(c2);    break;
-            case LIR_muljovi:
            case LIR_mulxovi:    d = double(c1) * double(c2);    break;
-            default:             NanoAssert(0);                  break;
+            default:            NanoAssert(0);                  break;
            }
            int32_t r = int32_t(d);
            if (r == d)
@ -957,20 +906,14 @@ namespace nanojit

        } else if (oprnd1->isImmI() && !oprnd2->isImmI()) {
            switch (op) {
-            case LIR_addjovi:
            case LIR_addxovi:
-            case LIR_muljovi:
            case LIR_mulxovi: {
-                // swap operands, moving const to rhs
+                // move const to rhs
                LIns* t = oprnd2;
                oprnd2 = oprnd1;
                oprnd1 = t;
-                // swap actual arguments in caller as well
-                *opnd1 = oprnd1;
-                *opnd2 = oprnd2;
                break;
            }
-            case LIR_subjovi:
            case LIR_subxovi:
                break;
            default:
@ -982,31 +925,19 @@ namespace nanojit
            int c = oprnd2->immI();
            if (c == 0) {
                switch (op) {
-                case LIR_addjovi:
                case LIR_addxovi:
-                case LIR_subjovi:
                case LIR_subxovi:
                    return oprnd1;
-                case LIR_muljovi:
                case LIR_mulxovi:
                    return oprnd2;
                default:
                    ;
                }
-            } else if (c == 1 && (op == LIR_muljovi || op == LIR_mulxovi)) {
+            } else if (c == 1 && op == LIR_mulxovi) {
                return oprnd1;
            }
        }

-        return NULL;
-    }
-
-    LIns* ExprFilter::insGuardXov(LOpcode op, LInsp oprnd1, LInsp oprnd2, GuardRecord *gr)
-    {
-        LIns* simplified = simplifyOverflowArith(op, &oprnd1, &oprnd2);
-        if (simplified)
-            return simplified;
-
        return out->insGuardXov(op, oprnd1, oprnd2, gr);
    }

@ -1038,15 +969,6 @@ namespace nanojit
        return out->insBranch(v, c, t);
    }

-    LIns* ExprFilter::insBranchJov(LOpcode op, LInsp oprnd1, LInsp oprnd2, LIns* target)
-    {
-        LIns* simplified = simplifyOverflowArith(op, &oprnd1, &oprnd2);
-        if (simplified)
-            return simplified;
-
-        return out->insBranchJov(op, oprnd1, oprnd2, target);
-    }
-
    LIns* ExprFilter::insLoad(LOpcode op, LIns* base, int32_t off, AccSet accSet) {
        if (base->isImmP() && !isS8(off)) {
            // if the effective address is constant, then transform:
@ -1394,18 +1316,12 @@ namespace nanojit
                case LIR_addxovi:
                case LIR_subxovi:
                case LIR_mulxovi:
-                case LIR_addjovi:
-                case LIR_subjovi:
-                case LIR_muljovi:
                CASE86(LIR_divi:)
                case LIR_addd:
                case LIR_subd:
                case LIR_muld:
                case LIR_divd:
                CASE64(LIR_addq:)
-                CASE64(LIR_subq:)
-                CASE64(LIR_addjovq:)
-                CASE64(LIR_subjovq:)
                case LIR_andi:
                case LIR_ori:
                case LIR_xori:
@ -1786,19 +1702,8 @@ namespace nanojit
                formatGuardXov(buf, i);
                break;

-            case LIR_addjovi:
-            case LIR_subjovi:
-            case LIR_muljovi:
-            CASE64(LIR_addjovq:)
-            CASE64(LIR_subjovq:)
-                VMPI_snprintf(s, n, "%s = %s %s, %s ; ovf -> %s", formatRef(&b1, i), lirNames[op],
-                    formatRef(&b2, i->oprnd1()),
-                    formatRef(&b3, i->oprnd2()),
-                    i->oprnd3() ? formatRef(&b4, i->oprnd3()) : "unpatched");
-                break;
-
            case LIR_addi:       CASE64(LIR_addq:)
-            case LIR_subi:       CASE64(LIR_subq:)
+            case LIR_subi:
            case LIR_muli:
            CASE86(LIR_divi:)
            case LIR_addd:
@ -1810,7 +1715,7 @@ namespace nanojit
            case LIR_xori:       CASE64(LIR_xorq:)
            case LIR_lshi:       CASE64(LIR_lshq:)
            case LIR_rshi:       CASE64(LIR_rshq:)
-            case LIR_rshui:      CASE64(LIR_rshuq:)
+            case LIR_rshui:       CASE64(LIR_rshuq:)
            case LIR_eqi:        CASE64(LIR_eqq:)
            case LIR_lti:        CASE64(LIR_ltq:)
            case LIR_lei:        CASE64(LIR_leq:)
@ -2496,8 +2401,6 @@ namespace nanojit
        return ins;
    }

-    // There is no CseFilter::insBranchJov(), as LIR_*jov* are not CSEable.
-
    LInsp CseFilter::insCall(const CallInfo *ci, LInsp args[])
    {
        LInsp ins;
@ -3023,7 +2926,6 @@ namespace nanojit
        case LIR_orq:
        case LIR_xorq:
        case LIR_addq:
-        case LIR_subq:
        case LIR_eqq:
        case LIR_ltq:
        case LIR_gtq:
@ -3247,39 +3149,6 @@ namespace nanojit
        return out->insBranch(op, cond, to);
    }

-    LIns* ValidateWriter::insBranchJov(LOpcode op, LIns* a, LIns* b, LIns* to)
-    {
-        int nArgs = 2;
-        LTy formals[2];
-        LIns* args[2] = { a, b };
-
-        switch (op) {
-        case LIR_addjovi:
-        case LIR_subjovi:
-        case LIR_muljovi:
-            formals[0] = LTy_I;
-            formals[1] = LTy_I;
-            break;
-
-#ifdef NANOJIT_64BIT
-        case LIR_addjovq:
-        case LIR_subjovq:
-            formals[0] = LTy_Q;
-            formals[1] = LTy_Q;
-            break;
-#endif
-        default:
-            NanoAssert(0);
-        }
-
-        // We check that target is a label in ValidateReader because it may
-        // not have been set here.
-
-        typeCheckArgs(op, nArgs, formals, args);
-
-        return out->insBranchJov(op, a, b, to);
-    }
-
    LIns* ValidateWriter::insAlloc(int32_t size)
    {
        return out->insAlloc(size);
@ -3311,15 +3180,6 @@ namespace nanojit
        case LIR_j:
            NanoAssert(ins->getTarget() && ins->oprnd2()->isop(LIR_label));
            break;
-
-        case LIR_addjovi:
-        case LIR_subjovi:
-        case LIR_muljovi:
-        CASE64(LIR_addjovq:)
-        CASE64(LIR_subjovq:)
-            NanoAssert(ins->getTarget() && ins->oprnd3()->isop(LIR_label));
-            break;
-
        case LIR_jtbl: {
            uint32_t tableSize = ins->getTableSize();
            NanoAssert(tableSize > 0);
--- a/js/src/nanojit/LIR.h
+++ b/js/src/nanojit/LIR.h
@ -62,41 +62,39 @@ namespace nanojit

        // Pointer-sized synonyms.

-        LIR_paramp  = PTR_SIZE(LIR_parami,  LIR_paramq),
+        LIR_paramp  = PTR_SIZE(LIR_parami, LIR_paramq),

-        LIR_retp    = PTR_SIZE(LIR_reti,    LIR_retq),
+        LIR_retp    = PTR_SIZE(LIR_reti,   LIR_retq),

-        LIR_livep   = PTR_SIZE(LIR_livei,   LIR_liveq),
+        LIR_livep   = PTR_SIZE(LIR_livei,  LIR_liveq),

-        LIR_ldp     = PTR_SIZE(LIR_ldi,     LIR_ldq),
+        LIR_ldp     = PTR_SIZE(LIR_ldi,    LIR_ldq),

-        LIR_stp     = PTR_SIZE(LIR_sti,     LIR_stq),
+        LIR_stp     = PTR_SIZE(LIR_sti,    LIR_stq),

-        LIR_callp   = PTR_SIZE(LIR_calli,   LIR_callq),
+        LIR_callp   = PTR_SIZE(LIR_calli,  LIR_callq),

-        LIR_eqp     = PTR_SIZE(LIR_eqi,     LIR_eqq),
-        LIR_ltp     = PTR_SIZE(LIR_lti,     LIR_ltq),
-        LIR_gtp     = PTR_SIZE(LIR_gti,     LIR_gtq),
-        LIR_lep     = PTR_SIZE(LIR_lei,     LIR_leq),
-        LIR_gep     = PTR_SIZE(LIR_gei,     LIR_geq),
-        LIR_ltup    = PTR_SIZE(LIR_ltui,    LIR_ltuq),
-        LIR_gtup    = PTR_SIZE(LIR_gtui,    LIR_gtuq),
-        LIR_leup    = PTR_SIZE(LIR_leui,    LIR_leuq),
-        LIR_geup    = PTR_SIZE(LIR_geui,    LIR_geuq),
+        LIR_eqp     = PTR_SIZE(LIR_eqi,    LIR_eqq),
+        LIR_ltp     = PTR_SIZE(LIR_lti,    LIR_ltq),
+        LIR_gtp     = PTR_SIZE(LIR_gti,    LIR_gtq),
+        LIR_lep     = PTR_SIZE(LIR_lei,    LIR_leq),
+        LIR_gep     = PTR_SIZE(LIR_gei,    LIR_geq),
+        LIR_ltup    = PTR_SIZE(LIR_ltui,   LIR_ltuq),
+        LIR_gtup    = PTR_SIZE(LIR_gtui,   LIR_gtuq),
+        LIR_leup    = PTR_SIZE(LIR_leui,   LIR_leuq),
+        LIR_geup    = PTR_SIZE(LIR_geui,   LIR_geuq),

-        LIR_addp    = PTR_SIZE(LIR_addi,    LIR_addq),
-        LIR_subp    = PTR_SIZE(LIR_subi,    LIR_subq),
-        LIR_addjovp = PTR_SIZE(LIR_addjovi, LIR_addjovq),
+        LIR_addp    = PTR_SIZE(LIR_addi,   LIR_addq),

-        LIR_andp    = PTR_SIZE(LIR_andi,    LIR_andq),
-        LIR_orp     = PTR_SIZE(LIR_ori,     LIR_orq),
-        LIR_xorp    = PTR_SIZE(LIR_xori,    LIR_xorq),
+        LIR_andp    = PTR_SIZE(LIR_andi,   LIR_andq),
+        LIR_orp     = PTR_SIZE(LIR_ori,    LIR_orq),
+        LIR_xorp    = PTR_SIZE(LIR_xori,   LIR_xorq),

-        LIR_lshp    = PTR_SIZE(LIR_lshi,    LIR_lshq),
-        LIR_rshp    = PTR_SIZE(LIR_rshi,    LIR_rshq),
-        LIR_rshup   = PTR_SIZE(LIR_rshui,   LIR_rshuq),
+        LIR_lshp    = PTR_SIZE(LIR_lshi,   LIR_lshq),
+        LIR_rshp    = PTR_SIZE(LIR_rshi,   LIR_rshq),
+        LIR_rshup   = PTR_SIZE(LIR_rshui,  LIR_rshuq),

-        LIR_cmovp   = PTR_SIZE(LIR_cmovi,   LIR_cmovq)
+        LIR_cmovp   = PTR_SIZE(LIR_cmovi,  LIR_cmovq)
    };

    // 32-bit integer comparisons must be contiguous, as must 64-bit integer
@ -323,7 +321,7 @@ namespace nanojit
        uintptr_t   _address;
        uint32_t    _typesig:27;     // 9 3-bit fields indicating arg type, by ARGTYPE above (including ret type): a1 a2 a3 a4 a5 ret
        AbiKind     _abi:3;
-        uint32_t    _isPure:1;      // _isPure=1 means no side-effects, result only depends on args
+        uint8_t     _isPure:1;      // _isPure=1 means no side-effects, result only depends on args
        AccSet      _storeAccSet;   // access regions stored by the function
        verbose_only ( const char* _name; )

@ -871,13 +869,6 @@ namespace nanojit
                   isop(LIR_xbarrier) || isop(LIR_xtbl) ||
                   isop(LIR_addxovi) || isop(LIR_subxovi) || isop(LIR_mulxovi);
        }
-        bool isJov() const {
-            return
-#ifdef NANOJIT_64BIT
-                isop(LIR_addjovq) || isop(LIR_subjovq) ||
-#endif
-                isop(LIR_addjovi) || isop(LIR_subjovi) || isop(LIR_muljovi);
-        }
        // True if the instruction is a 32-bit integer immediate.
        bool isImmI() const {
            return isop(LIR_immi);
@ -920,7 +911,7 @@ namespace nanojit
        }

        bool isBranch() const {
-            return isop(LIR_jt) || isop(LIR_jf) || isop(LIR_j) || isop(LIR_jtbl) || isJov();
+            return isop(LIR_jt) || isop(LIR_jf) || isop(LIR_j) || isop(LIR_jtbl);
        }

        LTy retType() const {
@ -1013,7 +1004,7 @@ namespace nanojit
        LIns* getLIns() { return &ins; };
    };

-    // 3-operand form.  Used for conditional moves, jov branches, and xov guards.
+    // 3-operand form.  Used for conditional moves and xov guards.
    class LInsOp3
    {
    private:
@ -1277,19 +1268,13 @@ namespace nanojit

    LIns* LIns::getTarget() const {
        NanoAssert(isBranch() && !isop(LIR_jtbl));
-        if (isJov())
-            return oprnd3();
-        else
-            return oprnd2();
+        return oprnd2();
    }

    void LIns::setTarget(LIns* label) {
        NanoAssert(label && label->isop(LIR_label));
        NanoAssert(isBranch() && !isop(LIR_jtbl));
-        if (isJov())
-            toLInsOp3()->oprnd_3 = label;
-        else
-            toLInsOp2()->oprnd_2 = label;
+        toLInsOp2()->oprnd_2 = label;
    }

    LIns* LIns::getTarget(uint32_t index) const {
@ -1445,12 +1430,9 @@ namespace nanojit
        virtual LInsp insGuardXov(LOpcode v, LIns *a, LIns* b, GuardRecord *gr) {
            return out->insGuardXov(v, a, b, gr);
        }
-        virtual LInsp insBranch(LOpcode v, LIns* condition, LIns* to) {
+        virtual LInsp insBranch(LOpcode v, LInsp condition, LInsp to) {
            return out->insBranch(v, condition, to);
        }
-        virtual LInsp insBranchJov(LOpcode v, LIns* a, LIns* b, LIns* to) {
-            return out->insBranchJov(v, a, b, to);
-        }
        // arg: 0=first, 1=second, ...
        // kind: 0=arg 1=saved-reg
        virtual LInsp insParam(int32_t arg, int32_t kind) {
@ -1717,10 +1699,6 @@ namespace nanojit
            return add_flush(out->insBranch(v, condition, to));
        }

-        LIns* insBranchJov(LOpcode v, LInsp a, LInsp b, LInsp to) {
-            return add_flush(out->insBranchJov(v, a, b, to));
-        }
-
        LIns* insJtbl(LIns* index, uint32_t size) {
            return add_flush(out->insJtbl(index, size));
        }
@ -1778,13 +1756,10 @@ namespace nanojit
        LIns* ins1(LOpcode v, LIns* a);
        LIns* ins2(LOpcode v, LIns* a, LIns* b);
        LIns* ins3(LOpcode v, LIns* a, LIns* b, LIns* c);
-        LIns* insGuard(LOpcode, LIns* cond, GuardRecord *);
+        LIns* insGuard(LOpcode, LIns *cond, GuardRecord *);
        LIns* insGuardXov(LOpcode, LIns* a, LIns* b, GuardRecord *);
-        LIns* insBranch(LOpcode, LIns* cond, LIns* target);
-        LIns* insBranchJov(LOpcode, LIns* a, LIns* b, LIns* target);
+        LIns* insBranch(LOpcode, LIns *cond, LIns *target);
        LIns* insLoad(LOpcode op, LInsp base, int32_t off, AccSet accSet);
-    private:
-        LIns* simplifyOverflowArith(LOpcode op, LInsp *opnd1, LInsp *opnd2);
    };

    class CseFilter: public LirWriter
@ -1979,7 +1954,6 @@ namespace nanojit
            LInsp   insGuard(LOpcode op, LInsp cond, GuardRecord *gr);
            LInsp   insGuardXov(LOpcode op, LInsp a, LInsp b, GuardRecord *gr);
            LInsp   insBranch(LOpcode v, LInsp condition, LInsp to);
-            LInsp   insBranchJov(LOpcode v, LInsp a, LInsp b, LInsp to);
            LInsp   insAlloc(int32_t size);
            LInsp   insJtbl(LIns* index, uint32_t size);
    };
@ -2127,7 +2101,6 @@ namespace nanojit
        LIns* insGuard(LOpcode v, LIns *c, GuardRecord *gr);
        LIns* insGuardXov(LOpcode v, LIns* a, LIns* b, GuardRecord* gr);
        LIns* insBranch(LOpcode v, LIns* condition, LIns* to);
-        LIns* insBranchJov(LOpcode v, LIns* a, LIns* b, LIns* to);
        LIns* insAlloc(int32_t size);
        LIns* insJtbl(LIns* index, uint32_t size);
    };
--- a/js/src/nanojit/LIRopcode.tbl
+++ b/js/src/nanojit/LIRopcode.tbl
@ -270,30 +270,30 @@ OP___(rshi,     90, Op2,  I,    1)  // right shift int (>>)
 OP___(rshui,    91, Op2,  I,    1)  // right shift unsigned int (>>>)

 OP_64(addq,     92, Op2,  Q,    1)  // add quad
-OP_64(subq,     93, Op2,  Q,    1)  // subtract quad

-OP_64(andq,     94, Op2,  Q,    1)  // bitwise-AND quad
-OP_64(orq,      95, Op2,  Q,    1)  // bitwise-OR quad
-OP_64(xorq,     96, Op2,  Q,    1)  // bitwise-XOR quad
+OP_64(andq,     93, Op2,  Q,    1)  // bitwise-AND quad
+OP_64(orq,      94, Op2,  Q,    1)  // bitwise-OR quad
+OP_64(xorq,     95, Op2,  Q,    1)  // bitwise-XOR quad

-OP_64(lshq,     97, Op2,  Q,    1)  // left shift quad;           2nd operand is an int
-OP_64(rshq,     98, Op2,  Q,    1)  // right shift quad;          2nd operand is an int
-OP_64(rshuq,    99, Op2,  Q,    1)  // right shift unsigned quad; 2nd operand is an int
+OP_64(lshq,     96, Op2,  Q,    1)  // left shift quad;           2nd operand is an int
+OP_64(rshq,     97, Op2,  Q,    1)  // right shift quad;          2nd operand is an int
+OP_64(rshuq,    98, Op2,  Q,    1)  // right shift unsigned quad; 2nd operand is an int

-OP___(negd,    100, Op1,  D,    1)  // negate double
-OP___(addd,    101, Op2,  D,    1)  // add double
-OP___(subd,    102, Op2,  D,    1)  // subtract double
-OP___(muld,    103, Op2,  D,    1)  // multiply double
-OP___(divd,    104, Op2,  D,    1)  // divide double
+OP___(negd,     99, Op1,  D,    1)  // negate double
+OP___(addd,    100, Op2,  D,    1)  // add double
+OP___(subd,    101, Op2,  D,    1)  // subtract double
+OP___(muld,    102, Op2,  D,    1)  // multiply double
+OP___(divd,    103, Op2,  D,    1)  // divide double
 // LIR_modd is just a place-holder opcode, ie. the back-ends cannot generate
 // code for it.  It's used in TraceMonkey briefly but is always demoted to a
 // LIR_modl or converted to a function call before Nanojit has to do anything
 // serious with it.
-OP___(modd,    105, Op2,  D,    1)  // modulo double
+OP___(modd,    104, Op2,  D,    1)  // modulo double

-OP___(cmovi,   106, Op3,  I,    1)  // conditional move int
-OP_64(cmovq,   107, Op3,  Q,    1)  // conditional move quad
+OP___(cmovi,   105, Op3,  I,    1)  // conditional move int
+OP_64(cmovq,   106, Op3,  Q,    1)  // conditional move quad

+OP_UN(107)
 OP_UN(108)

 //---------------------------------------------------------------------------
@ -307,36 +307,35 @@ OP___(i2d,     112, Op1,  D,    1)  // convert int to double
 OP___(ui2d,    113, Op1,  D,    1)  // convert unsigned int to double
 OP___(d2i,     114, Op1,  I,    1)  // convert double to int (no exceptions raised, platform rounding rules)

+OP_UN(115)
+OP_UN(116)
+
 //---------------------------------------------------------------------------
 // Overflow arithmetic
 //---------------------------------------------------------------------------
-// These all exit if overflow occurred.  The result is valid on either path.
-OP___(addxovi, 115, Op3,  I,    1)  // add int and exit on overflow
-OP___(subxovi, 116, Op3,  I,    1)  // subtract int and exit on overflow
-OP___(mulxovi, 117, Op3,  I,    1)  // multiply int and exit on overflow
+// These all exit if overflow occurred.  The results is valid on either path.
+OP___(addxovi, 117, Op3,  I,    1)  // add int and exit on overflow
+OP___(subxovi, 118, Op3,  I,    1)  // sub int and exit on overflow
+OP___(mulxovi, 119, Op3,  I,    1)  // multiply int and exit on overflow

-// These all branch if overflow occurred.  The result is valid on either path.
-OP___(addjovi, 118, Op3,  I,    1)  // add int and branch on overflow
-OP___(subjovi, 119, Op3,  I,    1)  // subtract int and branch on overflow
-OP___(muljovi, 120, Op3,  I,    1)  // multiply int and branch on overflow
-
-OP_64(addjovq, 121, Op3,  Q,    1)  // add quad and branch on overflow
-OP_64(subjovq, 122, Op3,  Q,    1)  // subtract quad and branch on overflow
+OP_UN(120)

 //---------------------------------------------------------------------------
 // SoftFloat
 //---------------------------------------------------------------------------
-OP_SF(dlo2i,   123, Op1,  I,    1)  // get the low  32 bits of a double as an int
-OP_SF(dhi2i,   124, Op1,  I,    1)  // get the high 32 bits of a double as an int
-OP_SF(ii2d,    125, Op2,  D,    1)  // join two ints (1st arg is low bits, 2nd is high)
+OP_SF(dlo2i,   121, Op1,  I,    1)  // get the low  32 bits of a double as an int
+OP_SF(dhi2i,   122, Op1,  I,    1)  // get the high 32 bits of a double as an int
+OP_SF(ii2d,    123, Op2,  D,    1)  // join two ints (1st arg is low bits, 2nd is high)

 // LIR_hcalli is a hack that's only used on 32-bit platforms that use
 // SoftFloat.  Its operand is always a LIR_calli, but one that specifies a
 // function that returns a double.  It indicates that the double result is
 // returned via two 32-bit integer registers.  The result is always used as the
 // second operand of a LIR_ii2d.
-OP_SF(hcalli,  126, Op1,  I,    1)
+OP_SF(hcalli,  124, Op1,  I,    1)

+OP_UN(125)
+OP_UN(126)
 OP_UN(127)

 #undef OP_UN
--- a/js/src/nanojit/NativeARM.cpp
+++ b/js/src/nanojit/NativeARM.cpp
@ -23,7 +23,6 @@
 * Contributor(s):
 *   Adobe AS3 Team
 *   Vladimir Vukicevic <vladimir@pobox.com>
- *   Jacob Bramley <Jacob.Bramley@arm.com>
 *
 * Alternatively, the contents of this file may be used under the terms of
 * either the GNU General Public License Version 2 or later (the "GPL"), or
@ -270,13 +269,6 @@ Assembler::asm_add_imm(Register rd, Register rn, int32_t imm, int stat /* =0 */)
    NanoAssert(IsGpReg(rn));
    NanoAssert((stat & 1) == stat);

-    // As a special case to simplify code elsewhere, emit nothing where we
-    // don't want to update the flags (stat == 0), the second operand is 0 and
-    // (rd == rn). Such instructions are effectively NOPs.
-    if ((imm == 0) && (stat == 0) && (rd == rn)) {
-        return;
-    }
-
    // Try to encode the value directly as an operand 2 immediate value, then
    // fall back to loading the value into a register.
    if (encOp2Imm(imm, &op2imm)) {
@ -317,13 +309,6 @@ Assembler::asm_sub_imm(Register rd, Register rn, int32_t imm, int stat /* =0 */)
    NanoAssert(IsGpReg(rd));
    NanoAssert(IsGpReg(rn));
    NanoAssert((stat & 1) == stat);
-    
-    // As a special case to simplify code elsewhere, emit nothing where we
-    // don't want to update the flags (stat == 0), the second operand is 0 and
-    // (rd == rn). Such instructions are effectively NOPs.
-    if ((imm == 0) && (stat == 0) && (rd == rn)) {
-        return;
-    }

    // Try to encode the value directly as an operand 2 immediate value, then
    // fall back to loading the value into a register.
@ -786,7 +771,7 @@ Assembler::asm_stkarg(LInsp arg, int stkd)
        if (!_config.arm_vfp || !isF64) {
            NanoAssert(IsGpReg(rr));

-            asm_str(rr, SP, stkd);
+            STR(rr, SP, stkd);
        } else {
            // According to the comments in asm_arg_64, LIR_ii2d
            // can have a 64-bit argument even if VFP is disabled. However,
@ -809,7 +794,7 @@ Assembler::asm_stkarg(LInsp arg, int stkd)
        // memory for it and then copy it onto the stack.
        int d = findMemFor(arg);
        if (!isF64) {
-            asm_str(IP, SP, stkd);
+            STR(IP, SP, stkd);
            if (arg->isop(LIR_allocp)) {
                asm_add_imm(IP, FP, d);
            } else {
@ -821,9 +806,9 @@ Assembler::asm_stkarg(LInsp arg, int stkd)
            NanoAssert((stkd & 7) == 0);
 #endif

-            asm_str(IP, SP, stkd+4);
+            STR(IP, SP, stkd+4);
            LDR(IP, FP, d+4);
-            asm_str(IP, SP, stkd);
+            STR(IP, SP, stkd);
            LDR(IP, FP, d);
        }
    }
@ -891,8 +876,8 @@ Assembler::asm_call(LInsp ins)

                // The result doesn't have a register allocated, so store the
                // result (in R0,R1) directly to its stack slot.
-                asm_str(R0, FP, d+0);
-                asm_str(R1, FP, d+4);
+                STR(R0, FP, d+0);
+                STR(R1, FP, d+4);
            } else {
                NanoAssert(IsFpReg(rr));

@ -1199,11 +1184,11 @@ Assembler::asm_qjoin(LIns *ins)
    LIns* hi = ins->oprnd2();

    Register r = findRegFor(hi, GpRegs);
-    asm_str(r, FP, d+4);
+    STR(r, FP, d+4);

    // okay if r gets recycled.
    r = findRegFor(lo, GpRegs);
-    asm_str(r, FP, d);
+    STR(r, FP, d);
    deprecated_freeRsrcOf(ins);     // if we had a reg in use, emit a ST to flush it to mem
 }

@ -1272,11 +1257,6 @@ Assembler::canRemat(LIns* ins)
 void
 Assembler::asm_restore(LInsp i, Register r)
 {
-    // The following registers should never be restored:
-    NanoAssert(r != PC);
-    NanoAssert(r != IP);
-    NanoAssert(r != SP);
-
    if (i->isop(LIR_allocp)) {
        asm_add_imm(r, FP, deprecated_disp(i));
    } else if (i->isImmI()) {
@ -1328,10 +1308,8 @@ Assembler::asm_spill(Register rr, int d, bool pop, bool quad)
    (void) pop;
    (void) quad;
    NanoAssert(d);
-    // The following registers should never be spilled:
-    NanoAssert(rr != PC);
-    NanoAssert(rr != IP);
-    NanoAssert(rr != SP);
+    // fixme: bug 556175 this code doesn't appear to handle
+    // values of d outside the 12-bit range.
    if (_config.arm_vfp && IsFpReg(rr)) {
        if (isS8(d >> 2)) {
            FSTD(rr, FP, d);
@ -1341,20 +1319,17 @@ Assembler::asm_spill(Register rr, int d, bool pop, bool quad)
        }
    } else {
        NIns merged;
-        // asm_str always succeeds, but returns '1' to indicate that it emitted
-        // a simple, easy-to-merge STR.
-        if (asm_str(rr, FP, d)) {
-            // See if we can merge this store into an immediately following one,
-            // one, by creating or extending a STM instruction.
-            if (/* is it safe to poke _nIns[1] ? */
-                    does_next_instruction_exist(_nIns, codeStart, codeEnd,
-                        exitStart, exitEnd)
-                    && /* can we merge _nIns[0] into _nIns[1] ? */
-                    do_peep_2_1(&merged, _nIns[0], _nIns[1])) {
-                _nIns[1] = merged;
-                _nIns++;
-                verbose_only( asm_output("merge next into STMDB"); )
-            }
+        STR(rr, FP, d);
+        // See if we can merge this store into an immediately following one,
+        // one, by creating or extending a STM instruction.
+        if (/* is it safe to poke _nIns[1] ? */
+            does_next_instruction_exist(_nIns, codeStart, codeEnd,
+                                               exitStart, exitEnd)
+            && /* can we merge _nIns[0] into _nIns[1] ? */
+               do_peep_2_1(&merged, _nIns[0], _nIns[1])) {
+            _nIns[1] = merged;
+            _nIns++;
+            verbose_only( asm_output("merge next into STMDB"); )
        }
    }
 }
@ -1457,9 +1432,9 @@ Assembler::asm_store64(LOpcode op, LInsp value, int dr, LInsp base)
                    underrunProtect(LD32_size*2 + 8);

                    // XXX use another reg, get rid of dependency
-                    asm_str(IP, rb, dr);
+                    STR(IP, rb, dr);
                    asm_ld_imm(IP, value->immDlo(), false);
-                    asm_str(IP, rb, dr+4);
+                    STR(IP, rb, dr+4);
                    asm_ld_imm(IP, value->immDhi(), false);

                    return;
@ -1506,9 +1481,9 @@ Assembler::asm_store64(LOpcode op, LInsp value, int dr, LInsp base)
                    underrunProtect(LD32_size*2 + 8);

                    // XXX use another reg, get rid of dependency
-                    asm_str(IP, rb, dr);
+                    STR(IP, rb, dr);
                    asm_ld_imm(IP, value->immDlo(), false);
-                    asm_str(IP, rb, dr+4);
+                    STR(IP, rb, dr+4);
                    asm_ld_imm(IP, value->immDhi(), false);

                    return;
@ -1580,6 +1555,8 @@ Assembler::asm_immd_nochk(Register rr, int32_t immDlo, int32_t immDhi)
 void
 Assembler::asm_immd(LInsp ins)
 {
+    //asm_output(">>> asm_immd");
+
    int d = deprecated_disp(ins);
    Register rr = ins->deprecated_getReg();

@ -1593,12 +1570,17 @@ Assembler::asm_immd(LInsp ins)
        asm_immd_nochk(rr, ins->immDlo(), ins->immDhi());
    } else {
        NanoAssert(d);
+        // asm_mmq might spill a reg, so don't call it;
+        // instead do the equivalent directly.
+        //asm_mmq(FP, d, PC, -16);

-        asm_str(IP, FP, d+4);
+        STR(IP, FP, d+4);
        asm_ld_imm(IP, ins->immDhi());
-        asm_str(IP, FP, d);
+        STR(IP, FP, d);
        asm_ld_imm(IP, ins->immDlo());
    }
+
+    //asm_output("<<< asm_immd");
 }

 void
@ -1656,9 +1638,6 @@ Assembler::asm_mmq(Register rd, int dd, Register rs, int ds)
    //  STR ip, [rd, #dd]
    //  LDR ip, [rs, #(ds+4)]
    //  STR ip, [rd, #(dd+4)]
-    //
-    // Note that if rs+4 or rd+4 is outside the LDR or STR range, extra
-    // instructions will be emitted as required to make the code work.

    // Ensure that the PC is not used as either base register. The instruction
    // generation macros call underrunProtect, and a side effect of this is
@ -1667,50 +1646,18 @@ Assembler::asm_mmq(Register rd, int dd, Register rs, int ds)
    NanoAssert(rs != PC);
    NanoAssert(rd != PC);

-    // We use IP as a swap register, so check that it isn't used for something
-    // else by the caller.
-    NanoAssert(rs != IP);
-    NanoAssert(rd != IP);
-
    // Find the list of free registers from the allocator's free list and the
    // GpRegs mask. This excludes any floating-point registers that may be on
    // the free list.
    RegisterMask    free = _allocator.free & AllowableFlagRegs;

-    // Ensure that ds and dd are within the +/-4095 offset range of STR and
-    // LDR. If either is out of range, adjust and modify rd or rs so that the
-    // load works correctly.
-    // The modification here is performed after the LDR/STR block (because code
-    // is emitted backwards), so this one is the reverse operation.
-
-    int32_t dd_adj = 0;
-    int32_t ds_adj = 0;
-
-    if ((dd+4) >= 0x1000) {
-        dd_adj = ((dd+4) & ~0xfff);
-    } else if (dd <= -0x1000) {
-        dd_adj = -((-dd) & ~0xfff);
-    }
-    if ((ds+4) >= 0x1000) {
-        ds_adj = ((ds+4) & ~0xfff);
-    } else if (ds <= -0x1000) {
-        ds_adj = -((-ds) & ~0xfff);
-    }
-
-    // These will emit no code if d*_adj is 0.
-    asm_sub_imm(rd, rd, dd_adj);
-    asm_sub_imm(rs, rs, ds_adj);
-
-    ds -= ds_adj;
-    dd -= dd_adj;
-
    if (free) {
        // There is at least one register on the free list, so grab one for
        // temporary use. There is no need to allocate it explicitly because
        // we won't need it after this function returns.

-        // The CountLeadingZeroes utility can be used to quickly find a set bit
-        // in the free mask.
+        // The CountLeadingZeroes can be used to quickly find a set bit in the
+        // free mask.
        Register    rr = (Register)(31-CountLeadingZeroes(free));

        // Note: Not every register in GpRegs is usable here. However, these
@ -1722,6 +1669,7 @@ Assembler::asm_mmq(Register rd, int dd, Register rs, int ds)
        NanoAssert((free & rmask(FP)) == 0);

        // Emit the actual instruction sequence.
+
        STR(IP, rd, dd+4);
        STR(rr, rd, dd);
        LDR(IP, rs, ds+4);
@ -1733,10 +1681,6 @@ Assembler::asm_mmq(Register rd, int dd, Register rs, int ds)
        STR(IP, rd, dd);
        LDR(IP, rs, ds);
    }
-
-    // Re-adjust the base registers. (These will emit no code if d*_adj is 0.
-    asm_add_imm(rd, rd, dd_adj);
-    asm_add_imm(rs, rs, ds_adj);
 }

 // Increment the 32-bit profiling counter at pCtr, without
@ -1984,72 +1928,6 @@ Assembler::asm_ldr_chk(Register d, Register b, int32_t off, bool chk)
    asm_output("ldr %s, [%s, #%d]",gpn(d),gpn(b),(off));
 }

-// Emit a store, using a register base and an arbitrary immediate offset. This
-// behaves like a STR instruction, but doesn't care about the offset range, and
-// emits one of the following instruction sequences:
-//
-// ----
-// STR  rt, [rr, #offset]
-// ----
-// asm_add_imm  ip, rr, #(offset & ~0xfff)
-// STR  rt, [ip, #(offset & 0xfff)]
-// ----
-// # This one's fairly horrible, but should be rare.
-// asm_add_imm  rr, rr, #(offset & ~0xfff)
-// STR  rt, [ip, #(offset & 0xfff)]
-// asm_sub_imm  rr, rr, #(offset & ~0xfff)
-// ----
-// SUB-based variants (for negative offsets) are also supported.
-// ----
-//
-// The return value is 1 if a simple STR could be emitted, or 0 if the required
-// sequence was more complex.
-int32_t
-Assembler::asm_str(Register rt, Register rr, int32_t offset)
-{
-    // We can't do PC-relative stores, and we can't store the PC value, because
-    // we use macros (such as STR) which call underrunProtect, and this can
-    // push _nIns to a new page, thus making any PC value impractical to
-    // predict.
-    NanoAssert(rr != PC);
-    NanoAssert(rt != PC);
-    if (offset >= 0) {
-        // The offset is positive, so use ADD (and variants).
-        if (isU12(offset)) {
-            STR(rt, rr, offset);
-            return 1;
-        }
-
-        if (rt != IP) {
-            STR(rt, IP, offset & 0xfff);
-            asm_add_imm(IP, rr, offset & ~0xfff);
-        } else {
-            int32_t adj = offset & ~0xfff;
-            asm_sub_imm(rr, rr, adj);
-            STR(rt, rr, offset-adj);
-            asm_add_imm(rr, rr, adj);
-        }
-    } else {
-        // The offset is negative, so use SUB (and variants).
-        if (isU12(-offset)) {
-            STR(rt, rr, offset);
-            return 1;
-        }
-
-        if (rt != IP) {
-            STR(rt, IP, -((-offset) & 0xfff));
-            asm_sub_imm(IP, rr, (-offset) & ~0xfff);
-        } else {
-            int32_t adj = -((-offset) & 0xfff);
-            asm_add_imm(IP, rr, adj);
-            STR(rt, rr, offset-adj);
-            asm_sub_imm(rr, rr, adj);
-        }
-    }
-
-    return 0;
-}
-
 // Emit the code required to load an immediate value (imm) into general-purpose
 // register d. Optimal (MOV-based) mechanisms are used if the immediate can be
 // encoded using ARM's operand 2 encoding. Otherwise, a slot is used on the
@ -2391,7 +2269,7 @@ Assembler::asm_branch(bool branchOnFalse, LInsp cond, NIns* targ)
    return at;
 }

-NIns* Assembler::asm_branch_ov(LOpcode op, NIns* target)
+void Assembler::asm_branch_xov(LOpcode op, NIns* target)
 {
    // Because MUL can't set the V flag, we use SMULL and CMP to set the Z flag
    // to detect overflow on multiply. Thus, if we have a LIR_mulxovi, we must
@ -2400,7 +2278,6 @@ NIns* Assembler::asm_branch_ov(LOpcode op, NIns* target)

    // Emit a suitable branch instruction.
    B_cond(cc, target);
-    return _nIns;
 }

 void
--- a/js/src/nanojit/NativeARM.h
+++ b/js/src/nanojit/NativeARM.h
@ -23,7 +23,6 @@
 * Contributor(s):
 *   Adobe AS3 Team
 *   Vladimir Vukicevic <vladimir@pobox.com>
- *   Jacob Bramley <Jacob.Bramley@arm.com>
 *
 * Alternatively, the contents of this file may be used under the terms of
 * either the GNU General Public License Version 2 or later (the "GPL"), or
@ -66,7 +65,12 @@ namespace nanojit
 // only d0-d6 are actually used; we'll use d7 as s14-s15 for i2d/u2f/etc.
 #define NJ_VFP_MAX_REGISTERS            8
 #define NJ_MAX_REGISTERS                (11 + NJ_VFP_MAX_REGISTERS)
-#define NJ_MAX_STACK_ENTRY              4096
+
+// fixme: bug 556175: this cant be over 1024, because
+// the ARM backend cannot support more than 12-bit displacements
+// in a single load/store instruction, for spilling.  see asm_spill().
+#define NJ_MAX_STACK_ENTRY              1024
+
 #define NJ_MAX_PARAMETERS               16
 #define NJ_ALIGN_STACK                  8

@ -225,7 +229,6 @@ verbose_only( extern const char* shiftNames[]; )
    void        asm_stkarg(LInsp p, int stkd);                                  \
    void        asm_cmpi(Register, int32_t imm);                                \
    void        asm_ldr_chk(Register d, Register b, int32_t off, bool chk);     \
-    int32_t     asm_str(Register rt, Register rr, int32_t off);                 \
    void        asm_cmp(LIns *cond);                                            \
    void        asm_cmpd(LIns *cond);                                           \
    void        asm_ld_imm(Register d, int32_t imm, bool chk = true);           \
@ -607,8 +610,8 @@ enum {
        NanoAssert(IsGpReg(_d) && IsGpReg(_n));                         \
        underrunProtect(4);                                             \
        if (_off < 0) {                                                 \
-            NanoAssert(isU12(-(_off)));                                 \
-            *(--_nIns) = (NIns)( COND_AL | (0x55<<20) | ((_n)<<16) | ((_d)<<12) | ((-(_off))&0xfff)  ); \
+            NanoAssert(isU12(-_off));                                   \
+            *(--_nIns) = (NIns)( COND_AL | (0x55<<20) | ((_n)<<16) | ((_d)<<12) | ((-_off)&0xfff)  ); \
        } else {                                                        \
            NanoAssert(isU12(_off));                                    \
            *(--_nIns) = (NIns)( COND_AL | (0x5D<<20) | ((_n)<<16) | ((_d)<<12) | ((_off)&0xfff)  ); \
@ -622,8 +625,8 @@ enum {
        NanoAssert(IsGpReg(_d) && IsGpReg(_n));                         \
        underrunProtect(4);                                             \
        if (_off < 0) {                                                 \
-            NanoAssert(isU8(-(_off)));                                  \
-            *(--_nIns) = (NIns)( COND_AL | (0x15<<20) | ((_n)<<16) | ((_d)<<12) | ((0xD)<<4) | (((-(_off))&0xf0)<<4) | ((-(_off))&0xf) ); \
+            NanoAssert(isU8(-_off));                                    \
+            *(--_nIns) = (NIns)( COND_AL | (0x15<<20) | ((_n)<<16) | ((_d)<<12) | ((0xD)<<4) | (((-_off)&0xf0)<<4) | ((-_off)&0xf) ); \
        } else {                                                        \
            NanoAssert(isU8(_off));                                     \
            *(--_nIns) = (NIns)( COND_AL | (0x1D<<20) | ((_n)<<16) | ((_d)<<12) | ((0xD)<<4) | (((_off)&0xf0)<<4) | ((_off)&0xf) ); \
@ -638,8 +641,8 @@ enum {
        NanoAssert(IsGpReg(_d) && IsGpReg(_n));                         \
        underrunProtect(4);                                             \
        if (_off < 0) {                                                 \
-            NanoAssert(isU8(-(_off)));                                  \
-            *(--_nIns) = (NIns)( COND_AL | (0x15<<20) | ((_n)<<16) | ((_d)<<12) | ((0xB)<<4) | (((-(_off))&0xf0)<<4) | ((-(_off))&0xf) ); \
+            NanoAssert(isU8(-_off));                                    \
+            *(--_nIns) = (NIns)( COND_AL | (0x15<<20) | ((_n)<<16) | ((_d)<<12) | ((0xB)<<4) | (((-_off)&0xf0)<<4) | ((-_off)&0xf) ); \
        } else {                                                        \
            NanoAssert(isU8(_off));                                     \
            *(--_nIns) = (NIns)( COND_AL | (0x1D<<20) | ((_n)<<16) | ((_d)<<12) | ((0xB)<<4) | (((_off)&0xf0)<<4) | ((_off)&0xf) ); \
@ -654,8 +657,8 @@ enum {
        NanoAssert(IsGpReg(_d) && IsGpReg(_n));                         \
        underrunProtect(4);                                             \
        if (_off < 0) {                                                 \
-            NanoAssert(isU8(-(_off)));                                  \
-            *(--_nIns) = (NIns)( COND_AL | (0x15<<20) | ((_n)<<16) | ((_d)<<12) | ((0xF)<<4) | (((-(_off))&0xf0)<<4) | ((-(_off))&0xf) ); \
+            NanoAssert(isU8(-_off));                                    \
+            *(--_nIns) = (NIns)( COND_AL | (0x15<<20) | ((_n)<<16) | ((_d)<<12) | ((0xF)<<4) | (((-_off)&0xf0)<<4) | ((-_off)&0xf) ); \
        } else {                                                        \
            NanoAssert(isU8(_off));                                     \
            *(--_nIns) = (NIns)( COND_AL | (0x1D<<20) | ((_n)<<16) | ((_d)<<12) | ((0xF)<<4) | (((_off)&0xf0)<<4) | ((_off)&0xf) ); \
@ -666,7 +669,7 @@ enum {
 // Valid offset for STR and STRB is +/- 4095, STRH only has +/- 255
 #define STR(_d,_n,_off) do {                                            \
        NanoAssert(IsGpReg(_d) && IsGpReg(_n));                         \
-        NanoAssert(isU12(_off) || isU12(-(_off)));                      \
+        NanoAssert(isU12(_off) || isU12(-_off));                        \
        underrunProtect(4);                                             \
        if ((_off)<0)   *(--_nIns) = (NIns)( COND_AL | (0x50<<20) | ((_n)<<16) | ((_d)<<12) | ((-(_off))&0xFFF) ); \
        else            *(--_nIns) = (NIns)( COND_AL | (0x58<<20) | ((_n)<<16) | ((_d)<<12) | ((_off)&0xFFF) ); \
@ -675,7 +678,7 @@ enum {

 #define STRB(_d,_n,_off) do {                                           \
        NanoAssert(IsGpReg(_d) && IsGpReg(_n));                         \
-        NanoAssert(isU12(_off) || isU12(-(_off)));                      \
+        NanoAssert(isU12(_off) || isU12(-_off));                        \
        underrunProtect(4);                                             \
        if ((_off)<0)   *(--_nIns) = (NIns)( COND_AL | (0x54<<20) | ((_n)<<16) | ((_d)<<12) | ((-(_off))&0xFFF) ); \
        else            *(--_nIns) = (NIns)( COND_AL | (0x5C<<20) | ((_n)<<16) | ((_d)<<12) | ((_off)&0xFFF) ); \
@ -687,7 +690,7 @@ enum {
        NanoAssert(IsGpReg(_d) && IsGpReg(_n));                         \
        underrunProtect(4);                                             \
        if ((_off)<0) {                                                 \
-            NanoAssert(isU8(-(_off)));                                  \
+            NanoAssert(isU8(-_off));                                    \
            *(--_nIns) = (NIns)( COND_AL | (0x14<<20) | ((_n)<<16) | ((_d)<<12) | (((-(_off))&0xF0)<<4) | (0xB<<4) | ((-(_off))&0xF) ); \
        } else {                                                        \
            NanoAssert(isU8(_off));                                     \
--- a/js/src/nanojit/NativeMIPS.cpp
+++ b/js/src/nanojit/NativeMIPS.cpp
@ -954,7 +954,6 @@ namespace nanojit
                // MIPS arith immediate ops sign-extend the imm16 value
                switch (op) {
                case LIR_addxovi:
-                case LIR_addjovi:
                    SLT(AT, rr, ra);
                    ADDIU(rr, ra, rhsc);
                    goto done;
@ -962,7 +961,6 @@ namespace nanojit
                    ADDIU(rr, ra, rhsc);
                    goto done;
                case LIR_subxovi:
-                case LIR_subjovi:
                    if (isS16(-rhsc)) {
                        SLT(AT, ra, rr);
                        ADDIU(rr, ra, -rhsc);
@ -976,7 +974,6 @@ namespace nanojit
                    }
                    break;
                case LIR_mulxovi:
-                case LIR_muljovi:
                case LIR_muli:
                    // FIXME: optimise constant multiply by 2^n
                    // if ((rhsc & (rhsc-1)) == 0)
@ -1027,7 +1024,6 @@ namespace nanojit

        switch (op) {
            case LIR_addxovi:
-            case LIR_addjovi:
                SLT(AT, rr, ra);
                ADDU(rr, ra, rb);
                break;
@ -1044,7 +1040,6 @@ namespace nanojit
                XOR(rr, ra, rb);
                break;
            case LIR_subxovi:
-            case LIR_subjovi:
                SLT(AT,ra,rr);
                SUBU(rr, ra, rb);
                break;
@ -1064,7 +1059,6 @@ namespace nanojit
                ANDI(rb, rb, 31);
                break;
            case LIR_mulxovi:
-            case LIR_muljovi:
                t = registerAllocTmp(allow);
                // Overflow indication required
                // Do a 32x32 signed multiply generating a 64 bit result
@ -1487,15 +1481,14 @@ namespace nanojit
        return patch;
    }

-    NIns* Assembler::asm_branch_ov(LOpcode op, NIns* target)
+    void Assembler::asm_branch_xov(LOpcode op, NIns* target)
    {
        USE(op);
        NanoAssert(target != NULL);

-        NIns* patch = asm_bxx(true, LIR_eqi, AT, ZERO, target);
+        (void) asm_bxx(true, LIR_eqi, AT, ZERO, target);

-        TAG("asm_branch_ov(op=%s, target=%p)", lirNames[op], target);
-        return patch;
+        TAG("asm_branch_xov(op=%s, target=%p)", lirNames[op], target);
    }

    NIns* Assembler::asm_branch(bool branchOnFalse, LIns *cond, NIns * const targ)
--- a/js/src/nanojit/NativePPC.cpp
+++ b/js/src/nanojit/NativePPC.cpp
@ -545,9 +545,8 @@ namespace nanojit
        return _nIns;
    }

-    NIns* Assembler::asm_branch_ov(LOpcode, NIns*) {
-        TODO(asm_branch_ov);
-        return _nIns;
+    void Assembler::asm_branch_xov(LOpcode, NIns*) {
+        TODO(asm_branch_xov);
    }

    void Assembler::asm_cmp(LOpcode condop, LIns *a, LIns *b, ConditionRegister cr) {
--- a/js/src/nanojit/NativeSparc.cpp
+++ b/js/src/nanojit/NativeSparc.cpp
@ -535,15 +535,13 @@ namespace nanojit
        return at;
    }

-    NIns* Assembler::asm_branch_ov(LOpcode, NIns* targ)
+    void Assembler::asm_branch_xov(LOpcode, NIns* targ)
    {
-        NIns* at = 0;
        underrunProtect(32);
        intptr_t tt = ((intptr_t)targ - (intptr_t)_nIns + 8) >> 2;
        // !targ means that it needs patch.
        if( !(isIMM22((int32_t)tt)) || !targ ) {
            JMP_long_nocheck((intptr_t)targ);
-            at = _nIns;
            NOP();
            BA(0, 5);
            tt = 4;
@ -551,7 +549,6 @@ namespace nanojit
        NOP();

        BVS(0, tt);
-        return at;
    }

    void Assembler::asm_cmp(LIns *cond)
@ -875,8 +872,9 @@ namespace nanojit
        LIns *rhs = ins->oprnd2();

        RegisterMask allow = FpRegs;
-        Register ra, rb;
-        findRegFor2(allow, lhs, ra, allow, rhs, rb);
+        Register ra = findRegFor(lhs, FpRegs);
+        Register rb = (rhs == lhs) ? ra : findRegFor(rhs, FpRegs);
+
        Register rr = deprecated_prepResultReg(ins, allow);

        if (op == LIR_addd)
--- a/js/src/nanojit/NativeX64.cpp
+++ b/js/src/nanojit/NativeX64.cpp
@ -700,7 +700,7 @@ namespace nanojit
        int32_t imm = getImm32(b);
        LOpcode op = ins->opcode();
        Register rr, ra;
-        if (op == LIR_muli || op == LIR_muljovi || op == LIR_mulxovi) {
+        if (op == LIR_muli || op == LIR_mulxovi) {
            // Special case: imul-by-imm has true 3-addr form.  So we don't
            // need the MR(rr, ra) after the IMULI.
            beginOp1Regs(ins, GpRegs, rr, ra);
@ -714,18 +714,13 @@ namespace nanojit
            switch (ins->opcode()) {
            default: TODO(arith_imm8);
            case LIR_addi:
-            case LIR_addjovi:
            case LIR_addxovi:    ADDLR8(rr, imm);   break;   // XXX: bug 547125: could use LEA for LIR_addi
            case LIR_andi:       ANDLR8(rr, imm);   break;
            case LIR_ori:        ORLR8( rr, imm);   break;
            case LIR_subi:
-            case LIR_subjovi:
            case LIR_subxovi:    SUBLR8(rr, imm);   break;
            case LIR_xori:       XORLR8(rr, imm);   break;
-            case LIR_addq:
-            case LIR_addjovq:    ADDQR8(rr, imm);   break;
-            case LIR_subq:
-            case LIR_subjovq:    SUBQR8(rr, imm);   break;
+            case LIR_addq:       ADDQR8(rr, imm);   break;
            case LIR_andq:       ANDQR8(rr, imm);   break;
            case LIR_orq:        ORQR8( rr, imm);   break;
            case LIR_xorq:       XORQR8(rr, imm);   break;
@ -734,18 +729,13 @@ namespace nanojit
            switch (ins->opcode()) {
            default: TODO(arith_imm);
            case LIR_addi:
-            case LIR_addjovi:
            case LIR_addxovi:    ADDLRI(rr, imm);   break;   // XXX: bug 547125: could use LEA for LIR_addi
            case LIR_andi:       ANDLRI(rr, imm);   break;
            case LIR_ori:        ORLRI( rr, imm);   break;
            case LIR_subi:
-            case LIR_subjovi:
            case LIR_subxovi:    SUBLRI(rr, imm);   break;
            case LIR_xori:       XORLRI(rr, imm);   break;
-            case LIR_addq:
-            case LIR_addjovq:    ADDQRI(rr, imm);   break;
-            case LIR_subq:
-            case LIR_subjovq:    SUBQRI(rr, imm);   break;
+            case LIR_addq:       ADDQRI(rr, imm);   break;
            case LIR_andq:       ANDQRI(rr, imm);   break;
            case LIR_orq:        ORQRI( rr, imm);   break;
            case LIR_xorq:       XORQRI(rr, imm);   break;
@ -844,23 +834,17 @@ namespace nanojit
        default:           TODO(asm_arith);
        case LIR_ori:      ORLRR(rr, rb);  break;
        case LIR_subi:
-        case LIR_subjovi:
        case LIR_subxovi:  SUBRR(rr, rb);  break;
        case LIR_addi:
-        case LIR_addjovi:
        case LIR_addxovi:  ADDRR(rr, rb);  break;  // XXX: bug 547125: could use LEA for LIR_addi
        case LIR_andi:     ANDRR(rr, rb);  break;
        case LIR_xori:     XORRR(rr, rb);  break;
        case LIR_muli:
-        case LIR_muljovi:
        case LIR_mulxovi:  IMUL(rr, rb);   break;
        case LIR_xorq:     XORQRR(rr, rb); break;
        case LIR_orq:      ORQRR(rr, rb);  break;
        case LIR_andq:     ANDQRR(rr, rb); break;
-        case LIR_addq:
-        case LIR_addjovq:  ADDQRR(rr, rb); break;
-        case LIR_subq:
-        case LIR_subjovq:  SUBQRR(rr, rb); break;
+        case LIR_addq:     ADDQRR(rr, rb); break;
        }
        if (rr != ra)
            MR(rr, ra);
@ -1220,7 +1204,7 @@ namespace nanojit
        return patch;
    }

-    NIns* Assembler::asm_branch_ov(LOpcode, NIns* target) {
+    void Assembler::asm_branch_xov(LOpcode, NIns* target) {
        if (target && !isTargetWithinS32(target)) {
            setError(ConditionalBranchTooFar);
            NanoAssert(0);
@ -1231,7 +1215,6 @@ namespace nanojit
            JO8(8, target);
        else
            JO( 8, target);
-        return _nIns;
    }

    // WARNING: this function cannot generate code that will affect the
--- a/js/src/nanojit/Nativei386.cpp
+++ b/js/src/nanojit/Nativei386.cpp
@ -1492,10 +1492,9 @@ namespace nanojit
        return at;
    }

-    NIns* Assembler::asm_branch_ov(LOpcode, NIns* target)
+    void Assembler::asm_branch_xov(LOpcode, NIns* target)
    {
        JO(target);
-        return _nIns;
    }

    void Assembler::asm_switch(LIns* ins, NIns* exit)
@ -1714,7 +1713,6 @@ namespace nanojit
            evictIfActive(EDX);
            break;
        case LIR_muli:
-        case LIR_muljovi:
        case LIR_mulxovi:
            isConstRhs = false;
            if (lhs != rhs) {
@ -1752,13 +1750,10 @@ namespace nanojit

            switch (op) {
            case LIR_addi:
-            case LIR_addjovi:
            case LIR_addxovi:    ADD(rr, rb); break;     // XXX: bug 547125: could use LEA for LIR_addi
            case LIR_subi:
-            case LIR_subjovi:
            case LIR_subxovi:    SUB(rr, rb); break;
            case LIR_muli:
-            case LIR_muljovi:
            case LIR_mulxovi:    MUL(rr, rb); break;
            case LIR_andi:       AND(rr, rb); break;
            case LIR_ori:        OR( rr, rb); break;
@ -1781,10 +1776,8 @@ namespace nanojit
                LEA(rr, c, ra);
                ra = rr; // suppress mov
                break;
-            case LIR_addjovi:
            case LIR_addxovi:    ADDi(rr, c);    break;
            case LIR_subi:
-            case LIR_subjovi:
            case LIR_subxovi:    SUBi(rr, c);    break;
            case LIR_andi:       ANDi(rr, c);    break;
            case LIR_ori:        ORi( rr, c);    break;
@ -2217,7 +2210,7 @@ namespace nanojit
            }

        } else {
-            debug_only( Register rr = ) prepareResultReg(ins, x87Regs);
+            verbose_only( Register rr = ) prepareResultReg(ins, x87Regs);
            NanoAssert(FST0 == rr);

            NanoAssert(!lhs->isInReg() || FST0 == lhs->getReg());
--- a/js/src/nanojit/nanojit.h
+++ b/js/src/nanojit/nanojit.h
@ -148,7 +148,9 @@ namespace nanojit
 }

 #ifdef AVMPLUS_VERBOSE
-    #define NJ_VERBOSE 1
+    #ifndef NJ_VERBOSE_DISABLED
+        #define NJ_VERBOSE 1
+    #endif
 #endif

 #ifdef NJ_NO_VARIADIC_MACROS
--- a/js/src/vprof/vprof.cpp
+++ b/js/src/vprof/vprof.cpp
@ -143,7 +143,6 @@ static void dumpProfile (void)
    entries = reverse(entries);
    vprof_printf ("event avg [min : max] total count\n");
    for (e = entries; e; e = e->next) {
-        if (e->count == 0) continue;  // ignore entries with zero count.
        vprof_printf ("%s", e->file); 
        if (e->line >= 0) {
            vprof_printf (":%d", e->line);
@ -180,6 +179,23 @@ static void dumpProfile (void)
    entries = reverse(entries);
 }

+
+int _profileEntryValue (void* id, int64_t value)
+{
+    entry_t e = (entry_t) id;
+    long* lock = &(e->lock);
+    LOCK (lock);
+       e->value = value;
+       e->sum += value;
+       e->count ++;
+       e->min = MIN (e->min, value);
+       e->max = MAX (e->max, value);
+       if (e->func) e->func (e);
+    UNLOCK (lock);
+
+    return 0;
+}
+
 inline static entry_t findEntry (char* file, int line)
 {
    for (entry_t e =  entries; e; e = e->next) {
@ -190,11 +206,7 @@ inline static entry_t findEntry (char* file, int line)
    return NULL;
 }

-// Initialize the location pointed to by 'id' to a new value profile entry
-// associated with 'file' and 'line', or do nothing if already initialized.
-// An optional final argument provides a user-defined probe function.
-
-int initValueProfile(void** id, char* file, int line, ...)
+int profileValue(void** id, char* file, int line, int64_t value, ...)
 {
    DO_LOCK (&glock);
        entry_t e = (entry_t) *id;
@ -208,7 +220,7 @@ int initValueProfile(void** id, char* file, int line, ...)
            if (e) {
                *id = e;
            }
-        }
+        } 

        if (e == NULL) {
            va_list va;
@ -216,58 +228,72 @@ int initValueProfile(void** id, char* file, int line, ...)
            e->lock = LOCK_IS_FREE;
            e->file = file;
            e->line = line;
-            e->value = 0;
-            e->sum = 0;
-            e->count = 0;
-            e->min = 0;
-            e->max = 0;
-            // optional probe function argument
-            va_start (va, line);
+            e->value = value;
+            e->sum = value;
+            e->count = 1;
+            e->min = value;
+            e->max = value;
+
+            va_start (va, value);
            e->func = (void (__cdecl*)(void*)) va_arg (va, void*);
            va_end (va);
+
            e->h = NULL;
+
            e->genptr = NULL;
+
            VMPI_memset (&e->ivar,   0, sizeof(e->ivar));
            VMPI_memset (&e->i64var, 0, sizeof(e->i64var));
            VMPI_memset (&e->dvar,   0, sizeof(e->dvar));
+
            e->next = entries;
            entries = e;
+
+            if (e->func) e->func (e);
+
            *id = e;
+        } else {
+            long* lock = &(e->lock);
+            LOCK (lock);
+                e->value = value;
+                e->sum += value;
+                e->count ++;
+                e->min = MIN (e->min, value);
+                e->max = MAX (e->max, value);
+                if (e->func) e->func (e);
+            UNLOCK (lock);
        }
    DO_UNLOCK (&glock);

    return 0;
 }

-// Record a value profile event.
-
-int profileValue(void* id, int64_t value)
+int _histEntryValue (void* id, int64_t value)
 {
    entry_t e = (entry_t) id;
    long* lock = &(e->lock);
+    hist_t h = e->h;
+    int nbins = h->nbins;
+    int64_t* lb = h->lb;
+    int b;
+
+    for (b = 0; b < nbins; b ++) {
+        if (value < lb[b]) break;
+    }
+
    LOCK (lock);
-        e->value = value;
-        if (e->count == 0) {
-            e->sum = value;
-            e->count = 1;
-            e->min = value;
-            e->max = value;
-        } else {
-            e->sum += value;
-            e->count ++;
-            e->min = MIN (e->min, value);
-            e->max = MAX (e->max, value);
-        }
-        if (e->func) e->func (e);
+       e->value = value;
+       e->sum += value;
+       e->count ++;
+       e->min = MIN (e->min, value);
+       e->max = MAX (e->max, value);
+       h->count[b] ++;
    UNLOCK (lock);

    return 0;
 }

-// Initialize the location pointed to by 'id' to a new histogram profile entry
-// associated with 'file' and 'line', or do nothing if already initialized.
-
-int initHistProfile(void** id, char* file, int line, int nbins, ...)
+int histValue(void** id, char* file, int line, int64_t value, int nbins, ...)
 {
    DO_LOCK (&glock);
        entry_t e = (entry_t) *id;
@ -293,11 +319,11 @@ int initHistProfile(void** id, char* file, int line, int nbins, ...)
            e->lock = LOCK_IS_FREE;
            e->file = file;
            e->line = line;
-            e->value = 0;
-            e->sum = 0;
-            e->count = 0;
-            e->min = 0;
-            e->max = 0;
+            e->value = value;
+            e->sum = value;
+            e->count = 1;
+            e->min = value;
+            e->max = value;
            e->func = NULL;
            e->h = h = (hist_t) malloc (sizeof(hist));
            n = 1+MAX(nbins,0);
@ -317,60 +343,51 @@ int initHistProfile(void** id, char* file, int line, int nbins, ...)
            lb[b] = MAXINT64;
            va_end (va);

+            for (b = 0; b < nbins; b ++) {
+                if (value < lb[b]) break;
+            }
+            h->count[b] ++;
+
            e->genptr = NULL;
+
            VMPI_memset (&e->ivar,   0, sizeof(e->ivar));
            VMPI_memset (&e->i64var, 0, sizeof(e->i64var));
            VMPI_memset (&e->dvar,   0, sizeof(e->dvar));
+
            e->next = entries;
            entries = e;
            *id = e;
+        } else {
+            int b;
+            long* lock = &(e->lock);
+            hist_t h=e->h;
+            int64_t* lb = h->lb;
+            
+            LOCK (lock);
+                e->value = value;
+                e->sum += value;
+                e->count ++;
+                e->min = MIN (e->min, value);
+                e->max = MAX (e->max, value);
+                for (b = 0; b < nbins; b ++) {
+                    if (value < lb[b]) break;
+                }
+                h->count[b] ++;
+            UNLOCK (lock);
        }
    DO_UNLOCK (&glock);

    return 0;
 }

-// Record a histogram profile event.
-
-int histValue(void* id, int64_t value)
-{
-    entry_t e = (entry_t) id;
-    long* lock = &(e->lock);
-    hist_t h = e->h;
-    int nbins = h->nbins;
-    int64_t* lb = h->lb;
-    int b;
-
-    LOCK (lock);
-        e->value = value;
-        if (e->count == 0) {
-            e->sum = value;
-            e->count = 1;
-            e->min = value;
-            e->max = value;
-        } else {
-            e->sum += value;
-            e->count ++;
-            e->min = MIN (e->min, value);
-            e->max = MAX (e->max, value);
-        }
-        for (b = 0; b < nbins; b ++) {
-            if (value < lb[b]) break;
-        }
-        h->count[b] ++;
-    UNLOCK (lock);
-
-    return 0;
-}
-
 #if defined(_MSC_VER) && defined(_M_IX86)
-uint64_t readTimestampCounter()
+inline uint64_t _rdtsc() 
 {
 	// read the cpu cycle counter.  1 tick = 1 cycle on IA32
 	_asm rdtsc;
 }
 #elif defined(__GNUC__) && (__i386__ || __x86_64__)
-uint64_t readTimestampCounter()
+inline uint64_t _rdtsc() 
 {
   uint32_t lo, hi;
   __asm__ __volatile__ ("rdtsc" : "=a" (lo), "=d" (hi));
@ -378,6 +395,16 @@ uint64_t readTimestampCounter()
 }
 #else
 // add stub for platforms without it, so fat builds don't fail
-uint64_t readTimestampCounter() { return 0; }
+inline uint64_t _rdtsc() { return 0; }
 #endif

+void* _tprof_before_id=0;
+static uint64_t _tprof_before = 0;
+int64_t _tprof_time() 
+{
+    uint64_t now = _rdtsc();
+    uint64_t v = _tprof_before ? now-_tprof_before : 0;
+    _tprof_before = now;
+    return v/2600; // v = microseconds on a 2.6ghz cpu
+}
+
--- a/js/src/vprof/vprof.h
+++ b/js/src/vprof/vprof.h
@ -105,11 +105,12 @@
 extern "C" {
 #endif

-int initValueProfile(void** id, char* file, int line, ...);
-int profileValue(void* id, int64_t value);
-int initHistProfile(void** id, char* file, int line, int nbins, ...);
-int histValue(void* id, int64_t value);
-uint64_t readTimestampCounter();
+int profileValue (void** id, char* file, int line, int64_t value, ...);
+int _profileEntryValue (void* id, int64_t value);
+int histValue(void** id, char* file, int line, int64_t value, int nbins, ...);
+int _histEntryValue (void* id, int64_t value);
+int64_t _tprof_time();
+extern void* _tprof_before_id;

 #ifdef __cplusplus
 }
@ -123,97 +124,73 @@ uint64_t readTimestampCounter();
 #define _vprof(v,...)
 #define _hprof(h,n,...)
 #define _nhprof(e,v,n,...)
-#define _ntprof_begin(e)
-#define _ntprof_end(e)
-#define _jvprof_init(id,...)
-#define _jnvprof_init(id,e,...)
-#define _jvprof(id,v)
+#define _ntprof(e)
+#define _tprof_end()
 #endif // ! VMCFG_SYMBIAN
 #else

-// Historical/compatibility note:
-// The macros below were originally written using conditional expressions, not if/else.  The original author
-// said that this was done to allow _vprof and _nvprof to be used in an expression context, but the old code
-// had already wrapped the macro bodies in { }, so it is not clear how this could have worked.  At present,
-// the profiling macros must appear in a statement context only.
- 
 #define _vprof(v,...) \
-do { \
+{ \
    static void* id = 0; \
-    if (id == 0) \
-        initValueProfile(&id, __FILE__, __LINE__, ##__VA_ARGS__, NULL); \
-    profileValue(id, (int64_t) (v)); \
-} while (0)
+    (id != 0) ? \
+        _profileEntryValue (id, (int64_t) (v)) \
+    : \
+        profileValue (&id, __FILE__, __LINE__, (int64_t) (v), ##__VA_ARGS__, NULL) \
+    ;\
+}

 #define _nvprof(e,v) \
-do { \
+{ \
    static void* id = 0; \
-    if (id == 0) \
-        initValueProfile(&id, (char*) (e), -1, NULL); \
-    profileValue(id, (int64_t) (v)); \
-} while (0)
+    (id != 0) ? \
+        _profileEntryValue (id, (int64_t) (v)) \
+    : \
+        profileValue (&id, (char*) (e), -1, (int64_t) (v), NULL) \
+    ; \
+}

 #define _hprof(v,n,...) \
-do { \
+{ \
    static void* id = 0; \
-    if (id == 0) \
-        initHistProfile(&id, __FILE__, __LINE__, (int) (n), ##__VA_ARGS__); \
-    histValue(id, (int64_t) (v)); \
-} while (0)
+    (id != 0) ? \
+        _histEntryValue (id, (int64_t) (v)) \
+    : \
+        histValue (&id, __FILE__, __LINE__, (int64_t) (v), (int) (n), ##__VA_ARGS__) \
+    ; \
+}

 #define _nhprof(e,v,n,...) \
-do { \
+{ \
    static void* id = 0; \
-    if (id == 0) \
-        initHistProfile(&id, (char*) (e), -1, (int) (n), ##__VA_ARGS__); \
-    histValue(id, (int64_t) (v)); \
-} while (0)
+    (id != 0) ? \
+        _histEntryValue (id, (int64_t) (v)) \
+    : \
+        histValue (&id, (char*) (e), -1, (int64_t) (v), (int) (n), ##__VA_ARGS__) \
+    ; \
+}

-// Profile execution time between _ntprof_begin(e) and _ntprof_end(e).
-// The tag 'e' must match at the beginning and end of the region to
-// be timed.  Regions may be nested or overlap arbitrarily, as it is
-// the tag alone that defines the begin/end correspondence.
-
-#define _ntprof_begin(e) \
-do { \
+#define _ntprof(e) \
+{ \
+    uint64_t v = _tprof_time();\
+    (_tprof_before_id != 0) ? \
+        _profileEntryValue(_tprof_before_id, v)\
+        : 0;\
    static void* id = 0; \
-    if (id == 0) \
-        initValueProfile(&id, (char*)(e), -1, NULL); \
-    ((entry_t)id)->i64var[0] = readTimestampCounter(); \
-} while (0)
+    (id != 0) ? \
+        _profileEntryValue (id, (int64_t) 0) \
+    : \
+        profileValue (&id, (char*)(e), -1, (int64_t) 0, NULL) \
+    ;\
+    _tprof_before_id = id;\
+}

-// Assume 2.6 Ghz CPU
-#define TICKS_PER_USEC 2600
-
-#define _ntprof_end(e) \
-do { \
-    static void* id = 0; \
-    uint64_t stop = readTimestampCounter(); \
-    if (id == 0) \
-        initValueProfile(&id, (char*)(e), -1, NULL); \
-    uint64_t start = ((entry_t)id)->i64var[0]; \
-    uint64_t usecs = (stop - start) / TICKS_PER_USEC; \
-    profileValue(id, usecs); \
-} while (0)
-
-// These macros separate the creation of a profile record from its later usage.
-// They are intended for profiling JIT-generated code.  Once created, the JIT can
-// bind a pointer to the profile record into the generated code, which can then
-// record profile events during execution.
-
-#define _jvprof_init(id,...) \
-    if (*(id) == 0) \
-        initValueProfile((id), __FILE__, __LINE__, ##__VA_ARGS__, NULL)
-
-#define _jnvprof_init(id,e,...) \
-    if (*(id) == 0) \
-        initValueProfile((id), (char*) (e), -1, ##__VA_ARGS__, NULL)
-
-// Calls to the _jvprof macro must be wrapped in an actual function
-// in order to be invoked from JIT-compiled code.
-
-#define _jvprof(id,v) \
-    profileValue((id), (int64_t) (v))
+#define _tprof_end() \
+{\
+    uint64_t v = _tprof_time();\
+    if (_tprof_before_id)\
+        _profileEntryValue(_tprof_before_id, v);\
+    _tprof_before_id = 0;\
+}

 #endif