Bug 609121 - nanojit: handle small immediates specially in CseFilter. r=wmaddox.

--HG-- extra : convert_revision : 04d7771f3f85877cf12395ffecfc4f2f6d4a0b50
2010-11-10 14:40:07 -08:00 · 2010-11-10 14:40:07 -08:00 · 9e273662fc
--- a/js/src/nanojit/LIR.cpp
+++ b/js/src/nanojit/LIR.cpp
@ -167,7 +167,6 @@ namespace nanojit
        // clear the stats, etc
        _unused = 0;
        _limit = 0;
-        _bytesAllocated = 0;
        _stats.lir = 0;
        for (int i = 0; i < NumSavedRegs; ++i)
            savedRegs[i] = NULL;
@ -186,11 +185,6 @@ namespace nanojit
        return _stats.lir;
    }

-    size_t LirBuffer::byteCount()
-    {
-        return _bytesAllocated - (_limit - _unused);
-    }
-
    // Allocate a new page, and write the first instruction to it -- a skip
    // linking to last instruction of the previous page.
    void LirBuffer::moveToNewChunk(uintptr_t addrOfLastLInsOnCurrentChunk)
@ -2070,23 +2064,25 @@ namespace nanojit
          suspended(false)
    {

-        m_findNL[LInsImmI] = &CseFilter::findImmI;
-        m_findNL[LInsImmQ] = PTR_SIZE(NULL, &CseFilter::findImmQ);
-        m_findNL[LInsImmD] = &CseFilter::findImmD;
-        m_findNL[LIns1]    = &CseFilter::find1;
-        m_findNL[LIns2]    = &CseFilter::find2;
-        m_findNL[LIns3]    = &CseFilter::find3;
-        m_findNL[LInsCall] = &CseFilter::findCall;
+        m_findNL[NLImmISmall] = &CseFilter::findImmISmall;
+        m_findNL[NLImmILarge] = &CseFilter::findImmILarge;
+        m_findNL[NLImmQ]      = PTR_SIZE(NULL, &CseFilter::findImmQ);
+        m_findNL[NLImmD]      = &CseFilter::findImmD;
+        m_findNL[NL1]         = &CseFilter::find1;
+        m_findNL[NL2]         = &CseFilter::find2;
+        m_findNL[NL3]         = &CseFilter::find3;
+        m_findNL[NLCall]      = &CseFilter::findCall;

-        m_capNL[LInsImmI]  = 128;
-        m_capNL[LInsImmQ]  = PTR_SIZE(0, 16);
-        m_capNL[LInsImmD]  = 16;
-        m_capNL[LIns1]     = 256;
-        m_capNL[LIns2]     = 512;
-        m_capNL[LIns3]     = 16;
-        m_capNL[LInsCall]  = 64;
+        m_capNL[NLImmISmall]  = 17;   // covers 0..16, which is over half the cases for TraceMonkey
+        m_capNL[NLImmILarge]  = 64;
+        m_capNL[NLImmQ]       = PTR_SIZE(0, 16);
+        m_capNL[NLImmD]       = 16;
+        m_capNL[NL1]          = 256;
+        m_capNL[NL2]          = 512;
+        m_capNL[NL3]          = 16;
+        m_capNL[NLCall]       = 64;

-        for (NLKind nlkind = LInsFirst; nlkind <= LInsLast; nlkind = nextNLKind(nlkind)) {
+        for (NLKind nlkind = NLFirst; nlkind <= NLLast; nlkind = nextNLKind(nlkind)) {
            m_listNL[nlkind] = new (alloc) LIns*[m_capNL[nlkind]];
            m_usedNL[nlkind] = 1; // Force memset in clearAll().
        }
@ -2162,7 +2158,7 @@ namespace nanojit
    }

    void CseFilter::clearAll() {
-        for (NLKind nlkind = LInsFirst; nlkind <= LInsLast; nlkind = nextNLKind(nlkind))
+        for (NLKind nlkind = NLFirst; nlkind <= NLLast; nlkind = nextNLKind(nlkind))
            clearNL(nlkind);

        // Note that this clears the CONST and MULTIPLE load tables as well.
@ -2216,6 +2212,7 @@ namespace nanojit

    void CseFilter::growNL(NLKind nlkind)
    {
+        NanoAssert(nlkind != NLImmISmall);
        const uint32_t oldcap = m_capNL[nlkind];
        m_capNL[nlkind] <<= 1;
        LIns** oldlist = m_listNL[nlkind];
@ -2248,6 +2245,16 @@ namespace nanojit
        }
    }

+    void CseFilter::addNLImmISmall(LIns* ins, uint32_t k)
+    {
+        if (suspended) return;
+        NLKind nlkind = NLImmISmall;
+        NanoAssert(k < m_capNL[nlkind]);
+        NanoAssert(!m_listNL[nlkind][k]);
+        m_usedNL[nlkind]++;
+        m_listNL[nlkind][k] = ins;
+    }
+
    void CseFilter::addNL(NLKind nlkind, LIns* ins, uint32_t k)
    {
        if (suspended) return;
@ -2271,9 +2278,26 @@ namespace nanojit
        }
    }

-    inline LIns* CseFilter::findImmI(int32_t a, uint32_t &k)
+    inline LIns* CseFilter::findImmISmall(int32_t a, uint32_t &k)
    {
-        NLKind nlkind = LInsImmI;
+        // This one is a direct array lookup rather than a hashtable lookup.
+        NLKind nlkind = NLImmISmall;
+        k = a;
+        LIns* ins = m_listNL[nlkind][k];
+        NanoAssert(!ins || ins->isImmI(a));
+        return ins;
+    }
+
+    uint32_t CseFilter::findImmISmall(LIns* ins)
+    {
+        uint32_t k;
+        findImmISmall(ins->immI(), k);
+        return k;
+    }
+
+    inline LIns* CseFilter::findImmILarge(int32_t a, uint32_t &k)
+    {
+        NLKind nlkind = NLImmILarge;
        const uint32_t bitmask = m_capNL[nlkind] - 1;
        k = hashImmI(a) & bitmask;
        uint32_t n = 1;
@ -2296,17 +2320,17 @@ namespace nanojit
        }
    }

-    uint32_t CseFilter::findImmI(LIns* ins)
+    uint32_t CseFilter::findImmILarge(LIns* ins)
    {
        uint32_t k;
-        findImmI(ins->immI(), k);
+        findImmILarge(ins->immI(), k);
        return k;
    }

 #ifdef NANOJIT_64BIT
    inline LIns* CseFilter::findImmQ(uint64_t a, uint32_t &k)
    {
-        NLKind nlkind = LInsImmQ;
+        NLKind nlkind = NLImmQ;
        const uint32_t bitmask = m_capNL[nlkind] - 1;
        k = hashImmQorD(a) & bitmask;
        uint32_t n = 1;
@ -2332,7 +2356,7 @@ namespace nanojit

    inline LIns* CseFilter::findImmD(uint64_t a, uint32_t &k)
    {
-        NLKind nlkind = LInsImmD;
+        NLKind nlkind = NLImmD;
        const uint32_t bitmask = m_capNL[nlkind] - 1;
        k = hashImmQorD(a) & bitmask;
        uint32_t n = 1;
@ -2357,7 +2381,7 @@ namespace nanojit

    inline LIns* CseFilter::find1(LOpcode op, LIns* a, uint32_t &k)
    {
-        NLKind nlkind = LIns1;
+        NLKind nlkind = NL1;
        const uint32_t bitmask = m_capNL[nlkind] - 1;
        k = hash1(op, a) & bitmask;
        uint32_t n = 1;
@ -2381,7 +2405,7 @@ namespace nanojit

    inline LIns* CseFilter::find2(LOpcode op, LIns* a, LIns* b, uint32_t &k)
    {
-        NLKind nlkind = LIns2;
+        NLKind nlkind = NL2;
        const uint32_t bitmask = m_capNL[nlkind] - 1;
        k = hash2(op, a, b) & bitmask;
        uint32_t n = 1;
@ -2405,7 +2429,7 @@ namespace nanojit

    inline LIns* CseFilter::find3(LOpcode op, LIns* a, LIns* b, LIns* c, uint32_t &k)
    {
-        NLKind nlkind = LIns3;
+        NLKind nlkind = NL3;
        const uint32_t bitmask = m_capNL[nlkind] - 1;
        k = hash3(op, a, b, c) & bitmask;
        uint32_t n = 1;
@ -2466,7 +2490,7 @@ namespace nanojit

    inline LIns* CseFilter::findCall(const CallInfo *ci, uint32_t argc, LIns* args[], uint32_t &k)
    {
-        NLKind nlkind = LInsCall;
+        NLKind nlkind = NLCall;
        const uint32_t bitmask = m_capNL[nlkind] - 1;
        k = hashCall(ci, argc, args) & bitmask;
        uint32_t n = 1;
@ -2496,10 +2520,19 @@ namespace nanojit
    LIns* CseFilter::insImmI(int32_t imm)
    {
        uint32_t k;
-        LIns* ins = findImmI(imm, k);
-        if (!ins) {
-            ins = out->insImmI(imm);
-            addNL(LInsImmI, ins, k);
+        LIns* ins;
+        if (0 <= imm && imm < int32_t(m_capNL[NLImmISmall])) {
+            ins = findImmISmall(imm, k);
+            if (!ins) {
+                ins = out->insImmI(imm);
+                addNLImmISmall(ins, k);
+            }
+        } else {
+            ins = findImmILarge(imm, k);
+            if (!ins) {
+                ins = out->insImmI(imm);
+                addNL(NLImmILarge, ins, k);
+            }
        }
        // We assume that downstream stages do not modify the instruction, so
        // that we can insert 'ins' into slot 'k'.  Check this.
@ -2514,7 +2547,7 @@ namespace nanojit
        LIns* ins = findImmQ(q, k);
        if (!ins) {
            ins = out->insImmQ(q);
-            addNL(LInsImmQ, ins, k);
+            addNL(NLImmQ, ins, k);
        }
        NanoAssert(ins->isop(LIR_immq) && ins->immQ() == q);
        return ins;
@ -2534,7 +2567,7 @@ namespace nanojit
        LIns* ins = findImmD(u.u64, k);
        if (!ins) {
            ins = out->insImmD(d);
-            addNL(LInsImmD, ins, k);
+            addNL(NLImmD, ins, k);
        }
        NanoAssert(ins->isop(LIR_immd) && ins->immDasQ() == u.u64);
        return ins;
@ -2555,7 +2588,7 @@ namespace nanojit
            ins = find1(op, a, k);
            if (!ins) {
                ins = out->ins1(op, a);
-                addNL(LIns1, ins, k);
+                addNL(NL1, ins, k);
            }
        } else {
            ins = out->ins1(op, a);
@ -2572,7 +2605,7 @@ namespace nanojit
        ins = find2(op, a, b, k);
        if (!ins) {
            ins = out->ins2(op, a, b);
-            addNL(LIns2, ins, k);
+            addNL(NL2, ins, k);
        } else if (ins->isCmp()) {
            if (knownCmpValues.containsKey(ins)) {
                // We've seen this comparison before, and it was previously
@ -2594,7 +2627,7 @@ namespace nanojit
        LIns* ins = find3(op, a, b, c, k);
        if (!ins) {
            ins = out->ins3(op, a, b, c);
-            addNL(LIns3, ins, k);
+            addNL(NL3, ins, k);
        }
        NanoAssert(ins->isop(op) && ins->oprnd1() == a && ins->oprnd2() == b && ins->oprnd3() == c);
        return ins;
@ -2694,7 +2727,7 @@ namespace nanojit
            ins = find1(op, c, k);
            if (!ins) {
                ins = out->insGuard(op, c, gr);
-                addNL(LIns1, ins, k);
+                addNL(NL1, ins, k);
            }
            // After this guard, we know that 'c's result was true (if
            // op==LIR_xf) or false (if op==LIR_xt), else we would have
@ -2719,7 +2752,7 @@ namespace nanojit
        LIns* ins = find2(op, a, b, k);
        if (!ins) {
            ins = out->insGuardXov(op, a, b, gr);
-            addNL(LIns2, ins, k);
+            addNL(NL2, ins, k);
        }
        NanoAssert(ins->isop(op) && ins->oprnd1() == a && ins->oprnd2() == b);
        return ins;
@ -2737,7 +2770,7 @@ namespace nanojit
            ins = findCall(ci, argc, args, k);
            if (!ins) {
                ins = out->insCall(ci, args);
-                addNL(LInsCall, ins, k);
+                addNL(NLCall, ins, k);
            }
        } else {
            // We only need to worry about aliasing if !ci->_isPure.
--- a/js/src/nanojit/LIR.h
+++ b/js/src/nanojit/LIR.h
@ -1924,18 +1924,19 @@ namespace nanojit
            // We divide instruction kinds into groups.  LIns0 isn't present
            // because we don't need to record any 0-ary instructions.  Loads
            // aren't here, they're handled separately.
-            LInsImmI = 0,
-            LInsImmQ = 1,   // only occurs on 64-bit platforms
-            LInsImmD = 2,
-            LIns1    = 3,
-            LIns2    = 4,
-            LIns3    = 5,
-            LInsCall = 6,
+            NLImmISmall = 0,
+            NLImmILarge = 1,
+            NLImmQ      = 2,   // only occurs on 64-bit platforms
+            NLImmD      = 3,
+            NL1         = 4,
+            NL2         = 5,
+            NL3         = 6,
+            NLCall      = 7,

-            LInsFirst = 0,
-            LInsLast = 6,
+            NLFirst = 0,
+            NLLast = 7,
            // Need a value after "last" to outsmart compilers that insist last+1 is impossible.
-            LInsInvalid = 7
+            NLInvalid = 8
        };
        #define nextNLKind(kind)  NLKind(kind+1)

@ -1948,11 +1949,11 @@ namespace nanojit
        //     Don't start m_capNL too small, or we'll waste time growing and rehashing.
        //     Don't start m_capNL too large, will waste memory.
        //
-        LIns**      m_listNL[LInsLast + 1];
-        uint32_t    m_capNL[ LInsLast + 1];
-        uint32_t    m_usedNL[LInsLast + 1];
+        LIns**      m_listNL[NLLast + 1];
+        uint32_t    m_capNL[ NLLast + 1];
+        uint32_t    m_usedNL[NLLast + 1];
        typedef uint32_t (CseFilter::*find_t)(LIns*);
-        find_t      m_findNL[LInsLast + 1];
+        find_t      m_findNL[NLLast + 1];

        // Similarly, for loads, there is one table for each CseAcc.  A CseAcc
        // is like a normal access region, but there are two extra possible
@ -2021,7 +2022,8 @@ namespace nanojit
        static uint32_t hashCall(const CallInfo *call, uint32_t argc, LIns* args[]);

        // These versions are used before an LIns has been created.
-        LIns* findImmI(int32_t a, uint32_t &k);
+        LIns* findImmISmall(int32_t a, uint32_t &k);
+        LIns* findImmILarge(int32_t a, uint32_t &k);
 #ifdef NANOJIT_64BIT
        LIns* findImmQ(uint64_t a, uint32_t &k);
 #endif
@ -2036,7 +2038,8 @@ namespace nanojit
        // These versions are used after an LIns has been created; they are
        // used for rehashing after growing.  They just call onto the
        // multi-arg versions above.
-        uint32_t findImmI(LIns* ins);
+        uint32_t findImmISmall(LIns* ins);
+        uint32_t findImmILarge(LIns* ins);
 #ifdef NANOJIT_64BIT
        uint32_t findImmQ(LIns* ins);
 #endif
@ -2050,6 +2053,7 @@ namespace nanojit
        void growNL(NLKind kind);
        void growL(CseAcc cseAcc);

+        void addNLImmISmall(LIns* ins, uint32_t k);
        // 'k' is the index found by findXYZ().
        void addNL(NLKind kind, LIns* ins, uint32_t k);
        void addL(LIns* ins, uint32_t k);
@ -2096,7 +2100,6 @@ namespace nanojit
            verbose_only(LInsPrinter* printer;)

            int32_t insCount();
-            size_t  byteCount();

            // stats
            struct
@ -2123,7 +2126,6 @@ namespace nanojit
            Allocator&  _allocator;
            uintptr_t   _unused;   // next unused instruction slot in the current LIR chunk
            uintptr_t   _limit;    // one past the last usable byte of the current LIR chunk
-            size_t      _bytesAllocated;
    };

    class LirBufWriter : public LirWriter