Bug 623428 - TM: avoid bloat caused by multiple mReserve arrays in VMAllocator (NJ-specific part). r=edwsmith.

--HG-- extra : convert_revision : f6016c7c7cd415a26dad9cf39d34141b8b482d43
2011-01-18 14:58:34 -08:00 · 2011-01-18 14:58:34 -08:00 · 40e5063705
--- a/js/src/lirasm/lirasm.cpp
+++ b/js/src/lirasm/lirasm.cpp
@ -64,7 +64,7 @@ using namespace std;
 /* Allocator SPI implementation. */

 void*
-nanojit::Allocator::allocChunk(size_t nbytes)
+nanojit::Allocator::allocChunk(size_t nbytes, bool /*fallible*/)
 {
    void *p = malloc(nbytes);
    if (!p)
--- a/js/src/nanojit/Allocator.cpp
+++ b/js/src/nanojit/Allocator.cpp
@ -68,28 +68,35 @@ namespace nanojit
        postReset();
    }

-    void* Allocator::allocSlow(size_t nbytes)
+    void* Allocator::allocSlow(size_t nbytes, bool fallible)
    {
        NanoAssert((nbytes & 7) == 0);
-        fill(nbytes);
-        NanoAssert(current_top + nbytes <= current_limit);
-        void* p = current_top;
-        current_top += nbytes;
-        return p;
+        if (fill(nbytes, fallible)) {
+            NanoAssert(current_top + nbytes <= current_limit);
+            void* p = current_top;
+            current_top += nbytes;
+            return p;
+        }
+        return NULL;
    }

-    void Allocator::fill(size_t nbytes)
+    bool Allocator::fill(size_t nbytes, bool fallible)
    {
-        const size_t minChunk = 2000;
-        if (nbytes < minChunk)
-            nbytes = minChunk;
+        if (nbytes < MIN_CHUNK_SZB)
+            nbytes = MIN_CHUNK_SZB;
        size_t chunkbytes = sizeof(Chunk) + nbytes - sizeof(int64_t);
-        void* mem = allocChunk(chunkbytes);
-        Chunk* chunk = (Chunk*) mem;
-        chunk->prev = current_chunk;
-        current_chunk = chunk;
-        current_top = (char*)chunk->data;
-        current_limit = (char*)mem + chunkbytes;
+        void* mem = allocChunk(chunkbytes, fallible);
+        if (mem) {
+            Chunk* chunk = (Chunk*) mem;
+            chunk->prev = current_chunk;
+            current_chunk = chunk;
+            current_top = (char*)chunk->data;
+            current_limit = (char*)mem + chunkbytes;
+            return true;
+        } else {
+            NanoAssert(fallible);
+            return false;
+        }
    }
 }

--- a/js/src/nanojit/Allocator.h
+++ b/js/src/nanojit/Allocator.h
@ -46,30 +46,53 @@ namespace nanojit
     * Allocator is a bump-pointer allocator with an SPI for getting more
     * memory from embedder-implemented allocator, such as malloc()/free().
     *
-     * allocations never return NULL.  The implementation of allocChunk()
+     * alloc() never returns NULL.  The implementation of allocChunk()
     * is expected to perform a longjmp or exception when an allocation can't
-     * proceed.
+     * proceed.  fallibleAlloc() (and fallibleAllocChunk()) may return NULL.
+     * They should be used for large allocations whose failure can be handled
+     * without too much difficulty.
     */
    class Allocator {
    public:
        Allocator();
        ~Allocator();
+
+        // Usable space in the minimum chunk size;  there are also a few bytes
+        // used for administration.
+        static const size_t MIN_CHUNK_SZB = 2000;
+
        void reset();

        /** alloc memory, never return null. */
        void* alloc(size_t nbytes) {
+            void* p;
            nbytes = (nbytes + 7) & ~7; // round up
            if (current_top + nbytes <= current_limit) {
-                void *p = current_top;
+                p = current_top;
                current_top += nbytes;
-                return p;
+            } else {
+                p = allocSlow(nbytes, /* fallible = */false);
+                NanoAssert(p);
            }
-            return allocSlow(nbytes);
+            return p;
+        }
+
+        /** alloc memory, maybe return null. */
+        void* fallibleAlloc(size_t nbytes) {
+            void* p;
+            nbytes = (nbytes + 7) & ~7; // round up
+            if (current_top + nbytes <= current_limit) {
+                p = current_top;
+                current_top += nbytes;
+            } else {
+                p = allocSlow(nbytes, /* fallible = */true);
+            }
+            return p;
        }

    protected:
-        void* allocSlow(size_t nbytes);
-        void fill(size_t minbytes);
+        void* allocSlow(size_t nbytes, bool fallible = false);
+        bool fill(size_t minbytes, bool fallible);

        class Chunk {
        public:
@ -84,7 +107,7 @@ namespace nanojit
        // allocator SPI

        /** allocate another block from a host provided allocator */
-        void* allocChunk(size_t nbytes);
+        void* allocChunk(size_t nbytes, bool fallible);

        /** free back to the same allocator */
        void freeChunk(void*);
--- a/js/src/nanojit/LIR.cpp
+++ b/js/src/nanojit/LIR.cpp
@ -2061,9 +2061,9 @@ namespace nanojit
          storesSinceLastLoad(ACCSET_NONE),
          alloc(alloc),
          knownCmpValues(alloc),
-          suspended(false)
+          suspended(false),
+          initOOM(false)
    {
-
        m_findNL[NLImmISmall] = &CseFilter::findImmISmall;
        m_findNL[NLImmILarge] = &CseFilter::findImmILarge;
        m_findNL[NLImmQ]      = PTR_SIZE(NULL, &CseFilter::findImmQ);
@ -2082,15 +2082,26 @@ namespace nanojit
        m_capNL[NL3]          = 16;
        m_capNL[NLCall]       = 64;

+        // The largish allocations are fallible, the small ones are
+        // infallible.  See the comment on initOOM's declaration for why.
+
        for (NLKind nlkind = NLFirst; nlkind <= NLLast; nlkind = nextNLKind(nlkind)) {
-            m_listNL[nlkind] = new (alloc) LIns*[m_capNL[nlkind]];
+            m_listNL[nlkind] = (LIns**)alloc.fallibleAlloc(sizeof(LIns*) * m_capNL[nlkind]);
+            if (!m_listNL[nlkind]) {
+                initOOM = true;
+                return;
+            }
            m_usedNL[nlkind] = 1; // Force memset in clearAll().
        }

        // Note that this allocates the CONST and MULTIPLE tables as well.
        for (CseAcc a = 0; a < CSE_NUM_USED_ACCS; a++) {
            m_capL[a] = 16;
-            m_listL[a] = new (alloc) LIns*[m_capL[a]];
+            m_listL[a] = (LIns**)alloc.fallibleAlloc(sizeof(LIns*) * m_capL[a]);
+            if (!m_listL[a]) {
+                initOOM = true;
+                return;
+            }
            m_usedL[a] = 1; // Force memset(0) in first clearAll().
        }

@ -2210,43 +2221,61 @@ namespace nanojit
        return hashfinish(hash);
    }

-    void CseFilter::growNL(NLKind nlkind)
+    bool CseFilter::growNL(NLKind nlkind)
    {
        NanoAssert(nlkind != NLImmISmall);
        const uint32_t oldcap = m_capNL[nlkind];
        m_capNL[nlkind] <<= 1;
-        LIns** oldlist = m_listNL[nlkind];
-        m_listNL[nlkind] = new (alloc) LIns*[m_capNL[nlkind]];
-        VMPI_memset(m_listNL[nlkind], 0, m_capNL[nlkind] * sizeof(LIns*));
-        find_t find = m_findNL[nlkind];
-        for (uint32_t i = 0; i < oldcap; i++) {
-            LIns* ins = oldlist[i];
-            if (!ins) continue;
-            uint32_t j = (this->*find)(ins);
-            NanoAssert(!m_listNL[nlkind][j]);
-            m_listNL[nlkind][j] = ins;
+        // We make this allocation fallible because it's potentially large and
+        // easy to recover from.  If it fails, we won't add any more
+        // instructions to the table and some CSE opportunities may be missed.
+        LIns** tmp = (LIns**)alloc.fallibleAlloc(sizeof(LIns*) * m_capNL[nlkind]);
+        if (tmp) {
+            LIns** oldlist = m_listNL[nlkind];
+            m_listNL[nlkind] = tmp;
+            VMPI_memset(m_listNL[nlkind], 0, m_capNL[nlkind] * sizeof(LIns*));
+            find_t find = m_findNL[nlkind];
+            for (uint32_t i = 0; i < oldcap; i++) {
+                LIns* ins = oldlist[i];
+                if (!ins) continue;
+                uint32_t j = (this->*find)(ins);
+                NanoAssert(!m_listNL[nlkind][j]);
+                m_listNL[nlkind][j] = ins;
+            }
+            return true;
+        } else {
+            m_capNL[nlkind] = oldcap;
+            return false;
        }
    }

-    void CseFilter::growL(CseAcc cseAcc)
+    bool CseFilter::growL(CseAcc cseAcc)
    {
        const uint32_t oldcap = m_capL[cseAcc];
        m_capL[cseAcc] <<= 1;
-        LIns** oldlist = m_listL[cseAcc];
-        m_listL[cseAcc] = new (alloc) LIns*[m_capL[cseAcc]];
-        VMPI_memset(m_listL[cseAcc], 0, m_capL[cseAcc] * sizeof(LIns*));
-        find_t find = &CseFilter::findLoad;
-        for (uint32_t i = 0; i < oldcap; i++) {
-            LIns* ins = oldlist[i];
-            if (!ins) continue;
-            uint32_t j = (this->*find)(ins);
-            NanoAssert(!m_listL[cseAcc][j]);
-            m_listL[cseAcc][j] = ins;
+        LIns** tmp = (LIns**)alloc.fallibleAlloc(sizeof(LIns*) * m_capL[cseAcc]);
+        if (tmp) {
+            LIns** oldlist = m_listL[cseAcc];
+            m_listL[cseAcc] = tmp;
+            VMPI_memset(m_listL[cseAcc], 0, m_capL[cseAcc] * sizeof(LIns*));
+            find_t find = &CseFilter::findLoad;
+            for (uint32_t i = 0; i < oldcap; i++) {
+                LIns* ins = oldlist[i];
+                if (!ins) continue;
+                uint32_t j = (this->*find)(ins);
+                NanoAssert(!m_listL[cseAcc][j]);
+                m_listL[cseAcc][j] = ins;
+            }
+            return true;
+        } else {
+            m_capL[cseAcc] = oldcap;
+            return false;
        }
    }

    void CseFilter::addNLImmISmall(LIns* ins, uint32_t k)
    {
+        NanoAssert(!initOOM);
        if (suspended) return;
        NLKind nlkind = NLImmISmall;
        NanoAssert(k < m_capNL[nlkind]);
@ -2257,24 +2286,36 @@ namespace nanojit

    void CseFilter::addNL(NLKind nlkind, LIns* ins, uint32_t k)
    {
+        NanoAssert(!initOOM);
        if (suspended) return;
        NanoAssert(!m_listNL[nlkind][k]);
        m_usedNL[nlkind]++;
        m_listNL[nlkind][k] = ins;
        if ((m_usedNL[nlkind] * 4) >= (m_capNL[nlkind] * 3)) {  // load factor of 0.75
-            growNL(nlkind);
+            bool ok = growNL(nlkind);
+            if (!ok) {
+                // OOM: undo the insertion.
+                m_usedNL[nlkind]--;
+                m_listNL[nlkind][k] = NULL;
+            }
        }
    }

    void CseFilter::addL(LIns* ins, uint32_t k)
    {
+        NanoAssert(!initOOM);
        if (suspended) return;
        CseAcc cseAcc = miniAccSetToCseAcc(ins->miniAccSet(), ins->loadQual());
        NanoAssert(!m_listL[cseAcc][k]);
        m_usedL[cseAcc]++;
        m_listL[cseAcc][k] = ins;
        if ((m_usedL[cseAcc] * 4) >= (m_capL[cseAcc] * 3)) {  // load factor of 0.75
-            growL(cseAcc);
+            bool ok = growL(cseAcc);
+            if (!ok) {
+                // OOM: undo the insertion.
+                m_usedL[cseAcc]--;
+                m_listL[cseAcc][k] = NULL;
+            }
        }
    }

--- a/js/src/nanojit/LIR.h
+++ b/js/src/nanojit/LIR.h
@ -2054,8 +2054,9 @@ namespace nanojit
        uint32_t findCall(LIns* ins);
        uint32_t findLoad(LIns* ins);

-        void growNL(NLKind kind);
-        void growL(CseAcc cseAcc);
+        // These return false if they failed to grow due to OOM.
+        bool growNL(NLKind kind);
+        bool growL(CseAcc cseAcc);

        void addNLImmISmall(LIns* ins, uint32_t k);
        // 'k' is the index found by findXYZ().
@ -2069,6 +2070,17 @@ namespace nanojit
    public:
        CseFilter(LirWriter *out, uint8_t embNumUsedAccs, Allocator&);

+        // CseFilter does some largish fallible allocations at start-up.  If
+        // they fail, the constructor sets this field to 'true'.  It should be
+        // checked after creation, and if set the CseFilter cannot be used.
+        // (But the check can be skipped if allocChunk() always succeeds.)
+        //
+        // FIXME: This fallibility is a sop to TraceMonkey's implementation of
+        // infallible malloc -- by avoiding some largish infallible
+        // allocations, it reduces the size of the reserve space needed.
+        // Bug 624590 is open to fix this.
+        bool initOOM;
+
        LIns* insImmI(int32_t imm);
 #ifdef NANOJIT_64BIT
        LIns* insImmQ(uint64_t q);
@ -2116,13 +2128,13 @@ namespace nanojit
            LIns *state, *param1, *sp, *rp;
            LIns* savedRegs[NumSavedRegs+1]; // Allocate an extra element in case NumSavedRegs == 0

-        protected:
-            friend class LirBufWriter;
-
            /** Each chunk is just a raw area of LIns instances, with no header
                and no more than 8-byte alignment.  The chunk size is somewhat arbitrary. */
            static const size_t CHUNK_SZB = 8000;

+        protected:
+            friend class LirBufWriter;
+
            /** Get CHUNK_SZB more memory for LIR instructions. */
            void        chunkAlloc();
            void        moveToNewChunk(uintptr_t addrOfLastLInsOnCurrentChunk);