diff --git a/js/src/nanojit/LIR.cpp b/js/src/nanojit/LIR.cpp index 46edd35fd4cd..df81bbb57e35 100644 --- a/js/src/nanojit/LIR.cpp +++ b/js/src/nanojit/LIR.cpp @@ -167,7 +167,6 @@ namespace nanojit // clear the stats, etc _unused = 0; _limit = 0; - _bytesAllocated = 0; _stats.lir = 0; for (int i = 0; i < NumSavedRegs; ++i) savedRegs[i] = NULL; @@ -186,11 +185,6 @@ namespace nanojit return _stats.lir; } - size_t LirBuffer::byteCount() - { - return _bytesAllocated - (_limit - _unused); - } - // Allocate a new page, and write the first instruction to it -- a skip // linking to last instruction of the previous page. void LirBuffer::moveToNewChunk(uintptr_t addrOfLastLInsOnCurrentChunk) @@ -2070,23 +2064,25 @@ namespace nanojit suspended(false) { - m_findNL[LInsImmI] = &CseFilter::findImmI; - m_findNL[LInsImmQ] = PTR_SIZE(NULL, &CseFilter::findImmQ); - m_findNL[LInsImmD] = &CseFilter::findImmD; - m_findNL[LIns1] = &CseFilter::find1; - m_findNL[LIns2] = &CseFilter::find2; - m_findNL[LIns3] = &CseFilter::find3; - m_findNL[LInsCall] = &CseFilter::findCall; + m_findNL[NLImmISmall] = &CseFilter::findImmISmall; + m_findNL[NLImmILarge] = &CseFilter::findImmILarge; + m_findNL[NLImmQ] = PTR_SIZE(NULL, &CseFilter::findImmQ); + m_findNL[NLImmD] = &CseFilter::findImmD; + m_findNL[NL1] = &CseFilter::find1; + m_findNL[NL2] = &CseFilter::find2; + m_findNL[NL3] = &CseFilter::find3; + m_findNL[NLCall] = &CseFilter::findCall; - m_capNL[LInsImmI] = 128; - m_capNL[LInsImmQ] = PTR_SIZE(0, 16); - m_capNL[LInsImmD] = 16; - m_capNL[LIns1] = 256; - m_capNL[LIns2] = 512; - m_capNL[LIns3] = 16; - m_capNL[LInsCall] = 64; + m_capNL[NLImmISmall] = 17; // covers 0..16, which is over half the cases for TraceMonkey + m_capNL[NLImmILarge] = 64; + m_capNL[NLImmQ] = PTR_SIZE(0, 16); + m_capNL[NLImmD] = 16; + m_capNL[NL1] = 256; + m_capNL[NL2] = 512; + m_capNL[NL3] = 16; + m_capNL[NLCall] = 64; - for (NLKind nlkind = LInsFirst; nlkind <= LInsLast; nlkind = nextNLKind(nlkind)) { + for (NLKind nlkind = NLFirst; nlkind <= NLLast; nlkind = nextNLKind(nlkind)) { m_listNL[nlkind] = new (alloc) LIns*[m_capNL[nlkind]]; m_usedNL[nlkind] = 1; // Force memset in clearAll(). } @@ -2162,7 +2158,7 @@ namespace nanojit } void CseFilter::clearAll() { - for (NLKind nlkind = LInsFirst; nlkind <= LInsLast; nlkind = nextNLKind(nlkind)) + for (NLKind nlkind = NLFirst; nlkind <= NLLast; nlkind = nextNLKind(nlkind)) clearNL(nlkind); // Note that this clears the CONST and MULTIPLE load tables as well. @@ -2216,6 +2212,7 @@ namespace nanojit void CseFilter::growNL(NLKind nlkind) { + NanoAssert(nlkind != NLImmISmall); const uint32_t oldcap = m_capNL[nlkind]; m_capNL[nlkind] <<= 1; LIns** oldlist = m_listNL[nlkind]; @@ -2248,6 +2245,16 @@ namespace nanojit } } + void CseFilter::addNLImmISmall(LIns* ins, uint32_t k) + { + if (suspended) return; + NLKind nlkind = NLImmISmall; + NanoAssert(k < m_capNL[nlkind]); + NanoAssert(!m_listNL[nlkind][k]); + m_usedNL[nlkind]++; + m_listNL[nlkind][k] = ins; + } + void CseFilter::addNL(NLKind nlkind, LIns* ins, uint32_t k) { if (suspended) return; @@ -2271,9 +2278,26 @@ namespace nanojit } } - inline LIns* CseFilter::findImmI(int32_t a, uint32_t &k) + inline LIns* CseFilter::findImmISmall(int32_t a, uint32_t &k) { - NLKind nlkind = LInsImmI; + // This one is a direct array lookup rather than a hashtable lookup. + NLKind nlkind = NLImmISmall; + k = a; + LIns* ins = m_listNL[nlkind][k]; + NanoAssert(!ins || ins->isImmI(a)); + return ins; + } + + uint32_t CseFilter::findImmISmall(LIns* ins) + { + uint32_t k; + findImmISmall(ins->immI(), k); + return k; + } + + inline LIns* CseFilter::findImmILarge(int32_t a, uint32_t &k) + { + NLKind nlkind = NLImmILarge; const uint32_t bitmask = m_capNL[nlkind] - 1; k = hashImmI(a) & bitmask; uint32_t n = 1; @@ -2296,17 +2320,17 @@ namespace nanojit } } - uint32_t CseFilter::findImmI(LIns* ins) + uint32_t CseFilter::findImmILarge(LIns* ins) { uint32_t k; - findImmI(ins->immI(), k); + findImmILarge(ins->immI(), k); return k; } #ifdef NANOJIT_64BIT inline LIns* CseFilter::findImmQ(uint64_t a, uint32_t &k) { - NLKind nlkind = LInsImmQ; + NLKind nlkind = NLImmQ; const uint32_t bitmask = m_capNL[nlkind] - 1; k = hashImmQorD(a) & bitmask; uint32_t n = 1; @@ -2332,7 +2356,7 @@ namespace nanojit inline LIns* CseFilter::findImmD(uint64_t a, uint32_t &k) { - NLKind nlkind = LInsImmD; + NLKind nlkind = NLImmD; const uint32_t bitmask = m_capNL[nlkind] - 1; k = hashImmQorD(a) & bitmask; uint32_t n = 1; @@ -2357,7 +2381,7 @@ namespace nanojit inline LIns* CseFilter::find1(LOpcode op, LIns* a, uint32_t &k) { - NLKind nlkind = LIns1; + NLKind nlkind = NL1; const uint32_t bitmask = m_capNL[nlkind] - 1; k = hash1(op, a) & bitmask; uint32_t n = 1; @@ -2381,7 +2405,7 @@ namespace nanojit inline LIns* CseFilter::find2(LOpcode op, LIns* a, LIns* b, uint32_t &k) { - NLKind nlkind = LIns2; + NLKind nlkind = NL2; const uint32_t bitmask = m_capNL[nlkind] - 1; k = hash2(op, a, b) & bitmask; uint32_t n = 1; @@ -2405,7 +2429,7 @@ namespace nanojit inline LIns* CseFilter::find3(LOpcode op, LIns* a, LIns* b, LIns* c, uint32_t &k) { - NLKind nlkind = LIns3; + NLKind nlkind = NL3; const uint32_t bitmask = m_capNL[nlkind] - 1; k = hash3(op, a, b, c) & bitmask; uint32_t n = 1; @@ -2466,7 +2490,7 @@ namespace nanojit inline LIns* CseFilter::findCall(const CallInfo *ci, uint32_t argc, LIns* args[], uint32_t &k) { - NLKind nlkind = LInsCall; + NLKind nlkind = NLCall; const uint32_t bitmask = m_capNL[nlkind] - 1; k = hashCall(ci, argc, args) & bitmask; uint32_t n = 1; @@ -2496,10 +2520,19 @@ namespace nanojit LIns* CseFilter::insImmI(int32_t imm) { uint32_t k; - LIns* ins = findImmI(imm, k); - if (!ins) { - ins = out->insImmI(imm); - addNL(LInsImmI, ins, k); + LIns* ins; + if (0 <= imm && imm < int32_t(m_capNL[NLImmISmall])) { + ins = findImmISmall(imm, k); + if (!ins) { + ins = out->insImmI(imm); + addNLImmISmall(ins, k); + } + } else { + ins = findImmILarge(imm, k); + if (!ins) { + ins = out->insImmI(imm); + addNL(NLImmILarge, ins, k); + } } // We assume that downstream stages do not modify the instruction, so // that we can insert 'ins' into slot 'k'. Check this. @@ -2514,7 +2547,7 @@ namespace nanojit LIns* ins = findImmQ(q, k); if (!ins) { ins = out->insImmQ(q); - addNL(LInsImmQ, ins, k); + addNL(NLImmQ, ins, k); } NanoAssert(ins->isop(LIR_immq) && ins->immQ() == q); return ins; @@ -2534,7 +2567,7 @@ namespace nanojit LIns* ins = findImmD(u.u64, k); if (!ins) { ins = out->insImmD(d); - addNL(LInsImmD, ins, k); + addNL(NLImmD, ins, k); } NanoAssert(ins->isop(LIR_immd) && ins->immDasQ() == u.u64); return ins; @@ -2555,7 +2588,7 @@ namespace nanojit ins = find1(op, a, k); if (!ins) { ins = out->ins1(op, a); - addNL(LIns1, ins, k); + addNL(NL1, ins, k); } } else { ins = out->ins1(op, a); @@ -2572,7 +2605,7 @@ namespace nanojit ins = find2(op, a, b, k); if (!ins) { ins = out->ins2(op, a, b); - addNL(LIns2, ins, k); + addNL(NL2, ins, k); } else if (ins->isCmp()) { if (knownCmpValues.containsKey(ins)) { // We've seen this comparison before, and it was previously @@ -2594,7 +2627,7 @@ namespace nanojit LIns* ins = find3(op, a, b, c, k); if (!ins) { ins = out->ins3(op, a, b, c); - addNL(LIns3, ins, k); + addNL(NL3, ins, k); } NanoAssert(ins->isop(op) && ins->oprnd1() == a && ins->oprnd2() == b && ins->oprnd3() == c); return ins; @@ -2694,7 +2727,7 @@ namespace nanojit ins = find1(op, c, k); if (!ins) { ins = out->insGuard(op, c, gr); - addNL(LIns1, ins, k); + addNL(NL1, ins, k); } // After this guard, we know that 'c's result was true (if // op==LIR_xf) or false (if op==LIR_xt), else we would have @@ -2719,7 +2752,7 @@ namespace nanojit LIns* ins = find2(op, a, b, k); if (!ins) { ins = out->insGuardXov(op, a, b, gr); - addNL(LIns2, ins, k); + addNL(NL2, ins, k); } NanoAssert(ins->isop(op) && ins->oprnd1() == a && ins->oprnd2() == b); return ins; @@ -2737,7 +2770,7 @@ namespace nanojit ins = findCall(ci, argc, args, k); if (!ins) { ins = out->insCall(ci, args); - addNL(LInsCall, ins, k); + addNL(NLCall, ins, k); } } else { // We only need to worry about aliasing if !ci->_isPure. diff --git a/js/src/nanojit/LIR.h b/js/src/nanojit/LIR.h index 744f27bce8e7..a52caedc0a09 100644 --- a/js/src/nanojit/LIR.h +++ b/js/src/nanojit/LIR.h @@ -1924,18 +1924,19 @@ namespace nanojit // We divide instruction kinds into groups. LIns0 isn't present // because we don't need to record any 0-ary instructions. Loads // aren't here, they're handled separately. - LInsImmI = 0, - LInsImmQ = 1, // only occurs on 64-bit platforms - LInsImmD = 2, - LIns1 = 3, - LIns2 = 4, - LIns3 = 5, - LInsCall = 6, + NLImmISmall = 0, + NLImmILarge = 1, + NLImmQ = 2, // only occurs on 64-bit platforms + NLImmD = 3, + NL1 = 4, + NL2 = 5, + NL3 = 6, + NLCall = 7, - LInsFirst = 0, - LInsLast = 6, + NLFirst = 0, + NLLast = 7, // Need a value after "last" to outsmart compilers that insist last+1 is impossible. - LInsInvalid = 7 + NLInvalid = 8 }; #define nextNLKind(kind) NLKind(kind+1) @@ -1948,11 +1949,11 @@ namespace nanojit // Don't start m_capNL too small, or we'll waste time growing and rehashing. // Don't start m_capNL too large, will waste memory. // - LIns** m_listNL[LInsLast + 1]; - uint32_t m_capNL[ LInsLast + 1]; - uint32_t m_usedNL[LInsLast + 1]; + LIns** m_listNL[NLLast + 1]; + uint32_t m_capNL[ NLLast + 1]; + uint32_t m_usedNL[NLLast + 1]; typedef uint32_t (CseFilter::*find_t)(LIns*); - find_t m_findNL[LInsLast + 1]; + find_t m_findNL[NLLast + 1]; // Similarly, for loads, there is one table for each CseAcc. A CseAcc // is like a normal access region, but there are two extra possible @@ -2021,7 +2022,8 @@ namespace nanojit static uint32_t hashCall(const CallInfo *call, uint32_t argc, LIns* args[]); // These versions are used before an LIns has been created. - LIns* findImmI(int32_t a, uint32_t &k); + LIns* findImmISmall(int32_t a, uint32_t &k); + LIns* findImmILarge(int32_t a, uint32_t &k); #ifdef NANOJIT_64BIT LIns* findImmQ(uint64_t a, uint32_t &k); #endif @@ -2036,7 +2038,8 @@ namespace nanojit // These versions are used after an LIns has been created; they are // used for rehashing after growing. They just call onto the // multi-arg versions above. - uint32_t findImmI(LIns* ins); + uint32_t findImmISmall(LIns* ins); + uint32_t findImmILarge(LIns* ins); #ifdef NANOJIT_64BIT uint32_t findImmQ(LIns* ins); #endif @@ -2050,6 +2053,7 @@ namespace nanojit void growNL(NLKind kind); void growL(CseAcc cseAcc); + void addNLImmISmall(LIns* ins, uint32_t k); // 'k' is the index found by findXYZ(). void addNL(NLKind kind, LIns* ins, uint32_t k); void addL(LIns* ins, uint32_t k); @@ -2096,7 +2100,6 @@ namespace nanojit verbose_only(LInsPrinter* printer;) int32_t insCount(); - size_t byteCount(); // stats struct @@ -2123,7 +2126,6 @@ namespace nanojit Allocator& _allocator; uintptr_t _unused; // next unused instruction slot in the current LIR chunk uintptr_t _limit; // one past the last usable byte of the current LIR chunk - size_t _bytesAllocated; }; class LirBufWriter : public LirWriter