Bug 623428 - TM: avoid bloat caused by multiple mReserve arrays in VMAllocator (NJ-specific part). r=edwsmith.

--HG--
extra : convert_revision : f6016c7c7cd415a26dad9cf39d34141b8b482d43
This commit is contained in:
Nicholas Nethercote 2011-01-18 14:58:34 -08:00
Родитель a38a7f4a07
Коммит 40e5063705
5 изменённых файлов: 141 добавлений и 58 удалений

Просмотреть файл

@ -64,7 +64,7 @@ using namespace std;
/* Allocator SPI implementation. */
void*
nanojit::Allocator::allocChunk(size_t nbytes)
nanojit::Allocator::allocChunk(size_t nbytes, bool /*fallible*/)
{
void *p = malloc(nbytes);
if (!p)

Просмотреть файл

@ -68,28 +68,35 @@ namespace nanojit
postReset();
}
void* Allocator::allocSlow(size_t nbytes)
void* Allocator::allocSlow(size_t nbytes, bool fallible)
{
NanoAssert((nbytes & 7) == 0);
fill(nbytes);
NanoAssert(current_top + nbytes <= current_limit);
void* p = current_top;
current_top += nbytes;
return p;
if (fill(nbytes, fallible)) {
NanoAssert(current_top + nbytes <= current_limit);
void* p = current_top;
current_top += nbytes;
return p;
}
return NULL;
}
void Allocator::fill(size_t nbytes)
bool Allocator::fill(size_t nbytes, bool fallible)
{
const size_t minChunk = 2000;
if (nbytes < minChunk)
nbytes = minChunk;
if (nbytes < MIN_CHUNK_SZB)
nbytes = MIN_CHUNK_SZB;
size_t chunkbytes = sizeof(Chunk) + nbytes - sizeof(int64_t);
void* mem = allocChunk(chunkbytes);
Chunk* chunk = (Chunk*) mem;
chunk->prev = current_chunk;
current_chunk = chunk;
current_top = (char*)chunk->data;
current_limit = (char*)mem + chunkbytes;
void* mem = allocChunk(chunkbytes, fallible);
if (mem) {
Chunk* chunk = (Chunk*) mem;
chunk->prev = current_chunk;
current_chunk = chunk;
current_top = (char*)chunk->data;
current_limit = (char*)mem + chunkbytes;
return true;
} else {
NanoAssert(fallible);
return false;
}
}
}

Просмотреть файл

@ -46,30 +46,53 @@ namespace nanojit
* Allocator is a bump-pointer allocator with an SPI for getting more
* memory from embedder-implemented allocator, such as malloc()/free().
*
* allocations never return NULL. The implementation of allocChunk()
* alloc() never returns NULL. The implementation of allocChunk()
* is expected to perform a longjmp or exception when an allocation can't
* proceed.
* proceed. fallibleAlloc() (and fallibleAllocChunk()) may return NULL.
* They should be used for large allocations whose failure can be handled
* without too much difficulty.
*/
class Allocator {
public:
Allocator();
~Allocator();
// Usable space in the minimum chunk size; there are also a few bytes
// used for administration.
static const size_t MIN_CHUNK_SZB = 2000;
void reset();
/** alloc memory, never return null. */
void* alloc(size_t nbytes) {
void* p;
nbytes = (nbytes + 7) & ~7; // round up
if (current_top + nbytes <= current_limit) {
void *p = current_top;
p = current_top;
current_top += nbytes;
return p;
} else {
p = allocSlow(nbytes, /* fallible = */false);
NanoAssert(p);
}
return allocSlow(nbytes);
return p;
}
/** alloc memory, maybe return null. */
void* fallibleAlloc(size_t nbytes) {
void* p;
nbytes = (nbytes + 7) & ~7; // round up
if (current_top + nbytes <= current_limit) {
p = current_top;
current_top += nbytes;
} else {
p = allocSlow(nbytes, /* fallible = */true);
}
return p;
}
protected:
void* allocSlow(size_t nbytes);
void fill(size_t minbytes);
void* allocSlow(size_t nbytes, bool fallible = false);
bool fill(size_t minbytes, bool fallible);
class Chunk {
public:
@ -84,7 +107,7 @@ namespace nanojit
// allocator SPI
/** allocate another block from a host provided allocator */
void* allocChunk(size_t nbytes);
void* allocChunk(size_t nbytes, bool fallible);
/** free back to the same allocator */
void freeChunk(void*);

Просмотреть файл

@ -2061,9 +2061,9 @@ namespace nanojit
storesSinceLastLoad(ACCSET_NONE),
alloc(alloc),
knownCmpValues(alloc),
suspended(false)
suspended(false),
initOOM(false)
{
m_findNL[NLImmISmall] = &CseFilter::findImmISmall;
m_findNL[NLImmILarge] = &CseFilter::findImmILarge;
m_findNL[NLImmQ] = PTR_SIZE(NULL, &CseFilter::findImmQ);
@ -2082,15 +2082,26 @@ namespace nanojit
m_capNL[NL3] = 16;
m_capNL[NLCall] = 64;
// The largish allocations are fallible, the small ones are
// infallible. See the comment on initOOM's declaration for why.
for (NLKind nlkind = NLFirst; nlkind <= NLLast; nlkind = nextNLKind(nlkind)) {
m_listNL[nlkind] = new (alloc) LIns*[m_capNL[nlkind]];
m_listNL[nlkind] = (LIns**)alloc.fallibleAlloc(sizeof(LIns*) * m_capNL[nlkind]);
if (!m_listNL[nlkind]) {
initOOM = true;
return;
}
m_usedNL[nlkind] = 1; // Force memset in clearAll().
}
// Note that this allocates the CONST and MULTIPLE tables as well.
for (CseAcc a = 0; a < CSE_NUM_USED_ACCS; a++) {
m_capL[a] = 16;
m_listL[a] = new (alloc) LIns*[m_capL[a]];
m_listL[a] = (LIns**)alloc.fallibleAlloc(sizeof(LIns*) * m_capL[a]);
if (!m_listL[a]) {
initOOM = true;
return;
}
m_usedL[a] = 1; // Force memset(0) in first clearAll().
}
@ -2210,43 +2221,61 @@ namespace nanojit
return hashfinish(hash);
}
void CseFilter::growNL(NLKind nlkind)
bool CseFilter::growNL(NLKind nlkind)
{
NanoAssert(nlkind != NLImmISmall);
const uint32_t oldcap = m_capNL[nlkind];
m_capNL[nlkind] <<= 1;
LIns** oldlist = m_listNL[nlkind];
m_listNL[nlkind] = new (alloc) LIns*[m_capNL[nlkind]];
VMPI_memset(m_listNL[nlkind], 0, m_capNL[nlkind] * sizeof(LIns*));
find_t find = m_findNL[nlkind];
for (uint32_t i = 0; i < oldcap; i++) {
LIns* ins = oldlist[i];
if (!ins) continue;
uint32_t j = (this->*find)(ins);
NanoAssert(!m_listNL[nlkind][j]);
m_listNL[nlkind][j] = ins;
// We make this allocation fallible because it's potentially large and
// easy to recover from. If it fails, we won't add any more
// instructions to the table and some CSE opportunities may be missed.
LIns** tmp = (LIns**)alloc.fallibleAlloc(sizeof(LIns*) * m_capNL[nlkind]);
if (tmp) {
LIns** oldlist = m_listNL[nlkind];
m_listNL[nlkind] = tmp;
VMPI_memset(m_listNL[nlkind], 0, m_capNL[nlkind] * sizeof(LIns*));
find_t find = m_findNL[nlkind];
for (uint32_t i = 0; i < oldcap; i++) {
LIns* ins = oldlist[i];
if (!ins) continue;
uint32_t j = (this->*find)(ins);
NanoAssert(!m_listNL[nlkind][j]);
m_listNL[nlkind][j] = ins;
}
return true;
} else {
m_capNL[nlkind] = oldcap;
return false;
}
}
void CseFilter::growL(CseAcc cseAcc)
bool CseFilter::growL(CseAcc cseAcc)
{
const uint32_t oldcap = m_capL[cseAcc];
m_capL[cseAcc] <<= 1;
LIns** oldlist = m_listL[cseAcc];
m_listL[cseAcc] = new (alloc) LIns*[m_capL[cseAcc]];
VMPI_memset(m_listL[cseAcc], 0, m_capL[cseAcc] * sizeof(LIns*));
find_t find = &CseFilter::findLoad;
for (uint32_t i = 0; i < oldcap; i++) {
LIns* ins = oldlist[i];
if (!ins) continue;
uint32_t j = (this->*find)(ins);
NanoAssert(!m_listL[cseAcc][j]);
m_listL[cseAcc][j] = ins;
LIns** tmp = (LIns**)alloc.fallibleAlloc(sizeof(LIns*) * m_capL[cseAcc]);
if (tmp) {
LIns** oldlist = m_listL[cseAcc];
m_listL[cseAcc] = tmp;
VMPI_memset(m_listL[cseAcc], 0, m_capL[cseAcc] * sizeof(LIns*));
find_t find = &CseFilter::findLoad;
for (uint32_t i = 0; i < oldcap; i++) {
LIns* ins = oldlist[i];
if (!ins) continue;
uint32_t j = (this->*find)(ins);
NanoAssert(!m_listL[cseAcc][j]);
m_listL[cseAcc][j] = ins;
}
return true;
} else {
m_capL[cseAcc] = oldcap;
return false;
}
}
void CseFilter::addNLImmISmall(LIns* ins, uint32_t k)
{
NanoAssert(!initOOM);
if (suspended) return;
NLKind nlkind = NLImmISmall;
NanoAssert(k < m_capNL[nlkind]);
@ -2257,24 +2286,36 @@ namespace nanojit
void CseFilter::addNL(NLKind nlkind, LIns* ins, uint32_t k)
{
NanoAssert(!initOOM);
if (suspended) return;
NanoAssert(!m_listNL[nlkind][k]);
m_usedNL[nlkind]++;
m_listNL[nlkind][k] = ins;
if ((m_usedNL[nlkind] * 4) >= (m_capNL[nlkind] * 3)) { // load factor of 0.75
growNL(nlkind);
bool ok = growNL(nlkind);
if (!ok) {
// OOM: undo the insertion.
m_usedNL[nlkind]--;
m_listNL[nlkind][k] = NULL;
}
}
}
void CseFilter::addL(LIns* ins, uint32_t k)
{
NanoAssert(!initOOM);
if (suspended) return;
CseAcc cseAcc = miniAccSetToCseAcc(ins->miniAccSet(), ins->loadQual());
NanoAssert(!m_listL[cseAcc][k]);
m_usedL[cseAcc]++;
m_listL[cseAcc][k] = ins;
if ((m_usedL[cseAcc] * 4) >= (m_capL[cseAcc] * 3)) { // load factor of 0.75
growL(cseAcc);
bool ok = growL(cseAcc);
if (!ok) {
// OOM: undo the insertion.
m_usedL[cseAcc]--;
m_listL[cseAcc][k] = NULL;
}
}
}

Просмотреть файл

@ -2054,8 +2054,9 @@ namespace nanojit
uint32_t findCall(LIns* ins);
uint32_t findLoad(LIns* ins);
void growNL(NLKind kind);
void growL(CseAcc cseAcc);
// These return false if they failed to grow due to OOM.
bool growNL(NLKind kind);
bool growL(CseAcc cseAcc);
void addNLImmISmall(LIns* ins, uint32_t k);
// 'k' is the index found by findXYZ().
@ -2069,6 +2070,17 @@ namespace nanojit
public:
CseFilter(LirWriter *out, uint8_t embNumUsedAccs, Allocator&);
// CseFilter does some largish fallible allocations at start-up. If
// they fail, the constructor sets this field to 'true'. It should be
// checked after creation, and if set the CseFilter cannot be used.
// (But the check can be skipped if allocChunk() always succeeds.)
//
// FIXME: This fallibility is a sop to TraceMonkey's implementation of
// infallible malloc -- by avoiding some largish infallible
// allocations, it reduces the size of the reserve space needed.
// Bug 624590 is open to fix this.
bool initOOM;
LIns* insImmI(int32_t imm);
#ifdef NANOJIT_64BIT
LIns* insImmQ(uint64_t q);
@ -2116,13 +2128,13 @@ namespace nanojit
LIns *state, *param1, *sp, *rp;
LIns* savedRegs[NumSavedRegs+1]; // Allocate an extra element in case NumSavedRegs == 0
protected:
friend class LirBufWriter;
/** Each chunk is just a raw area of LIns instances, with no header
and no more than 8-byte alignment. The chunk size is somewhat arbitrary. */
static const size_t CHUNK_SZB = 8000;
protected:
friend class LirBufWriter;
/** Get CHUNK_SZB more memory for LIR instructions. */
void chunkAlloc();
void moveToNewChunk(uintptr_t addrOfLastLInsOnCurrentChunk);