Bug 609121 - nanojit: handle small immediates specially in CseFilter. r=wmaddox.

--HG--
extra : convert_revision : 04d7771f3f85877cf12395ffecfc4f2f6d4a0b50
This commit is contained in:
Nicholas Nethercote 2010-11-10 14:40:07 -08:00
Родитель 4cd0a47289
Коммит 9e273662fc
2 изменённых файлов: 97 добавлений и 62 удалений

Просмотреть файл

@ -167,7 +167,6 @@ namespace nanojit
// clear the stats, etc
_unused = 0;
_limit = 0;
_bytesAllocated = 0;
_stats.lir = 0;
for (int i = 0; i < NumSavedRegs; ++i)
savedRegs[i] = NULL;
@ -186,11 +185,6 @@ namespace nanojit
return _stats.lir;
}
size_t LirBuffer::byteCount()
{
return _bytesAllocated - (_limit - _unused);
}
// Allocate a new page, and write the first instruction to it -- a skip
// linking to last instruction of the previous page.
void LirBuffer::moveToNewChunk(uintptr_t addrOfLastLInsOnCurrentChunk)
@ -2070,23 +2064,25 @@ namespace nanojit
suspended(false)
{
m_findNL[LInsImmI] = &CseFilter::findImmI;
m_findNL[LInsImmQ] = PTR_SIZE(NULL, &CseFilter::findImmQ);
m_findNL[LInsImmD] = &CseFilter::findImmD;
m_findNL[LIns1] = &CseFilter::find1;
m_findNL[LIns2] = &CseFilter::find2;
m_findNL[LIns3] = &CseFilter::find3;
m_findNL[LInsCall] = &CseFilter::findCall;
m_findNL[NLImmISmall] = &CseFilter::findImmISmall;
m_findNL[NLImmILarge] = &CseFilter::findImmILarge;
m_findNL[NLImmQ] = PTR_SIZE(NULL, &CseFilter::findImmQ);
m_findNL[NLImmD] = &CseFilter::findImmD;
m_findNL[NL1] = &CseFilter::find1;
m_findNL[NL2] = &CseFilter::find2;
m_findNL[NL3] = &CseFilter::find3;
m_findNL[NLCall] = &CseFilter::findCall;
m_capNL[LInsImmI] = 128;
m_capNL[LInsImmQ] = PTR_SIZE(0, 16);
m_capNL[LInsImmD] = 16;
m_capNL[LIns1] = 256;
m_capNL[LIns2] = 512;
m_capNL[LIns3] = 16;
m_capNL[LInsCall] = 64;
m_capNL[NLImmISmall] = 17; // covers 0..16, which is over half the cases for TraceMonkey
m_capNL[NLImmILarge] = 64;
m_capNL[NLImmQ] = PTR_SIZE(0, 16);
m_capNL[NLImmD] = 16;
m_capNL[NL1] = 256;
m_capNL[NL2] = 512;
m_capNL[NL3] = 16;
m_capNL[NLCall] = 64;
for (NLKind nlkind = LInsFirst; nlkind <= LInsLast; nlkind = nextNLKind(nlkind)) {
for (NLKind nlkind = NLFirst; nlkind <= NLLast; nlkind = nextNLKind(nlkind)) {
m_listNL[nlkind] = new (alloc) LIns*[m_capNL[nlkind]];
m_usedNL[nlkind] = 1; // Force memset in clearAll().
}
@ -2162,7 +2158,7 @@ namespace nanojit
}
void CseFilter::clearAll() {
for (NLKind nlkind = LInsFirst; nlkind <= LInsLast; nlkind = nextNLKind(nlkind))
for (NLKind nlkind = NLFirst; nlkind <= NLLast; nlkind = nextNLKind(nlkind))
clearNL(nlkind);
// Note that this clears the CONST and MULTIPLE load tables as well.
@ -2216,6 +2212,7 @@ namespace nanojit
void CseFilter::growNL(NLKind nlkind)
{
NanoAssert(nlkind != NLImmISmall);
const uint32_t oldcap = m_capNL[nlkind];
m_capNL[nlkind] <<= 1;
LIns** oldlist = m_listNL[nlkind];
@ -2248,6 +2245,16 @@ namespace nanojit
}
}
void CseFilter::addNLImmISmall(LIns* ins, uint32_t k)
{
if (suspended) return;
NLKind nlkind = NLImmISmall;
NanoAssert(k < m_capNL[nlkind]);
NanoAssert(!m_listNL[nlkind][k]);
m_usedNL[nlkind]++;
m_listNL[nlkind][k] = ins;
}
void CseFilter::addNL(NLKind nlkind, LIns* ins, uint32_t k)
{
if (suspended) return;
@ -2271,9 +2278,26 @@ namespace nanojit
}
}
inline LIns* CseFilter::findImmI(int32_t a, uint32_t &k)
inline LIns* CseFilter::findImmISmall(int32_t a, uint32_t &k)
{
NLKind nlkind = LInsImmI;
// This one is a direct array lookup rather than a hashtable lookup.
NLKind nlkind = NLImmISmall;
k = a;
LIns* ins = m_listNL[nlkind][k];
NanoAssert(!ins || ins->isImmI(a));
return ins;
}
uint32_t CseFilter::findImmISmall(LIns* ins)
{
uint32_t k;
findImmISmall(ins->immI(), k);
return k;
}
inline LIns* CseFilter::findImmILarge(int32_t a, uint32_t &k)
{
NLKind nlkind = NLImmILarge;
const uint32_t bitmask = m_capNL[nlkind] - 1;
k = hashImmI(a) & bitmask;
uint32_t n = 1;
@ -2296,17 +2320,17 @@ namespace nanojit
}
}
uint32_t CseFilter::findImmI(LIns* ins)
uint32_t CseFilter::findImmILarge(LIns* ins)
{
uint32_t k;
findImmI(ins->immI(), k);
findImmILarge(ins->immI(), k);
return k;
}
#ifdef NANOJIT_64BIT
inline LIns* CseFilter::findImmQ(uint64_t a, uint32_t &k)
{
NLKind nlkind = LInsImmQ;
NLKind nlkind = NLImmQ;
const uint32_t bitmask = m_capNL[nlkind] - 1;
k = hashImmQorD(a) & bitmask;
uint32_t n = 1;
@ -2332,7 +2356,7 @@ namespace nanojit
inline LIns* CseFilter::findImmD(uint64_t a, uint32_t &k)
{
NLKind nlkind = LInsImmD;
NLKind nlkind = NLImmD;
const uint32_t bitmask = m_capNL[nlkind] - 1;
k = hashImmQorD(a) & bitmask;
uint32_t n = 1;
@ -2357,7 +2381,7 @@ namespace nanojit
inline LIns* CseFilter::find1(LOpcode op, LIns* a, uint32_t &k)
{
NLKind nlkind = LIns1;
NLKind nlkind = NL1;
const uint32_t bitmask = m_capNL[nlkind] - 1;
k = hash1(op, a) & bitmask;
uint32_t n = 1;
@ -2381,7 +2405,7 @@ namespace nanojit
inline LIns* CseFilter::find2(LOpcode op, LIns* a, LIns* b, uint32_t &k)
{
NLKind nlkind = LIns2;
NLKind nlkind = NL2;
const uint32_t bitmask = m_capNL[nlkind] - 1;
k = hash2(op, a, b) & bitmask;
uint32_t n = 1;
@ -2405,7 +2429,7 @@ namespace nanojit
inline LIns* CseFilter::find3(LOpcode op, LIns* a, LIns* b, LIns* c, uint32_t &k)
{
NLKind nlkind = LIns3;
NLKind nlkind = NL3;
const uint32_t bitmask = m_capNL[nlkind] - 1;
k = hash3(op, a, b, c) & bitmask;
uint32_t n = 1;
@ -2466,7 +2490,7 @@ namespace nanojit
inline LIns* CseFilter::findCall(const CallInfo *ci, uint32_t argc, LIns* args[], uint32_t &k)
{
NLKind nlkind = LInsCall;
NLKind nlkind = NLCall;
const uint32_t bitmask = m_capNL[nlkind] - 1;
k = hashCall(ci, argc, args) & bitmask;
uint32_t n = 1;
@ -2496,10 +2520,19 @@ namespace nanojit
LIns* CseFilter::insImmI(int32_t imm)
{
uint32_t k;
LIns* ins = findImmI(imm, k);
if (!ins) {
ins = out->insImmI(imm);
addNL(LInsImmI, ins, k);
LIns* ins;
if (0 <= imm && imm < int32_t(m_capNL[NLImmISmall])) {
ins = findImmISmall(imm, k);
if (!ins) {
ins = out->insImmI(imm);
addNLImmISmall(ins, k);
}
} else {
ins = findImmILarge(imm, k);
if (!ins) {
ins = out->insImmI(imm);
addNL(NLImmILarge, ins, k);
}
}
// We assume that downstream stages do not modify the instruction, so
// that we can insert 'ins' into slot 'k'. Check this.
@ -2514,7 +2547,7 @@ namespace nanojit
LIns* ins = findImmQ(q, k);
if (!ins) {
ins = out->insImmQ(q);
addNL(LInsImmQ, ins, k);
addNL(NLImmQ, ins, k);
}
NanoAssert(ins->isop(LIR_immq) && ins->immQ() == q);
return ins;
@ -2534,7 +2567,7 @@ namespace nanojit
LIns* ins = findImmD(u.u64, k);
if (!ins) {
ins = out->insImmD(d);
addNL(LInsImmD, ins, k);
addNL(NLImmD, ins, k);
}
NanoAssert(ins->isop(LIR_immd) && ins->immDasQ() == u.u64);
return ins;
@ -2555,7 +2588,7 @@ namespace nanojit
ins = find1(op, a, k);
if (!ins) {
ins = out->ins1(op, a);
addNL(LIns1, ins, k);
addNL(NL1, ins, k);
}
} else {
ins = out->ins1(op, a);
@ -2572,7 +2605,7 @@ namespace nanojit
ins = find2(op, a, b, k);
if (!ins) {
ins = out->ins2(op, a, b);
addNL(LIns2, ins, k);
addNL(NL2, ins, k);
} else if (ins->isCmp()) {
if (knownCmpValues.containsKey(ins)) {
// We've seen this comparison before, and it was previously
@ -2594,7 +2627,7 @@ namespace nanojit
LIns* ins = find3(op, a, b, c, k);
if (!ins) {
ins = out->ins3(op, a, b, c);
addNL(LIns3, ins, k);
addNL(NL3, ins, k);
}
NanoAssert(ins->isop(op) && ins->oprnd1() == a && ins->oprnd2() == b && ins->oprnd3() == c);
return ins;
@ -2694,7 +2727,7 @@ namespace nanojit
ins = find1(op, c, k);
if (!ins) {
ins = out->insGuard(op, c, gr);
addNL(LIns1, ins, k);
addNL(NL1, ins, k);
}
// After this guard, we know that 'c's result was true (if
// op==LIR_xf) or false (if op==LIR_xt), else we would have
@ -2719,7 +2752,7 @@ namespace nanojit
LIns* ins = find2(op, a, b, k);
if (!ins) {
ins = out->insGuardXov(op, a, b, gr);
addNL(LIns2, ins, k);
addNL(NL2, ins, k);
}
NanoAssert(ins->isop(op) && ins->oprnd1() == a && ins->oprnd2() == b);
return ins;
@ -2737,7 +2770,7 @@ namespace nanojit
ins = findCall(ci, argc, args, k);
if (!ins) {
ins = out->insCall(ci, args);
addNL(LInsCall, ins, k);
addNL(NLCall, ins, k);
}
} else {
// We only need to worry about aliasing if !ci->_isPure.

Просмотреть файл

@ -1924,18 +1924,19 @@ namespace nanojit
// We divide instruction kinds into groups. LIns0 isn't present
// because we don't need to record any 0-ary instructions. Loads
// aren't here, they're handled separately.
LInsImmI = 0,
LInsImmQ = 1, // only occurs on 64-bit platforms
LInsImmD = 2,
LIns1 = 3,
LIns2 = 4,
LIns3 = 5,
LInsCall = 6,
NLImmISmall = 0,
NLImmILarge = 1,
NLImmQ = 2, // only occurs on 64-bit platforms
NLImmD = 3,
NL1 = 4,
NL2 = 5,
NL3 = 6,
NLCall = 7,
LInsFirst = 0,
LInsLast = 6,
NLFirst = 0,
NLLast = 7,
// Need a value after "last" to outsmart compilers that insist last+1 is impossible.
LInsInvalid = 7
NLInvalid = 8
};
#define nextNLKind(kind) NLKind(kind+1)
@ -1948,11 +1949,11 @@ namespace nanojit
// Don't start m_capNL too small, or we'll waste time growing and rehashing.
// Don't start m_capNL too large, will waste memory.
//
LIns** m_listNL[LInsLast + 1];
uint32_t m_capNL[ LInsLast + 1];
uint32_t m_usedNL[LInsLast + 1];
LIns** m_listNL[NLLast + 1];
uint32_t m_capNL[ NLLast + 1];
uint32_t m_usedNL[NLLast + 1];
typedef uint32_t (CseFilter::*find_t)(LIns*);
find_t m_findNL[LInsLast + 1];
find_t m_findNL[NLLast + 1];
// Similarly, for loads, there is one table for each CseAcc. A CseAcc
// is like a normal access region, but there are two extra possible
@ -2021,7 +2022,8 @@ namespace nanojit
static uint32_t hashCall(const CallInfo *call, uint32_t argc, LIns* args[]);
// These versions are used before an LIns has been created.
LIns* findImmI(int32_t a, uint32_t &k);
LIns* findImmISmall(int32_t a, uint32_t &k);
LIns* findImmILarge(int32_t a, uint32_t &k);
#ifdef NANOJIT_64BIT
LIns* findImmQ(uint64_t a, uint32_t &k);
#endif
@ -2036,7 +2038,8 @@ namespace nanojit
// These versions are used after an LIns has been created; they are
// used for rehashing after growing. They just call onto the
// multi-arg versions above.
uint32_t findImmI(LIns* ins);
uint32_t findImmISmall(LIns* ins);
uint32_t findImmILarge(LIns* ins);
#ifdef NANOJIT_64BIT
uint32_t findImmQ(LIns* ins);
#endif
@ -2050,6 +2053,7 @@ namespace nanojit
void growNL(NLKind kind);
void growL(CseAcc cseAcc);
void addNLImmISmall(LIns* ins, uint32_t k);
// 'k' is the index found by findXYZ().
void addNL(NLKind kind, LIns* ins, uint32_t k);
void addL(LIns* ins, uint32_t k);
@ -2096,7 +2100,6 @@ namespace nanojit
verbose_only(LInsPrinter* printer;)
int32_t insCount();
size_t byteCount();
// stats
struct
@ -2123,7 +2126,6 @@ namespace nanojit
Allocator& _allocator;
uintptr_t _unused; // next unused instruction slot in the current LIR chunk
uintptr_t _limit; // one past the last usable byte of the current LIR chunk
size_t _bytesAllocated;
};
class LirBufWriter : public LirWriter