зеркало из https://github.com/mozilla/gecko-dev.git
merge
This commit is contained in:
Коммит
39e1d582a2
|
@ -1 +1 @@
|
|||
f348fd5b02118c7151d991f51d76abe69976952e
|
||||
04d7771f3f85877cf12395ffecfc4f2f6d4a0b50
|
||||
|
|
|
@ -70,8 +70,7 @@ namespace nanojit
|
|||
* - merging paths ( build a graph? ), possibly use external rep to drive codegen
|
||||
*/
|
||||
Assembler::Assembler(CodeAlloc& codeAlloc, Allocator& dataAlloc, Allocator& alloc, AvmCore* core, LogControl* logc, const Config& config)
|
||||
: codeList(NULL)
|
||||
, alloc(alloc)
|
||||
: alloc(alloc)
|
||||
, _codeAlloc(codeAlloc)
|
||||
, _dataAlloc(dataAlloc)
|
||||
, _thisfrag(NULL)
|
||||
|
@ -82,6 +81,7 @@ namespace nanojit
|
|||
#if NJ_USES_IMMD_POOL
|
||||
, _immDPool(alloc)
|
||||
#endif
|
||||
, codeList(NULL)
|
||||
, _epilogue(NULL)
|
||||
, _err(None)
|
||||
#if PEDANTIC
|
||||
|
@ -1125,6 +1125,7 @@ namespace nanojit
|
|||
_codeAlloc.free(exitStart, exitEnd);
|
||||
_codeAlloc.free(codeStart, codeEnd);
|
||||
codeList = NULL;
|
||||
_codeAlloc.markAllExec(); // expensive but safe, we mark all code pages R-X
|
||||
}
|
||||
|
||||
void Assembler::endAssembly(Fragment* frag)
|
||||
|
@ -1162,6 +1163,9 @@ namespace nanojit
|
|||
verbose_only( codeBytes -= (_nIns - codeStart) * sizeof(NIns); )
|
||||
#endif
|
||||
|
||||
// note: the code pages are no longer writable from this point onwards
|
||||
_codeAlloc.markExec(codeList);
|
||||
|
||||
// at this point all our new code is in the d-cache and not the i-cache,
|
||||
// so flush the i-cache on cpu's that need it.
|
||||
CodeAlloc::flushICache(codeList);
|
||||
|
@ -1488,13 +1492,10 @@ namespace nanojit
|
|||
{
|
||||
size_t delta = (uintptr_t)priorIns - (uintptr_t)_nIns; // # bytes that have been emitted since last go-around
|
||||
|
||||
if (codeList) {
|
||||
codeList = codeList;
|
||||
}
|
||||
// if no codeList then we know priorIns and _nIns are on same page, otherwise make sure priorIns was not in the previous code block
|
||||
if (!codeList || !codeList->isInBlock(priorIns)) {
|
||||
NanoAssert(delta < VMPI_getVMPageSize()); // sanity check
|
||||
nopInsertTrigger -= delta;
|
||||
nopInsertTrigger -= (int32_t) delta;
|
||||
if (nopInsertTrigger < 0)
|
||||
{
|
||||
nopInsertTrigger = noiseForNopInsertion(_noise);
|
||||
|
|
|
@ -313,8 +313,6 @@ namespace nanojit
|
|||
debug_only( void resourceConsistencyCheck(); )
|
||||
debug_only( void registerConsistencyCheck(); )
|
||||
|
||||
CodeList* codeList; // finished blocks of code.
|
||||
|
||||
private:
|
||||
void gen(LirFilter* toCompile);
|
||||
NIns* genPrologue();
|
||||
|
@ -401,6 +399,7 @@ namespace nanojit
|
|||
// temporarily swap all the code/exit variables below (using
|
||||
// swapCodeChunks()). Afterwards we swap them all back and set
|
||||
// _inExit to false again.
|
||||
CodeList* codeList; // finished blocks of code.
|
||||
bool _inExit, vpad2[3];
|
||||
NIns *codeStart, *codeEnd; // current normal code chunk
|
||||
NIns *exitStart, *exitEnd; // current exit code chunk
|
||||
|
|
|
@ -128,28 +128,20 @@ namespace nanojit
|
|||
}
|
||||
|
||||
void CodeAlloc::alloc(NIns* &start, NIns* &end) {
|
||||
// Reuse a block if possible.
|
||||
if (availblocks) {
|
||||
markBlockWrite(availblocks);
|
||||
CodeList* b = removeBlock(availblocks);
|
||||
b->isFree = false;
|
||||
start = b->start();
|
||||
end = b->end;
|
||||
if (verbose)
|
||||
avmplus::AvmLog("alloc %p-%p %d\n", start, end, int(end-start));
|
||||
return;
|
||||
if (!availblocks) {
|
||||
// no free mem, get more
|
||||
addMem();
|
||||
}
|
||||
// no suitable block found, get more memory
|
||||
void *mem = allocCodeChunk(bytesPerAlloc); // allocations never fail
|
||||
totalAllocated += bytesPerAlloc;
|
||||
NanoAssert(mem != NULL); // see allocCodeChunk contract in CodeAlloc.h
|
||||
_nvprof("alloc page", uintptr_t(mem)>>12);
|
||||
CodeList* b = addMem(mem, bytesPerAlloc);
|
||||
|
||||
// grab a block
|
||||
markBlockWrite(availblocks);
|
||||
CodeList* b = removeBlock(availblocks);
|
||||
b->isFree = false;
|
||||
start = b->start();
|
||||
end = b->end;
|
||||
if (verbose)
|
||||
avmplus::AvmLog("alloc %p-%p %d\n", start, end, int(end-start));
|
||||
avmplus::AvmLog("CodeAlloc(%p).alloc %p-%p %d\n", this, start, end, int(end-start));
|
||||
debug_only(sanity_check();)
|
||||
}
|
||||
|
||||
void CodeAlloc::free(NIns* start, NIns *end) {
|
||||
|
@ -349,11 +341,16 @@ extern "C" void sync_instruction_memory(caddr_t v, u_int len);
|
|||
blocks = b;
|
||||
}
|
||||
|
||||
CodeList* CodeAlloc::addMem(void *mem, size_t bytes) {
|
||||
void CodeAlloc::addMem() {
|
||||
void *mem = allocCodeChunk(bytesPerAlloc); // allocations never fail
|
||||
totalAllocated += bytesPerAlloc;
|
||||
NanoAssert(mem != NULL); // see allocCodeChunk contract in CodeAlloc.h
|
||||
_nvprof("alloc page", uintptr_t(mem)>>12);
|
||||
|
||||
CodeList* b = (CodeList*)mem;
|
||||
b->lower = 0;
|
||||
b->end = (NIns*) (uintptr_t(mem) + bytes - sizeofMinBlock);
|
||||
b->next = 0;
|
||||
b->end = (NIns*) (uintptr_t(mem) + bytesPerAlloc - sizeofMinBlock);
|
||||
b->isFree = true;
|
||||
|
||||
// create a tiny terminator block, add to fragmented list, this way
|
||||
|
@ -370,7 +367,8 @@ extern "C" void sync_instruction_memory(caddr_t v, u_int len);
|
|||
// add terminator to heapblocks list so we can track whole blocks
|
||||
terminator->next = heapblocks;
|
||||
heapblocks = terminator;
|
||||
return b;
|
||||
|
||||
addBlock(availblocks, b); // add to free list
|
||||
}
|
||||
|
||||
CodeList* CodeAlloc::getBlock(NIns* start, NIns* end) {
|
||||
|
@ -509,6 +507,15 @@ extern "C" void sync_instruction_memory(caddr_t v, u_int len);
|
|||
}
|
||||
#endif
|
||||
|
||||
// Loop through a list of blocks marking the chunks executable. If we encounter
|
||||
// multiple blocks in the same chunk, only the first block will cause the
|
||||
// chunk to become executable, the other calls will no-op (isExec flag checked)
|
||||
void CodeAlloc::markExec(CodeList* &blocks) {
|
||||
for (CodeList *b = blocks; b != 0; b = b->next) {
|
||||
markChunkExec(b->terminator);
|
||||
}
|
||||
}
|
||||
|
||||
// Variant of markExec(CodeList*) that walks all heapblocks (i.e. chunks) marking
|
||||
// each one executable. On systems where bytesPerAlloc is low (i.e. have lots
|
||||
// of elements in the list) this can be expensive.
|
||||
|
|
|
@ -43,9 +43,9 @@
|
|||
namespace nanojit
|
||||
{
|
||||
/**
|
||||
* CodeList is a linked list of non-contigous blocks of code. Clients use CodeList*
|
||||
* to point to a list, and each CodeList instance tracks a single contiguous
|
||||
* block of code.
|
||||
* CodeList is a single block of code. The next field is used to
|
||||
* form linked lists of non-contiguous blocks of code. Clients use CodeList*
|
||||
* to point to the first block in a list.
|
||||
*/
|
||||
class CodeList
|
||||
{
|
||||
|
@ -95,13 +95,24 @@ namespace nanojit
|
|||
};
|
||||
|
||||
/**
|
||||
* Code memory allocator.
|
||||
* Long lived manager for many code blocks,
|
||||
* Code memory allocator is a long lived manager for many code blocks that
|
||||
* manages interaction with an underlying code memory allocator,
|
||||
* setting page permissions, api's for allocating and freeing
|
||||
* sets page permissions. CodeAlloc provides APIs for allocating and freeing
|
||||
* individual blocks of code memory (for methods, stubs, or compiled
|
||||
* traces), and also static functions for managing lists of allocated
|
||||
* code.
|
||||
* traces), static functions for managing lists of allocated code, and has
|
||||
* a few pure virtual methods that embedders must implement to provide
|
||||
* memory to the allocator.
|
||||
*
|
||||
* A "chunk" is a region of memory obtained from allocCodeChunk; it must
|
||||
* be page aligned and be a multiple of the system page size.
|
||||
*
|
||||
* A "block" is a region of memory within a chunk. It can be arbitrarily
|
||||
* sized and aligned, but is always contained within a single chunk.
|
||||
* class CodeList represents one block; the members of CodeList track the
|
||||
* extent of the block and support creating lists of blocks.
|
||||
*
|
||||
* The allocator coalesces free blocks when it can, in free(), but never
|
||||
* coalesces chunks.
|
||||
*/
|
||||
class CodeAlloc
|
||||
{
|
||||
|
@ -133,7 +144,7 @@ namespace nanojit
|
|||
static CodeList* getBlock(NIns* start, NIns* end);
|
||||
|
||||
/** add raw memory to the free list */
|
||||
CodeList* addMem(void* mem, size_t bytes);
|
||||
void addMem();
|
||||
|
||||
/** make sure all the higher/lower pointers are correct for every block */
|
||||
void sanity_check();
|
||||
|
@ -142,9 +153,9 @@ namespace nanojit
|
|||
CodeList* firstBlock(CodeList* term);
|
||||
|
||||
//
|
||||
// CodeAlloc's SPI. Implementations must be defined by nanojit embedder.
|
||||
// allocation failures should cause an exception or longjmp; nanojit
|
||||
// intentionally does not check for null.
|
||||
// CodeAlloc's SPI (Service Provider Interface). Implementations must be
|
||||
// defined by nanojit embedder. Allocation failures should cause an exception
|
||||
// or longjmp; nanojit intentionally does not check for null.
|
||||
//
|
||||
|
||||
/** allocate nbytes of memory to hold code. Never return null! */
|
||||
|
@ -203,9 +214,12 @@ namespace nanojit
|
|||
/** print out stats about heap usage */
|
||||
void logStats();
|
||||
|
||||
/** protect all code in this code alloc */
|
||||
/** protect all code managed by this CodeAlloc */
|
||||
void markAllExec();
|
||||
|
||||
/** protect all mem in the block list */
|
||||
void markExec(CodeList* &blocks);
|
||||
|
||||
/** protect an entire chunk */
|
||||
void markChunkExec(CodeList* term);
|
||||
|
||||
|
|
|
@ -167,7 +167,6 @@ namespace nanojit
|
|||
// clear the stats, etc
|
||||
_unused = 0;
|
||||
_limit = 0;
|
||||
_bytesAllocated = 0;
|
||||
_stats.lir = 0;
|
||||
for (int i = 0; i < NumSavedRegs; ++i)
|
||||
savedRegs[i] = NULL;
|
||||
|
@ -186,11 +185,6 @@ namespace nanojit
|
|||
return _stats.lir;
|
||||
}
|
||||
|
||||
size_t LirBuffer::byteCount()
|
||||
{
|
||||
return _bytesAllocated - (_limit - _unused);
|
||||
}
|
||||
|
||||
// Allocate a new page, and write the first instruction to it -- a skip
|
||||
// linking to last instruction of the previous page.
|
||||
void LirBuffer::moveToNewChunk(uintptr_t addrOfLastLInsOnCurrentChunk)
|
||||
|
@ -2070,23 +2064,25 @@ namespace nanojit
|
|||
suspended(false)
|
||||
{
|
||||
|
||||
m_findNL[LInsImmI] = &CseFilter::findImmI;
|
||||
m_findNL[LInsImmQ] = PTR_SIZE(NULL, &CseFilter::findImmQ);
|
||||
m_findNL[LInsImmD] = &CseFilter::findImmD;
|
||||
m_findNL[LIns1] = &CseFilter::find1;
|
||||
m_findNL[LIns2] = &CseFilter::find2;
|
||||
m_findNL[LIns3] = &CseFilter::find3;
|
||||
m_findNL[LInsCall] = &CseFilter::findCall;
|
||||
m_findNL[NLImmISmall] = &CseFilter::findImmISmall;
|
||||
m_findNL[NLImmILarge] = &CseFilter::findImmILarge;
|
||||
m_findNL[NLImmQ] = PTR_SIZE(NULL, &CseFilter::findImmQ);
|
||||
m_findNL[NLImmD] = &CseFilter::findImmD;
|
||||
m_findNL[NL1] = &CseFilter::find1;
|
||||
m_findNL[NL2] = &CseFilter::find2;
|
||||
m_findNL[NL3] = &CseFilter::find3;
|
||||
m_findNL[NLCall] = &CseFilter::findCall;
|
||||
|
||||
m_capNL[LInsImmI] = 128;
|
||||
m_capNL[LInsImmQ] = PTR_SIZE(0, 16);
|
||||
m_capNL[LInsImmD] = 16;
|
||||
m_capNL[LIns1] = 256;
|
||||
m_capNL[LIns2] = 512;
|
||||
m_capNL[LIns3] = 16;
|
||||
m_capNL[LInsCall] = 64;
|
||||
m_capNL[NLImmISmall] = 17; // covers 0..16, which is over half the cases for TraceMonkey
|
||||
m_capNL[NLImmILarge] = 64;
|
||||
m_capNL[NLImmQ] = PTR_SIZE(0, 16);
|
||||
m_capNL[NLImmD] = 16;
|
||||
m_capNL[NL1] = 256;
|
||||
m_capNL[NL2] = 512;
|
||||
m_capNL[NL3] = 16;
|
||||
m_capNL[NLCall] = 64;
|
||||
|
||||
for (NLKind nlkind = LInsFirst; nlkind <= LInsLast; nlkind = nextNLKind(nlkind)) {
|
||||
for (NLKind nlkind = NLFirst; nlkind <= NLLast; nlkind = nextNLKind(nlkind)) {
|
||||
m_listNL[nlkind] = new (alloc) LIns*[m_capNL[nlkind]];
|
||||
m_usedNL[nlkind] = 1; // Force memset in clearAll().
|
||||
}
|
||||
|
@ -2162,7 +2158,7 @@ namespace nanojit
|
|||
}
|
||||
|
||||
void CseFilter::clearAll() {
|
||||
for (NLKind nlkind = LInsFirst; nlkind <= LInsLast; nlkind = nextNLKind(nlkind))
|
||||
for (NLKind nlkind = NLFirst; nlkind <= NLLast; nlkind = nextNLKind(nlkind))
|
||||
clearNL(nlkind);
|
||||
|
||||
// Note that this clears the CONST and MULTIPLE load tables as well.
|
||||
|
@ -2216,6 +2212,7 @@ namespace nanojit
|
|||
|
||||
void CseFilter::growNL(NLKind nlkind)
|
||||
{
|
||||
NanoAssert(nlkind != NLImmISmall);
|
||||
const uint32_t oldcap = m_capNL[nlkind];
|
||||
m_capNL[nlkind] <<= 1;
|
||||
LIns** oldlist = m_listNL[nlkind];
|
||||
|
@ -2248,6 +2245,16 @@ namespace nanojit
|
|||
}
|
||||
}
|
||||
|
||||
void CseFilter::addNLImmISmall(LIns* ins, uint32_t k)
|
||||
{
|
||||
if (suspended) return;
|
||||
NLKind nlkind = NLImmISmall;
|
||||
NanoAssert(k < m_capNL[nlkind]);
|
||||
NanoAssert(!m_listNL[nlkind][k]);
|
||||
m_usedNL[nlkind]++;
|
||||
m_listNL[nlkind][k] = ins;
|
||||
}
|
||||
|
||||
void CseFilter::addNL(NLKind nlkind, LIns* ins, uint32_t k)
|
||||
{
|
||||
if (suspended) return;
|
||||
|
@ -2271,9 +2278,26 @@ namespace nanojit
|
|||
}
|
||||
}
|
||||
|
||||
inline LIns* CseFilter::findImmI(int32_t a, uint32_t &k)
|
||||
inline LIns* CseFilter::findImmISmall(int32_t a, uint32_t &k)
|
||||
{
|
||||
NLKind nlkind = LInsImmI;
|
||||
// This one is a direct array lookup rather than a hashtable lookup.
|
||||
NLKind nlkind = NLImmISmall;
|
||||
k = a;
|
||||
LIns* ins = m_listNL[nlkind][k];
|
||||
NanoAssert(!ins || ins->isImmI(a));
|
||||
return ins;
|
||||
}
|
||||
|
||||
uint32_t CseFilter::findImmISmall(LIns* ins)
|
||||
{
|
||||
uint32_t k;
|
||||
findImmISmall(ins->immI(), k);
|
||||
return k;
|
||||
}
|
||||
|
||||
inline LIns* CseFilter::findImmILarge(int32_t a, uint32_t &k)
|
||||
{
|
||||
NLKind nlkind = NLImmILarge;
|
||||
const uint32_t bitmask = m_capNL[nlkind] - 1;
|
||||
k = hashImmI(a) & bitmask;
|
||||
uint32_t n = 1;
|
||||
|
@ -2296,17 +2320,17 @@ namespace nanojit
|
|||
}
|
||||
}
|
||||
|
||||
uint32_t CseFilter::findImmI(LIns* ins)
|
||||
uint32_t CseFilter::findImmILarge(LIns* ins)
|
||||
{
|
||||
uint32_t k;
|
||||
findImmI(ins->immI(), k);
|
||||
findImmILarge(ins->immI(), k);
|
||||
return k;
|
||||
}
|
||||
|
||||
#ifdef NANOJIT_64BIT
|
||||
inline LIns* CseFilter::findImmQ(uint64_t a, uint32_t &k)
|
||||
{
|
||||
NLKind nlkind = LInsImmQ;
|
||||
NLKind nlkind = NLImmQ;
|
||||
const uint32_t bitmask = m_capNL[nlkind] - 1;
|
||||
k = hashImmQorD(a) & bitmask;
|
||||
uint32_t n = 1;
|
||||
|
@ -2332,7 +2356,7 @@ namespace nanojit
|
|||
|
||||
inline LIns* CseFilter::findImmD(uint64_t a, uint32_t &k)
|
||||
{
|
||||
NLKind nlkind = LInsImmD;
|
||||
NLKind nlkind = NLImmD;
|
||||
const uint32_t bitmask = m_capNL[nlkind] - 1;
|
||||
k = hashImmQorD(a) & bitmask;
|
||||
uint32_t n = 1;
|
||||
|
@ -2357,7 +2381,7 @@ namespace nanojit
|
|||
|
||||
inline LIns* CseFilter::find1(LOpcode op, LIns* a, uint32_t &k)
|
||||
{
|
||||
NLKind nlkind = LIns1;
|
||||
NLKind nlkind = NL1;
|
||||
const uint32_t bitmask = m_capNL[nlkind] - 1;
|
||||
k = hash1(op, a) & bitmask;
|
||||
uint32_t n = 1;
|
||||
|
@ -2381,7 +2405,7 @@ namespace nanojit
|
|||
|
||||
inline LIns* CseFilter::find2(LOpcode op, LIns* a, LIns* b, uint32_t &k)
|
||||
{
|
||||
NLKind nlkind = LIns2;
|
||||
NLKind nlkind = NL2;
|
||||
const uint32_t bitmask = m_capNL[nlkind] - 1;
|
||||
k = hash2(op, a, b) & bitmask;
|
||||
uint32_t n = 1;
|
||||
|
@ -2405,7 +2429,7 @@ namespace nanojit
|
|||
|
||||
inline LIns* CseFilter::find3(LOpcode op, LIns* a, LIns* b, LIns* c, uint32_t &k)
|
||||
{
|
||||
NLKind nlkind = LIns3;
|
||||
NLKind nlkind = NL3;
|
||||
const uint32_t bitmask = m_capNL[nlkind] - 1;
|
||||
k = hash3(op, a, b, c) & bitmask;
|
||||
uint32_t n = 1;
|
||||
|
@ -2466,7 +2490,7 @@ namespace nanojit
|
|||
|
||||
inline LIns* CseFilter::findCall(const CallInfo *ci, uint32_t argc, LIns* args[], uint32_t &k)
|
||||
{
|
||||
NLKind nlkind = LInsCall;
|
||||
NLKind nlkind = NLCall;
|
||||
const uint32_t bitmask = m_capNL[nlkind] - 1;
|
||||
k = hashCall(ci, argc, args) & bitmask;
|
||||
uint32_t n = 1;
|
||||
|
@ -2496,10 +2520,19 @@ namespace nanojit
|
|||
LIns* CseFilter::insImmI(int32_t imm)
|
||||
{
|
||||
uint32_t k;
|
||||
LIns* ins = findImmI(imm, k);
|
||||
if (!ins) {
|
||||
ins = out->insImmI(imm);
|
||||
addNL(LInsImmI, ins, k);
|
||||
LIns* ins;
|
||||
if (0 <= imm && imm < int32_t(m_capNL[NLImmISmall])) {
|
||||
ins = findImmISmall(imm, k);
|
||||
if (!ins) {
|
||||
ins = out->insImmI(imm);
|
||||
addNLImmISmall(ins, k);
|
||||
}
|
||||
} else {
|
||||
ins = findImmILarge(imm, k);
|
||||
if (!ins) {
|
||||
ins = out->insImmI(imm);
|
||||
addNL(NLImmILarge, ins, k);
|
||||
}
|
||||
}
|
||||
// We assume that downstream stages do not modify the instruction, so
|
||||
// that we can insert 'ins' into slot 'k'. Check this.
|
||||
|
@ -2514,7 +2547,7 @@ namespace nanojit
|
|||
LIns* ins = findImmQ(q, k);
|
||||
if (!ins) {
|
||||
ins = out->insImmQ(q);
|
||||
addNL(LInsImmQ, ins, k);
|
||||
addNL(NLImmQ, ins, k);
|
||||
}
|
||||
NanoAssert(ins->isop(LIR_immq) && ins->immQ() == q);
|
||||
return ins;
|
||||
|
@ -2534,7 +2567,7 @@ namespace nanojit
|
|||
LIns* ins = findImmD(u.u64, k);
|
||||
if (!ins) {
|
||||
ins = out->insImmD(d);
|
||||
addNL(LInsImmD, ins, k);
|
||||
addNL(NLImmD, ins, k);
|
||||
}
|
||||
NanoAssert(ins->isop(LIR_immd) && ins->immDasQ() == u.u64);
|
||||
return ins;
|
||||
|
@ -2555,7 +2588,7 @@ namespace nanojit
|
|||
ins = find1(op, a, k);
|
||||
if (!ins) {
|
||||
ins = out->ins1(op, a);
|
||||
addNL(LIns1, ins, k);
|
||||
addNL(NL1, ins, k);
|
||||
}
|
||||
} else {
|
||||
ins = out->ins1(op, a);
|
||||
|
@ -2572,7 +2605,7 @@ namespace nanojit
|
|||
ins = find2(op, a, b, k);
|
||||
if (!ins) {
|
||||
ins = out->ins2(op, a, b);
|
||||
addNL(LIns2, ins, k);
|
||||
addNL(NL2, ins, k);
|
||||
} else if (ins->isCmp()) {
|
||||
if (knownCmpValues.containsKey(ins)) {
|
||||
// We've seen this comparison before, and it was previously
|
||||
|
@ -2594,7 +2627,7 @@ namespace nanojit
|
|||
LIns* ins = find3(op, a, b, c, k);
|
||||
if (!ins) {
|
||||
ins = out->ins3(op, a, b, c);
|
||||
addNL(LIns3, ins, k);
|
||||
addNL(NL3, ins, k);
|
||||
}
|
||||
NanoAssert(ins->isop(op) && ins->oprnd1() == a && ins->oprnd2() == b && ins->oprnd3() == c);
|
||||
return ins;
|
||||
|
@ -2694,7 +2727,7 @@ namespace nanojit
|
|||
ins = find1(op, c, k);
|
||||
if (!ins) {
|
||||
ins = out->insGuard(op, c, gr);
|
||||
addNL(LIns1, ins, k);
|
||||
addNL(NL1, ins, k);
|
||||
}
|
||||
// After this guard, we know that 'c's result was true (if
|
||||
// op==LIR_xf) or false (if op==LIR_xt), else we would have
|
||||
|
@ -2719,7 +2752,7 @@ namespace nanojit
|
|||
LIns* ins = find2(op, a, b, k);
|
||||
if (!ins) {
|
||||
ins = out->insGuardXov(op, a, b, gr);
|
||||
addNL(LIns2, ins, k);
|
||||
addNL(NL2, ins, k);
|
||||
}
|
||||
NanoAssert(ins->isop(op) && ins->oprnd1() == a && ins->oprnd2() == b);
|
||||
return ins;
|
||||
|
@ -2737,7 +2770,7 @@ namespace nanojit
|
|||
ins = findCall(ci, argc, args, k);
|
||||
if (!ins) {
|
||||
ins = out->insCall(ci, args);
|
||||
addNL(LInsCall, ins, k);
|
||||
addNL(NLCall, ins, k);
|
||||
}
|
||||
} else {
|
||||
// We only need to worry about aliasing if !ci->_isPure.
|
||||
|
|
|
@ -1924,18 +1924,19 @@ namespace nanojit
|
|||
// We divide instruction kinds into groups. LIns0 isn't present
|
||||
// because we don't need to record any 0-ary instructions. Loads
|
||||
// aren't here, they're handled separately.
|
||||
LInsImmI = 0,
|
||||
LInsImmQ = 1, // only occurs on 64-bit platforms
|
||||
LInsImmD = 2,
|
||||
LIns1 = 3,
|
||||
LIns2 = 4,
|
||||
LIns3 = 5,
|
||||
LInsCall = 6,
|
||||
NLImmISmall = 0,
|
||||
NLImmILarge = 1,
|
||||
NLImmQ = 2, // only occurs on 64-bit platforms
|
||||
NLImmD = 3,
|
||||
NL1 = 4,
|
||||
NL2 = 5,
|
||||
NL3 = 6,
|
||||
NLCall = 7,
|
||||
|
||||
LInsFirst = 0,
|
||||
LInsLast = 6,
|
||||
NLFirst = 0,
|
||||
NLLast = 7,
|
||||
// Need a value after "last" to outsmart compilers that insist last+1 is impossible.
|
||||
LInsInvalid = 7
|
||||
NLInvalid = 8
|
||||
};
|
||||
#define nextNLKind(kind) NLKind(kind+1)
|
||||
|
||||
|
@ -1948,11 +1949,11 @@ namespace nanojit
|
|||
// Don't start m_capNL too small, or we'll waste time growing and rehashing.
|
||||
// Don't start m_capNL too large, will waste memory.
|
||||
//
|
||||
LIns** m_listNL[LInsLast + 1];
|
||||
uint32_t m_capNL[ LInsLast + 1];
|
||||
uint32_t m_usedNL[LInsLast + 1];
|
||||
LIns** m_listNL[NLLast + 1];
|
||||
uint32_t m_capNL[ NLLast + 1];
|
||||
uint32_t m_usedNL[NLLast + 1];
|
||||
typedef uint32_t (CseFilter::*find_t)(LIns*);
|
||||
find_t m_findNL[LInsLast + 1];
|
||||
find_t m_findNL[NLLast + 1];
|
||||
|
||||
// Similarly, for loads, there is one table for each CseAcc. A CseAcc
|
||||
// is like a normal access region, but there are two extra possible
|
||||
|
@ -2021,7 +2022,8 @@ namespace nanojit
|
|||
static uint32_t hashCall(const CallInfo *call, uint32_t argc, LIns* args[]);
|
||||
|
||||
// These versions are used before an LIns has been created.
|
||||
LIns* findImmI(int32_t a, uint32_t &k);
|
||||
LIns* findImmISmall(int32_t a, uint32_t &k);
|
||||
LIns* findImmILarge(int32_t a, uint32_t &k);
|
||||
#ifdef NANOJIT_64BIT
|
||||
LIns* findImmQ(uint64_t a, uint32_t &k);
|
||||
#endif
|
||||
|
@ -2036,7 +2038,8 @@ namespace nanojit
|
|||
// These versions are used after an LIns has been created; they are
|
||||
// used for rehashing after growing. They just call onto the
|
||||
// multi-arg versions above.
|
||||
uint32_t findImmI(LIns* ins);
|
||||
uint32_t findImmISmall(LIns* ins);
|
||||
uint32_t findImmILarge(LIns* ins);
|
||||
#ifdef NANOJIT_64BIT
|
||||
uint32_t findImmQ(LIns* ins);
|
||||
#endif
|
||||
|
@ -2050,6 +2053,7 @@ namespace nanojit
|
|||
void growNL(NLKind kind);
|
||||
void growL(CseAcc cseAcc);
|
||||
|
||||
void addNLImmISmall(LIns* ins, uint32_t k);
|
||||
// 'k' is the index found by findXYZ().
|
||||
void addNL(NLKind kind, LIns* ins, uint32_t k);
|
||||
void addL(LIns* ins, uint32_t k);
|
||||
|
@ -2096,7 +2100,6 @@ namespace nanojit
|
|||
verbose_only(LInsPrinter* printer;)
|
||||
|
||||
int32_t insCount();
|
||||
size_t byteCount();
|
||||
|
||||
// stats
|
||||
struct
|
||||
|
@ -2123,7 +2126,6 @@ namespace nanojit
|
|||
Allocator& _allocator;
|
||||
uintptr_t _unused; // next unused instruction slot in the current LIR chunk
|
||||
uintptr_t _limit; // one past the last usable byte of the current LIR chunk
|
||||
size_t _bytesAllocated;
|
||||
};
|
||||
|
||||
class LirBufWriter : public LirWriter
|
||||
|
|
|
@ -115,16 +115,16 @@ namespace nanojit
|
|||
}
|
||||
inline void Assembler::SUB(Register rs1, Register rs2, Register rd) {
|
||||
IntegerOperation(rs1, rs2, rd, 0x4, "sub");
|
||||
};
|
||||
}
|
||||
inline void Assembler::SUBCC(Register rs1, Register rs2, Register rd) {
|
||||
IntegerOperation(rs1, rs2, rd, 0x14, "subcc");
|
||||
};
|
||||
}
|
||||
inline void Assembler::SUBI(Register rs1, int32_t simm13, Register rd) {
|
||||
IntegerOperationI(rs1, simm13, rd, 0x4, "sub");
|
||||
}
|
||||
inline void Assembler::XOR(Register rs1, Register rs2, Register rd) {
|
||||
IntegerOperation(rs1, rs2, rd, 0x3, "xor");
|
||||
};
|
||||
}
|
||||
|
||||
inline void Assembler::Bicc(int32_t a, int32_t dsp22, int32_t cond, const char *opcode) {
|
||||
Format_2_2(a, cond, 0x2, dsp22);
|
||||
|
@ -208,6 +208,12 @@ namespace nanojit
|
|||
inline void Assembler::FITOD(Register rs2, Register rd) {
|
||||
FloatOperation(G0, rs2, rd, 0xc8, "fitod");
|
||||
}
|
||||
inline void Assembler::FDTOS(Register rs2, Register rd) {
|
||||
FloatOperation(G0, rs2, rd, 0xc6, "fdtos");
|
||||
}
|
||||
inline void Assembler::FSTOD(Register rs2, Register rd) {
|
||||
FloatOperation(G0, rs2, rd, 0xc9, "fstod");
|
||||
}
|
||||
|
||||
inline void Assembler::JMPL(Register rs1, Register rs2, Register rd) {
|
||||
Format_3_1(2, rd, 0x38, rs1, 0, rs2);
|
||||
|
@ -238,6 +244,15 @@ namespace nanojit
|
|||
LoadOperationI(rs1, simm13, rd, 0x20, "ldf");
|
||||
}
|
||||
|
||||
inline void Assembler::LDF32(Register rs1, int32_t immI, Register rd) {
|
||||
if (isIMM13(immI)) {
|
||||
LDFI(rs1, immI, rd);
|
||||
} else {
|
||||
LDF(rs1, L0, rd);
|
||||
SET32(immI, L0);
|
||||
}
|
||||
}
|
||||
|
||||
inline void Assembler::LDDF32(Register rs1, int32_t immI, Register rd) {
|
||||
if (isIMM13(immI+4)) {
|
||||
LDFI(rs1, immI+4, rd + 1);
|
||||
|
@ -266,6 +281,22 @@ namespace nanojit
|
|||
}
|
||||
}
|
||||
|
||||
inline void Assembler::LDSB(Register rs1, Register rs2, Register rd) {
|
||||
LoadOperation(rs1, rs2, rd, 0x9, "ldsb");
|
||||
}
|
||||
inline void Assembler::LDSBI(Register rs1, int32_t simm13, Register rd) {
|
||||
LoadOperationI(rs1, simm13, rd, 0x9, "ldsb");
|
||||
}
|
||||
|
||||
inline void Assembler::LDSB32(Register rs1, int32_t immI, Register rd) {
|
||||
if (isIMM13(immI)) {
|
||||
LDSBI(rs1, immI, rd);
|
||||
} else {
|
||||
LDSB(rs1, L0, rd);
|
||||
SET32(immI, L0);
|
||||
}
|
||||
}
|
||||
|
||||
inline void Assembler::LDUH(Register rs1, Register rs2, Register rd) {
|
||||
LoadOperation(rs1, rs2, rd, 0x2, "lduh");
|
||||
}
|
||||
|
@ -282,6 +313,22 @@ namespace nanojit
|
|||
}
|
||||
}
|
||||
|
||||
inline void Assembler::LDSH(Register rs1, Register rs2, Register rd) {
|
||||
LoadOperation(rs1, rs2, rd, 0xa, "ldsh");
|
||||
}
|
||||
inline void Assembler::LDSHI(Register rs1, int32_t simm13, Register rd) {
|
||||
LoadOperationI(rs1, simm13, rd, 0xa, "ldsh");
|
||||
}
|
||||
|
||||
inline void Assembler::LDSH32(Register rs1, int32_t immI, Register rd) {
|
||||
if (isIMM13(immI)) {
|
||||
LDSHI(rs1, immI, rd);
|
||||
} else {
|
||||
LDSH(rs1, L0, rd);
|
||||
SET32(immI, L0);
|
||||
}
|
||||
}
|
||||
|
||||
inline void Assembler::LDSW(Register rs1, Register rs2, Register rd) {
|
||||
LoadOperation(rs1, rs2, rd, 0x8, "ldsw");
|
||||
}
|
||||
|
@ -475,6 +522,22 @@ namespace nanojit
|
|||
}
|
||||
}
|
||||
|
||||
inline void Assembler::STH(Register rd, Register rs1, Register rs2) {
|
||||
Store(rd, rs1, rs2, 0x6, "sth");
|
||||
}
|
||||
inline void Assembler::STHI(Register rd, int32_t simm13, Register rs1) {
|
||||
StoreI(rd, simm13, rs1, 0x6, "sth");
|
||||
}
|
||||
|
||||
inline void Assembler::STH32(Register rd, int32_t immI, Register rs1) {
|
||||
if (isIMM13(immI)) {
|
||||
STHI(rd, immI, rs1);
|
||||
} else {
|
||||
STH(rd, L0, rs1);
|
||||
SET32(immI, L0);
|
||||
}
|
||||
}
|
||||
|
||||
inline void Assembler::STB(Register rd, Register rs1, Register rs2) {
|
||||
Store(rd, rs1, rs2, 0x5, "stb");
|
||||
}
|
||||
|
@ -753,11 +816,9 @@ namespace nanojit
|
|||
switch (op) {
|
||||
case LIR_sti:
|
||||
case LIR_sti2c:
|
||||
case LIR_sti2s:
|
||||
// handled by mainline code below for now
|
||||
break;
|
||||
case LIR_sti2s:
|
||||
NanoAssertMsg(0, "NJ_EXPANDED_LOADSTORE_SUPPORTED not yet supported for this architecture");
|
||||
return;
|
||||
default:
|
||||
NanoAssertMsg(0, "asm_store32 should never receive this LIR opcode");
|
||||
return;
|
||||
|
@ -775,6 +836,9 @@ namespace nanojit
|
|||
case LIR_sti2c:
|
||||
STB32(L2, dr, rb);
|
||||
break;
|
||||
case LIR_sti2s:
|
||||
STH32(L2, dr, rb);
|
||||
break;
|
||||
}
|
||||
SET32(c, L2);
|
||||
}
|
||||
|
@ -797,6 +861,9 @@ namespace nanojit
|
|||
case LIR_sti2c:
|
||||
STB32(ra, dr, rb);
|
||||
break;
|
||||
case LIR_sti2s:
|
||||
STH32(ra, dr, rb);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -817,65 +884,75 @@ namespace nanojit
|
|||
{
|
||||
switch (ins->opcode()) {
|
||||
case LIR_ldd:
|
||||
case LIR_ldf2d:
|
||||
// handled by mainline code below for now
|
||||
break;
|
||||
case LIR_ldf2d:
|
||||
NanoAssertMsg(0, "NJ_EXPANDED_LOADSTORE_SUPPORTED not yet supported for this architecture");
|
||||
return;
|
||||
default:
|
||||
NanoAssertMsg(0, "asm_load64 should never receive this LIR opcode");
|
||||
return;
|
||||
}
|
||||
|
||||
underrunProtect(72);
|
||||
underrunProtect(48);
|
||||
LIns* base = ins->oprnd1();
|
||||
int db = ins->disp();
|
||||
Register rr = ins->deprecated_getReg();
|
||||
Register rb = getBaseReg(base, db, GpRegs);
|
||||
|
||||
int dr = deprecated_disp(ins);
|
||||
Register rb;
|
||||
if (base->isop(LIR_allocp)) {
|
||||
rb = FP;
|
||||
db += findMemFor(base);
|
||||
} else {
|
||||
rb = findRegFor(base, GpRegs);
|
||||
}
|
||||
ins->clearReg();
|
||||
if (ins->isInReg()) {
|
||||
Register rr = ins->getReg();
|
||||
asm_maybe_spill(ins, false);
|
||||
NanoAssert(rmask(rr) & FpRegs);
|
||||
|
||||
// don't use an fpu reg to simply load & store the value.
|
||||
if (dr)
|
||||
asm_mmq(FP, dr, rb, db);
|
||||
|
||||
deprecated_freeRsrcOf(ins);
|
||||
|
||||
if (rr != deprecated_UnknownReg)
|
||||
{
|
||||
NanoAssert(rmask(rr)&FpRegs);
|
||||
_allocator.retire(rr);
|
||||
if (ins->opcode() == LIR_ldd) {
|
||||
LDDF32(rb, db, rr);
|
||||
} else {
|
||||
FSTOD(F28, rr);
|
||||
LDF32(rb, db, F28);
|
||||
}
|
||||
} else {
|
||||
NanoAssert(ins->isInAr());
|
||||
int dr = arDisp(ins);
|
||||
|
||||
if (ins->opcode() == LIR_ldd) {
|
||||
// don't use an fpu reg to simply load & store the value.
|
||||
asm_mmq(FP, dr, rb, db);
|
||||
} else {
|
||||
STDF32(F28, dr, FP);
|
||||
FSTOD(F28, F28);
|
||||
LDF32(rb, db, F28);
|
||||
}
|
||||
}
|
||||
|
||||
freeResourcesOf(ins);
|
||||
}
|
||||
|
||||
void Assembler::asm_store64(LOpcode op, LIns* value, int dr, LIns* base)
|
||||
{
|
||||
switch (op) {
|
||||
case LIR_std:
|
||||
case LIR_std2f:
|
||||
// handled by mainline code below for now
|
||||
break;
|
||||
case LIR_std2f:
|
||||
NanoAssertMsg(0, "NJ_EXPANDED_LOADSTORE_SUPPORTED not yet supported for this architecture");
|
||||
return;
|
||||
default:
|
||||
NanoAssertMsg(0, "asm_store64 should never receive this LIR opcode");
|
||||
return;
|
||||
}
|
||||
|
||||
underrunProtect(48);
|
||||
Register rb = getBaseReg(base, dr, GpRegs);
|
||||
if (op == LIR_std2f) {
|
||||
Register rv = ( !value->isInReg()
|
||||
? findRegFor(value, FpRegs)
|
||||
: value->getReg() );
|
||||
NanoAssert(rmask(rv) & FpRegs);
|
||||
STF32(F28, dr, rb);
|
||||
FDTOS(rv, F28);
|
||||
return;
|
||||
}
|
||||
|
||||
if (value->isImmD())
|
||||
{
|
||||
// if a constant 64-bit value just store it now rather than
|
||||
// generating a pointless store/load/store sequence
|
||||
Register rb = findRegFor(base, GpRegs);
|
||||
STW32(L2, dr+4, rb);
|
||||
SET32(value->immDlo(), L2);
|
||||
STW32(L2, dr, rb);
|
||||
|
@ -895,30 +972,15 @@ namespace nanojit
|
|||
// c) maybe its a double just being stored. oh well.
|
||||
|
||||
int da = findMemFor(value);
|
||||
Register rb;
|
||||
if (base->isop(LIR_allocp)) {
|
||||
rb = FP;
|
||||
dr += findMemFor(base);
|
||||
} else {
|
||||
rb = findRegFor(base, GpRegs);
|
||||
}
|
||||
asm_mmq(rb, dr, FP, da);
|
||||
return;
|
||||
}
|
||||
|
||||
Register rb;
|
||||
if (base->isop(LIR_allocp)) {
|
||||
rb = FP;
|
||||
dr += findMemFor(base);
|
||||
} else {
|
||||
rb = findRegFor(base, GpRegs);
|
||||
}
|
||||
|
||||
// if value already in a reg, use that, otherwise
|
||||
// try to get it into XMM regs before FPU regs.
|
||||
// get it into FPU regs.
|
||||
Register rv = ( !value->isInReg()
|
||||
? findRegFor(value, FpRegs)
|
||||
: value->deprecated_getReg() );
|
||||
: value->getReg() );
|
||||
|
||||
STDF32(rv, dr, rb);
|
||||
}
|
||||
|
@ -1244,9 +1306,11 @@ namespace nanojit
|
|||
LDSW32(ra, d, rr);
|
||||
break;
|
||||
case LIR_ldc2i:
|
||||
LDSB32(ra, d, rr);
|
||||
break;
|
||||
case LIR_lds2i:
|
||||
NanoAssertMsg(0, "NJ_EXPANDED_LOADSTORE_SUPPORTED not yet supported for this architecture");
|
||||
return;
|
||||
LDSH32(ra, d, rr);
|
||||
break;
|
||||
default:
|
||||
NanoAssertMsg(0, "asm_load32 should never receive this LIR opcode");
|
||||
return;
|
||||
|
|
|
@ -77,7 +77,7 @@ namespace nanojit
|
|||
#define NJ_MAX_PARAMETERS 1
|
||||
|
||||
#define NJ_JTBL_SUPPORTED 0
|
||||
#define NJ_EXPANDED_LOADSTORE_SUPPORTED 0
|
||||
#define NJ_EXPANDED_LOADSTORE_SUPPORTED 1
|
||||
#define NJ_F2I_SUPPORTED 1
|
||||
#define NJ_SOFTFLOAT_SUPPORTED 0
|
||||
#define NJ_DIVI_SUPPORTED 0
|
||||
|
@ -296,7 +296,7 @@ namespace nanojit
|
|||
Format_3A(2, rd, op3, (cond & 0xF) << 14 | (opf_cc & 0x7) << 11 | (opf_low & 0x3F) << 5 | _reg_(rs2)); \
|
||||
} \
|
||||
void IntegerOperation(Register rs1, Register rs2, Register rd, int32_t op3, const char *opcode); \
|
||||
void Assembler::IntegerOperationI(Register rs1, int32_t simm13, Register rd, int32_t op3, const char *opcode); \
|
||||
void IntegerOperationI(Register rs1, int32_t simm13, Register rd, int32_t op3, const char *opcode); \
|
||||
void FloatOperation(Register rs1, Register rs2, Register rd, int32_t op3, const char *opcode); \
|
||||
void Bicc(int32_t a, int32_t dsp22, int32_t cond, const char *opcode); \
|
||||
void FBfcc(int32_t a, int32_t dsp22, int32_t cond, const char *opcode); \
|
||||
|
@ -308,7 +308,7 @@ namespace nanojit
|
|||
void ShiftOperation(Register rs1, Register rs2, Register rd, int32_t op3, const char* opcode); \
|
||||
void ShiftOperationI(Register rs1, int32_t shcnt32, Register rd, int32_t op3, const char* opcode); \
|
||||
void Store(Register rd, Register rs1, Register rs2, int32_t op3, const char* opcode); \
|
||||
void Assembler::StoreI(Register rd, int32_t simm13, Register rs1, int32_t op3, const char* opcode); \
|
||||
void StoreI(Register rd, int32_t simm13, Register rs1, int32_t op3, const char* opcode); \
|
||||
void ADD(Register rs1, Register rs2, Register rd); \
|
||||
void ADDCC(Register rs1, Register rs2, Register rd); \
|
||||
void AND(Register rs1, Register rs2, Register rd); \
|
||||
|
@ -355,17 +355,26 @@ namespace nanojit
|
|||
void FMOVD(Register rs2, Register rd); \
|
||||
void FNEGD(Register rs2, Register rd); \
|
||||
void FITOD(Register rs2, Register rd); \
|
||||
void FDTOS(Register rs2, Register rd); \
|
||||
void FSTOD(Register rs2, Register rd); \
|
||||
void JMPL(Register rs1, Register rs2, Register rd); \
|
||||
void JMPLI(Register rs1, int32_t simm13, Register rd); \
|
||||
void LDF(Register rs1, Register rs2, Register rd); \
|
||||
void LDFI(Register rs1, int32_t simm13, Register rd); \
|
||||
void LDF32(Register rs1, int32_t immI, Register rd); \
|
||||
void LDDF32(Register rs1, int32_t immI, Register rd); \
|
||||
void LDUB(Register rs1, Register rs2, Register rd); \
|
||||
void LDUBI(Register rs1, int32_t simm13, Register rd); \
|
||||
void LDUB32(Register rs1, int32_t immI, Register rd); \
|
||||
void LDSB(Register rs1, Register rs2, Register rd); \
|
||||
void LDSBI(Register rs1, int32_t simm13, Register rd); \
|
||||
void LDSB32(Register rs1, int32_t immI, Register rd); \
|
||||
void LDUH(Register rs1, Register rs2, Register rd); \
|
||||
void LDUHI(Register rs1, int32_t simm13, Register rd); \
|
||||
void LDUH32(Register rs1, int32_t immI, Register rd); \
|
||||
void LDSH(Register rs1, Register rs2, Register rd); \
|
||||
void LDSHI(Register rs1, int32_t simm13, Register rd); \
|
||||
void LDSH32(Register rs1, int32_t immI, Register rd); \
|
||||
void LDSW(Register rs1, Register rs2, Register rd); \
|
||||
void LDSWI(Register rs1, int32_t simm13, Register rd); \
|
||||
void LDSW32(Register rs1, int32_t immI, Register rd); \
|
||||
|
@ -428,6 +437,9 @@ namespace nanojit
|
|||
void STW(Register rd, Register rs1, Register rs2); \
|
||||
void STWI(Register rd, int32_t simm13, Register rs1); \
|
||||
void STW32(Register rd, int32_t immI, Register rs1); \
|
||||
void STH(Register rd, Register rs1, Register rs2); \
|
||||
void STHI(Register rd, int32_t simm13, Register rs1); \
|
||||
void STH32(Register rd, int32_t immI, Register rs1); \
|
||||
void STB(Register rd, Register rs1, Register rs2); \
|
||||
void STBI(Register rd, int32_t simm13, Register rs1); \
|
||||
void STB32(Register rd, int32_t immI, Register rs1); \
|
||||
|
|
Загрузка…
Ссылка в новой задаче