This commit is contained in:
Nicholas Nethercote 2010-11-10 15:52:26 -08:00
Родитель 2f2dfb62c3 8910109b16
Коммит 39e1d582a2
9 изменённых файлов: 292 добавлений и 160 удалений

Просмотреть файл

@ -1 +1 @@
f348fd5b02118c7151d991f51d76abe69976952e
04d7771f3f85877cf12395ffecfc4f2f6d4a0b50

Просмотреть файл

@ -70,8 +70,7 @@ namespace nanojit
* - merging paths ( build a graph? ), possibly use external rep to drive codegen
*/
Assembler::Assembler(CodeAlloc& codeAlloc, Allocator& dataAlloc, Allocator& alloc, AvmCore* core, LogControl* logc, const Config& config)
: codeList(NULL)
, alloc(alloc)
: alloc(alloc)
, _codeAlloc(codeAlloc)
, _dataAlloc(dataAlloc)
, _thisfrag(NULL)
@ -82,6 +81,7 @@ namespace nanojit
#if NJ_USES_IMMD_POOL
, _immDPool(alloc)
#endif
, codeList(NULL)
, _epilogue(NULL)
, _err(None)
#if PEDANTIC
@ -1125,6 +1125,7 @@ namespace nanojit
_codeAlloc.free(exitStart, exitEnd);
_codeAlloc.free(codeStart, codeEnd);
codeList = NULL;
_codeAlloc.markAllExec(); // expensive but safe, we mark all code pages R-X
}
void Assembler::endAssembly(Fragment* frag)
@ -1162,6 +1163,9 @@ namespace nanojit
verbose_only( codeBytes -= (_nIns - codeStart) * sizeof(NIns); )
#endif
// note: the code pages are no longer writable from this point onwards
_codeAlloc.markExec(codeList);
// at this point all our new code is in the d-cache and not the i-cache,
// so flush the i-cache on cpu's that need it.
CodeAlloc::flushICache(codeList);
@ -1488,13 +1492,10 @@ namespace nanojit
{
size_t delta = (uintptr_t)priorIns - (uintptr_t)_nIns; // # bytes that have been emitted since last go-around
if (codeList) {
codeList = codeList;
}
// if no codeList then we know priorIns and _nIns are on same page, otherwise make sure priorIns was not in the previous code block
if (!codeList || !codeList->isInBlock(priorIns)) {
NanoAssert(delta < VMPI_getVMPageSize()); // sanity check
nopInsertTrigger -= delta;
nopInsertTrigger -= (int32_t) delta;
if (nopInsertTrigger < 0)
{
nopInsertTrigger = noiseForNopInsertion(_noise);

Просмотреть файл

@ -313,8 +313,6 @@ namespace nanojit
debug_only( void resourceConsistencyCheck(); )
debug_only( void registerConsistencyCheck(); )
CodeList* codeList; // finished blocks of code.
private:
void gen(LirFilter* toCompile);
NIns* genPrologue();
@ -401,6 +399,7 @@ namespace nanojit
// temporarily swap all the code/exit variables below (using
// swapCodeChunks()). Afterwards we swap them all back and set
// _inExit to false again.
CodeList* codeList; // finished blocks of code.
bool _inExit, vpad2[3];
NIns *codeStart, *codeEnd; // current normal code chunk
NIns *exitStart, *exitEnd; // current exit code chunk

Просмотреть файл

@ -128,28 +128,20 @@ namespace nanojit
}
void CodeAlloc::alloc(NIns* &start, NIns* &end) {
// Reuse a block if possible.
if (availblocks) {
markBlockWrite(availblocks);
CodeList* b = removeBlock(availblocks);
b->isFree = false;
start = b->start();
end = b->end;
if (verbose)
avmplus::AvmLog("alloc %p-%p %d\n", start, end, int(end-start));
return;
if (!availblocks) {
// no free mem, get more
addMem();
}
// no suitable block found, get more memory
void *mem = allocCodeChunk(bytesPerAlloc); // allocations never fail
totalAllocated += bytesPerAlloc;
NanoAssert(mem != NULL); // see allocCodeChunk contract in CodeAlloc.h
_nvprof("alloc page", uintptr_t(mem)>>12);
CodeList* b = addMem(mem, bytesPerAlloc);
// grab a block
markBlockWrite(availblocks);
CodeList* b = removeBlock(availblocks);
b->isFree = false;
start = b->start();
end = b->end;
if (verbose)
avmplus::AvmLog("alloc %p-%p %d\n", start, end, int(end-start));
avmplus::AvmLog("CodeAlloc(%p).alloc %p-%p %d\n", this, start, end, int(end-start));
debug_only(sanity_check();)
}
void CodeAlloc::free(NIns* start, NIns *end) {
@ -349,11 +341,16 @@ extern "C" void sync_instruction_memory(caddr_t v, u_int len);
blocks = b;
}
CodeList* CodeAlloc::addMem(void *mem, size_t bytes) {
void CodeAlloc::addMem() {
void *mem = allocCodeChunk(bytesPerAlloc); // allocations never fail
totalAllocated += bytesPerAlloc;
NanoAssert(mem != NULL); // see allocCodeChunk contract in CodeAlloc.h
_nvprof("alloc page", uintptr_t(mem)>>12);
CodeList* b = (CodeList*)mem;
b->lower = 0;
b->end = (NIns*) (uintptr_t(mem) + bytes - sizeofMinBlock);
b->next = 0;
b->end = (NIns*) (uintptr_t(mem) + bytesPerAlloc - sizeofMinBlock);
b->isFree = true;
// create a tiny terminator block, add to fragmented list, this way
@ -370,7 +367,8 @@ extern "C" void sync_instruction_memory(caddr_t v, u_int len);
// add terminator to heapblocks list so we can track whole blocks
terminator->next = heapblocks;
heapblocks = terminator;
return b;
addBlock(availblocks, b); // add to free list
}
CodeList* CodeAlloc::getBlock(NIns* start, NIns* end) {
@ -509,6 +507,15 @@ extern "C" void sync_instruction_memory(caddr_t v, u_int len);
}
#endif
// Loop through a list of blocks marking the chunks executable. If we encounter
// multiple blocks in the same chunk, only the first block will cause the
// chunk to become executable, the other calls will no-op (isExec flag checked)
void CodeAlloc::markExec(CodeList* &blocks) {
for (CodeList *b = blocks; b != 0; b = b->next) {
markChunkExec(b->terminator);
}
}
// Variant of markExec(CodeList*) that walks all heapblocks (i.e. chunks) marking
// each one executable. On systems where bytesPerAlloc is low (i.e. have lots
// of elements in the list) this can be expensive.

Просмотреть файл

@ -43,9 +43,9 @@
namespace nanojit
{
/**
* CodeList is a linked list of non-contigous blocks of code. Clients use CodeList*
* to point to a list, and each CodeList instance tracks a single contiguous
* block of code.
* CodeList is a single block of code. The next field is used to
* form linked lists of non-contiguous blocks of code. Clients use CodeList*
* to point to the first block in a list.
*/
class CodeList
{
@ -95,13 +95,24 @@ namespace nanojit
};
/**
* Code memory allocator.
* Long lived manager for many code blocks,
* Code memory allocator is a long lived manager for many code blocks that
* manages interaction with an underlying code memory allocator,
* setting page permissions, api's for allocating and freeing
* sets page permissions. CodeAlloc provides APIs for allocating and freeing
* individual blocks of code memory (for methods, stubs, or compiled
* traces), and also static functions for managing lists of allocated
* code.
* traces), static functions for managing lists of allocated code, and has
* a few pure virtual methods that embedders must implement to provide
* memory to the allocator.
*
* A "chunk" is a region of memory obtained from allocCodeChunk; it must
* be page aligned and be a multiple of the system page size.
*
* A "block" is a region of memory within a chunk. It can be arbitrarily
* sized and aligned, but is always contained within a single chunk.
* class CodeList represents one block; the members of CodeList track the
* extent of the block and support creating lists of blocks.
*
* The allocator coalesces free blocks when it can, in free(), but never
* coalesces chunks.
*/
class CodeAlloc
{
@ -133,7 +144,7 @@ namespace nanojit
static CodeList* getBlock(NIns* start, NIns* end);
/** add raw memory to the free list */
CodeList* addMem(void* mem, size_t bytes);
void addMem();
/** make sure all the higher/lower pointers are correct for every block */
void sanity_check();
@ -142,9 +153,9 @@ namespace nanojit
CodeList* firstBlock(CodeList* term);
//
// CodeAlloc's SPI. Implementations must be defined by nanojit embedder.
// allocation failures should cause an exception or longjmp; nanojit
// intentionally does not check for null.
// CodeAlloc's SPI (Service Provider Interface). Implementations must be
// defined by nanojit embedder. Allocation failures should cause an exception
// or longjmp; nanojit intentionally does not check for null.
//
/** allocate nbytes of memory to hold code. Never return null! */
@ -203,9 +214,12 @@ namespace nanojit
/** print out stats about heap usage */
void logStats();
/** protect all code in this code alloc */
/** protect all code managed by this CodeAlloc */
void markAllExec();
/** protect all mem in the block list */
void markExec(CodeList* &blocks);
/** protect an entire chunk */
void markChunkExec(CodeList* term);

Просмотреть файл

@ -167,7 +167,6 @@ namespace nanojit
// clear the stats, etc
_unused = 0;
_limit = 0;
_bytesAllocated = 0;
_stats.lir = 0;
for (int i = 0; i < NumSavedRegs; ++i)
savedRegs[i] = NULL;
@ -186,11 +185,6 @@ namespace nanojit
return _stats.lir;
}
size_t LirBuffer::byteCount()
{
return _bytesAllocated - (_limit - _unused);
}
// Allocate a new page, and write the first instruction to it -- a skip
// linking to last instruction of the previous page.
void LirBuffer::moveToNewChunk(uintptr_t addrOfLastLInsOnCurrentChunk)
@ -2070,23 +2064,25 @@ namespace nanojit
suspended(false)
{
m_findNL[LInsImmI] = &CseFilter::findImmI;
m_findNL[LInsImmQ] = PTR_SIZE(NULL, &CseFilter::findImmQ);
m_findNL[LInsImmD] = &CseFilter::findImmD;
m_findNL[LIns1] = &CseFilter::find1;
m_findNL[LIns2] = &CseFilter::find2;
m_findNL[LIns3] = &CseFilter::find3;
m_findNL[LInsCall] = &CseFilter::findCall;
m_findNL[NLImmISmall] = &CseFilter::findImmISmall;
m_findNL[NLImmILarge] = &CseFilter::findImmILarge;
m_findNL[NLImmQ] = PTR_SIZE(NULL, &CseFilter::findImmQ);
m_findNL[NLImmD] = &CseFilter::findImmD;
m_findNL[NL1] = &CseFilter::find1;
m_findNL[NL2] = &CseFilter::find2;
m_findNL[NL3] = &CseFilter::find3;
m_findNL[NLCall] = &CseFilter::findCall;
m_capNL[LInsImmI] = 128;
m_capNL[LInsImmQ] = PTR_SIZE(0, 16);
m_capNL[LInsImmD] = 16;
m_capNL[LIns1] = 256;
m_capNL[LIns2] = 512;
m_capNL[LIns3] = 16;
m_capNL[LInsCall] = 64;
m_capNL[NLImmISmall] = 17; // covers 0..16, which is over half the cases for TraceMonkey
m_capNL[NLImmILarge] = 64;
m_capNL[NLImmQ] = PTR_SIZE(0, 16);
m_capNL[NLImmD] = 16;
m_capNL[NL1] = 256;
m_capNL[NL2] = 512;
m_capNL[NL3] = 16;
m_capNL[NLCall] = 64;
for (NLKind nlkind = LInsFirst; nlkind <= LInsLast; nlkind = nextNLKind(nlkind)) {
for (NLKind nlkind = NLFirst; nlkind <= NLLast; nlkind = nextNLKind(nlkind)) {
m_listNL[nlkind] = new (alloc) LIns*[m_capNL[nlkind]];
m_usedNL[nlkind] = 1; // Force memset in clearAll().
}
@ -2162,7 +2158,7 @@ namespace nanojit
}
void CseFilter::clearAll() {
for (NLKind nlkind = LInsFirst; nlkind <= LInsLast; nlkind = nextNLKind(nlkind))
for (NLKind nlkind = NLFirst; nlkind <= NLLast; nlkind = nextNLKind(nlkind))
clearNL(nlkind);
// Note that this clears the CONST and MULTIPLE load tables as well.
@ -2216,6 +2212,7 @@ namespace nanojit
void CseFilter::growNL(NLKind nlkind)
{
NanoAssert(nlkind != NLImmISmall);
const uint32_t oldcap = m_capNL[nlkind];
m_capNL[nlkind] <<= 1;
LIns** oldlist = m_listNL[nlkind];
@ -2248,6 +2245,16 @@ namespace nanojit
}
}
void CseFilter::addNLImmISmall(LIns* ins, uint32_t k)
{
if (suspended) return;
NLKind nlkind = NLImmISmall;
NanoAssert(k < m_capNL[nlkind]);
NanoAssert(!m_listNL[nlkind][k]);
m_usedNL[nlkind]++;
m_listNL[nlkind][k] = ins;
}
void CseFilter::addNL(NLKind nlkind, LIns* ins, uint32_t k)
{
if (suspended) return;
@ -2271,9 +2278,26 @@ namespace nanojit
}
}
inline LIns* CseFilter::findImmI(int32_t a, uint32_t &k)
inline LIns* CseFilter::findImmISmall(int32_t a, uint32_t &k)
{
NLKind nlkind = LInsImmI;
// This one is a direct array lookup rather than a hashtable lookup.
NLKind nlkind = NLImmISmall;
k = a;
LIns* ins = m_listNL[nlkind][k];
NanoAssert(!ins || ins->isImmI(a));
return ins;
}
uint32_t CseFilter::findImmISmall(LIns* ins)
{
uint32_t k;
findImmISmall(ins->immI(), k);
return k;
}
inline LIns* CseFilter::findImmILarge(int32_t a, uint32_t &k)
{
NLKind nlkind = NLImmILarge;
const uint32_t bitmask = m_capNL[nlkind] - 1;
k = hashImmI(a) & bitmask;
uint32_t n = 1;
@ -2296,17 +2320,17 @@ namespace nanojit
}
}
uint32_t CseFilter::findImmI(LIns* ins)
uint32_t CseFilter::findImmILarge(LIns* ins)
{
uint32_t k;
findImmI(ins->immI(), k);
findImmILarge(ins->immI(), k);
return k;
}
#ifdef NANOJIT_64BIT
inline LIns* CseFilter::findImmQ(uint64_t a, uint32_t &k)
{
NLKind nlkind = LInsImmQ;
NLKind nlkind = NLImmQ;
const uint32_t bitmask = m_capNL[nlkind] - 1;
k = hashImmQorD(a) & bitmask;
uint32_t n = 1;
@ -2332,7 +2356,7 @@ namespace nanojit
inline LIns* CseFilter::findImmD(uint64_t a, uint32_t &k)
{
NLKind nlkind = LInsImmD;
NLKind nlkind = NLImmD;
const uint32_t bitmask = m_capNL[nlkind] - 1;
k = hashImmQorD(a) & bitmask;
uint32_t n = 1;
@ -2357,7 +2381,7 @@ namespace nanojit
inline LIns* CseFilter::find1(LOpcode op, LIns* a, uint32_t &k)
{
NLKind nlkind = LIns1;
NLKind nlkind = NL1;
const uint32_t bitmask = m_capNL[nlkind] - 1;
k = hash1(op, a) & bitmask;
uint32_t n = 1;
@ -2381,7 +2405,7 @@ namespace nanojit
inline LIns* CseFilter::find2(LOpcode op, LIns* a, LIns* b, uint32_t &k)
{
NLKind nlkind = LIns2;
NLKind nlkind = NL2;
const uint32_t bitmask = m_capNL[nlkind] - 1;
k = hash2(op, a, b) & bitmask;
uint32_t n = 1;
@ -2405,7 +2429,7 @@ namespace nanojit
inline LIns* CseFilter::find3(LOpcode op, LIns* a, LIns* b, LIns* c, uint32_t &k)
{
NLKind nlkind = LIns3;
NLKind nlkind = NL3;
const uint32_t bitmask = m_capNL[nlkind] - 1;
k = hash3(op, a, b, c) & bitmask;
uint32_t n = 1;
@ -2466,7 +2490,7 @@ namespace nanojit
inline LIns* CseFilter::findCall(const CallInfo *ci, uint32_t argc, LIns* args[], uint32_t &k)
{
NLKind nlkind = LInsCall;
NLKind nlkind = NLCall;
const uint32_t bitmask = m_capNL[nlkind] - 1;
k = hashCall(ci, argc, args) & bitmask;
uint32_t n = 1;
@ -2496,10 +2520,19 @@ namespace nanojit
LIns* CseFilter::insImmI(int32_t imm)
{
uint32_t k;
LIns* ins = findImmI(imm, k);
if (!ins) {
ins = out->insImmI(imm);
addNL(LInsImmI, ins, k);
LIns* ins;
if (0 <= imm && imm < int32_t(m_capNL[NLImmISmall])) {
ins = findImmISmall(imm, k);
if (!ins) {
ins = out->insImmI(imm);
addNLImmISmall(ins, k);
}
} else {
ins = findImmILarge(imm, k);
if (!ins) {
ins = out->insImmI(imm);
addNL(NLImmILarge, ins, k);
}
}
// We assume that downstream stages do not modify the instruction, so
// that we can insert 'ins' into slot 'k'. Check this.
@ -2514,7 +2547,7 @@ namespace nanojit
LIns* ins = findImmQ(q, k);
if (!ins) {
ins = out->insImmQ(q);
addNL(LInsImmQ, ins, k);
addNL(NLImmQ, ins, k);
}
NanoAssert(ins->isop(LIR_immq) && ins->immQ() == q);
return ins;
@ -2534,7 +2567,7 @@ namespace nanojit
LIns* ins = findImmD(u.u64, k);
if (!ins) {
ins = out->insImmD(d);
addNL(LInsImmD, ins, k);
addNL(NLImmD, ins, k);
}
NanoAssert(ins->isop(LIR_immd) && ins->immDasQ() == u.u64);
return ins;
@ -2555,7 +2588,7 @@ namespace nanojit
ins = find1(op, a, k);
if (!ins) {
ins = out->ins1(op, a);
addNL(LIns1, ins, k);
addNL(NL1, ins, k);
}
} else {
ins = out->ins1(op, a);
@ -2572,7 +2605,7 @@ namespace nanojit
ins = find2(op, a, b, k);
if (!ins) {
ins = out->ins2(op, a, b);
addNL(LIns2, ins, k);
addNL(NL2, ins, k);
} else if (ins->isCmp()) {
if (knownCmpValues.containsKey(ins)) {
// We've seen this comparison before, and it was previously
@ -2594,7 +2627,7 @@ namespace nanojit
LIns* ins = find3(op, a, b, c, k);
if (!ins) {
ins = out->ins3(op, a, b, c);
addNL(LIns3, ins, k);
addNL(NL3, ins, k);
}
NanoAssert(ins->isop(op) && ins->oprnd1() == a && ins->oprnd2() == b && ins->oprnd3() == c);
return ins;
@ -2694,7 +2727,7 @@ namespace nanojit
ins = find1(op, c, k);
if (!ins) {
ins = out->insGuard(op, c, gr);
addNL(LIns1, ins, k);
addNL(NL1, ins, k);
}
// After this guard, we know that 'c's result was true (if
// op==LIR_xf) or false (if op==LIR_xt), else we would have
@ -2719,7 +2752,7 @@ namespace nanojit
LIns* ins = find2(op, a, b, k);
if (!ins) {
ins = out->insGuardXov(op, a, b, gr);
addNL(LIns2, ins, k);
addNL(NL2, ins, k);
}
NanoAssert(ins->isop(op) && ins->oprnd1() == a && ins->oprnd2() == b);
return ins;
@ -2737,7 +2770,7 @@ namespace nanojit
ins = findCall(ci, argc, args, k);
if (!ins) {
ins = out->insCall(ci, args);
addNL(LInsCall, ins, k);
addNL(NLCall, ins, k);
}
} else {
// We only need to worry about aliasing if !ci->_isPure.

Просмотреть файл

@ -1924,18 +1924,19 @@ namespace nanojit
// We divide instruction kinds into groups. LIns0 isn't present
// because we don't need to record any 0-ary instructions. Loads
// aren't here, they're handled separately.
LInsImmI = 0,
LInsImmQ = 1, // only occurs on 64-bit platforms
LInsImmD = 2,
LIns1 = 3,
LIns2 = 4,
LIns3 = 5,
LInsCall = 6,
NLImmISmall = 0,
NLImmILarge = 1,
NLImmQ = 2, // only occurs on 64-bit platforms
NLImmD = 3,
NL1 = 4,
NL2 = 5,
NL3 = 6,
NLCall = 7,
LInsFirst = 0,
LInsLast = 6,
NLFirst = 0,
NLLast = 7,
// Need a value after "last" to outsmart compilers that insist last+1 is impossible.
LInsInvalid = 7
NLInvalid = 8
};
#define nextNLKind(kind) NLKind(kind+1)
@ -1948,11 +1949,11 @@ namespace nanojit
// Don't start m_capNL too small, or we'll waste time growing and rehashing.
// Don't start m_capNL too large, will waste memory.
//
LIns** m_listNL[LInsLast + 1];
uint32_t m_capNL[ LInsLast + 1];
uint32_t m_usedNL[LInsLast + 1];
LIns** m_listNL[NLLast + 1];
uint32_t m_capNL[ NLLast + 1];
uint32_t m_usedNL[NLLast + 1];
typedef uint32_t (CseFilter::*find_t)(LIns*);
find_t m_findNL[LInsLast + 1];
find_t m_findNL[NLLast + 1];
// Similarly, for loads, there is one table for each CseAcc. A CseAcc
// is like a normal access region, but there are two extra possible
@ -2021,7 +2022,8 @@ namespace nanojit
static uint32_t hashCall(const CallInfo *call, uint32_t argc, LIns* args[]);
// These versions are used before an LIns has been created.
LIns* findImmI(int32_t a, uint32_t &k);
LIns* findImmISmall(int32_t a, uint32_t &k);
LIns* findImmILarge(int32_t a, uint32_t &k);
#ifdef NANOJIT_64BIT
LIns* findImmQ(uint64_t a, uint32_t &k);
#endif
@ -2036,7 +2038,8 @@ namespace nanojit
// These versions are used after an LIns has been created; they are
// used for rehashing after growing. They just call onto the
// multi-arg versions above.
uint32_t findImmI(LIns* ins);
uint32_t findImmISmall(LIns* ins);
uint32_t findImmILarge(LIns* ins);
#ifdef NANOJIT_64BIT
uint32_t findImmQ(LIns* ins);
#endif
@ -2050,6 +2053,7 @@ namespace nanojit
void growNL(NLKind kind);
void growL(CseAcc cseAcc);
void addNLImmISmall(LIns* ins, uint32_t k);
// 'k' is the index found by findXYZ().
void addNL(NLKind kind, LIns* ins, uint32_t k);
void addL(LIns* ins, uint32_t k);
@ -2096,7 +2100,6 @@ namespace nanojit
verbose_only(LInsPrinter* printer;)
int32_t insCount();
size_t byteCount();
// stats
struct
@ -2123,7 +2126,6 @@ namespace nanojit
Allocator& _allocator;
uintptr_t _unused; // next unused instruction slot in the current LIR chunk
uintptr_t _limit; // one past the last usable byte of the current LIR chunk
size_t _bytesAllocated;
};
class LirBufWriter : public LirWriter

Просмотреть файл

@ -115,16 +115,16 @@ namespace nanojit
}
inline void Assembler::SUB(Register rs1, Register rs2, Register rd) {
IntegerOperation(rs1, rs2, rd, 0x4, "sub");
};
}
inline void Assembler::SUBCC(Register rs1, Register rs2, Register rd) {
IntegerOperation(rs1, rs2, rd, 0x14, "subcc");
};
}
inline void Assembler::SUBI(Register rs1, int32_t simm13, Register rd) {
IntegerOperationI(rs1, simm13, rd, 0x4, "sub");
}
inline void Assembler::XOR(Register rs1, Register rs2, Register rd) {
IntegerOperation(rs1, rs2, rd, 0x3, "xor");
};
}
inline void Assembler::Bicc(int32_t a, int32_t dsp22, int32_t cond, const char *opcode) {
Format_2_2(a, cond, 0x2, dsp22);
@ -208,6 +208,12 @@ namespace nanojit
inline void Assembler::FITOD(Register rs2, Register rd) {
FloatOperation(G0, rs2, rd, 0xc8, "fitod");
}
inline void Assembler::FDTOS(Register rs2, Register rd) {
FloatOperation(G0, rs2, rd, 0xc6, "fdtos");
}
inline void Assembler::FSTOD(Register rs2, Register rd) {
FloatOperation(G0, rs2, rd, 0xc9, "fstod");
}
inline void Assembler::JMPL(Register rs1, Register rs2, Register rd) {
Format_3_1(2, rd, 0x38, rs1, 0, rs2);
@ -238,6 +244,15 @@ namespace nanojit
LoadOperationI(rs1, simm13, rd, 0x20, "ldf");
}
inline void Assembler::LDF32(Register rs1, int32_t immI, Register rd) {
if (isIMM13(immI)) {
LDFI(rs1, immI, rd);
} else {
LDF(rs1, L0, rd);
SET32(immI, L0);
}
}
inline void Assembler::LDDF32(Register rs1, int32_t immI, Register rd) {
if (isIMM13(immI+4)) {
LDFI(rs1, immI+4, rd + 1);
@ -266,6 +281,22 @@ namespace nanojit
}
}
inline void Assembler::LDSB(Register rs1, Register rs2, Register rd) {
LoadOperation(rs1, rs2, rd, 0x9, "ldsb");
}
inline void Assembler::LDSBI(Register rs1, int32_t simm13, Register rd) {
LoadOperationI(rs1, simm13, rd, 0x9, "ldsb");
}
inline void Assembler::LDSB32(Register rs1, int32_t immI, Register rd) {
if (isIMM13(immI)) {
LDSBI(rs1, immI, rd);
} else {
LDSB(rs1, L0, rd);
SET32(immI, L0);
}
}
inline void Assembler::LDUH(Register rs1, Register rs2, Register rd) {
LoadOperation(rs1, rs2, rd, 0x2, "lduh");
}
@ -282,6 +313,22 @@ namespace nanojit
}
}
inline void Assembler::LDSH(Register rs1, Register rs2, Register rd) {
LoadOperation(rs1, rs2, rd, 0xa, "ldsh");
}
inline void Assembler::LDSHI(Register rs1, int32_t simm13, Register rd) {
LoadOperationI(rs1, simm13, rd, 0xa, "ldsh");
}
inline void Assembler::LDSH32(Register rs1, int32_t immI, Register rd) {
if (isIMM13(immI)) {
LDSHI(rs1, immI, rd);
} else {
LDSH(rs1, L0, rd);
SET32(immI, L0);
}
}
inline void Assembler::LDSW(Register rs1, Register rs2, Register rd) {
LoadOperation(rs1, rs2, rd, 0x8, "ldsw");
}
@ -475,6 +522,22 @@ namespace nanojit
}
}
inline void Assembler::STH(Register rd, Register rs1, Register rs2) {
Store(rd, rs1, rs2, 0x6, "sth");
}
inline void Assembler::STHI(Register rd, int32_t simm13, Register rs1) {
StoreI(rd, simm13, rs1, 0x6, "sth");
}
inline void Assembler::STH32(Register rd, int32_t immI, Register rs1) {
if (isIMM13(immI)) {
STHI(rd, immI, rs1);
} else {
STH(rd, L0, rs1);
SET32(immI, L0);
}
}
inline void Assembler::STB(Register rd, Register rs1, Register rs2) {
Store(rd, rs1, rs2, 0x5, "stb");
}
@ -753,11 +816,9 @@ namespace nanojit
switch (op) {
case LIR_sti:
case LIR_sti2c:
case LIR_sti2s:
// handled by mainline code below for now
break;
case LIR_sti2s:
NanoAssertMsg(0, "NJ_EXPANDED_LOADSTORE_SUPPORTED not yet supported for this architecture");
return;
default:
NanoAssertMsg(0, "asm_store32 should never receive this LIR opcode");
return;
@ -775,6 +836,9 @@ namespace nanojit
case LIR_sti2c:
STB32(L2, dr, rb);
break;
case LIR_sti2s:
STH32(L2, dr, rb);
break;
}
SET32(c, L2);
}
@ -797,6 +861,9 @@ namespace nanojit
case LIR_sti2c:
STB32(ra, dr, rb);
break;
case LIR_sti2s:
STH32(ra, dr, rb);
break;
}
}
}
@ -817,65 +884,75 @@ namespace nanojit
{
switch (ins->opcode()) {
case LIR_ldd:
case LIR_ldf2d:
// handled by mainline code below for now
break;
case LIR_ldf2d:
NanoAssertMsg(0, "NJ_EXPANDED_LOADSTORE_SUPPORTED not yet supported for this architecture");
return;
default:
NanoAssertMsg(0, "asm_load64 should never receive this LIR opcode");
return;
}
underrunProtect(72);
underrunProtect(48);
LIns* base = ins->oprnd1();
int db = ins->disp();
Register rr = ins->deprecated_getReg();
Register rb = getBaseReg(base, db, GpRegs);
int dr = deprecated_disp(ins);
Register rb;
if (base->isop(LIR_allocp)) {
rb = FP;
db += findMemFor(base);
} else {
rb = findRegFor(base, GpRegs);
}
ins->clearReg();
if (ins->isInReg()) {
Register rr = ins->getReg();
asm_maybe_spill(ins, false);
NanoAssert(rmask(rr) & FpRegs);
// don't use an fpu reg to simply load & store the value.
if (dr)
asm_mmq(FP, dr, rb, db);
deprecated_freeRsrcOf(ins);
if (rr != deprecated_UnknownReg)
{
NanoAssert(rmask(rr)&FpRegs);
_allocator.retire(rr);
if (ins->opcode() == LIR_ldd) {
LDDF32(rb, db, rr);
} else {
FSTOD(F28, rr);
LDF32(rb, db, F28);
}
} else {
NanoAssert(ins->isInAr());
int dr = arDisp(ins);
if (ins->opcode() == LIR_ldd) {
// don't use an fpu reg to simply load & store the value.
asm_mmq(FP, dr, rb, db);
} else {
STDF32(F28, dr, FP);
FSTOD(F28, F28);
LDF32(rb, db, F28);
}
}
freeResourcesOf(ins);
}
void Assembler::asm_store64(LOpcode op, LIns* value, int dr, LIns* base)
{
switch (op) {
case LIR_std:
case LIR_std2f:
// handled by mainline code below for now
break;
case LIR_std2f:
NanoAssertMsg(0, "NJ_EXPANDED_LOADSTORE_SUPPORTED not yet supported for this architecture");
return;
default:
NanoAssertMsg(0, "asm_store64 should never receive this LIR opcode");
return;
}
underrunProtect(48);
Register rb = getBaseReg(base, dr, GpRegs);
if (op == LIR_std2f) {
Register rv = ( !value->isInReg()
? findRegFor(value, FpRegs)
: value->getReg() );
NanoAssert(rmask(rv) & FpRegs);
STF32(F28, dr, rb);
FDTOS(rv, F28);
return;
}
if (value->isImmD())
{
// if a constant 64-bit value just store it now rather than
// generating a pointless store/load/store sequence
Register rb = findRegFor(base, GpRegs);
STW32(L2, dr+4, rb);
SET32(value->immDlo(), L2);
STW32(L2, dr, rb);
@ -895,30 +972,15 @@ namespace nanojit
// c) maybe its a double just being stored. oh well.
int da = findMemFor(value);
Register rb;
if (base->isop(LIR_allocp)) {
rb = FP;
dr += findMemFor(base);
} else {
rb = findRegFor(base, GpRegs);
}
asm_mmq(rb, dr, FP, da);
return;
}
Register rb;
if (base->isop(LIR_allocp)) {
rb = FP;
dr += findMemFor(base);
} else {
rb = findRegFor(base, GpRegs);
}
// if value already in a reg, use that, otherwise
// try to get it into XMM regs before FPU regs.
// get it into FPU regs.
Register rv = ( !value->isInReg()
? findRegFor(value, FpRegs)
: value->deprecated_getReg() );
: value->getReg() );
STDF32(rv, dr, rb);
}
@ -1244,9 +1306,11 @@ namespace nanojit
LDSW32(ra, d, rr);
break;
case LIR_ldc2i:
LDSB32(ra, d, rr);
break;
case LIR_lds2i:
NanoAssertMsg(0, "NJ_EXPANDED_LOADSTORE_SUPPORTED not yet supported for this architecture");
return;
LDSH32(ra, d, rr);
break;
default:
NanoAssertMsg(0, "asm_load32 should never receive this LIR opcode");
return;

Просмотреть файл

@ -77,7 +77,7 @@ namespace nanojit
#define NJ_MAX_PARAMETERS 1
#define NJ_JTBL_SUPPORTED 0
#define NJ_EXPANDED_LOADSTORE_SUPPORTED 0
#define NJ_EXPANDED_LOADSTORE_SUPPORTED 1
#define NJ_F2I_SUPPORTED 1
#define NJ_SOFTFLOAT_SUPPORTED 0
#define NJ_DIVI_SUPPORTED 0
@ -296,7 +296,7 @@ namespace nanojit
Format_3A(2, rd, op3, (cond & 0xF) << 14 | (opf_cc & 0x7) << 11 | (opf_low & 0x3F) << 5 | _reg_(rs2)); \
} \
void IntegerOperation(Register rs1, Register rs2, Register rd, int32_t op3, const char *opcode); \
void Assembler::IntegerOperationI(Register rs1, int32_t simm13, Register rd, int32_t op3, const char *opcode); \
void IntegerOperationI(Register rs1, int32_t simm13, Register rd, int32_t op3, const char *opcode); \
void FloatOperation(Register rs1, Register rs2, Register rd, int32_t op3, const char *opcode); \
void Bicc(int32_t a, int32_t dsp22, int32_t cond, const char *opcode); \
void FBfcc(int32_t a, int32_t dsp22, int32_t cond, const char *opcode); \
@ -308,7 +308,7 @@ namespace nanojit
void ShiftOperation(Register rs1, Register rs2, Register rd, int32_t op3, const char* opcode); \
void ShiftOperationI(Register rs1, int32_t shcnt32, Register rd, int32_t op3, const char* opcode); \
void Store(Register rd, Register rs1, Register rs2, int32_t op3, const char* opcode); \
void Assembler::StoreI(Register rd, int32_t simm13, Register rs1, int32_t op3, const char* opcode); \
void StoreI(Register rd, int32_t simm13, Register rs1, int32_t op3, const char* opcode); \
void ADD(Register rs1, Register rs2, Register rd); \
void ADDCC(Register rs1, Register rs2, Register rd); \
void AND(Register rs1, Register rs2, Register rd); \
@ -355,17 +355,26 @@ namespace nanojit
void FMOVD(Register rs2, Register rd); \
void FNEGD(Register rs2, Register rd); \
void FITOD(Register rs2, Register rd); \
void FDTOS(Register rs2, Register rd); \
void FSTOD(Register rs2, Register rd); \
void JMPL(Register rs1, Register rs2, Register rd); \
void JMPLI(Register rs1, int32_t simm13, Register rd); \
void LDF(Register rs1, Register rs2, Register rd); \
void LDFI(Register rs1, int32_t simm13, Register rd); \
void LDF32(Register rs1, int32_t immI, Register rd); \
void LDDF32(Register rs1, int32_t immI, Register rd); \
void LDUB(Register rs1, Register rs2, Register rd); \
void LDUBI(Register rs1, int32_t simm13, Register rd); \
void LDUB32(Register rs1, int32_t immI, Register rd); \
void LDSB(Register rs1, Register rs2, Register rd); \
void LDSBI(Register rs1, int32_t simm13, Register rd); \
void LDSB32(Register rs1, int32_t immI, Register rd); \
void LDUH(Register rs1, Register rs2, Register rd); \
void LDUHI(Register rs1, int32_t simm13, Register rd); \
void LDUH32(Register rs1, int32_t immI, Register rd); \
void LDSH(Register rs1, Register rs2, Register rd); \
void LDSHI(Register rs1, int32_t simm13, Register rd); \
void LDSH32(Register rs1, int32_t immI, Register rd); \
void LDSW(Register rs1, Register rs2, Register rd); \
void LDSWI(Register rs1, int32_t simm13, Register rd); \
void LDSW32(Register rs1, int32_t immI, Register rd); \
@ -428,6 +437,9 @@ namespace nanojit
void STW(Register rd, Register rs1, Register rs2); \
void STWI(Register rd, int32_t simm13, Register rs1); \
void STW32(Register rd, int32_t immI, Register rs1); \
void STH(Register rd, Register rs1, Register rs2); \
void STHI(Register rd, int32_t simm13, Register rs1); \
void STH32(Register rd, int32_t immI, Register rs1); \
void STB(Register rd, Register rs1, Register rs2); \
void STBI(Register rd, int32_t simm13, Register rs1); \
void STB32(Register rd, int32_t immI, Register rs1); \