зеркало из https://github.com/mozilla/gecko-dev.git
Merge tamarin-redux (nanojit2) into tracemonkey (457786, r=edwsmith,gal,danderson).
This commit is contained in:
Родитель
180d9758af
Коммит
c6c4b6404b
|
@ -145,7 +145,7 @@ struct JSTraceableNative {
|
|||
#define _JS_DEFINE_CALLINFO(name, crtype, cargtypes, argtypes, cse, fold) \
|
||||
crtype FASTCALL js_##name cargtypes; \
|
||||
const nanojit::CallInfo ci_##name = \
|
||||
{ (intptr_t) &js_##name, argtypes, cse, fold _JS_CI_NAME(name) };
|
||||
{ (intptr_t) &js_##name, argtypes, cse, fold, nanojit::ABI_FASTCALL _JS_CI_NAME(name) };
|
||||
|
||||
/*
|
||||
* Declare a C function named js_<op> and a CallInfo struct named ci_<op> so
|
||||
|
|
|
@ -344,16 +344,12 @@ math_max(JSContext *cx, uintN argc, jsval *vp)
|
|||
*vp = DOUBLE_TO_JSVAL(cx->runtime->jsNaN);
|
||||
return JS_TRUE;
|
||||
}
|
||||
if (x == 0 && x == z && fd_copysign(1.0, z) == -1)
|
||||
z = x;
|
||||
else
|
||||
/*
|
||||
* Note: it is essential that you write the ternary expression
|
||||
* here such that the false branch produces z not x, as the case
|
||||
* of x=-0, z=0, for which we wind up in this expression but
|
||||
* evaluate either > order as false, whether we do x>z *or* z>x.
|
||||
*/
|
||||
if (x == 0 && x == z) {
|
||||
if (fd_copysign(1.0, z) == -1)
|
||||
z = x;
|
||||
} else {
|
||||
z = (x > z) ? x : z;
|
||||
}
|
||||
}
|
||||
return js_NewNumberInRootedValue(cx, z, vp);
|
||||
}
|
||||
|
@ -378,9 +374,10 @@ math_min(JSContext *cx, uintN argc, jsval *vp)
|
|||
*vp = DOUBLE_TO_JSVAL(cx->runtime->jsNaN);
|
||||
return JS_TRUE;
|
||||
}
|
||||
if (x == 0 && x == z && fd_copysign(1.0,x) == -1)
|
||||
z = x;
|
||||
else
|
||||
if (x == 0 && x == z) {
|
||||
if (fd_copysign(1.0, x) == -1)
|
||||
z = x;
|
||||
} else
|
||||
z = (x < z) ? x : z;
|
||||
}
|
||||
return js_NewNumberInRootedValue(cx, z, vp);
|
||||
|
@ -623,9 +620,13 @@ js_Math_max(jsdouble d, jsdouble p)
|
|||
if (JSDOUBLE_IS_NaN(d) || JSDOUBLE_IS_NaN(p))
|
||||
return js_NaN;
|
||||
|
||||
if (p == 0 && p == d && fd_copysign(1.0, d) == -1)
|
||||
return p;
|
||||
return (d > p) ? d : p;
|
||||
if (p == 0 && p == d) {
|
||||
if (fd_copysign(1.0, d) == -1)
|
||||
return p;
|
||||
else
|
||||
return d;
|
||||
}
|
||||
return (p > d) ? p : d;
|
||||
}
|
||||
|
||||
jsdouble FASTCALL
|
||||
|
|
|
@ -1000,12 +1000,14 @@ TraceRecorder::TraceRecorder(JSContext* cx, GuardRecord* _anchor, Fragment* _fra
|
|||
lir = cse_filter = new (&gc) CseFilter(lir, &gc);
|
||||
lir = expr_filter = new (&gc) ExprFilter(lir);
|
||||
lir = func_filter = new (&gc) FuncFilter(lir, *this);
|
||||
lir->ins0(LIR_trace);
|
||||
lir->ins0(LIR_start);
|
||||
|
||||
if (!nanojit::AvmCore::config.tree_opt || fragment->root == fragment) {
|
||||
lirbuf->state = addName(lir->insParam(0), "state");
|
||||
lirbuf->param1 = addName(lir->insParam(1), "param1");
|
||||
lirbuf->state = addName(lir->insParam(0, 0), "state");
|
||||
lirbuf->param1 = addName(lir->insParam(1, 0), "param1");
|
||||
}
|
||||
loop_header_ins = addName(lir->ins0(LIR_label), "loop_header");
|
||||
|
||||
lirbuf->sp = addName(lir->insLoad(LIR_ldp, lirbuf->state, (int)offsetof(InterpState, sp)), "sp");
|
||||
lirbuf->rp = addName(lir->insLoad(LIR_ldp, lirbuf->state, offsetof(InterpState, rp)), "rp");
|
||||
cx_ins = addName(lir->insLoad(LIR_ldp, lirbuf->state, offsetof(InterpState, cx)), "cx");
|
||||
|
@ -1955,10 +1957,9 @@ TraceRecorder::closeLoop(Fragmento* fragmento)
|
|||
SideExit *exit = snapshot(LOOP_EXIT);
|
||||
exit->target = fragment->root;
|
||||
if (fragment == fragment->root) {
|
||||
fragment->lastIns = lir->insGuard(LIR_loop, lir->insImm(1), exit);
|
||||
} else {
|
||||
fragment->lastIns = lir->insGuard(LIR_x, lir->insImm(1), exit);
|
||||
fragment->lastIns = lir->insBranch(LIR_j, NULL, loop_header_ins);
|
||||
}
|
||||
fragment->lastIns = lir->insGuard(LIR_x, lir->insImm(1), exit);
|
||||
compile(fragmento);
|
||||
|
||||
debug_only_v(printf("recording completed at %s:%u@%u via closeLoop\n", cx->fp->script->filename,
|
||||
|
@ -2107,9 +2108,9 @@ TraceRecorder::fuseIf(jsbytecode* pc, bool cond, LIns* x)
|
|||
int
|
||||
nanojit::StackFilter::getTop(LInsp guard)
|
||||
{
|
||||
if (sp == frag->lirbuf->sp)
|
||||
if (sp == lirbuf->sp)
|
||||
return guard->exit()->sp_adj;
|
||||
JS_ASSERT(sp == frag->lirbuf->rp);
|
||||
JS_ASSERT(sp == lirbuf->rp);
|
||||
return guard->exit()->rp_adj;
|
||||
}
|
||||
|
||||
|
@ -2387,7 +2388,7 @@ js_RecordTree(JSContext* cx, JSTraceMonitor* tm, Fragment* f)
|
|||
while (f->code() && f->peer)
|
||||
f = f->peer;
|
||||
if (f->code())
|
||||
f = JS_TRACE_MONITOR(cx).fragmento->newLoop(f->ip);
|
||||
f = JS_TRACE_MONITOR(cx).fragmento->getAnchor(f->ip);
|
||||
|
||||
f->calldepth = 0;
|
||||
f->root = f;
|
||||
|
@ -2604,7 +2605,7 @@ js_ExecuteTree(JSContext* cx, Fragment** treep, uintN& inlineCallCount,
|
|||
OBJ_SHAPE(globalObj), tm->globalShape);)
|
||||
const void* ip = f->ip;
|
||||
js_FlushJITCache(cx);
|
||||
*treep = tm->fragmento->newLoop(ip);
|
||||
*treep = tm->fragmento->getAnchor(ip);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
|
@ -2624,7 +2625,7 @@ js_ExecuteTree(JSContext* cx, Fragment** treep, uintN& inlineCallCount,
|
|||
bool didGC;
|
||||
const void* ip = f->ip;
|
||||
if (!ReplenishReservePool(cx, tm, didGC) || didGC) {
|
||||
*treep = tm->fragmento->newLoop(ip);
|
||||
*treep = tm->fragmento->getAnchor(ip);
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
|
@ -2663,6 +2664,7 @@ js_ExecuteTree(JSContext* cx, Fragment** treep, uintN& inlineCallCount,
|
|||
tm->onTrace = true;
|
||||
GuardRecord* lr;
|
||||
|
||||
debug_only(fflush(NULL);)
|
||||
#if defined(JS_NO_FASTCALL) && defined(NANOJIT_IA32)
|
||||
SIMULATE_FASTCALL(lr, &state, NULL, u.func);
|
||||
#else
|
||||
|
@ -2854,7 +2856,7 @@ js_MonitorLoopEdge(JSContext* cx, uintN& inlineCallCount)
|
|||
} else {
|
||||
f = tm->fragmento->getLoop(pc);
|
||||
if (!f)
|
||||
f = tm->fragmento->newLoop(pc);
|
||||
f = tm->fragmento->getAnchor(pc);
|
||||
cacheEntry->pc = pc;
|
||||
cacheEntry->fragment = f;
|
||||
}
|
||||
|
|
|
@ -220,6 +220,7 @@ class TraceRecorder : public GCObject {
|
|||
#ifdef NJ_SOFTFLOAT
|
||||
nanojit::LirWriter* float_filter;
|
||||
#endif
|
||||
nanojit::LIns* loop_header_ins;
|
||||
nanojit::LIns* cx_ins;
|
||||
nanojit::LIns* gp_ins;
|
||||
nanojit::LIns* eos_ins;
|
||||
|
|
Разница между файлами не показана из-за своего большого размера
Загрузить разницу
|
@ -73,8 +73,8 @@ namespace nanojit
|
|||
struct Reservation
|
||||
{
|
||||
uint32_t arIndex:16; /* index into stack frame. displ is -4*arIndex */
|
||||
Register reg:8; /* register UnkownReg implies not in register */
|
||||
int cost:8;
|
||||
Register reg:15; /* register UnkownReg implies not in register */
|
||||
uint32_t used:1;
|
||||
};
|
||||
|
||||
struct AR
|
||||
|
@ -83,32 +83,6 @@ namespace nanojit
|
|||
uint32_t tos; /* current top of stack entry */
|
||||
uint32_t highwatermark; /* max tos hit */
|
||||
uint32_t lowwatermark; /* we pre-allocate entries from 0 upto this index-1; so dynamic entries are added above this index */
|
||||
LIns* parameter[ NJ_MAX_PARAMETERS ]; /* incoming parameters */
|
||||
};
|
||||
|
||||
enum ArgSize {
|
||||
ARGSIZE_NONE = 0,
|
||||
ARGSIZE_F = 1,
|
||||
ARGSIZE_LO = 2,
|
||||
ARGSIZE_Q = 3,
|
||||
_ARGSIZE_MASK_INT = 2,
|
||||
_ARGSIZE_MASK_ANY = 3
|
||||
};
|
||||
|
||||
struct CallInfo
|
||||
{
|
||||
intptr_t _address;
|
||||
uint16_t _argtypes; // 6 2-bit fields indicating arg type, by ARGSIZE above (including ret type): a1 a2 a3 a4 a5 ret
|
||||
uint8_t _cse; // true if no side effects
|
||||
uint8_t _fold; // true if no side effects
|
||||
verbose_only ( const char* _name; )
|
||||
|
||||
uint32_t FASTCALL _count_args(uint32_t mask) const;
|
||||
uint32_t get_sizes(ArgSize*) const;
|
||||
|
||||
inline uint32_t FASTCALL count_args() const { return _count_args(_ARGSIZE_MASK_ANY); }
|
||||
inline uint32_t FASTCALL count_iargs() const { return _count_args(_ARGSIZE_MASK_INT); }
|
||||
// fargs = args - iargs
|
||||
};
|
||||
|
||||
#ifdef AVMPLUS_WIN32
|
||||
|
@ -124,6 +98,10 @@ namespace nanojit
|
|||
counter_define(spills;)
|
||||
counter_define(native;)
|
||||
counter_define(exitnative;)
|
||||
|
||||
int32_t pages;
|
||||
NIns* codeStart;
|
||||
NIns* codeExitStart;
|
||||
|
||||
DECLARE_PLATFORM_STATS()
|
||||
#ifdef __GNUC__
|
||||
|
@ -146,10 +124,34 @@ namespace nanojit
|
|||
,MaxExit
|
||||
,MaxXJump
|
||||
,UnknownPrim
|
||||
,UnknownBranch
|
||||
};
|
||||
|
||||
typedef avmplus::List<NIns*, avmplus::LIST_NonGCObjects> NInsList;
|
||||
typedef avmplus::SortedMap<LIns*,NIns*,avmplus::LIST_NonGCObjects> InsMap;
|
||||
typedef avmplus::SortedMap<NIns*,LIns*,avmplus::LIST_NonGCObjects> NInsMap;
|
||||
|
||||
class LabelState MMGC_SUBCLASS_DECL
|
||||
{
|
||||
public:
|
||||
RegAlloc regs;
|
||||
NIns *addr;
|
||||
LabelState(NIns *a, RegAlloc &r) : regs(r), addr(a)
|
||||
{}
|
||||
};
|
||||
|
||||
class LabelStateMap
|
||||
{
|
||||
GC *gc;
|
||||
avmplus::SortedMap<LIns*, LabelState*, avmplus::LIST_GCObjects> labels;
|
||||
public:
|
||||
LabelStateMap(GC *gc) : gc(gc), labels(gc)
|
||||
{}
|
||||
|
||||
void clear() { labels.clear(); }
|
||||
void add(LIns *label, NIns *addr, RegAlloc ®s);
|
||||
LabelState *get(LIns *);
|
||||
};
|
||||
/**
|
||||
* Information about the activation record for the method is built up
|
||||
* as we generate machine code. As part of the prologue, we issue
|
||||
|
@ -190,6 +192,7 @@ namespace nanojit
|
|||
void setError(AssmError e) { _err = e; }
|
||||
void setCallTable(const CallInfo *functions);
|
||||
void pageReset();
|
||||
int32_t codeBytes();
|
||||
Page* handoverPages(bool exitPages=false);
|
||||
|
||||
debug_only ( void pageValidate(); )
|
||||
|
@ -197,30 +200,32 @@ namespace nanojit
|
|||
|
||||
// support calling out from a fragment ; used to debug the jit
|
||||
debug_only( void resourceConsistencyCheck(); )
|
||||
debug_only( void registerConsistencyCheck(LIns** resv); )
|
||||
debug_only( void registerConsistencyCheck(); )
|
||||
|
||||
Stats _stats;
|
||||
int hasLoop;
|
||||
|
||||
private:
|
||||
|
||||
void gen(LirFilter* toCompile, NInsList& loopJumps);
|
||||
NIns* genPrologue(RegisterMask);
|
||||
NIns* genEpilogue(RegisterMask);
|
||||
|
||||
bool ignoreInstruction(LInsp ins);
|
||||
NIns* genPrologue();
|
||||
NIns* genEpilogue();
|
||||
|
||||
GuardRecord* placeGuardRecord(LInsp guard);
|
||||
void initGuardRecord(LInsp guard, GuardRecord*);
|
||||
|
||||
uint32_t arReserve(LIns* l);
|
||||
uint32_t arFree(uint32_t idx);
|
||||
void arFree(uint32_t idx);
|
||||
void arReset();
|
||||
|
||||
Register registerAlloc(RegisterMask allow);
|
||||
void registerResetAll();
|
||||
void restoreCallerSaved();
|
||||
void mergeRegisterState(RegAlloc& saved);
|
||||
LInsp findVictim(RegAlloc& regs, RegisterMask allow, RegisterMask prefer);
|
||||
void evictRegs(RegisterMask regs);
|
||||
void evictScratchRegs();
|
||||
void intersectRegisterState(RegAlloc& saved);
|
||||
void unionRegisterState(RegAlloc& saved);
|
||||
void assignSaved(RegAlloc &saved, RegisterMask skip);
|
||||
LInsp findVictim(RegAlloc& regs, RegisterMask allow);
|
||||
|
||||
int findMemFor(LIns* i);
|
||||
Register findRegFor(LIns* i, RegisterMask allow);
|
||||
|
@ -234,12 +239,16 @@ namespace nanojit
|
|||
NIns* pageAlloc(bool exitPage=false);
|
||||
void pagesFree(Page*& list);
|
||||
void internalReset();
|
||||
bool canRemat(LIns*);
|
||||
|
||||
Reservation* reserveAlloc(LInsp i);
|
||||
void reserveFree(LInsp i);
|
||||
void reserveReset();
|
||||
|
||||
Reservation* getresv(LIns *x) { return x->resv() ? &_resvTable[x->resv()] : 0; }
|
||||
Reservation* getresv(LIns *x) {
|
||||
uint32_t resv_index = x->resv();
|
||||
return resv_index ? &_resvTable[resv_index] : 0;
|
||||
}
|
||||
|
||||
DWB(Fragmento*) _frago;
|
||||
GC* _gc;
|
||||
|
@ -259,13 +268,18 @@ namespace nanojit
|
|||
AR _activation;
|
||||
RegAlloc _allocator;
|
||||
|
||||
LabelStateMap _labels;
|
||||
NInsMap _patches;
|
||||
Reservation _resvTable[ NJ_MAX_STACK_ENTRY ]; // table where we house stack and register information
|
||||
uint32_t _resvFree;
|
||||
bool _inExit,vpad2[3];
|
||||
bool _inExit, vpad2[3];
|
||||
avmplus::List<LIns*, avmplus::LIST_GCObjects> pending_lives;
|
||||
|
||||
void asm_cmp(LIns *cond);
|
||||
#ifndef NJ_SOFTFLOAT
|
||||
void asm_fcmp(LIns *cond);
|
||||
void asm_setcc(Register res, LIns *cond);
|
||||
NIns * asm_jmpcc(bool brOnFalse, LIns *cond, NIns *target);
|
||||
#endif
|
||||
void asm_mmq(Register rd, int dd, Register rs, int ds);
|
||||
NIns* asm_exit(LInsp guard);
|
||||
|
@ -274,7 +288,9 @@ namespace nanojit
|
|||
void asm_store32(LIns *val, int d, LIns *base);
|
||||
void asm_store64(LIns *val, int d, LIns *base);
|
||||
void asm_restore(LInsp, Reservation*, Register);
|
||||
void asm_spill(LInsp i, Reservation *resv, bool pop);
|
||||
void asm_load(int d, Register r);
|
||||
void asm_spilli(LInsp i, Reservation *resv, bool pop);
|
||||
void asm_spill(Register rr, int d, bool pop=false, bool quad=false);
|
||||
void asm_load64(LInsp i);
|
||||
void asm_pusharg(LInsp p);
|
||||
NIns* asm_adjustBranch(NIns* at, NIns* target);
|
||||
|
@ -290,6 +306,10 @@ namespace nanojit
|
|||
void asm_call(LInsp);
|
||||
void asm_arg(ArgSize, LInsp, Register);
|
||||
Register asm_binop_rhs_reg(LInsp ins);
|
||||
NIns* asm_branch(bool branchOnFalse, LInsp cond, NIns* targ);
|
||||
void assignSavedParams();
|
||||
void reserveSavedParams();
|
||||
void handleLoopCarriedExprs();
|
||||
|
||||
// platform specific implementation (see NativeXXX.cpp file)
|
||||
void nInit(uint32_t flags);
|
||||
|
@ -303,6 +323,7 @@ namespace nanojit
|
|||
|
||||
// platform specific methods
|
||||
public:
|
||||
const static Register savedRegs[NumSavedRegs];
|
||||
DECLARE_PLATFORM_ASSEMBLER()
|
||||
|
||||
private:
|
||||
|
|
|
@ -39,6 +39,7 @@
|
|||
* ***** END LICENSE BLOCK ***** */
|
||||
|
||||
#include "nanojit.h"
|
||||
#undef MEMORY_INFO
|
||||
|
||||
namespace nanojit
|
||||
{
|
||||
|
@ -58,16 +59,17 @@ namespace nanojit
|
|||
*/
|
||||
Fragmento::Fragmento(AvmCore* core, uint32_t cacheSizeLog2)
|
||||
: _allocList(core->GetGC()),
|
||||
_max_pages(1 << (calcSaneCacheSize(cacheSizeLog2) - NJ_LOG2_PAGE_SIZE))
|
||||
_max_pages(1 << (calcSaneCacheSize(cacheSizeLog2) - NJ_LOG2_PAGE_SIZE)),
|
||||
_pagesGrowth(1)
|
||||
{
|
||||
#ifdef MEMORY_INFO
|
||||
_allocList.set_meminfo_name("Fragmento._allocList");
|
||||
#endif
|
||||
NanoAssert(_max_pages > _pagesGrowth); // shrink growth if needed
|
||||
_core = core;
|
||||
GC *gc = core->GetGC();
|
||||
_frags = new (gc) FragmentMap(gc, 128);
|
||||
_assm = new (gc) nanojit::Assembler(this);
|
||||
_pageGrowth = 1;
|
||||
verbose_only( enterCounts = new (gc) BlockHist(gc); )
|
||||
verbose_only( mergeCounts = new (gc) BlockHist(gc); )
|
||||
}
|
||||
|
@ -109,10 +111,10 @@ namespace nanojit
|
|||
{
|
||||
NanoAssert(sizeof(Page) == NJ_PAGE_SIZE);
|
||||
if (!_pageList) {
|
||||
pagesGrow(_pageGrowth); // try to get more mem
|
||||
if ((_pageGrowth << 1) < _max_pages)
|
||||
_pageGrowth <<= 1;
|
||||
}
|
||||
pagesGrow(_pagesGrowth); // try to get more mem
|
||||
if ((_pagesGrowth << 1) < _max_pages)
|
||||
_pagesGrowth <<= 1;
|
||||
}
|
||||
Page *page = _pageList;
|
||||
if (page)
|
||||
{
|
||||
|
@ -221,7 +223,7 @@ namespace nanojit
|
|||
return _core;
|
||||
}
|
||||
|
||||
Fragment* Fragmento::newLoop(const void* ip)
|
||||
Fragment* Fragmento::getAnchor(const void* ip)
|
||||
{
|
||||
Fragment *f = newFrag(ip);
|
||||
Fragment *p = _frags->get(ip);
|
||||
|
@ -480,7 +482,7 @@ namespace nanojit
|
|||
{
|
||||
int c = hist->count(ip);
|
||||
if (_assm->_verbose)
|
||||
_assm->outputf("++ %s %d", core()->interp.labels->format(ip), c);
|
||||
_assm->outputf("++ %s %d", labels->format(ip), c);
|
||||
}
|
||||
|
||||
void Fragmento::countIL(uint32_t il, uint32_t abc)
|
||||
|
|
|
@ -54,7 +54,6 @@ namespace nanojit
|
|||
struct PageHeader
|
||||
{
|
||||
struct Page *next;
|
||||
verbose_only (int seq;) // sequence # of page
|
||||
};
|
||||
struct Page: public PageHeader
|
||||
{
|
||||
|
@ -101,8 +100,8 @@ namespace nanojit
|
|||
Page* pageAlloc();
|
||||
void pageFree(Page* page);
|
||||
|
||||
Fragment* newLoop(const void* ip);
|
||||
Fragment* getLoop(const void* ip);
|
||||
Fragment* getAnchor(const void* ip);
|
||||
void clearFrags(); // clear all fragments from the cache
|
||||
Fragment* getMerge(GuardRecord *lr, const void* ip);
|
||||
Fragment* createBranch(GuardRecord *lr, const void* ip);
|
||||
|
@ -145,13 +144,13 @@ namespace nanojit
|
|||
DWB(Assembler*) _assm;
|
||||
DWB(FragmentMap*) _frags; /* map from ip -> Fragment ptr */
|
||||
Page* _pageList;
|
||||
uint32_t _pageGrowth;
|
||||
|
||||
/* unmanaged mem */
|
||||
AllocList _allocList;
|
||||
GCHeap* _gcHeap;
|
||||
|
||||
const uint32_t _max_pages;
|
||||
uint32_t _pagesGrowth;
|
||||
};
|
||||
|
||||
enum TraceKind {
|
||||
|
@ -236,18 +235,5 @@ namespace nanojit
|
|||
int32_t _hits;
|
||||
Page* _pages; // native code pages
|
||||
};
|
||||
|
||||
#ifdef NJ_VERBOSE
|
||||
inline int nbr(LInsp x)
|
||||
{
|
||||
Page *p = x->page();
|
||||
return (p->seq * NJ_PAGE_SIZE + (intptr_t(x)-intptr_t(p))) / sizeof(LIns);
|
||||
}
|
||||
#else
|
||||
inline int nbr(LInsp x)
|
||||
{
|
||||
return (int)(intptr_t(x) & intptr_t(NJ_PAGE_SIZE-1));
|
||||
}
|
||||
#endif
|
||||
}
|
||||
#endif // __nanojit_Fragmento__
|
||||
|
|
Разница между файлами не показана из-за своего большого размера
Загрузить разницу
|
@ -63,23 +63,34 @@ namespace nanojit
|
|||
LIR64 = 0x40, // result is double or quad
|
||||
|
||||
// special operations (must be 0..N)
|
||||
LIR_trace = 2,
|
||||
LIR_nearskip = 3, // must be LIR_skip-1 and lsb=1
|
||||
LIR_skip = 4,
|
||||
LIR_neartramp = 5, // must be LIR_tramp-1 and lsb=1
|
||||
LIR_tramp = 6,
|
||||
LIR_start = 0,
|
||||
LIR_nearskip = 1, // must be LIR_skip-1 and lsb=1
|
||||
LIR_skip = 2,
|
||||
LIR_neartramp = 3, // must be LIR_tramp-1 and lsb=1
|
||||
LIR_tramp = 4,
|
||||
|
||||
// non-pure operations
|
||||
LIR_addp = 9,
|
||||
LIR_param = 10,
|
||||
LIR_st = 11, // 32-bit store
|
||||
LIR_ld = 12, // 32-bit load
|
||||
LIR_alloc = 13, // alloca some stack space
|
||||
LIR_sti = 14,
|
||||
LIR_call = 18, // subrouting call returning a 32-bit value
|
||||
LIR_ret = 15,
|
||||
LIR_live = 16, // extend live range of reference
|
||||
LIR_calli = 17, // indirect call
|
||||
LIR_call = 18, // subroutine call returning a 32-bit value
|
||||
|
||||
// guards
|
||||
LIR_loop = 19, // loop fragment
|
||||
LIR_x = 20, // exit always
|
||||
|
||||
// branches
|
||||
LIR_j = 21, // jump always
|
||||
LIR_jt = 22, // jump true
|
||||
LIR_jf = 23, // jump false
|
||||
LIR_label = 24, // a jump target
|
||||
LIR_ji = 25, // jump indirect
|
||||
// operators
|
||||
|
||||
// LIR_feq though LIR_fge must only be used on float arguments. They
|
||||
|
@ -137,18 +148,25 @@ namespace nanojit
|
|||
LIR_ule = 62, // 0x3E 0011 1110
|
||||
LIR_uge = 63, // 0x3F 0011 1111
|
||||
|
||||
// non-64bit ops, but we're out of code space below 64
|
||||
LIR_file = 1 | LIR64,
|
||||
LIR_line = 2 | LIR64,
|
||||
|
||||
/**
|
||||
* 64bit operations
|
||||
*/
|
||||
LIR_stq = LIR_st | LIR64, // quad store
|
||||
LIR_stqi = LIR_sti | LIR64,
|
||||
LIR_fret = LIR_ret | LIR64,
|
||||
LIR_quad = LIR_int | LIR64, // quad constant value
|
||||
LIR_ldq = LIR_ld | LIR64, // quad load
|
||||
LIR_ldqc = LIR_ldc | LIR64,
|
||||
LIR_qiand = 24 | LIR64,
|
||||
LIR_qiadd = 25 | LIR64,
|
||||
LIR_qilsh = LIR_lsh | LIR64,
|
||||
|
||||
LIR_fcall = LIR_call | LIR64, // subroutine call returning quad
|
||||
LIR_fcalli = LIR_calli | LIR64,
|
||||
LIR_fneg = LIR_neg | LIR64, // floating-point numeric negation
|
||||
LIR_fadd = LIR_add | LIR64, // floating-point addition
|
||||
LIR_fsub = LIR_sub | LIR64, // floating-point subtraction
|
||||
|
@ -164,6 +182,7 @@ namespace nanojit
|
|||
|
||||
#if defined NANOJIT_64BIT
|
||||
#define LIR_ldp LIR_ldq
|
||||
#define LIR_stp LIR_stq
|
||||
#define LIR_piadd LIR_qiadd
|
||||
#define LIR_piand LIR_qiand
|
||||
#define LIR_pilsh LIR_qilsh
|
||||
|
@ -171,6 +190,7 @@ namespace nanojit
|
|||
#define LIR_pior LIR_qior
|
||||
#else
|
||||
#define LIR_ldp LIR_ld
|
||||
#define LIR_stp LIR_st
|
||||
#define LIR_piadd LIR_add
|
||||
#define LIR_piand LIR_and
|
||||
#define LIR_pilsh LIR_lsh
|
||||
|
@ -184,7 +204,71 @@ namespace nanojit
|
|||
|
||||
struct SideExit;
|
||||
struct Page;
|
||||
struct CallInfo;
|
||||
|
||||
enum AbiKind {
|
||||
ABI_FASTCALL,
|
||||
ABI_THISCALL,
|
||||
ABI_STDCALL,
|
||||
ABI_CDECL
|
||||
};
|
||||
|
||||
enum ArgSize {
|
||||
ARGSIZE_NONE = 0,
|
||||
ARGSIZE_F = 1,
|
||||
ARGSIZE_LO = 2,
|
||||
ARGSIZE_Q = 3,
|
||||
_ARGSIZE_MASK_INT = 2,
|
||||
_ARGSIZE_MASK_ANY = 3
|
||||
};
|
||||
|
||||
struct CallInfo
|
||||
{
|
||||
uintptr_t _address;
|
||||
uint32_t _argtypes:18; // 9 2-bit fields indicating arg type, by ARGSIZE above (including ret type): a1 a2 a3 a4 a5 ret
|
||||
uint8_t _cse:1; // true if no side effects
|
||||
uint8_t _fold:1; // true if no side effects
|
||||
AbiKind _abi:3;
|
||||
verbose_only ( const char* _name; )
|
||||
|
||||
uint32_t FASTCALL _count_args(uint32_t mask) const;
|
||||
uint32_t get_sizes(ArgSize*) const;
|
||||
|
||||
inline bool isInterface() const {
|
||||
return _address == 2 || _address == 3; /* hack! */
|
||||
}
|
||||
inline bool isIndirect() const {
|
||||
return _address < 256;
|
||||
}
|
||||
inline uint32_t FASTCALL count_args() const {
|
||||
return _count_args(_ARGSIZE_MASK_ANY) + isIndirect();
|
||||
}
|
||||
inline uint32_t FASTCALL count_iargs() const {
|
||||
return _count_args(_ARGSIZE_MASK_INT);
|
||||
}
|
||||
// fargs = args - iargs
|
||||
};
|
||||
|
||||
inline bool isGuard(LOpcode op) {
|
||||
return op==LIR_x || op==LIR_xf || op==LIR_xt || op==LIR_loop;
|
||||
}
|
||||
|
||||
inline bool isCall(LOpcode op) {
|
||||
op = LOpcode(op & ~LIR64);
|
||||
return op == LIR_call || op == LIR_calli;
|
||||
}
|
||||
|
||||
inline bool isStore(LOpcode op) {
|
||||
op = LOpcode(op & ~LIR64);
|
||||
return op == LIR_st || op == LIR_sti;
|
||||
}
|
||||
|
||||
inline bool isConst(LOpcode op) {
|
||||
return (op & ~1) == LIR_short;
|
||||
}
|
||||
|
||||
inline bool isLoad(LOpcode op) {
|
||||
return op == LIR_ldq || op == LIR_ld || op == LIR_ldc || op == LIR_ldqc;
|
||||
}
|
||||
|
||||
// Low-level Instruction 4B
|
||||
// had to lay it our as a union with duplicate code fields since msvc couldn't figure out how to compact it otherwise.
|
||||
|
@ -290,7 +374,9 @@ namespace nanojit
|
|||
|
||||
inline LOpcode opcode() const { return u.code; }
|
||||
inline uint8_t imm8() const { return c.imm8a; }
|
||||
inline uint8_t imm8b() const { return c.imm8b; }
|
||||
inline int16_t imm16() const { return i.imm16; }
|
||||
inline int32_t imm24() const { return t.imm24; }
|
||||
inline LIns* ref() const {
|
||||
#if defined NANOJIT_64BIT
|
||||
return (t.code & 1) ? (LIns*)this+t.imm24 : *(LIns**)(this-2);
|
||||
|
@ -302,6 +388,14 @@ namespace nanojit
|
|||
inline uint8_t resv() const { return g.resv; }
|
||||
void* payload() const;
|
||||
inline Page* page() { return (Page*) alignTo(this,NJ_PAGE_SIZE); }
|
||||
inline int32_t size() const {
|
||||
NanoAssert(isop(LIR_alloc));
|
||||
return i.imm16<<2;
|
||||
}
|
||||
inline void setSize(int32_t bytes) {
|
||||
NanoAssert(isop(LIR_alloc) && (bytes&3)==0 && isU16(bytes>>2));
|
||||
i.imm16 = bytes>>2;
|
||||
}
|
||||
|
||||
// index args in r-l order. arg(0) is rightmost arg
|
||||
inline LIns* arg(uint32_t i) {
|
||||
|
@ -375,12 +469,12 @@ namespace nanojit
|
|||
bool isQuad() const;
|
||||
bool isCond() const;
|
||||
bool isCmp() const;
|
||||
bool isCall() const;
|
||||
bool isStore() const;
|
||||
bool isLoad() const;
|
||||
bool isGuard() const;
|
||||
bool isCall() const { return nanojit::isCall(u.code); }
|
||||
bool isStore() const { return nanojit::isStore(u.code); }
|
||||
bool isLoad() const { return nanojit::isLoad(u.code); }
|
||||
bool isGuard() const { return nanojit::isGuard(u.code); }
|
||||
// True if the instruction is a 32-bit or smaller constant integer.
|
||||
bool isconst() const;
|
||||
bool isconst() const { return nanojit::isConst(u.code); }
|
||||
// True if the instruction is a 32-bit or smaller constant integer and
|
||||
// has the value val when treated as a 32-bit signed integer.
|
||||
bool isconstval(int32_t val) const;
|
||||
|
@ -391,10 +485,13 @@ namespace nanojit
|
|||
bool isTramp() {
|
||||
return isop(LIR_neartramp) || isop(LIR_tramp);
|
||||
}
|
||||
|
||||
bool isBranch() const {
|
||||
return isop(LIR_jt) || isop(LIR_jf) || isop(LIR_j);
|
||||
}
|
||||
// Set the imm16 member. Should only be used on instructions that use
|
||||
// that. If you're not sure, you shouldn't be calling it.
|
||||
void setimm16(int32_t i);
|
||||
void setimm24(int32_t x);
|
||||
// Set the resv member. Should only be used on instructions that use
|
||||
// that. If you're not sure, you shouldn't be calling it.
|
||||
void setresv(uint32_t resv);
|
||||
|
@ -405,6 +502,9 @@ namespace nanojit
|
|||
void setOprnd2(LIns*);
|
||||
void setOprnd3(LIns*);
|
||||
void setDisp(int8_t d);
|
||||
void target(LIns* t);
|
||||
LIns **targetAddr();
|
||||
LIns* getTarget();
|
||||
|
||||
SideExit *exit();
|
||||
|
||||
|
@ -424,19 +524,21 @@ namespace nanojit
|
|||
bool FASTCALL isCse(LOpcode v);
|
||||
bool FASTCALL isCmp(LOpcode v);
|
||||
bool FASTCALL isCond(LOpcode v);
|
||||
inline bool isRet(LOpcode c) {
|
||||
return (c & ~LIR64) == LIR_ret;
|
||||
}
|
||||
bool FASTCALL isFloat(LOpcode v);
|
||||
LIns* FASTCALL callArgN(LInsp i, uint32_t n);
|
||||
extern const uint8_t operandCount[];
|
||||
|
||||
class Fragmento; // @todo remove this ; needed for minbuild for some reason?!? Should not be compiling this code at all
|
||||
class LirFilter;
|
||||
struct CallInfo;
|
||||
|
||||
// make it a GCObject so we can explicitly delete it early
|
||||
class LirWriter : public GCObject
|
||||
{
|
||||
public:
|
||||
LirWriter *out;
|
||||
public:
|
||||
const CallInfo *_functions;
|
||||
|
||||
virtual ~LirWriter() {}
|
||||
|
@ -455,8 +557,13 @@ namespace nanojit
|
|||
virtual LInsp insGuard(LOpcode v, LIns *c, SideExit *x) {
|
||||
return out->insGuard(v, c, x);
|
||||
}
|
||||
virtual LInsp insParam(int32_t i) {
|
||||
return out->insParam(i);
|
||||
virtual LInsp insBranch(LOpcode v, LInsp condition, LInsp to) {
|
||||
return out->insBranch(v, condition, to);
|
||||
}
|
||||
// arg: 0=first, 1=second, ...
|
||||
// kind: 0=arg 1=saved-reg
|
||||
virtual LInsp insParam(int32_t arg, int32_t kind) {
|
||||
return out->insParam(arg, kind);
|
||||
}
|
||||
virtual LInsp insImm(int32_t imm) {
|
||||
return out->insImm(imm);
|
||||
|
@ -477,10 +584,14 @@ namespace nanojit
|
|||
virtual LInsp insCall(const CallInfo *call, LInsp args[]) {
|
||||
return out->insCall(call, args);
|
||||
}
|
||||
virtual LInsp insAlloc(int32_t size) {
|
||||
return out->insAlloc(size);
|
||||
}
|
||||
|
||||
// convenience
|
||||
LIns* insLoadi(LIns *base, int disp);
|
||||
LIns* insLoad(LOpcode op, LIns *base, int disp);
|
||||
LIns* store(LIns* value, LIns* base, int32_t d);
|
||||
// Inserts a conditional to execute and branches to execute if
|
||||
// the condition is true and false respectively.
|
||||
LIns* ins_choose(LIns* cond, LIns* iftrue, LIns* iffalse);
|
||||
|
@ -491,6 +602,7 @@ namespace nanojit
|
|||
LIns* ins2i(LOpcode op, LIns *oprnd1, int32_t);
|
||||
LIns* qjoin(LInsp lo, LInsp hi);
|
||||
LIns* insImmPtr(const void *ptr);
|
||||
LIns* insImmf(double f);
|
||||
};
|
||||
|
||||
#ifdef NJ_VERBOSE
|
||||
|
@ -516,8 +628,8 @@ namespace nanojit
|
|||
char buf[1000], *end;
|
||||
void formatAddr(const void *p, char *buf);
|
||||
public:
|
||||
AvmCore *core;
|
||||
LabelMap(AvmCore *, LabelMap* parent);
|
||||
avmplus::AvmCore *core;
|
||||
LabelMap(avmplus::AvmCore *, LabelMap* parent);
|
||||
~LabelMap();
|
||||
void add(const void *p, size_t size, size_t align, const char *name);
|
||||
void add(const void *p, size_t size, size_t align, avmplus::String*);
|
||||
|
@ -579,50 +691,63 @@ namespace nanojit
|
|||
class VerboseWriter : public LirWriter
|
||||
{
|
||||
avmplus::List<LInsp, avmplus::LIST_NonGCObjects> code;
|
||||
LirNameMap *names;
|
||||
DWB(LirNameMap*) names;
|
||||
public:
|
||||
VerboseWriter(GC *gc, LirWriter *out, LirNameMap* names)
|
||||
: LirWriter(out), code(gc), names(names)
|
||||
{}
|
||||
|
||||
LInsp add(LInsp i) {
|
||||
code.add(i);
|
||||
if (i)
|
||||
code.add(i);
|
||||
return i;
|
||||
}
|
||||
|
||||
LInsp add_flush(LInsp i) {
|
||||
if ((i = add(i)) != 0)
|
||||
flush();
|
||||
return i;
|
||||
}
|
||||
|
||||
void flush()
|
||||
{
|
||||
for (int j=0, n=code.size(); j < n; j++)
|
||||
printf(" %s\n",names->formatIns(code[j]));
|
||||
code.clear();
|
||||
printf("\n");
|
||||
int n = code.size();
|
||||
if (n) {
|
||||
for (int i=0; i < n; i++)
|
||||
printf(" %s\n",names->formatIns(code[i]));
|
||||
code.clear();
|
||||
if (n > 1)
|
||||
printf("\n");
|
||||
}
|
||||
}
|
||||
|
||||
LIns* insGuard(LOpcode op, LInsp cond, SideExit *x) {
|
||||
LInsp i = add(out->insGuard(op,cond,x));
|
||||
if (i)
|
||||
flush();
|
||||
return i;
|
||||
return add_flush(out->insGuard(op,cond,x));
|
||||
}
|
||||
|
||||
LIns* insBranch(LOpcode v, LInsp condition, LInsp to) {
|
||||
return add_flush(out->insBranch(v, condition, to));
|
||||
}
|
||||
|
||||
|
||||
LIns* ins0(LOpcode v) {
|
||||
LInsp i = add(out->ins0(v));
|
||||
if (i)
|
||||
flush();
|
||||
return i;
|
||||
if (v == LIR_label || v == LIR_start) {
|
||||
flush();
|
||||
}
|
||||
return add(out->ins0(v));
|
||||
}
|
||||
|
||||
LIns* ins1(LOpcode v, LInsp a) {
|
||||
return add(out->ins1(v, a));
|
||||
return isRet(v) ? add_flush(out->ins1(v, a)) : add(out->ins1(v, a));
|
||||
}
|
||||
LIns* ins2(LOpcode v, LInsp a, LInsp b) {
|
||||
return v == LIR_2 ? out->ins2(v,a,b) : add(out->ins2(v, a, b));
|
||||
}
|
||||
LIns* insCall(const CallInfo *call, LInsp args[]) {
|
||||
return add(out->insCall(call, args));
|
||||
return add_flush(out->insCall(call, args));
|
||||
}
|
||||
LIns* insParam(int32_t i) {
|
||||
return add(out->insParam(i));
|
||||
LIns* insParam(int32_t i, int32_t kind) {
|
||||
return add(out->insParam(i, kind));
|
||||
}
|
||||
LIns* insLoad(LOpcode v, LInsp base, LInsp disp) {
|
||||
return add(out->insLoad(v, base, disp));
|
||||
|
@ -633,6 +758,9 @@ namespace nanojit
|
|||
LIns* insStorei(LInsp v, LInsp b, int32_t d) {
|
||||
return add(out->insStorei(v, b, d));
|
||||
}
|
||||
LIns* insAlloc(int32_t size) {
|
||||
return add(out->insAlloc(size));
|
||||
}
|
||||
};
|
||||
|
||||
#endif
|
||||
|
@ -643,7 +771,8 @@ namespace nanojit
|
|||
ExprFilter(LirWriter *out) : LirWriter(out) {}
|
||||
LIns* ins1(LOpcode v, LIns* a);
|
||||
LIns* ins2(LOpcode v, LIns* a, LIns* b);
|
||||
LIns* insGuard(LOpcode v, LIns *c, SideExit *x);
|
||||
LIns* insGuard(LOpcode, LIns *cond, SideExit *);
|
||||
LIns* insBranch(LOpcode, LIns *cond, LIns *target);
|
||||
};
|
||||
|
||||
// @todo, this could be replaced by a generic HashMap or HashSet, if we had one
|
||||
|
@ -652,14 +781,14 @@ namespace nanojit
|
|||
// must be a power of 2.
|
||||
// don't start too small, or we'll waste time growing and rehashing.
|
||||
// don't start too large, will waste memory.
|
||||
static const uint32_t kInitialCap = 2048;
|
||||
static const uint32_t kInitialCap = 64;
|
||||
|
||||
InsList m_list;
|
||||
uint32_t m_used;
|
||||
LInsp *m_list; // explicit WB's are used, no DWB needed.
|
||||
uint32_t m_used, m_cap;
|
||||
GC* m_gc;
|
||||
|
||||
static uint32_t FASTCALL hashcode(LInsp i);
|
||||
uint32_t FASTCALL find(LInsp name, uint32_t hash, const InsList& list, uint32_t cap);
|
||||
uint32_t FASTCALL find(LInsp name, uint32_t hash, const LInsp *list, uint32_t cap);
|
||||
static bool FASTCALL equals(LInsp a, LInsp b);
|
||||
void FASTCALL grow();
|
||||
|
||||
|
@ -673,6 +802,7 @@ namespace nanojit
|
|||
LInsp findcall(const CallInfo *call, uint32_t argc, LInsp args[], uint32_t &i);
|
||||
LInsp add(LInsp i, uint32_t k);
|
||||
void replace(LInsp i);
|
||||
void clear();
|
||||
|
||||
static uint32_t FASTCALL hashimm(int32_t);
|
||||
static uint32_t FASTCALL hashimmq(uint64_t);
|
||||
|
@ -695,7 +825,6 @@ namespace nanojit
|
|||
LIns* insGuard(LOpcode op, LInsp cond, SideExit *x);
|
||||
};
|
||||
|
||||
struct Page;
|
||||
class LirBuffer : public GCFinalizedObject
|
||||
{
|
||||
public:
|
||||
|
@ -704,13 +833,13 @@ namespace nanojit
|
|||
virtual ~LirBuffer();
|
||||
void clear();
|
||||
LInsp next();
|
||||
LInsp commit(uint32_t count);
|
||||
bool addPage();
|
||||
bool outOmem() { return _noMem != 0; }
|
||||
debug_only (void validate() const;)
|
||||
|
||||
debug_only (void validate() const;)
|
||||
verbose_only(DWB(LirNameMap*) names;)
|
||||
verbose_only(int insCount();)
|
||||
verbose_only(int byteCount();)
|
||||
|
||||
int32_t insCount();
|
||||
int32_t byteCount();
|
||||
|
||||
// stats
|
||||
struct
|
||||
|
@ -721,14 +850,20 @@ namespace nanojit
|
|||
_stats;
|
||||
|
||||
const CallInfo* _functions;
|
||||
AbiKind abi;
|
||||
LInsp state,param1,sp,rp;
|
||||
LInsp savedParams[NumSavedRegs];
|
||||
|
||||
private:
|
||||
protected:
|
||||
friend class LirBufWriter;
|
||||
|
||||
LInsp commit(uint32_t count);
|
||||
bool addPage();
|
||||
Page* pageAlloc();
|
||||
|
||||
Page* _start; // first page
|
||||
LInsp _unused; // next unused instruction slot
|
||||
int _noMem; // set if ran out of memory when writing to buffer
|
||||
Page* _start; // first page
|
||||
LInsp _unused; // next unused instruction slot
|
||||
int _noMem; // set if ran out of memory when writing to buffer
|
||||
};
|
||||
|
||||
class LirBufWriter : public LirWriter
|
||||
|
@ -749,17 +884,24 @@ namespace nanojit
|
|||
LInsp ins0(LOpcode op);
|
||||
LInsp ins1(LOpcode op, LInsp o1);
|
||||
LInsp ins2(LOpcode op, LInsp o1, LInsp o2);
|
||||
LInsp insParam(int32_t i);
|
||||
LInsp insParam(int32_t i, int32_t kind);
|
||||
LInsp insImm(int32_t imm);
|
||||
LInsp insImmq(uint64_t imm);
|
||||
LInsp insCall(const CallInfo *call, LInsp args[]);
|
||||
LInsp insGuard(LOpcode op, LInsp cond, SideExit *x);
|
||||
LInsp insBranch(LOpcode v, LInsp condition, LInsp to);
|
||||
LInsp insAlloc(int32_t size);
|
||||
|
||||
// buffer mgmt
|
||||
LInsp skip(size_t);
|
||||
|
||||
protected:
|
||||
LInsp insFar(LOpcode op, LInsp target);
|
||||
LInsp insLink(LOpcode op, LInsp target);
|
||||
LInsp ensureReferenceable(LInsp i, int32_t addedDistance);
|
||||
bool ensureRoom(uint32_t count);
|
||||
bool can8bReach(LInsp from, LInsp to) { return isU8(from-to-1); }
|
||||
bool can24bReach(LInsp from, LInsp to){ return isS24(from-to); }
|
||||
bool canReference(LInsp from, LInsp to) {
|
||||
return isU8(from-to-1);
|
||||
}
|
||||
|
@ -795,24 +937,27 @@ namespace nanojit
|
|||
LInsp pos() {
|
||||
return _i;
|
||||
}
|
||||
void setpos(LIns *i) {
|
||||
_i = i;
|
||||
}
|
||||
};
|
||||
|
||||
class Assembler;
|
||||
|
||||
void compile(Assembler *assm, Fragment *frag);
|
||||
verbose_only( void printTracker(const char* s, avmplus::RegionTracker& trk, Assembler* assm); )
|
||||
verbose_only(void live(GC *gc, Assembler *assm, Fragment *frag);)
|
||||
verbose_only(void live(GC *gc, LirBuffer *lirbuf);)
|
||||
|
||||
class StackFilter: public LirFilter
|
||||
{
|
||||
GC *gc;
|
||||
Fragment *frag;
|
||||
LirBuffer *lirbuf;
|
||||
LInsp sp;
|
||||
avmplus::BitSet stk;
|
||||
int top;
|
||||
int getTop(LInsp guard);
|
||||
int getTop(LInsp br);
|
||||
public:
|
||||
StackFilter(LirFilter *in, GC *gc, Fragment *frag, LInsp sp);
|
||||
StackFilter(LirFilter *in, GC *gc, LirBuffer *lirbuf, LInsp sp);
|
||||
virtual ~StackFilter() {}
|
||||
LInsp read();
|
||||
};
|
||||
|
@ -825,5 +970,23 @@ namespace nanojit
|
|||
CseReader(LirFilter *in, LInsHashSet *exprs, const CallInfo*);
|
||||
LInsp read();
|
||||
};
|
||||
|
||||
// eliminate redundant loads by watching for stores & mutator calls
|
||||
class LoadFilter: public LirWriter
|
||||
{
|
||||
public:
|
||||
LInsp sp, rp;
|
||||
LInsHashSet exprs;
|
||||
void clear(LInsp p);
|
||||
public:
|
||||
LoadFilter(LirWriter *out, GC *gc)
|
||||
: LirWriter(out), exprs(gc) { }
|
||||
|
||||
LInsp ins0(LOpcode);
|
||||
LInsp insLoad(LOpcode, LInsp base, LInsp disp);
|
||||
LInsp insStore(LInsp v, LInsp b, LInsp d);
|
||||
LInsp insStorei(LInsp v, LInsp b, int32_t d);
|
||||
LInsp insCall(const CallInfo *call, LInsp args[]);
|
||||
};
|
||||
}
|
||||
#endif // __nanojit_LIR__
|
||||
|
|
|
@ -109,6 +109,7 @@ namespace nanojit
|
|||
typedef int RegisterMask;
|
||||
|
||||
/* RBX, R13-R15 */
|
||||
static const int NumSavedRegs = 3;
|
||||
static const RegisterMask SavedRegs = /*(1<<RBX) |*/ /*(1<<R12) |*/ (1<<R13) | (1<<R14) | (1<<R15);
|
||||
/* RAX, RCX, RDX, RDI, RSI, R8-R11 */
|
||||
static const RegisterMask TempRegs = (1<<RAX) | (1<<RCX) | (1<<RDX) | (1<<R8) | (1<<R9) | (1<<R10) | (1<<R11) | (1<<RDI) | (1<<RSI);
|
||||
|
|
|
@ -157,6 +157,7 @@ static const RegisterMask SavedFpRegs = 1<<D0 | 1<<D1 | 1<<D2 | 1<<D3 | 1<<D4 |
|
|||
#else
|
||||
static const RegisterMask SavedFpRegs = 0;
|
||||
#endif
|
||||
static const int NumSavedRegs = 7;
|
||||
static const RegisterMask SavedRegs = 1<<R4 | 1<<R5 | 1<<R6 | 1<<R7 | 1<<R8 | 1<<R9 | 1<<R10 | SavedFpRegs;
|
||||
static const RegisterMask FpRegs = 1<<D0 | 1<<D1 | 1<<D2 | 1<<D3 | 1<<D4 | 1<<D5 | 1<<D6; // no D7; S14-S15 are used for i2f/u2f.
|
||||
static const RegisterMask GpRegs = 0x07FF;
|
||||
|
|
|
@ -61,6 +61,12 @@ namespace nanojit
|
|||
const Register Assembler::argRegs[] = { R0, R1, R2, R3 };
|
||||
const Register Assembler::retRegs[] = { R0, R1 };
|
||||
|
||||
#ifdef NJ_THUMB_JIT
|
||||
const Register Assembler::savedRegs[] = { R4, R5, R6, R7 };
|
||||
#else
|
||||
const Register Assembler::savedRegs[] = { R4, R5, R6, R7, R8, R9, R10 };
|
||||
#endif
|
||||
|
||||
void Assembler::nInit(AvmCore*)
|
||||
{
|
||||
// Thumb mode does not have conditional move, alas
|
||||
|
@ -269,7 +275,7 @@ namespace nanojit
|
|||
else if (op == LIR_callh)
|
||||
prefer = rmask(R1);
|
||||
else if (op == LIR_param)
|
||||
prefer = rmask(imm2register(i->imm8()));
|
||||
prefer = rmask(imm2register(argRegs[i->imm8()]));
|
||||
|
||||
if (_allocator.free & allow & prefer)
|
||||
allow &= prefer;
|
||||
|
|
|
@ -101,6 +101,7 @@ namespace nanojit
|
|||
}
|
||||
FragInfo;
|
||||
|
||||
static const int NumSavedRegs = 4;
|
||||
static const RegisterMask SavedRegs = 1<<R4 | 1<<R5 | 1<<R6 | 1<<R7;
|
||||
static const RegisterMask FpRegs = 0x0000; // FST0-FST7
|
||||
static const RegisterMask GpRegs = 0x003F;
|
||||
|
|
|
@ -71,6 +71,7 @@ namespace nanojit
|
|||
#if defined NANOJIT_IA32
|
||||
const Register Assembler::argRegs[] = { ECX, EDX };
|
||||
const Register Assembler::retRegs[] = { EAX, EDX };
|
||||
const Register Assembler::savedRegs[] = { EBX, ESI, EDI };
|
||||
#elif defined NANOJIT_AMD64
|
||||
#if defined WIN64
|
||||
const Register Assembler::argRegs[] = { R8, R9, RCX, RDX };
|
||||
|
@ -78,8 +79,17 @@ namespace nanojit
|
|||
const Register Assembler::argRegs[] = { RDI, RSI, RDX, RCX, R8, R9 };
|
||||
#endif
|
||||
const Register Assembler::retRegs[] = { RAX, RDX };
|
||||
const Register Assembler::savedRegs[] = { R13, R14, R15 };
|
||||
#endif
|
||||
|
||||
const static uint8_t max_abi_regs[] = {
|
||||
2, /* ABI_FASTCALL */
|
||||
1, /* ABI_THISCALL */
|
||||
0, /* ABI_STDCALL */
|
||||
0 /* ABI_CDECL */
|
||||
};
|
||||
|
||||
|
||||
void Assembler::nInit(AvmCore* core)
|
||||
{
|
||||
OSDep::getDate();
|
||||
|
@ -89,21 +99,16 @@ namespace nanojit
|
|||
#endif
|
||||
}
|
||||
|
||||
NIns* Assembler::genPrologue(RegisterMask needSaving)
|
||||
NIns* Assembler::genPrologue()
|
||||
{
|
||||
/**
|
||||
* Prologue
|
||||
*/
|
||||
uint32_t stackNeeded = STACK_GRANULARITY * _activation.highwatermark;
|
||||
uint32_t savingCount = 0;
|
||||
|
||||
for(Register i=FirstReg; i <= LastReg; i = nextreg(i))
|
||||
if (needSaving&rmask(i))
|
||||
savingCount++;
|
||||
|
||||
// After forcing alignment, we've pushed the pre-alignment SP
|
||||
// and savingCount registers.
|
||||
uint32_t stackPushed = STACK_GRANULARITY * (1+savingCount);
|
||||
uint32_t stackPushed =
|
||||
STACK_GRANULARITY + // returnaddr
|
||||
STACK_GRANULARITY; // ebp
|
||||
uint32_t aligned = alignUp(stackNeeded + stackPushed, NJ_ALIGN_STACK);
|
||||
uint32_t amt = aligned - stackPushed;
|
||||
|
||||
|
@ -122,37 +127,37 @@ namespace nanojit
|
|||
verbose_only( verbose_output(" patch entry:"); )
|
||||
NIns *patchEntry = _nIns;
|
||||
MR(FP, SP); // Establish our own FP.
|
||||
PUSHr(FP); // Save caller's FP.
|
||||
|
||||
// Save pre-alignment SP value here, where the FP will point,
|
||||
// to preserve the illusion of a valid frame chain for
|
||||
// functions like MMgc::GetStackTrace. The 'return address'
|
||||
// of this 'frame' will be the last-saved register, but that's
|
||||
// fine, because the next-older frame will be legit.
|
||||
PUSHr(FP);
|
||||
|
||||
for(Register i=FirstReg; i <= LastReg; i = nextreg(i))
|
||||
if (needSaving&rmask(i))
|
||||
PUSHr(i);
|
||||
|
||||
// We'd like to be able to use SSE instructions like MOVDQA on
|
||||
// stack slots; it requires 16B alignment. Darwin requires a
|
||||
// 16B stack alignment, and Linux GCC seems to intend to
|
||||
// establish and preserve the same, but we're told that GCC
|
||||
// has not always done this right. To avoid doubt, do it on
|
||||
// all platforms. The prologue runs only when we enter
|
||||
// fragments from the interpreter, so forcing 16B alignment
|
||||
// here is cheap.
|
||||
#if defined NANOJIT_IA32
|
||||
ANDi(SP, -NJ_ALIGN_STACK);
|
||||
#elif defined NANOJIT_AMD64
|
||||
ANDQi(SP, -NJ_ALIGN_STACK);
|
||||
#endif
|
||||
MR(FP,SP);
|
||||
PUSHr(FP); // Save caller's FP.
|
||||
// align the entry point
|
||||
asm_align_code();
|
||||
|
||||
return patchEntry;
|
||||
}
|
||||
|
||||
void Assembler::asm_align_code() {
|
||||
static char nop[][9] = {
|
||||
{0x90},
|
||||
{0x66,0x90},
|
||||
{0x0f,0x1f,0x00},
|
||||
{0x0f,0x1f,0x40,0x00},
|
||||
{0x0f,0x1f,0x44,0x00,0x00},
|
||||
{0x66,0x0f,0x1f,0x44,0x00,0x00},
|
||||
{0x0f,0x1f,0x80,0x00,0x00,0x00,0x00},
|
||||
{0x0f,0x1f,0x84,0x00,0x00,0x00,0x00,0x00},
|
||||
{0x66,0x0f,0x1f,0x84,0x00,0x00,0x00,0x00,0x00},
|
||||
};
|
||||
unsigned n;
|
||||
while((n = uintptr_t(_nIns) & 15) != 0) {
|
||||
if (n > 9)
|
||||
n = 9;
|
||||
underrunProtect(n);
|
||||
_nIns -= n;
|
||||
memcpy(_nIns, nop[n-1], n);
|
||||
asm_output1("nop%d", n);
|
||||
}
|
||||
}
|
||||
|
||||
void Assembler::nFragExit(LInsp guard)
|
||||
{
|
||||
SideExit *exit = guard->exit();
|
||||
|
@ -191,7 +196,6 @@ namespace nanojit
|
|||
// first restore ESP from EBP, undoing SUBi(SP,amt) from genPrologue
|
||||
MR(SP,FP);
|
||||
|
||||
|
||||
#ifdef NJ_VERBOSE
|
||||
if (_frago->core()->config.show_stats) {
|
||||
// load EDX (arg1) with Fragment *fromFrag, target fragment
|
||||
|
@ -213,17 +217,11 @@ namespace nanojit
|
|||
#endif
|
||||
}
|
||||
|
||||
NIns *Assembler::genEpilogue(RegisterMask restore)
|
||||
NIns *Assembler::genEpilogue()
|
||||
{
|
||||
RET();
|
||||
POPr(FP); // Restore caller's FP.
|
||||
MR(SP,FP); // Undo forced alignment.
|
||||
|
||||
// Restore saved registers.
|
||||
for (Register i=UnknownReg; i >= FirstReg; i = prevreg(i))
|
||||
if (restore&rmask(i)) { POPr(i); }
|
||||
|
||||
POPr(FP); // Pop the pre-alignment SP.
|
||||
MR(SP,FP); // pop the stack frame
|
||||
return _nIns;
|
||||
}
|
||||
|
||||
|
@ -232,75 +230,92 @@ namespace nanojit
|
|||
{
|
||||
const CallInfo* call = ins->callInfo();
|
||||
// must be signed, not unsigned
|
||||
const uint32_t iargs = call->count_iargs();
|
||||
int32_t fstack = call->count_args() - iargs;
|
||||
uint32_t iargs = call->count_iargs();
|
||||
int32_t fargs = call->count_args() - iargs - call->isIndirect();
|
||||
|
||||
bool imt = call->isInterface();
|
||||
if (imt)
|
||||
iargs --;
|
||||
|
||||
uint32_t max_regs = max_abi_regs[call->_abi];
|
||||
if (max_regs > iargs)
|
||||
max_regs = iargs;
|
||||
|
||||
int32_t istack = iargs-max_regs; // first 2 4B args are in registers
|
||||
int32_t extra = 0;
|
||||
const int32_t pushsize = 4*istack + 8*fargs; // actual stack space used
|
||||
|
||||
#if defined NJ_NO_FASTCALL
|
||||
int32_t istack = iargs;
|
||||
#if _MSC_VER
|
||||
// msc is slack, and MIR doesn't do anything extra, so lets use this
|
||||
// call-site alignment to at least have code size parity with MIR.
|
||||
uint32_t align = 4;//NJ_ALIGN_STACK;
|
||||
#else
|
||||
int32_t istack = iargs-2; // first 2 4B args are in registers
|
||||
if (istack <= 0)
|
||||
{
|
||||
istack = 0;
|
||||
}
|
||||
uint32_t align = NJ_ALIGN_STACK;
|
||||
#endif
|
||||
|
||||
const int32_t size = 4*istack + 8*fstack; // actual stack space used
|
||||
if (size) {
|
||||
if (pushsize) {
|
||||
// stack re-alignment
|
||||
// only pop our adjustment amount since callee pops args in FASTCALL mode
|
||||
extra = alignUp(size, NJ_ALIGN_STACK) - (size);
|
||||
#ifndef NJ_NO_FASTCALL
|
||||
if (extra > 0)
|
||||
{
|
||||
extra = alignUp(pushsize, align) - pushsize;
|
||||
if (call->_abi == ABI_CDECL) {
|
||||
// with CDECL only, caller pops args
|
||||
ADDi(SP, extra+pushsize);
|
||||
} else if (extra > 0) {
|
||||
ADDi(SP, extra);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
#ifdef NJ_NO_FASTCALL
|
||||
// In C calling conventions, callee doesn't pop args.
|
||||
ADDi(SP, 4*iargs + 8*fstack + extra);
|
||||
#endif
|
||||
|
||||
CALL(call);
|
||||
|
||||
#ifdef NJ_NO_FASTCALL
|
||||
if (iargs >= 1) {
|
||||
PUSHr(ECX);
|
||||
if (iargs >= 2) {
|
||||
PUSHr(EDX);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
bool indirect = false;
|
||||
if (ins->isop(LIR_call) || ins->isop(LIR_fcall)) {
|
||||
verbose_only(if (_verbose)
|
||||
outputf(" %p:", _nIns);
|
||||
)
|
||||
CALL(call);
|
||||
}
|
||||
else {
|
||||
// indirect call. x86 Calling conventions don't use EAX as an
|
||||
// argument, and do use EAX as a return value. We need a register
|
||||
// for the address to call, so we use EAX since it will always be
|
||||
// available
|
||||
NanoAssert(ins->isop(LIR_calli) || ins->isop(LIR_fcalli));
|
||||
CALLr(call, EAX);
|
||||
indirect = true;
|
||||
}
|
||||
|
||||
// make sure fpu stack is empty before call (restoreCallerSaved)
|
||||
NanoAssert(_allocator.isFree(FST0));
|
||||
// note: this code requires that ref arguments (ARGSIZE_Q)
|
||||
// be one of the first two arguments
|
||||
// pre-assign registers to the first 2 4B args
|
||||
const int max_regs = (iargs < 2) ? iargs : 2;
|
||||
int n = 0;
|
||||
// pre-assign registers to the first N 4B args based on the calling convention
|
||||
uint32_t n = 0;
|
||||
|
||||
ArgSize sizes[10];
|
||||
ArgSize sizes[2*MAXARGS];
|
||||
uint32_t argc = call->get_sizes(sizes);
|
||||
if (indirect) {
|
||||
argc--;
|
||||
asm_arg(ARGSIZE_LO, ins->arg(argc), EAX);
|
||||
}
|
||||
|
||||
if (imt) {
|
||||
// interface thunk calling convention: put iid in EDX
|
||||
NanoAssert(call->_abi == ABI_CDECL);
|
||||
argc--;
|
||||
asm_arg(ARGSIZE_LO, ins->arg(argc), EDX);
|
||||
}
|
||||
|
||||
for(uint32_t i=0; i < argc; i++)
|
||||
{
|
||||
uint32_t j = argc-i-1;
|
||||
ArgSize sz = sizes[j];
|
||||
Register r = UnknownReg;
|
||||
if (n < max_regs && sz != ARGSIZE_F)
|
||||
r = argRegs[n++]; // tell asm_arg what reg to use
|
||||
if (n < max_regs && sz != ARGSIZE_F) {
|
||||
r = argRegs[n++]; // tell asm_arg what reg to use
|
||||
}
|
||||
asm_arg(sz, ins->arg(j), r);
|
||||
}
|
||||
|
||||
if (extra > 0)
|
||||
{
|
||||
SUBi(SP, extra);
|
||||
}
|
||||
}
|
||||
|
||||
#elif defined NANOJIT_AMD64
|
||||
|
@ -310,7 +325,7 @@ namespace nanojit
|
|||
Register fpu_reg = XMM0;
|
||||
const CallInfo* call = ins->callInfo();
|
||||
int n = 0;
|
||||
|
||||
|
||||
CALL(call);
|
||||
|
||||
ArgSize sizes[10];
|
||||
|
@ -417,25 +432,26 @@ namespace nanojit
|
|||
{
|
||||
uint32_t op = i->opcode();
|
||||
int prefer = allow;
|
||||
if (op == LIR_call)
|
||||
#if defined NANOJIT_IA32
|
||||
prefer &= rmask(EAX);
|
||||
#elif defined NANOJIT_AMD64
|
||||
prefer &= rmask(RAX);
|
||||
#endif
|
||||
else if (op == LIR_param)
|
||||
prefer &= rmask(Register(i->imm8()));
|
||||
#if defined NANOJIT_IA32
|
||||
else if (op == LIR_callh || op == LIR_rsh && i->oprnd1()->opcode()==LIR_callh)
|
||||
prefer &= rmask(EDX);
|
||||
#else
|
||||
else if (op == LIR_callh)
|
||||
prefer &= rmask(RAX);
|
||||
#endif
|
||||
else if (i->isCmp())
|
||||
if (op == LIR_call || op == LIR_calli) {
|
||||
prefer &= rmask(retRegs[0]);
|
||||
}
|
||||
else if (op == LIR_fcall || op == LIR_fcalli) {
|
||||
prefer &= rmask(FST0);
|
||||
}
|
||||
else if (op == LIR_param) {
|
||||
uint32_t max_regs = max_abi_regs[_thisfrag->lirbuf->abi];
|
||||
if (i->imm8() < max_regs)
|
||||
prefer &= rmask(Register(i->imm8()));
|
||||
}
|
||||
else if (op == LIR_callh || op == LIR_rsh && i->oprnd1()->opcode()==LIR_callh) {
|
||||
prefer &= rmask(retRegs[1]);
|
||||
}
|
||||
else if (i->isCmp()) {
|
||||
prefer &= AllowableFlagRegs;
|
||||
else if (i->isconst())
|
||||
}
|
||||
else if (i->isconst()) {
|
||||
prefer &= ScratchRegs;
|
||||
}
|
||||
return (_allocator.free & prefer) ? prefer : allow;
|
||||
}
|
||||
|
||||
|
@ -476,38 +492,49 @@ namespace nanojit
|
|||
freeRsrcOf(ins, false); // if we had a reg in use, emit a ST to flush it to mem
|
||||
}
|
||||
|
||||
void Assembler::asm_load(int d, Register r)
|
||||
{
|
||||
if (rmask(r) & FpRegs)
|
||||
{
|
||||
#if defined NANOJIT_IA32
|
||||
if (rmask(r) & XmmRegs) {
|
||||
#endif
|
||||
SSE_LDQ(r, d, FP);
|
||||
#if defined NANOJIT_IA32
|
||||
} else {
|
||||
FLDQ(d, FP);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
#if defined NANOJIT_AMD64
|
||||
else if (i->opcode() == LIR_param)
|
||||
{
|
||||
LDQ(r, d, FP);
|
||||
}
|
||||
#endif
|
||||
else
|
||||
{
|
||||
LD(r, d, FP);
|
||||
}
|
||||
}
|
||||
|
||||
void Assembler::asm_restore(LInsp i, Reservation *resv, Register r)
|
||||
{
|
||||
if (i->isconst())
|
||||
{
|
||||
if (i->isop(LIR_alloc)) {
|
||||
LEA(r, disp(resv), FP);
|
||||
verbose_only(if (_verbose) {
|
||||
outputf(" remat %s size %d", _thisfrag->lirbuf->names->formatRef(i), i->size());
|
||||
})
|
||||
}
|
||||
else if (i->isconst()) {
|
||||
if (!resv->arIndex) {
|
||||
reserveFree(i);
|
||||
}
|
||||
LDi(r, i->constval());
|
||||
}
|
||||
else
|
||||
{
|
||||
else {
|
||||
int d = findMemFor(i);
|
||||
if (rmask(r) & FpRegs)
|
||||
{
|
||||
#if defined NANOJIT_IA32
|
||||
if (rmask(r) & XmmRegs) {
|
||||
#endif
|
||||
SSE_LDQ(r, d, FP);
|
||||
#if defined NANOJIT_IA32
|
||||
} else {
|
||||
FLDQ(d, FP);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
else
|
||||
{
|
||||
#if defined NANOJIT_AMD64
|
||||
LDQ(r, d, FP);
|
||||
#else
|
||||
LD(r, d, FP);
|
||||
#endif
|
||||
}
|
||||
asm_load(d,r);
|
||||
verbose_only(if (_verbose) {
|
||||
outputf(" restore %s", _thisfrag->lirbuf->names->formatRef(i));
|
||||
})
|
||||
|
@ -518,7 +545,13 @@ namespace nanojit
|
|||
{
|
||||
if (value->isconst())
|
||||
{
|
||||
Register rb = findRegFor(base, GpRegs);
|
||||
Register rb;
|
||||
if (base->isop(LIR_alloc)) {
|
||||
rb = FP;
|
||||
dr += findMemFor(base);
|
||||
} else {
|
||||
rb = findRegFor(base, GpRegs);
|
||||
}
|
||||
int c = value->constval();
|
||||
STi(rb, dr, c);
|
||||
}
|
||||
|
@ -526,18 +559,28 @@ namespace nanojit
|
|||
{
|
||||
// make sure what is in a register
|
||||
Reservation *rA, *rB;
|
||||
findRegFor2(GpRegs, value, rA, base, rB);
|
||||
Register ra = rA->reg;
|
||||
Register rb = rB->reg;
|
||||
Register ra, rb;
|
||||
if (base->isop(LIR_alloc)) {
|
||||
rb = FP;
|
||||
dr += findMemFor(base);
|
||||
ra = findRegFor(value, GpRegs);
|
||||
} else if (base->isconst()) {
|
||||
// absolute address
|
||||
dr += base->constval();
|
||||
ra = findRegFor(value, GpRegs);
|
||||
rb = UnknownReg;
|
||||
} else {
|
||||
findRegFor2(GpRegs, value, rA, base, rB);
|
||||
ra = rA->reg;
|
||||
rb = rB->reg;
|
||||
}
|
||||
ST(rb, dr, ra);
|
||||
}
|
||||
}
|
||||
|
||||
void Assembler::asm_spill(LInsp i, Reservation *resv, bool pop)
|
||||
void Assembler::asm_spill(Register rr, int d, bool pop, bool quad)
|
||||
{
|
||||
(void)i;
|
||||
int d = disp(resv);
|
||||
Register rr = resv->reg;
|
||||
(void)quad;
|
||||
if (d)
|
||||
{
|
||||
// save to spill location
|
||||
|
@ -553,17 +596,16 @@ namespace nanojit
|
|||
}
|
||||
#endif
|
||||
}
|
||||
#if defined NANOJIT_AMD64
|
||||
else if (quad)
|
||||
{
|
||||
STQ(FP, d, rr);
|
||||
}
|
||||
#endif
|
||||
else
|
||||
{
|
||||
#if defined NANOJIT_AMD64
|
||||
STQ(FP, d, rr);
|
||||
#else
|
||||
ST(FP, d, rr);
|
||||
#endif
|
||||
}
|
||||
verbose_only(if (_verbose) {
|
||||
outputf(" spill %s",_thisfrag->lirbuf->names->formatRef(i));
|
||||
})
|
||||
}
|
||||
#if defined NANOJIT_IA32
|
||||
else if (pop && (rmask(rr) & x87Regs))
|
||||
|
@ -571,7 +613,21 @@ namespace nanojit
|
|||
// pop the fpu result since it isn't used
|
||||
FSTP(FST0);
|
||||
}
|
||||
#endif
|
||||
#endif
|
||||
}
|
||||
|
||||
void Assembler::asm_spilli(LInsp i, Reservation *resv, bool pop)
|
||||
{
|
||||
int d = disp(resv);
|
||||
Register rr = resv->reg;
|
||||
bool quad = i->opcode() == LIR_param || i->isQuad();
|
||||
asm_spill(rr, d, pop, quad);
|
||||
if (d)
|
||||
{
|
||||
verbose_only(if (_verbose) {
|
||||
outputf(" spill %s",_thisfrag->lirbuf->names->formatRef(i));
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
void Assembler::asm_load64(LInsp ins)
|
||||
|
@ -584,7 +640,13 @@ namespace nanojit
|
|||
if (rr != UnknownReg && rmask(rr) & XmmRegs)
|
||||
{
|
||||
freeRsrcOf(ins, false);
|
||||
Register rb = findRegFor(base, GpRegs);
|
||||
Register rb;
|
||||
if (base->isop(LIR_alloc)) {
|
||||
rb = FP;
|
||||
db += findMemFor(base);
|
||||
} else {
|
||||
rb = findRegFor(base, GpRegs);
|
||||
}
|
||||
SSE_LDQ(rr, db, rb);
|
||||
}
|
||||
#if defined NANOJIT_AMD64
|
||||
|
@ -614,7 +676,13 @@ namespace nanojit
|
|||
else
|
||||
{
|
||||
int dr = disp(resv);
|
||||
Register rb = findRegFor(base, GpRegs);
|
||||
Register rb;
|
||||
if (base->isop(LIR_alloc)) {
|
||||
rb = FP;
|
||||
db += findMemFor(base);
|
||||
} else {
|
||||
rb = findRegFor(base, GpRegs);
|
||||
}
|
||||
resv->reg = UnknownReg;
|
||||
|
||||
// don't use an fpu reg to simply load & store the value.
|
||||
|
@ -639,7 +707,13 @@ namespace nanojit
|
|||
{
|
||||
// if a constant 64-bit value just store it now rather than
|
||||
// generating a pointless store/load/store sequence
|
||||
Register rb = findRegFor(base, GpRegs);
|
||||
Register rb;
|
||||
if (base->isop(LIR_alloc)) {
|
||||
rb = FP;
|
||||
dr += findMemFor(base);
|
||||
} else {
|
||||
rb = findRegFor(base, GpRegs);
|
||||
}
|
||||
const int32_t* p = (const int32_t*) (value-2);
|
||||
STi(rb, dr+4, p[1]);
|
||||
STi(rb, dr, p[0]);
|
||||
|
@ -647,7 +721,7 @@ namespace nanojit
|
|||
}
|
||||
|
||||
#if defined NANOJIT_IA32
|
||||
if (value->isop(LIR_ldq) || value->isop(LIR_qjoin))
|
||||
if (value->isop(LIR_ldq) || value->isop(LIR_ldqc) || value->isop(LIR_qjoin))
|
||||
{
|
||||
// value is 64bit struct or int64_t, or maybe a double.
|
||||
// it may be live in an FPU reg. Either way, don't
|
||||
|
@ -660,21 +734,47 @@ namespace nanojit
|
|||
|
||||
if (avmplus::AvmCore::use_sse2()) {
|
||||
Register rv = findRegFor(value, XmmRegs);
|
||||
Register rb = findRegFor(base, GpRegs);
|
||||
Register rb;
|
||||
if (base->isop(LIR_alloc)) {
|
||||
rb = FP;
|
||||
dr += findMemFor(base);
|
||||
} else {
|
||||
rb = findRegFor(base, GpRegs);
|
||||
}
|
||||
SSE_STQ(dr, rb, rv);
|
||||
return;
|
||||
}
|
||||
|
||||
int da = findMemFor(value);
|
||||
Register rb = findRegFor(base, GpRegs);
|
||||
Register rb;
|
||||
if (base->isop(LIR_alloc)) {
|
||||
rb = FP;
|
||||
dr += findMemFor(base);
|
||||
} else {
|
||||
rb = findRegFor(base, GpRegs);
|
||||
}
|
||||
asm_mmq(rb, dr, FP, da);
|
||||
return;
|
||||
}
|
||||
|
||||
Register rb;
|
||||
if (base->isop(LIR_alloc)) {
|
||||
rb = FP;
|
||||
dr += findMemFor(base);
|
||||
} else {
|
||||
rb = findRegFor(base, GpRegs);
|
||||
}
|
||||
|
||||
// if value already in a reg, use that, otherwise
|
||||
// try to get it into XMM regs before FPU regs.
|
||||
Reservation* rA = getresv(value);
|
||||
Register rv;
|
||||
int pop = !rA || rA->reg==UnknownReg;
|
||||
Register rv = findRegFor(value, avmplus::AvmCore::use_sse2() ? XmmRegs : FpRegs);
|
||||
Register rb = findRegFor(base, GpRegs);
|
||||
if (pop) {
|
||||
rv = findRegFor(value, avmplus::AvmCore::use_sse2() ? XmmRegs : FpRegs);
|
||||
} else {
|
||||
rv = rA->reg;
|
||||
}
|
||||
|
||||
if (rmask(rv) & XmmRegs) {
|
||||
SSE_STQ(dr, rb, rv);
|
||||
|
@ -763,8 +863,10 @@ namespace nanojit
|
|||
NanoAssert((rmask(rr) & FpRegs) != 0);
|
||||
|
||||
const double d = ins->constvalf();
|
||||
const uint64_t q = ins->constvalq();
|
||||
if (rmask(rr) & XmmRegs) {
|
||||
if (d == 0.0) {
|
||||
if (q == 0.0) {
|
||||
// test (int64)0 since -0.0 == 0.0
|
||||
SSE_XORPDr(rr, rr);
|
||||
} else if (d == 1.0) {
|
||||
// 1.0 is extremely frequent and worth special-casing!
|
||||
|
@ -776,7 +878,8 @@ namespace nanojit
|
|||
SSE_LDQ(rr, d, FP);
|
||||
}
|
||||
} else {
|
||||
if (d == 0.0) {
|
||||
if (q == 0.0) {
|
||||
// test (int64)0 since -0.0 == 0.0
|
||||
FLDZ();
|
||||
} else if (d == 1.0) {
|
||||
FLD1();
|
||||
|
@ -803,24 +906,23 @@ namespace nanojit
|
|||
|
||||
if (rR->reg != UnknownReg)
|
||||
{
|
||||
Register rr = rR->reg;
|
||||
freeRsrcOf(ins, false);
|
||||
if (rmask(rr) & GpRegs)
|
||||
if (rmask(rR->reg) & GpRegs)
|
||||
{
|
||||
LDQi(rr, val);
|
||||
LDQi(rR->reg, val);
|
||||
}
|
||||
else if (rmask(rr) & XmmRegs)
|
||||
else if (rmask(rR->reg) & XmmRegs)
|
||||
{
|
||||
if (ins->constvalf() == 0.0)
|
||||
{
|
||||
SSE_XORPDr(rr, rr);
|
||||
SSE_XORPDr(rR->reg, rR->reg);
|
||||
}
|
||||
else
|
||||
{
|
||||
/* Get a short-lived register, not associated with instruction */
|
||||
Register rd = rR->reg;
|
||||
Register rs = registerAlloc(GpRegs);
|
||||
|
||||
SSE_MOVD(rr, rs);
|
||||
|
||||
SSE_MOVD(rd, rs);
|
||||
LDQi(rs, val);
|
||||
|
||||
_allocator.addFree(rs);
|
||||
|
@ -831,10 +933,11 @@ namespace nanojit
|
|||
{
|
||||
const int32_t* p = (const int32_t*) (ins-2);
|
||||
int dr = disp(rR);
|
||||
freeRsrcOf(ins, false);
|
||||
STi(FP, dr+4, p[1]);
|
||||
STi(FP, dr, p[0]);
|
||||
}
|
||||
|
||||
freeRsrcOf(ins, false);
|
||||
#endif
|
||||
}
|
||||
|
||||
|
@ -925,22 +1028,75 @@ namespace nanojit
|
|||
#endif
|
||||
}
|
||||
|
||||
void Assembler::asm_arg(ArgSize sz, LInsp p, Register r)
|
||||
{
|
||||
if (sz == ARGSIZE_Q)
|
||||
{
|
||||
// ref arg - use lea
|
||||
if (r != UnknownReg)
|
||||
{
|
||||
// arg in specific reg
|
||||
int da = findMemFor(p);
|
||||
LEA(r, da, FP);
|
||||
}
|
||||
else
|
||||
{
|
||||
NanoAssert(0); // not supported
|
||||
}
|
||||
}
|
||||
else if (sz == ARGSIZE_LO)
|
||||
{
|
||||
if (r != UnknownReg) {
|
||||
// arg goes in specific register
|
||||
if (p->isconst()) {
|
||||
LDi(r, p->constval());
|
||||
} else {
|
||||
Reservation* rA = getresv(p);
|
||||
if (rA) {
|
||||
if (rA->reg == UnknownReg) {
|
||||
// load it into the arg reg
|
||||
int d = findMemFor(p);
|
||||
if (p->isop(LIR_alloc)) {
|
||||
LEA(r, d, FP);
|
||||
} else {
|
||||
LD(r, d, FP);
|
||||
}
|
||||
} else {
|
||||
// it must be in a saved reg
|
||||
MR(r, rA->reg);
|
||||
}
|
||||
}
|
||||
else {
|
||||
// this is the last use, so fine to assign it
|
||||
// to the scratch reg, it's dead after this point.
|
||||
findSpecificRegFor(p, r);
|
||||
}
|
||||
}
|
||||
}
|
||||
else {
|
||||
asm_pusharg(p);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
NanoAssert(sz == ARGSIZE_F);
|
||||
asm_farg(p);
|
||||
}
|
||||
}
|
||||
|
||||
void Assembler::asm_pusharg(LInsp p)
|
||||
{
|
||||
// arg goes on stack
|
||||
Reservation* rA = getresv(p);
|
||||
if (rA == 0)
|
||||
if (rA == 0 && p->isconst())
|
||||
{
|
||||
if (p->isconst())
|
||||
{
|
||||
// small const we push directly
|
||||
PUSHi(p->constval());
|
||||
}
|
||||
else
|
||||
{
|
||||
Register ra = findRegFor(p, GpRegs);
|
||||
PUSHr(ra);
|
||||
}
|
||||
// small const we push directly
|
||||
PUSHi(p->constval());
|
||||
}
|
||||
else if (rA == 0 || p->isop(LIR_alloc))
|
||||
{
|
||||
Register ra = findRegFor(p, GpRegs);
|
||||
PUSHr(ra);
|
||||
}
|
||||
else if (rA->reg == UnknownReg)
|
||||
{
|
||||
|
@ -955,14 +1111,16 @@ namespace nanojit
|
|||
void Assembler::asm_farg(LInsp p)
|
||||
{
|
||||
#if defined NANOJIT_IA32
|
||||
NanoAssert(p->isQuad());
|
||||
Register r = findRegFor(p, FpRegs);
|
||||
if (rmask(r) & XmmRegs) {
|
||||
SSE_STQ(0, SP, r);
|
||||
} else {
|
||||
FSTPQ(0, SP);
|
||||
}
|
||||
PUSHr(ECX); // 2*pushr is smaller than sub
|
||||
PUSHr(ECX);
|
||||
SUBi(ESP,8);
|
||||
//PUSHr(ECX); // 2*pushr is smaller than sub
|
||||
//PUSHr(ECX);
|
||||
#endif
|
||||
}
|
||||
|
||||
|
@ -997,7 +1155,10 @@ namespace nanojit
|
|||
*/
|
||||
ra = findRegFor(lhs, XmmRegs);
|
||||
}
|
||||
// else, rA already has a register assigned.
|
||||
else {
|
||||
// rA already has a register assigned but maybe not from the allow set
|
||||
ra = findRegFor(lhs, allow);
|
||||
}
|
||||
|
||||
if (lhs == rhs)
|
||||
rb = ra;
|
||||
|
@ -1190,6 +1351,75 @@ namespace nanojit
|
|||
}
|
||||
}
|
||||
|
||||
NIns * Assembler::asm_jmpcc(bool branchOnFalse, LIns *cond, NIns *targ)
|
||||
{
|
||||
LOpcode c = cond->opcode();
|
||||
if (avmplus::AvmCore::use_sse2() && c != LIR_feq) {
|
||||
LIns *lhs = cond->oprnd1();
|
||||
LIns *rhs = cond->oprnd2();
|
||||
if (c == LIR_flt) {
|
||||
LIns *t = lhs; lhs = rhs; rhs = t;
|
||||
c = LIR_fgt;
|
||||
}
|
||||
else if (c == LIR_fle) {
|
||||
LIns *t = lhs; lhs = rhs; rhs = t;
|
||||
c = LIR_fge;
|
||||
}
|
||||
|
||||
if (c == LIR_fgt) {
|
||||
if (branchOnFalse) { JNA(targ); } else { JA(targ); }
|
||||
}
|
||||
else { // if (c == LIR_fge)
|
||||
if (branchOnFalse) { JNAE(targ); } else { JAE(targ); }
|
||||
}
|
||||
NIns *at = _nIns;
|
||||
Reservation *rA, *rB;
|
||||
findRegFor2(XmmRegs, lhs, rA, rhs, rB);
|
||||
SSE_UCOMISD(rA->reg, rB->reg);
|
||||
return at;
|
||||
}
|
||||
|
||||
if (branchOnFalse)
|
||||
JP(targ);
|
||||
else
|
||||
JNP(targ);
|
||||
NIns *at = _nIns;
|
||||
asm_fcmp(cond);
|
||||
return at;
|
||||
}
|
||||
|
||||
void Assembler::asm_setcc(Register r, LIns *cond)
|
||||
{
|
||||
LOpcode c = cond->opcode();
|
||||
if (avmplus::AvmCore::use_sse2() && c != LIR_feq) {
|
||||
MOVZX8(r,r);
|
||||
LIns *lhs = cond->oprnd1();
|
||||
LIns *rhs = cond->oprnd2();
|
||||
if (c == LIR_flt) {
|
||||
LIns *t = lhs; lhs = rhs; rhs = t;
|
||||
SETA(r);
|
||||
}
|
||||
else if (c == LIR_fle) {
|
||||
LIns *t = lhs; lhs = rhs; rhs = t;
|
||||
SETAE(r);
|
||||
}
|
||||
else if (c == LIR_fgt) {
|
||||
SETA(r);
|
||||
}
|
||||
else { // if (c == LIR_fge)
|
||||
SETAE(r);
|
||||
}
|
||||
Reservation *rA, *rB;
|
||||
findRegFor2(XmmRegs, lhs, rA, rhs, rB);
|
||||
SSE_UCOMISD(rA->reg, rB->reg);
|
||||
return;
|
||||
}
|
||||
// SETcc only sets low 8 bits, so extend
|
||||
MOVZX8(r,r);
|
||||
SETNP(r);
|
||||
asm_fcmp(cond);
|
||||
}
|
||||
|
||||
void Assembler::asm_fcmp(LIns *cond)
|
||||
{
|
||||
LOpcode condop = cond->opcode();
|
||||
|
@ -1206,10 +1436,12 @@ namespace nanojit
|
|||
mask = 0x05;
|
||||
else if (condop == LIR_fge) {
|
||||
// swap, use le
|
||||
condop = LIR_fle;
|
||||
LIns* t = lhs; lhs = rhs; rhs = t;
|
||||
mask = 0x41;
|
||||
} else { // if (condop == LIR_fgt)
|
||||
// swap, use lt
|
||||
condop = LIR_flt;
|
||||
LIns* t = lhs; lhs = rhs; rhs = t;
|
||||
mask = 0x05;
|
||||
}
|
||||
|
@ -1227,7 +1459,8 @@ namespace nanojit
|
|||
// nan check
|
||||
Register r = findRegFor(lhs, XmmRegs);
|
||||
SSE_UCOMISD(r, r);
|
||||
} else {
|
||||
}
|
||||
else {
|
||||
#if defined NANOJIT_IA32
|
||||
evict(EAX);
|
||||
TEST_AH(mask);
|
||||
|
@ -1384,5 +1617,19 @@ namespace nanojit
|
|||
if (!_nIns) _nIns = pageAlloc();
|
||||
if (!_nExitIns) _nExitIns = pageAlloc(true);
|
||||
}
|
||||
|
||||
// enough room for n bytes
|
||||
void Assembler::underrunProtect(int n)
|
||||
{
|
||||
NIns *eip = this->_nIns;
|
||||
Page *p = (Page*)pageTop(eip-1);
|
||||
NIns *top = (NIns*) &p->code[0];
|
||||
if (eip - n < top) {
|
||||
_nIns = pageAlloc(_inExit);
|
||||
JMP(eip);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
#endif /* FEATURE_NANOJIT */
|
||||
}
|
||||
|
|
|
@ -101,6 +101,7 @@ namespace nanojit
|
|||
|
||||
typedef int RegisterMask;
|
||||
|
||||
static const int NumSavedRegs = 3;
|
||||
static const RegisterMask SavedRegs = 1<<EBX | 1<<EDI | 1<<ESI;
|
||||
static const RegisterMask GpRegs = SavedRegs | 1<<EAX | 1<<ECX | 1<<EDX;
|
||||
static const RegisterMask XmmRegs = 1<<XMM0|1<<XMM1|1<<XMM2|1<<XMM3|1<<XMM4|1<<XMM5|1<<XMM6|1<<XMM7;
|
||||
|
@ -132,23 +133,12 @@ namespace nanojit
|
|||
bool pad[3];\
|
||||
void nativePageReset();\
|
||||
void nativePageSetup();\
|
||||
void asm_farg(LInsp);
|
||||
void underrunProtect(int);\
|
||||
void asm_farg(LInsp);\
|
||||
void asm_align_code();
|
||||
|
||||
#define swapptrs() { NIns* _tins = _nIns; _nIns=_nExitIns; _nExitIns=_tins; }
|
||||
|
||||
// enough room for n bytes
|
||||
#define underrunProtect(n) \
|
||||
{ \
|
||||
intptr_t u = n + sizeof(PageHeader)/sizeof(NIns) + 5; \
|
||||
if ( !samepage(_nIns-u,_nIns-1) ) \
|
||||
{ \
|
||||
NIns *tt = _nIns; \
|
||||
_nIns = pageAlloc(_inExit); \
|
||||
int d = tt-_nIns; \
|
||||
JMP_long_nochk_offset(d); \
|
||||
} \
|
||||
} \
|
||||
|
||||
#define IMM32(i) \
|
||||
_nIns -= 4; \
|
||||
*((int32_t*)_nIns) = (int32_t)(i)
|
||||
|
@ -171,8 +161,11 @@ namespace nanojit
|
|||
}
|
||||
|
||||
#define MODRMm(r,d,b) \
|
||||
NanoAssert(unsigned(r)<8 && unsigned(b)<8); \
|
||||
if ((b) == ESP) { \
|
||||
NanoAssert(unsigned(r)<8 && ((b)==UnknownReg || unsigned(b)<8)); \
|
||||
if ((b) == UnknownReg) {\
|
||||
IMM32(d);\
|
||||
*(--_nIns) = (uint8_t) (0<<6 | (r)<<3 | 5);\
|
||||
} else if ((b) == ESP) { \
|
||||
MODRMs(r, d, b, 0, (Register)4); \
|
||||
} \
|
||||
else if ( (d) == 0 && (b) != EBP) { \
|
||||
|
@ -344,7 +337,7 @@ namespace nanojit
|
|||
|
||||
#define ST(base,disp,reg) do { \
|
||||
ALUm(0x89,reg,disp,base); \
|
||||
asm_output3("mov %d(%s),%s",disp,gpn(base),gpn(reg)); } while(0)
|
||||
asm_output3("mov %d(%s),%s",disp,base==UnknownReg?"0":gpn(base),gpn(reg)); } while(0)
|
||||
|
||||
#define STi(base,disp,imm) do { \
|
||||
underrunProtect(12); \
|
||||
|
@ -497,7 +490,7 @@ namespace nanojit
|
|||
*(--_nIns) = 0x10;\
|
||||
*(--_nIns) = 0x0f;\
|
||||
*(--_nIns) = 0xf2;\
|
||||
asm_output3("movsd %s,%p // =%f",gpn(r),daddr,*daddr); \
|
||||
asm_output3("movsd %s,(#%p) // =%f",gpn(r),(void*)daddr,*daddr); \
|
||||
} while(0)
|
||||
|
||||
#define STSD(d,b,r)do { \
|
||||
|
@ -539,61 +532,70 @@ namespace nanojit
|
|||
} while(0)
|
||||
|
||||
#define SSE_MOVSD(rd,rs) do{ \
|
||||
NanoAssert(_is_xmm_reg_(rd) && _is_xmm_reg_(rs));\
|
||||
SSE(0xf20f10, (rd)&7, (rs)&7); \
|
||||
asm_output2("movsd %s,%s",gpn(rd),gpn(rs)); \
|
||||
} while(0)
|
||||
|
||||
#define SSE_MOVDm(d,b,xrs) do {\
|
||||
NanoAssert(_is_xmm_reg_(xrs) && _is_gp_reg_(b));\
|
||||
SSEm(0x660f7e, (xrs)&7, d, b);\
|
||||
asm_output3("movd %d(%s),%s", d, gpn(b), gpn(xrs));\
|
||||
} while(0)
|
||||
|
||||
#define SSE_ADDSD(rd,rs) do{ \
|
||||
NanoAssert(_is_xmm_reg_(rd) && _is_xmm_reg_(rs));\
|
||||
SSE(0xf20f58, (rd)&7, (rs)&7); \
|
||||
asm_output2("addsd %s,%s",gpn(rd),gpn(rs)); \
|
||||
} while(0)
|
||||
|
||||
#define SSE_ADDSDm(r,addr)do { \
|
||||
underrunProtect(8); \
|
||||
NanoAssert(_is_xmm_reg_(r));\
|
||||
const double* daddr = addr; \
|
||||
IMM32(int32_t(daddr));\
|
||||
*(--_nIns) = uint8_t(((r)&7)<<3|5); \
|
||||
*(--_nIns) = 0x58;\
|
||||
*(--_nIns) = 0x0f;\
|
||||
*(--_nIns) = 0xf2;\
|
||||
asm_output3("addsd %s,%p // =%f",gpn(r),daddr,*daddr); \
|
||||
asm_output3("addsd %s,%p // =%f",gpn(r),(void*)daddr,*daddr); \
|
||||
} while(0)
|
||||
|
||||
#define SSE_SUBSD(rd,rs) do{ \
|
||||
NanoAssert(_is_xmm_reg_(rd) && _is_xmm_reg_(rs));\
|
||||
SSE(0xf20f5c, (rd)&7, (rs)&7); \
|
||||
asm_output2("subsd %s,%s",gpn(rd),gpn(rs)); \
|
||||
} while(0)
|
||||
#define SSE_MULSD(rd,rs) do{ \
|
||||
NanoAssert(_is_xmm_reg_(rd) && _is_xmm_reg_(rs));\
|
||||
SSE(0xf20f59, (rd)&7, (rs)&7); \
|
||||
asm_output2("mulsd %s,%s",gpn(rd),gpn(rs)); \
|
||||
} while(0)
|
||||
#define SSE_DIVSD(rd,rs) do{ \
|
||||
NanoAssert(_is_xmm_reg_(rd) && _is_xmm_reg_(rs));\
|
||||
SSE(0xf20f5e, (rd)&7, (rs)&7); \
|
||||
asm_output2("divsd %s,%s",gpn(rd),gpn(rs)); \
|
||||
} while(0)
|
||||
#define SSE_UCOMISD(rl,rr) do{ \
|
||||
NanoAssert(_is_xmm_reg_(rl) && _is_xmm_reg_(rr));\
|
||||
SSE(0x660f2e, (rl)&7, (rr)&7); \
|
||||
asm_output2("ucomisd %s,%s",gpn(rl),gpn(rr)); \
|
||||
} while(0)
|
||||
|
||||
#define CVTSI2SDm(xr,d,b) do{ \
|
||||
NanoAssert(_is_xmm_reg_(xr) && _is_gp_reg_(b));\
|
||||
SSEm(0xf20f2a, (xr)&7, (d), (b)); \
|
||||
asm_output3("cvtsi2sd %s,%d(%s)",gpn(xr),(d),gpn(b)); \
|
||||
} while(0)
|
||||
|
||||
#define SSE_XORPD(r, maskaddr) do {\
|
||||
underrunProtect(8); \
|
||||
underrunProtect(8); \
|
||||
IMM32(maskaddr);\
|
||||
*(--_nIns) = uint8_t(((r)&7)<<3|5); \
|
||||
*(--_nIns) = 0x57;\
|
||||
*(--_nIns) = 0x0f;\
|
||||
*(--_nIns) = 0x66;\
|
||||
asm_output2("xorpd %s,[0x%p]",gpn(r),(maskaddr));\
|
||||
asm_output2("xorpd %s,[0x%p]",gpn(r),(void*)(maskaddr));\
|
||||
} while(0)
|
||||
|
||||
#define SSE_XORPDr(rd,rs) do{ \
|
||||
|
@ -657,6 +659,7 @@ namespace nanojit
|
|||
#define FLDr(r) do { FPU(0xd9c0,r); asm_output1("fld %s",fpn(r)); fpu_push(); } while(0)
|
||||
#define EMMS() do { FPUc(0x0f77); asm_output("emms"); } while (0)
|
||||
|
||||
// standard direct call
|
||||
#define CALL(c) do { \
|
||||
underrunProtect(5); \
|
||||
int offset = (c->_address) - ((int)_nIns); \
|
||||
|
@ -666,5 +669,14 @@ namespace nanojit
|
|||
debug_only(if ((c->_argtypes&3)==ARGSIZE_F) fpu_push();)\
|
||||
} while (0)
|
||||
|
||||
// indirect call thru register
|
||||
#define CALLr(c,r) do { \
|
||||
underrunProtect(2);\
|
||||
ALU(0xff, 2, (r));\
|
||||
verbose_only(asm_output1("call %s",gpn(r));) \
|
||||
debug_only(if ((c->_argtypes&3)==ARGSIZE_F) fpu_push();)\
|
||||
} while (0)
|
||||
|
||||
|
||||
}
|
||||
#endif // __nanojit_Nativei386__
|
||||
|
|
|
@ -72,11 +72,18 @@ namespace nanojit
|
|||
|
||||
void RegAlloc::addActive(Register r, LIns* v)
|
||||
{
|
||||
//addActiveCount++;
|
||||
// Count++;
|
||||
NanoAssert(v && r != UnknownReg && active[r] == NULL );
|
||||
active[r] = v;
|
||||
useActive(r);
|
||||
}
|
||||
|
||||
void RegAlloc::useActive(Register r)
|
||||
{
|
||||
NanoAssert(r != UnknownReg && active[r] != NULL);
|
||||
usepri[r] = priority++;
|
||||
}
|
||||
|
||||
void RegAlloc::removeActive(Register r)
|
||||
{
|
||||
//registerReleaseCount++;
|
||||
|
@ -87,12 +94,6 @@ namespace nanojit
|
|||
active[r] = NULL;
|
||||
}
|
||||
|
||||
LIns* RegAlloc::getActive(Register r)
|
||||
{
|
||||
NanoAssert(r != UnknownReg);
|
||||
return active[r];
|
||||
}
|
||||
|
||||
void RegAlloc::retire(Register r)
|
||||
{
|
||||
NanoAssert(r != UnknownReg);
|
||||
|
@ -101,30 +102,26 @@ namespace nanojit
|
|||
free |= rmask(r);
|
||||
}
|
||||
|
||||
// scan table for instruction with longest span
|
||||
LIns* Assembler::findVictim(RegAlloc ®s, RegisterMask allow, RegisterMask prefer)
|
||||
// scan table for instruction with the lowest priority, meaning it is used
|
||||
// furthest in the future.
|
||||
LIns* Assembler::findVictim(RegAlloc ®s, RegisterMask allow)
|
||||
{
|
||||
NanoAssert(allow != 0 && (allow&prefer)==prefer);
|
||||
LIns *i, *a=0, *p = 0;
|
||||
int acost=10, pcost=10;
|
||||
NanoAssert(allow != 0);
|
||||
LIns *i, *a=0;
|
||||
int allow_pri = 0x7fffffff;
|
||||
for (Register r=FirstReg; r <= LastReg; r = nextreg(r))
|
||||
{
|
||||
if ((allow & rmask(r)) && (i = regs.getActive(r)) != 0)
|
||||
{
|
||||
int cost = getresv(i)->cost;
|
||||
if (!a || cost < acost || cost == acost && nbr(i) < nbr(a)) {
|
||||
int pri = canRemat(i) ? 0 : regs.getPriority(r);
|
||||
if (!a || pri < allow_pri) {
|
||||
a = i;
|
||||
acost = cost;
|
||||
}
|
||||
if (prefer & rmask(r)) {
|
||||
if (!p || cost < pcost || cost == pcost && nbr(i) < nbr(p)) {
|
||||
p = i;
|
||||
pcost = cost;
|
||||
}
|
||||
allow_pri = pri;
|
||||
}
|
||||
}
|
||||
}
|
||||
return acost < pcost ? a : p;
|
||||
NanoAssert(a != 0);
|
||||
return a;
|
||||
}
|
||||
|
||||
#ifdef NJ_VERBOSE
|
||||
|
|
|
@ -51,15 +51,28 @@ namespace nanojit
|
|||
class RegAlloc MMGC_SUBCLASS_DECL
|
||||
{
|
||||
public:
|
||||
RegAlloc() {}
|
||||
RegAlloc() : free(0), used(0), priority(0) {}
|
||||
void clear();
|
||||
bool isFree(Register r);
|
||||
void addFree(Register r);
|
||||
void removeFree(Register r);
|
||||
void addActive(Register r, LIns* ins);
|
||||
void useActive(Register r);
|
||||
void removeActive(Register r);
|
||||
LIns* getActive(Register r);
|
||||
void retire(Register r);
|
||||
bool isValid() {
|
||||
return (free|used) != 0;
|
||||
}
|
||||
|
||||
int32_t getPriority(Register r) {
|
||||
NanoAssert(r != UnknownReg && active[r]);
|
||||
return usepri[r];
|
||||
}
|
||||
|
||||
LIns* getActive(Register r) {
|
||||
NanoAssert(r != UnknownReg);
|
||||
return active[r];
|
||||
}
|
||||
|
||||
debug_only( uint32_t countFree(); )
|
||||
debug_only( uint32_t countActive(); )
|
||||
|
@ -68,11 +81,11 @@ namespace nanojit
|
|||
debug_only( uint32_t count; )
|
||||
debug_only( RegisterMask managed; ) // bitfield of 0..NJ_MAX_REGISTERS denoting which are under our management
|
||||
|
||||
// RegisterMask is a 32-bit value, so we can never have more than 32 active.
|
||||
// hardcode 32 here in case we have non-contiguous register numbers
|
||||
LIns* active[32]; // active[r] = OP that defines r
|
||||
LIns* active[LastReg + 1]; // active[r] = OP that defines r
|
||||
int32_t usepri[LastReg + 1]; // used priority. lower = more likely to spill.
|
||||
RegisterMask free;
|
||||
RegisterMask used;
|
||||
int32_t priority;
|
||||
|
||||
verbose_only( static void formatRegisters(RegAlloc& regs, char* s, Fragment*); )
|
||||
|
||||
|
|
|
@ -326,6 +326,8 @@ public:
|
|||
|
||||
#define DWB(x) x
|
||||
#define DRCWB(x) x
|
||||
#define WB(gc, container, addr, value) do { *(addr) = (value); } while(0)
|
||||
#define WBRC(gc, container, addr, value) do { *(addr) = (value); } while(0)
|
||||
|
||||
#define MMGC_MEM_TYPE(x)
|
||||
|
||||
|
|
|
@ -42,6 +42,8 @@
|
|||
#include <stddef.h>
|
||||
#include "avmplus.h"
|
||||
|
||||
#ifdef FEATURE_NANOJIT
|
||||
|
||||
#ifdef AVMPLUS_IA32
|
||||
#define NANOJIT_IA32
|
||||
#elif AVMPLUS_ARM
|
||||
|
@ -73,6 +75,8 @@ namespace nanojit
|
|||
typedef avmplus::List<LIns*,avmplus::LIST_NonGCObjects> InsList;
|
||||
typedef avmplus::List<char*, avmplus::LIST_GCObjects> StringList;
|
||||
|
||||
const uint32_t MAXARGS = 8;
|
||||
|
||||
#if defined(_MSC_VER) && _MSC_VER < 1400
|
||||
static void NanoAssertMsgf(bool a,const char *f,...) {}
|
||||
static void NanoAssertMsg(bool a,const char *m) {}
|
||||
|
@ -113,12 +117,12 @@ namespace nanojit
|
|||
#define verbose_output if (verbose_enabled()) Assembler::output
|
||||
#define verbose_outputf if (verbose_enabled()) Assembler::outputf
|
||||
#define verbose_enabled() (_verbose)
|
||||
#define verbose_only(x) x
|
||||
#define verbose_only(...) __VA_ARGS__
|
||||
#else
|
||||
#define verbose_output
|
||||
#define verbose_outputf
|
||||
#define verbose_enabled()
|
||||
#define verbose_only(x)
|
||||
#define verbose_only(...)
|
||||
#endif /*NJ_VERBOSE*/
|
||||
|
||||
#ifdef _DEBUG
|
||||
|
@ -172,4 +176,5 @@ namespace nanojit
|
|||
#include "Assembler.h"
|
||||
#include "TraceTreeDrawer.h"
|
||||
|
||||
#endif // FEATURE_NANOJIT
|
||||
#endif // __nanojit_h__
|
||||
|
|
|
@ -1,224 +0,0 @@
|
|||
/*
|
||||
* A JavaScript implementation of the Secure Hash Algorithm, SHA-1, as defined
|
||||
* in FIPS PUB 180-1
|
||||
* Version 2.1a Copyright Paul Johnston 2000 - 2002.
|
||||
* Other contributors: Greg Holt, Andrew Kepert, Ydnar, Lostinet
|
||||
* Distributed under the BSD License
|
||||
* See http://pajhome.org.uk/crypt/md5 for details.
|
||||
*/
|
||||
|
||||
/*
|
||||
* Configurable variables. You may need to tweak these to be compatible with
|
||||
* the server-side, but the defaults work in most cases.
|
||||
*/
|
||||
var hexcase = 0; /* hex output format. 0 - lowercase; 1 - uppercase */
|
||||
var b64pad = ""; /* base-64 pad character. "=" for strict RFC compliance */
|
||||
var chrsz = 8; /* bits per input character. 8 - ASCII; 16 - Unicode */
|
||||
|
||||
/*
|
||||
* These are the functions you'll usually want to call
|
||||
* They take string arguments and return either hex or base-64 encoded strings
|
||||
*/
|
||||
function hex_sha1(s){return binb2hex(core_sha1(str2binb(s),s.length * chrsz));}
|
||||
function b64_sha1(s){return binb2b64(core_sha1(str2binb(s),s.length * chrsz));}
|
||||
function str_sha1(s){return binb2str(core_sha1(str2binb(s),s.length * chrsz));}
|
||||
function hex_hmac_sha1(key, data){ return binb2hex(core_hmac_sha1(key, data));}
|
||||
function b64_hmac_sha1(key, data){ return binb2b64(core_hmac_sha1(key, data));}
|
||||
function str_hmac_sha1(key, data){ return binb2str(core_hmac_sha1(key, data));}
|
||||
|
||||
/*
|
||||
* Perform a simple self-test to see if the VM is working
|
||||
*/
|
||||
function sha1_vm_test()
|
||||
{
|
||||
return hex_sha1("abc") == "a9993e364706816aba3e25717850c26c9cd0d89d";
|
||||
}
|
||||
|
||||
/*
|
||||
* Calculate the SHA-1 of an array of big-endian words, and a bit length
|
||||
*/
|
||||
function core_sha1(x, len)
|
||||
{
|
||||
/* append padding */
|
||||
x[len >> 5] |= 0x80 << (24 - len % 32);
|
||||
x[((len + 64 >> 9) << 4) + 15] = len;
|
||||
|
||||
var w = Array(80);
|
||||
var a = 1732584193;
|
||||
var b = -271733879;
|
||||
var c = -1732584194;
|
||||
var d = 271733878;
|
||||
var e = -1009589776;
|
||||
|
||||
for(var i = 0; i < x.length; i += 16)
|
||||
{
|
||||
var olda = a;
|
||||
var oldb = b;
|
||||
var oldc = c;
|
||||
var oldd = d;
|
||||
var olde = e;
|
||||
|
||||
for(var j = 0; j < 80; j++)
|
||||
{
|
||||
if(j < 16) w[j] = x[i + j];
|
||||
else w[j] = rol(w[j-3] ^ w[j-8] ^ w[j-14] ^ w[j-16], 1);
|
||||
var t = safe_add(safe_add(rol(a, 5), sha1_ft(j, b, c, d)),
|
||||
safe_add(safe_add(e, w[j]), sha1_kt(j)));
|
||||
e = d;
|
||||
d = c;
|
||||
c = rol(b, 30);
|
||||
b = a;
|
||||
a = t;
|
||||
}
|
||||
|
||||
a = safe_add(a, olda);
|
||||
b = safe_add(b, oldb);
|
||||
c = safe_add(c, oldc);
|
||||
d = safe_add(d, oldd);
|
||||
e = safe_add(e, olde);
|
||||
}
|
||||
return Array(a, b, c, d, e);
|
||||
|
||||
}
|
||||
|
||||
/*
|
||||
* Perform the appropriate triplet combination function for the current
|
||||
* iteration
|
||||
*/
|
||||
function sha1_ft(t, b, c, d)
|
||||
{
|
||||
if(t < 20) return (b & c) | ((~b) & d);
|
||||
if(t < 40) return b ^ c ^ d;
|
||||
if(t < 60) return (b & c) | (b & d) | (c & d);
|
||||
return b ^ c ^ d;
|
||||
}
|
||||
|
||||
/*
|
||||
* Determine the appropriate additive constant for the current iteration
|
||||
*/
|
||||
function sha1_kt(t)
|
||||
{
|
||||
return (t < 20) ? 1518500249 : (t < 40) ? 1859775393 :
|
||||
(t < 60) ? -1894007588 : -899497514;
|
||||
}
|
||||
|
||||
/*
|
||||
* Calculate the HMAC-SHA1 of a key and some data
|
||||
*/
|
||||
function core_hmac_sha1(key, data)
|
||||
{
|
||||
var bkey = str2binb(key);
|
||||
if(bkey.length > 16) bkey = core_sha1(bkey, key.length * chrsz);
|
||||
|
||||
var ipad = Array(16), opad = Array(16);
|
||||
for(var i = 0; i < 16; i++)
|
||||
{
|
||||
ipad[i] = bkey[i] ^ 0x36363636;
|
||||
opad[i] = bkey[i] ^ 0x5C5C5C5C;
|
||||
}
|
||||
|
||||
var hash = core_sha1(ipad.concat(str2binb(data)), 512 + data.length * chrsz);
|
||||
return core_sha1(opad.concat(hash), 512 + 160);
|
||||
}
|
||||
|
||||
/*
|
||||
* Add integers, wrapping at 2^32. This uses 16-bit operations internally
|
||||
* to work around bugs in some JS interpreters.
|
||||
*/
|
||||
function safe_add(x, y)
|
||||
{
|
||||
var lsw = (x & 0xFFFF) + (y & 0xFFFF);
|
||||
var msw = (x >> 16) + (y >> 16) + (lsw >> 16);
|
||||
return (msw << 16) | (lsw & 0xFFFF);
|
||||
}
|
||||
|
||||
/*
|
||||
* Bitwise rotate a 32-bit number to the left.
|
||||
*/
|
||||
function rol(num, cnt)
|
||||
{
|
||||
return (num << cnt) | (num >>> (32 - cnt));
|
||||
}
|
||||
|
||||
/*
|
||||
* Convert an 8-bit or 16-bit string to an array of big-endian words
|
||||
* In 8-bit function, characters >255 have their hi-byte silently ignored.
|
||||
*/
|
||||
function str2binb(str)
|
||||
{
|
||||
var bin = Array();
|
||||
var mask = (1 << chrsz) - 1;
|
||||
for(var i = 0; i < str.length * chrsz; i += chrsz)
|
||||
bin[i>>5] |= (str.charCodeAt(i / chrsz) & mask) << (32 - chrsz - i%32);
|
||||
return bin;
|
||||
}
|
||||
|
||||
/*
|
||||
* Convert an array of big-endian words to a string
|
||||
*/
|
||||
function binb2str(bin)
|
||||
{
|
||||
var str = "";
|
||||
var mask = (1 << chrsz) - 1;
|
||||
for(var i = 0; i < bin.length * 32; i += chrsz)
|
||||
str += String.fromCharCode((bin[i>>5] >>> (32 - chrsz - i%32)) & mask);
|
||||
return str;
|
||||
}
|
||||
|
||||
/*
|
||||
* Convert an array of big-endian words to a hex string.
|
||||
*/
|
||||
function binb2hex(binarray)
|
||||
{
|
||||
var hex_tab = hexcase ? "0123456789ABCDEF" : "0123456789abcdef";
|
||||
var str = "";
|
||||
for(var i = 0; i < binarray.length * 4; i++)
|
||||
{
|
||||
str += hex_tab.charAt((binarray[i>>2] >> ((3 - i%4)*8+4)) & 0xF) +
|
||||
hex_tab.charAt((binarray[i>>2] >> ((3 - i%4)*8 )) & 0xF);
|
||||
}
|
||||
return str;
|
||||
}
|
||||
|
||||
/*
|
||||
* Convert an array of big-endian words to a base-64 string
|
||||
*/
|
||||
function binb2b64(binarray)
|
||||
{
|
||||
var tab = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
|
||||
var str = "";
|
||||
for(var i = 0; i < binarray.length * 4; i += 3)
|
||||
{
|
||||
var triplet = (((binarray[i >> 2] >> 8 * (3 - i %4)) & 0xFF) << 16)
|
||||
| (((binarray[i+1 >> 2] >> 8 * (3 - (i+1)%4)) & 0xFF) << 8 )
|
||||
| ((binarray[i+2 >> 2] >> 8 * (3 - (i+2)%4)) & 0xFF);
|
||||
for(var j = 0; j < 4; j++)
|
||||
{
|
||||
if(i * 8 + j * 6 > binarray.length * 32) str += b64pad;
|
||||
else str += tab.charAt((triplet >> 6*(3-j)) & 0x3F);
|
||||
}
|
||||
}
|
||||
return str;
|
||||
}
|
||||
|
||||
|
||||
var plainText = "Two households, both alike in dignity,\n\
|
||||
In fair Verona, where we lay our scene,\n\
|
||||
From ancient grudge break to new mutiny,\n\
|
||||
Where civil blood makes civil hands unclean.\n\
|
||||
From forth the fatal loins of these two foes\n\
|
||||
A pair of star-cross'd lovers take their life;\n\
|
||||
Whole misadventured piteous overthrows\n\
|
||||
Do with their death bury their parents' strife.\n\
|
||||
The fearful passage of their death-mark'd love,\n\
|
||||
And the continuance of their parents' rage,\n\
|
||||
Which, but their children's end, nought could remove,\n\
|
||||
Is now the two hours' traffic of our stage;\n\
|
||||
The which if you with patient ears attend,\n\
|
||||
What here shall miss, our toil shall strive to mend.";
|
||||
|
||||
for (var i = 0; i <4; i++) {
|
||||
plainText += plainText;
|
||||
}
|
||||
|
||||
var sha1Output = hex_sha1(plainText);
|
Загрузка…
Ссылка в новой задаче