Merge tamarin-redux (nanojit2) into tracemonkey (457786, r=edwsmith,gal,danderson).

This commit is contained in:
Graydon Hoare 2008-10-13 13:29:18 -07:00
Родитель 180d9758af
Коммит c6c4b6404b
21 изменённых файлов: 2074 добавлений и 1097 удалений

Просмотреть файл

@ -145,7 +145,7 @@ struct JSTraceableNative {
#define _JS_DEFINE_CALLINFO(name, crtype, cargtypes, argtypes, cse, fold) \
crtype FASTCALL js_##name cargtypes; \
const nanojit::CallInfo ci_##name = \
{ (intptr_t) &js_##name, argtypes, cse, fold _JS_CI_NAME(name) };
{ (intptr_t) &js_##name, argtypes, cse, fold, nanojit::ABI_FASTCALL _JS_CI_NAME(name) };
/*
* Declare a C function named js_<op> and a CallInfo struct named ci_<op> so

Просмотреть файл

@ -344,16 +344,12 @@ math_max(JSContext *cx, uintN argc, jsval *vp)
*vp = DOUBLE_TO_JSVAL(cx->runtime->jsNaN);
return JS_TRUE;
}
if (x == 0 && x == z && fd_copysign(1.0, z) == -1)
z = x;
else
/*
* Note: it is essential that you write the ternary expression
* here such that the false branch produces z not x, as the case
* of x=-0, z=0, for which we wind up in this expression but
* evaluate either > order as false, whether we do x>z *or* z>x.
*/
if (x == 0 && x == z) {
if (fd_copysign(1.0, z) == -1)
z = x;
} else {
z = (x > z) ? x : z;
}
}
return js_NewNumberInRootedValue(cx, z, vp);
}
@ -378,9 +374,10 @@ math_min(JSContext *cx, uintN argc, jsval *vp)
*vp = DOUBLE_TO_JSVAL(cx->runtime->jsNaN);
return JS_TRUE;
}
if (x == 0 && x == z && fd_copysign(1.0,x) == -1)
z = x;
else
if (x == 0 && x == z) {
if (fd_copysign(1.0, x) == -1)
z = x;
} else
z = (x < z) ? x : z;
}
return js_NewNumberInRootedValue(cx, z, vp);
@ -623,9 +620,13 @@ js_Math_max(jsdouble d, jsdouble p)
if (JSDOUBLE_IS_NaN(d) || JSDOUBLE_IS_NaN(p))
return js_NaN;
if (p == 0 && p == d && fd_copysign(1.0, d) == -1)
return p;
return (d > p) ? d : p;
if (p == 0 && p == d) {
if (fd_copysign(1.0, d) == -1)
return p;
else
return d;
}
return (p > d) ? p : d;
}
jsdouble FASTCALL

Просмотреть файл

@ -1000,12 +1000,14 @@ TraceRecorder::TraceRecorder(JSContext* cx, GuardRecord* _anchor, Fragment* _fra
lir = cse_filter = new (&gc) CseFilter(lir, &gc);
lir = expr_filter = new (&gc) ExprFilter(lir);
lir = func_filter = new (&gc) FuncFilter(lir, *this);
lir->ins0(LIR_trace);
lir->ins0(LIR_start);
if (!nanojit::AvmCore::config.tree_opt || fragment->root == fragment) {
lirbuf->state = addName(lir->insParam(0), "state");
lirbuf->param1 = addName(lir->insParam(1), "param1");
lirbuf->state = addName(lir->insParam(0, 0), "state");
lirbuf->param1 = addName(lir->insParam(1, 0), "param1");
}
loop_header_ins = addName(lir->ins0(LIR_label), "loop_header");
lirbuf->sp = addName(lir->insLoad(LIR_ldp, lirbuf->state, (int)offsetof(InterpState, sp)), "sp");
lirbuf->rp = addName(lir->insLoad(LIR_ldp, lirbuf->state, offsetof(InterpState, rp)), "rp");
cx_ins = addName(lir->insLoad(LIR_ldp, lirbuf->state, offsetof(InterpState, cx)), "cx");
@ -1955,10 +1957,9 @@ TraceRecorder::closeLoop(Fragmento* fragmento)
SideExit *exit = snapshot(LOOP_EXIT);
exit->target = fragment->root;
if (fragment == fragment->root) {
fragment->lastIns = lir->insGuard(LIR_loop, lir->insImm(1), exit);
} else {
fragment->lastIns = lir->insGuard(LIR_x, lir->insImm(1), exit);
fragment->lastIns = lir->insBranch(LIR_j, NULL, loop_header_ins);
}
fragment->lastIns = lir->insGuard(LIR_x, lir->insImm(1), exit);
compile(fragmento);
debug_only_v(printf("recording completed at %s:%u@%u via closeLoop\n", cx->fp->script->filename,
@ -2107,9 +2108,9 @@ TraceRecorder::fuseIf(jsbytecode* pc, bool cond, LIns* x)
int
nanojit::StackFilter::getTop(LInsp guard)
{
if (sp == frag->lirbuf->sp)
if (sp == lirbuf->sp)
return guard->exit()->sp_adj;
JS_ASSERT(sp == frag->lirbuf->rp);
JS_ASSERT(sp == lirbuf->rp);
return guard->exit()->rp_adj;
}
@ -2387,7 +2388,7 @@ js_RecordTree(JSContext* cx, JSTraceMonitor* tm, Fragment* f)
while (f->code() && f->peer)
f = f->peer;
if (f->code())
f = JS_TRACE_MONITOR(cx).fragmento->newLoop(f->ip);
f = JS_TRACE_MONITOR(cx).fragmento->getAnchor(f->ip);
f->calldepth = 0;
f->root = f;
@ -2604,7 +2605,7 @@ js_ExecuteTree(JSContext* cx, Fragment** treep, uintN& inlineCallCount,
OBJ_SHAPE(globalObj), tm->globalShape);)
const void* ip = f->ip;
js_FlushJITCache(cx);
*treep = tm->fragmento->newLoop(ip);
*treep = tm->fragmento->getAnchor(ip);
return NULL;
}
@ -2624,7 +2625,7 @@ js_ExecuteTree(JSContext* cx, Fragment** treep, uintN& inlineCallCount,
bool didGC;
const void* ip = f->ip;
if (!ReplenishReservePool(cx, tm, didGC) || didGC) {
*treep = tm->fragmento->newLoop(ip);
*treep = tm->fragmento->getAnchor(ip);
return NULL;
}
}
@ -2663,6 +2664,7 @@ js_ExecuteTree(JSContext* cx, Fragment** treep, uintN& inlineCallCount,
tm->onTrace = true;
GuardRecord* lr;
debug_only(fflush(NULL);)
#if defined(JS_NO_FASTCALL) && defined(NANOJIT_IA32)
SIMULATE_FASTCALL(lr, &state, NULL, u.func);
#else
@ -2854,7 +2856,7 @@ js_MonitorLoopEdge(JSContext* cx, uintN& inlineCallCount)
} else {
f = tm->fragmento->getLoop(pc);
if (!f)
f = tm->fragmento->newLoop(pc);
f = tm->fragmento->getAnchor(pc);
cacheEntry->pc = pc;
cacheEntry->fragment = f;
}

Просмотреть файл

@ -220,6 +220,7 @@ class TraceRecorder : public GCObject {
#ifdef NJ_SOFTFLOAT
nanojit::LirWriter* float_filter;
#endif
nanojit::LIns* loop_header_ins;
nanojit::LIns* cx_ins;
nanojit::LIns* gp_ins;
nanojit::LIns* eos_ins;

Разница между файлами не показана из-за своего большого размера Загрузить разницу

Просмотреть файл

@ -73,8 +73,8 @@ namespace nanojit
struct Reservation
{
uint32_t arIndex:16; /* index into stack frame. displ is -4*arIndex */
Register reg:8; /* register UnkownReg implies not in register */
int cost:8;
Register reg:15; /* register UnkownReg implies not in register */
uint32_t used:1;
};
struct AR
@ -83,32 +83,6 @@ namespace nanojit
uint32_t tos; /* current top of stack entry */
uint32_t highwatermark; /* max tos hit */
uint32_t lowwatermark; /* we pre-allocate entries from 0 upto this index-1; so dynamic entries are added above this index */
LIns* parameter[ NJ_MAX_PARAMETERS ]; /* incoming parameters */
};
enum ArgSize {
ARGSIZE_NONE = 0,
ARGSIZE_F = 1,
ARGSIZE_LO = 2,
ARGSIZE_Q = 3,
_ARGSIZE_MASK_INT = 2,
_ARGSIZE_MASK_ANY = 3
};
struct CallInfo
{
intptr_t _address;
uint16_t _argtypes; // 6 2-bit fields indicating arg type, by ARGSIZE above (including ret type): a1 a2 a3 a4 a5 ret
uint8_t _cse; // true if no side effects
uint8_t _fold; // true if no side effects
verbose_only ( const char* _name; )
uint32_t FASTCALL _count_args(uint32_t mask) const;
uint32_t get_sizes(ArgSize*) const;
inline uint32_t FASTCALL count_args() const { return _count_args(_ARGSIZE_MASK_ANY); }
inline uint32_t FASTCALL count_iargs() const { return _count_args(_ARGSIZE_MASK_INT); }
// fargs = args - iargs
};
#ifdef AVMPLUS_WIN32
@ -124,6 +98,10 @@ namespace nanojit
counter_define(spills;)
counter_define(native;)
counter_define(exitnative;)
int32_t pages;
NIns* codeStart;
NIns* codeExitStart;
DECLARE_PLATFORM_STATS()
#ifdef __GNUC__
@ -146,10 +124,34 @@ namespace nanojit
,MaxExit
,MaxXJump
,UnknownPrim
,UnknownBranch
};
typedef avmplus::List<NIns*, avmplus::LIST_NonGCObjects> NInsList;
typedef avmplus::SortedMap<LIns*,NIns*,avmplus::LIST_NonGCObjects> InsMap;
typedef avmplus::SortedMap<NIns*,LIns*,avmplus::LIST_NonGCObjects> NInsMap;
class LabelState MMGC_SUBCLASS_DECL
{
public:
RegAlloc regs;
NIns *addr;
LabelState(NIns *a, RegAlloc &r) : regs(r), addr(a)
{}
};
class LabelStateMap
{
GC *gc;
avmplus::SortedMap<LIns*, LabelState*, avmplus::LIST_GCObjects> labels;
public:
LabelStateMap(GC *gc) : gc(gc), labels(gc)
{}
void clear() { labels.clear(); }
void add(LIns *label, NIns *addr, RegAlloc &regs);
LabelState *get(LIns *);
};
/**
* Information about the activation record for the method is built up
* as we generate machine code. As part of the prologue, we issue
@ -190,6 +192,7 @@ namespace nanojit
void setError(AssmError e) { _err = e; }
void setCallTable(const CallInfo *functions);
void pageReset();
int32_t codeBytes();
Page* handoverPages(bool exitPages=false);
debug_only ( void pageValidate(); )
@ -197,30 +200,32 @@ namespace nanojit
// support calling out from a fragment ; used to debug the jit
debug_only( void resourceConsistencyCheck(); )
debug_only( void registerConsistencyCheck(LIns** resv); )
debug_only( void registerConsistencyCheck(); )
Stats _stats;
int hasLoop;
private:
void gen(LirFilter* toCompile, NInsList& loopJumps);
NIns* genPrologue(RegisterMask);
NIns* genEpilogue(RegisterMask);
bool ignoreInstruction(LInsp ins);
NIns* genPrologue();
NIns* genEpilogue();
GuardRecord* placeGuardRecord(LInsp guard);
void initGuardRecord(LInsp guard, GuardRecord*);
uint32_t arReserve(LIns* l);
uint32_t arFree(uint32_t idx);
void arFree(uint32_t idx);
void arReset();
Register registerAlloc(RegisterMask allow);
void registerResetAll();
void restoreCallerSaved();
void mergeRegisterState(RegAlloc& saved);
LInsp findVictim(RegAlloc& regs, RegisterMask allow, RegisterMask prefer);
void evictRegs(RegisterMask regs);
void evictScratchRegs();
void intersectRegisterState(RegAlloc& saved);
void unionRegisterState(RegAlloc& saved);
void assignSaved(RegAlloc &saved, RegisterMask skip);
LInsp findVictim(RegAlloc& regs, RegisterMask allow);
int findMemFor(LIns* i);
Register findRegFor(LIns* i, RegisterMask allow);
@ -234,12 +239,16 @@ namespace nanojit
NIns* pageAlloc(bool exitPage=false);
void pagesFree(Page*& list);
void internalReset();
bool canRemat(LIns*);
Reservation* reserveAlloc(LInsp i);
void reserveFree(LInsp i);
void reserveReset();
Reservation* getresv(LIns *x) { return x->resv() ? &_resvTable[x->resv()] : 0; }
Reservation* getresv(LIns *x) {
uint32_t resv_index = x->resv();
return resv_index ? &_resvTable[resv_index] : 0;
}
DWB(Fragmento*) _frago;
GC* _gc;
@ -259,13 +268,18 @@ namespace nanojit
AR _activation;
RegAlloc _allocator;
LabelStateMap _labels;
NInsMap _patches;
Reservation _resvTable[ NJ_MAX_STACK_ENTRY ]; // table where we house stack and register information
uint32_t _resvFree;
bool _inExit,vpad2[3];
bool _inExit, vpad2[3];
avmplus::List<LIns*, avmplus::LIST_GCObjects> pending_lives;
void asm_cmp(LIns *cond);
#ifndef NJ_SOFTFLOAT
void asm_fcmp(LIns *cond);
void asm_setcc(Register res, LIns *cond);
NIns * asm_jmpcc(bool brOnFalse, LIns *cond, NIns *target);
#endif
void asm_mmq(Register rd, int dd, Register rs, int ds);
NIns* asm_exit(LInsp guard);
@ -274,7 +288,9 @@ namespace nanojit
void asm_store32(LIns *val, int d, LIns *base);
void asm_store64(LIns *val, int d, LIns *base);
void asm_restore(LInsp, Reservation*, Register);
void asm_spill(LInsp i, Reservation *resv, bool pop);
void asm_load(int d, Register r);
void asm_spilli(LInsp i, Reservation *resv, bool pop);
void asm_spill(Register rr, int d, bool pop=false, bool quad=false);
void asm_load64(LInsp i);
void asm_pusharg(LInsp p);
NIns* asm_adjustBranch(NIns* at, NIns* target);
@ -290,6 +306,10 @@ namespace nanojit
void asm_call(LInsp);
void asm_arg(ArgSize, LInsp, Register);
Register asm_binop_rhs_reg(LInsp ins);
NIns* asm_branch(bool branchOnFalse, LInsp cond, NIns* targ);
void assignSavedParams();
void reserveSavedParams();
void handleLoopCarriedExprs();
// platform specific implementation (see NativeXXX.cpp file)
void nInit(uint32_t flags);
@ -303,6 +323,7 @@ namespace nanojit
// platform specific methods
public:
const static Register savedRegs[NumSavedRegs];
DECLARE_PLATFORM_ASSEMBLER()
private:

Просмотреть файл

@ -39,6 +39,7 @@
* ***** END LICENSE BLOCK ***** */
#include "nanojit.h"
#undef MEMORY_INFO
namespace nanojit
{
@ -58,16 +59,17 @@ namespace nanojit
*/
Fragmento::Fragmento(AvmCore* core, uint32_t cacheSizeLog2)
: _allocList(core->GetGC()),
_max_pages(1 << (calcSaneCacheSize(cacheSizeLog2) - NJ_LOG2_PAGE_SIZE))
_max_pages(1 << (calcSaneCacheSize(cacheSizeLog2) - NJ_LOG2_PAGE_SIZE)),
_pagesGrowth(1)
{
#ifdef MEMORY_INFO
_allocList.set_meminfo_name("Fragmento._allocList");
#endif
NanoAssert(_max_pages > _pagesGrowth); // shrink growth if needed
_core = core;
GC *gc = core->GetGC();
_frags = new (gc) FragmentMap(gc, 128);
_assm = new (gc) nanojit::Assembler(this);
_pageGrowth = 1;
verbose_only( enterCounts = new (gc) BlockHist(gc); )
verbose_only( mergeCounts = new (gc) BlockHist(gc); )
}
@ -109,10 +111,10 @@ namespace nanojit
{
NanoAssert(sizeof(Page) == NJ_PAGE_SIZE);
if (!_pageList) {
pagesGrow(_pageGrowth); // try to get more mem
if ((_pageGrowth << 1) < _max_pages)
_pageGrowth <<= 1;
}
pagesGrow(_pagesGrowth); // try to get more mem
if ((_pagesGrowth << 1) < _max_pages)
_pagesGrowth <<= 1;
}
Page *page = _pageList;
if (page)
{
@ -221,7 +223,7 @@ namespace nanojit
return _core;
}
Fragment* Fragmento::newLoop(const void* ip)
Fragment* Fragmento::getAnchor(const void* ip)
{
Fragment *f = newFrag(ip);
Fragment *p = _frags->get(ip);
@ -480,7 +482,7 @@ namespace nanojit
{
int c = hist->count(ip);
if (_assm->_verbose)
_assm->outputf("++ %s %d", core()->interp.labels->format(ip), c);
_assm->outputf("++ %s %d", labels->format(ip), c);
}
void Fragmento::countIL(uint32_t il, uint32_t abc)

Просмотреть файл

@ -54,7 +54,6 @@ namespace nanojit
struct PageHeader
{
struct Page *next;
verbose_only (int seq;) // sequence # of page
};
struct Page: public PageHeader
{
@ -101,8 +100,8 @@ namespace nanojit
Page* pageAlloc();
void pageFree(Page* page);
Fragment* newLoop(const void* ip);
Fragment* getLoop(const void* ip);
Fragment* getAnchor(const void* ip);
void clearFrags(); // clear all fragments from the cache
Fragment* getMerge(GuardRecord *lr, const void* ip);
Fragment* createBranch(GuardRecord *lr, const void* ip);
@ -145,13 +144,13 @@ namespace nanojit
DWB(Assembler*) _assm;
DWB(FragmentMap*) _frags; /* map from ip -> Fragment ptr */
Page* _pageList;
uint32_t _pageGrowth;
/* unmanaged mem */
AllocList _allocList;
GCHeap* _gcHeap;
const uint32_t _max_pages;
uint32_t _pagesGrowth;
};
enum TraceKind {
@ -236,18 +235,5 @@ namespace nanojit
int32_t _hits;
Page* _pages; // native code pages
};
#ifdef NJ_VERBOSE
inline int nbr(LInsp x)
{
Page *p = x->page();
return (p->seq * NJ_PAGE_SIZE + (intptr_t(x)-intptr_t(p))) / sizeof(LIns);
}
#else
inline int nbr(LInsp x)
{
return (int)(intptr_t(x) & intptr_t(NJ_PAGE_SIZE-1));
}
#endif
}
#endif // __nanojit_Fragmento__

Разница между файлами не показана из-за своего большого размера Загрузить разницу

Просмотреть файл

@ -63,23 +63,34 @@ namespace nanojit
LIR64 = 0x40, // result is double or quad
// special operations (must be 0..N)
LIR_trace = 2,
LIR_nearskip = 3, // must be LIR_skip-1 and lsb=1
LIR_skip = 4,
LIR_neartramp = 5, // must be LIR_tramp-1 and lsb=1
LIR_tramp = 6,
LIR_start = 0,
LIR_nearskip = 1, // must be LIR_skip-1 and lsb=1
LIR_skip = 2,
LIR_neartramp = 3, // must be LIR_tramp-1 and lsb=1
LIR_tramp = 4,
// non-pure operations
LIR_addp = 9,
LIR_param = 10,
LIR_st = 11, // 32-bit store
LIR_ld = 12, // 32-bit load
LIR_alloc = 13, // alloca some stack space
LIR_sti = 14,
LIR_call = 18, // subrouting call returning a 32-bit value
LIR_ret = 15,
LIR_live = 16, // extend live range of reference
LIR_calli = 17, // indirect call
LIR_call = 18, // subroutine call returning a 32-bit value
// guards
LIR_loop = 19, // loop fragment
LIR_x = 20, // exit always
// branches
LIR_j = 21, // jump always
LIR_jt = 22, // jump true
LIR_jf = 23, // jump false
LIR_label = 24, // a jump target
LIR_ji = 25, // jump indirect
// operators
// LIR_feq though LIR_fge must only be used on float arguments. They
@ -137,18 +148,25 @@ namespace nanojit
LIR_ule = 62, // 0x3E 0011 1110
LIR_uge = 63, // 0x3F 0011 1111
// non-64bit ops, but we're out of code space below 64
LIR_file = 1 | LIR64,
LIR_line = 2 | LIR64,
/**
* 64bit operations
*/
LIR_stq = LIR_st | LIR64, // quad store
LIR_stqi = LIR_sti | LIR64,
LIR_fret = LIR_ret | LIR64,
LIR_quad = LIR_int | LIR64, // quad constant value
LIR_ldq = LIR_ld | LIR64, // quad load
LIR_ldqc = LIR_ldc | LIR64,
LIR_qiand = 24 | LIR64,
LIR_qiadd = 25 | LIR64,
LIR_qilsh = LIR_lsh | LIR64,
LIR_fcall = LIR_call | LIR64, // subroutine call returning quad
LIR_fcalli = LIR_calli | LIR64,
LIR_fneg = LIR_neg | LIR64, // floating-point numeric negation
LIR_fadd = LIR_add | LIR64, // floating-point addition
LIR_fsub = LIR_sub | LIR64, // floating-point subtraction
@ -164,6 +182,7 @@ namespace nanojit
#if defined NANOJIT_64BIT
#define LIR_ldp LIR_ldq
#define LIR_stp LIR_stq
#define LIR_piadd LIR_qiadd
#define LIR_piand LIR_qiand
#define LIR_pilsh LIR_qilsh
@ -171,6 +190,7 @@ namespace nanojit
#define LIR_pior LIR_qior
#else
#define LIR_ldp LIR_ld
#define LIR_stp LIR_st
#define LIR_piadd LIR_add
#define LIR_piand LIR_and
#define LIR_pilsh LIR_lsh
@ -184,7 +204,71 @@ namespace nanojit
struct SideExit;
struct Page;
struct CallInfo;
enum AbiKind {
ABI_FASTCALL,
ABI_THISCALL,
ABI_STDCALL,
ABI_CDECL
};
enum ArgSize {
ARGSIZE_NONE = 0,
ARGSIZE_F = 1,
ARGSIZE_LO = 2,
ARGSIZE_Q = 3,
_ARGSIZE_MASK_INT = 2,
_ARGSIZE_MASK_ANY = 3
};
struct CallInfo
{
uintptr_t _address;
uint32_t _argtypes:18; // 9 2-bit fields indicating arg type, by ARGSIZE above (including ret type): a1 a2 a3 a4 a5 ret
uint8_t _cse:1; // true if no side effects
uint8_t _fold:1; // true if no side effects
AbiKind _abi:3;
verbose_only ( const char* _name; )
uint32_t FASTCALL _count_args(uint32_t mask) const;
uint32_t get_sizes(ArgSize*) const;
inline bool isInterface() const {
return _address == 2 || _address == 3; /* hack! */
}
inline bool isIndirect() const {
return _address < 256;
}
inline uint32_t FASTCALL count_args() const {
return _count_args(_ARGSIZE_MASK_ANY) + isIndirect();
}
inline uint32_t FASTCALL count_iargs() const {
return _count_args(_ARGSIZE_MASK_INT);
}
// fargs = args - iargs
};
inline bool isGuard(LOpcode op) {
return op==LIR_x || op==LIR_xf || op==LIR_xt || op==LIR_loop;
}
inline bool isCall(LOpcode op) {
op = LOpcode(op & ~LIR64);
return op == LIR_call || op == LIR_calli;
}
inline bool isStore(LOpcode op) {
op = LOpcode(op & ~LIR64);
return op == LIR_st || op == LIR_sti;
}
inline bool isConst(LOpcode op) {
return (op & ~1) == LIR_short;
}
inline bool isLoad(LOpcode op) {
return op == LIR_ldq || op == LIR_ld || op == LIR_ldc || op == LIR_ldqc;
}
// Low-level Instruction 4B
// had to lay it our as a union with duplicate code fields since msvc couldn't figure out how to compact it otherwise.
@ -290,7 +374,9 @@ namespace nanojit
inline LOpcode opcode() const { return u.code; }
inline uint8_t imm8() const { return c.imm8a; }
inline uint8_t imm8b() const { return c.imm8b; }
inline int16_t imm16() const { return i.imm16; }
inline int32_t imm24() const { return t.imm24; }
inline LIns* ref() const {
#if defined NANOJIT_64BIT
return (t.code & 1) ? (LIns*)this+t.imm24 : *(LIns**)(this-2);
@ -302,6 +388,14 @@ namespace nanojit
inline uint8_t resv() const { return g.resv; }
void* payload() const;
inline Page* page() { return (Page*) alignTo(this,NJ_PAGE_SIZE); }
inline int32_t size() const {
NanoAssert(isop(LIR_alloc));
return i.imm16<<2;
}
inline void setSize(int32_t bytes) {
NanoAssert(isop(LIR_alloc) && (bytes&3)==0 && isU16(bytes>>2));
i.imm16 = bytes>>2;
}
// index args in r-l order. arg(0) is rightmost arg
inline LIns* arg(uint32_t i) {
@ -375,12 +469,12 @@ namespace nanojit
bool isQuad() const;
bool isCond() const;
bool isCmp() const;
bool isCall() const;
bool isStore() const;
bool isLoad() const;
bool isGuard() const;
bool isCall() const { return nanojit::isCall(u.code); }
bool isStore() const { return nanojit::isStore(u.code); }
bool isLoad() const { return nanojit::isLoad(u.code); }
bool isGuard() const { return nanojit::isGuard(u.code); }
// True if the instruction is a 32-bit or smaller constant integer.
bool isconst() const;
bool isconst() const { return nanojit::isConst(u.code); }
// True if the instruction is a 32-bit or smaller constant integer and
// has the value val when treated as a 32-bit signed integer.
bool isconstval(int32_t val) const;
@ -391,10 +485,13 @@ namespace nanojit
bool isTramp() {
return isop(LIR_neartramp) || isop(LIR_tramp);
}
bool isBranch() const {
return isop(LIR_jt) || isop(LIR_jf) || isop(LIR_j);
}
// Set the imm16 member. Should only be used on instructions that use
// that. If you're not sure, you shouldn't be calling it.
void setimm16(int32_t i);
void setimm24(int32_t x);
// Set the resv member. Should only be used on instructions that use
// that. If you're not sure, you shouldn't be calling it.
void setresv(uint32_t resv);
@ -405,6 +502,9 @@ namespace nanojit
void setOprnd2(LIns*);
void setOprnd3(LIns*);
void setDisp(int8_t d);
void target(LIns* t);
LIns **targetAddr();
LIns* getTarget();
SideExit *exit();
@ -424,19 +524,21 @@ namespace nanojit
bool FASTCALL isCse(LOpcode v);
bool FASTCALL isCmp(LOpcode v);
bool FASTCALL isCond(LOpcode v);
inline bool isRet(LOpcode c) {
return (c & ~LIR64) == LIR_ret;
}
bool FASTCALL isFloat(LOpcode v);
LIns* FASTCALL callArgN(LInsp i, uint32_t n);
extern const uint8_t operandCount[];
class Fragmento; // @todo remove this ; needed for minbuild for some reason?!? Should not be compiling this code at all
class LirFilter;
struct CallInfo;
// make it a GCObject so we can explicitly delete it early
class LirWriter : public GCObject
{
public:
LirWriter *out;
public:
const CallInfo *_functions;
virtual ~LirWriter() {}
@ -455,8 +557,13 @@ namespace nanojit
virtual LInsp insGuard(LOpcode v, LIns *c, SideExit *x) {
return out->insGuard(v, c, x);
}
virtual LInsp insParam(int32_t i) {
return out->insParam(i);
virtual LInsp insBranch(LOpcode v, LInsp condition, LInsp to) {
return out->insBranch(v, condition, to);
}
// arg: 0=first, 1=second, ...
// kind: 0=arg 1=saved-reg
virtual LInsp insParam(int32_t arg, int32_t kind) {
return out->insParam(arg, kind);
}
virtual LInsp insImm(int32_t imm) {
return out->insImm(imm);
@ -477,10 +584,14 @@ namespace nanojit
virtual LInsp insCall(const CallInfo *call, LInsp args[]) {
return out->insCall(call, args);
}
virtual LInsp insAlloc(int32_t size) {
return out->insAlloc(size);
}
// convenience
LIns* insLoadi(LIns *base, int disp);
LIns* insLoad(LOpcode op, LIns *base, int disp);
LIns* store(LIns* value, LIns* base, int32_t d);
// Inserts a conditional to execute and branches to execute if
// the condition is true and false respectively.
LIns* ins_choose(LIns* cond, LIns* iftrue, LIns* iffalse);
@ -491,6 +602,7 @@ namespace nanojit
LIns* ins2i(LOpcode op, LIns *oprnd1, int32_t);
LIns* qjoin(LInsp lo, LInsp hi);
LIns* insImmPtr(const void *ptr);
LIns* insImmf(double f);
};
#ifdef NJ_VERBOSE
@ -516,8 +628,8 @@ namespace nanojit
char buf[1000], *end;
void formatAddr(const void *p, char *buf);
public:
AvmCore *core;
LabelMap(AvmCore *, LabelMap* parent);
avmplus::AvmCore *core;
LabelMap(avmplus::AvmCore *, LabelMap* parent);
~LabelMap();
void add(const void *p, size_t size, size_t align, const char *name);
void add(const void *p, size_t size, size_t align, avmplus::String*);
@ -579,50 +691,63 @@ namespace nanojit
class VerboseWriter : public LirWriter
{
avmplus::List<LInsp, avmplus::LIST_NonGCObjects> code;
LirNameMap *names;
DWB(LirNameMap*) names;
public:
VerboseWriter(GC *gc, LirWriter *out, LirNameMap* names)
: LirWriter(out), code(gc), names(names)
{}
LInsp add(LInsp i) {
code.add(i);
if (i)
code.add(i);
return i;
}
LInsp add_flush(LInsp i) {
if ((i = add(i)) != 0)
flush();
return i;
}
void flush()
{
for (int j=0, n=code.size(); j < n; j++)
printf(" %s\n",names->formatIns(code[j]));
code.clear();
printf("\n");
int n = code.size();
if (n) {
for (int i=0; i < n; i++)
printf(" %s\n",names->formatIns(code[i]));
code.clear();
if (n > 1)
printf("\n");
}
}
LIns* insGuard(LOpcode op, LInsp cond, SideExit *x) {
LInsp i = add(out->insGuard(op,cond,x));
if (i)
flush();
return i;
return add_flush(out->insGuard(op,cond,x));
}
LIns* insBranch(LOpcode v, LInsp condition, LInsp to) {
return add_flush(out->insBranch(v, condition, to));
}
LIns* ins0(LOpcode v) {
LInsp i = add(out->ins0(v));
if (i)
flush();
return i;
if (v == LIR_label || v == LIR_start) {
flush();
}
return add(out->ins0(v));
}
LIns* ins1(LOpcode v, LInsp a) {
return add(out->ins1(v, a));
return isRet(v) ? add_flush(out->ins1(v, a)) : add(out->ins1(v, a));
}
LIns* ins2(LOpcode v, LInsp a, LInsp b) {
return v == LIR_2 ? out->ins2(v,a,b) : add(out->ins2(v, a, b));
}
LIns* insCall(const CallInfo *call, LInsp args[]) {
return add(out->insCall(call, args));
return add_flush(out->insCall(call, args));
}
LIns* insParam(int32_t i) {
return add(out->insParam(i));
LIns* insParam(int32_t i, int32_t kind) {
return add(out->insParam(i, kind));
}
LIns* insLoad(LOpcode v, LInsp base, LInsp disp) {
return add(out->insLoad(v, base, disp));
@ -633,6 +758,9 @@ namespace nanojit
LIns* insStorei(LInsp v, LInsp b, int32_t d) {
return add(out->insStorei(v, b, d));
}
LIns* insAlloc(int32_t size) {
return add(out->insAlloc(size));
}
};
#endif
@ -643,7 +771,8 @@ namespace nanojit
ExprFilter(LirWriter *out) : LirWriter(out) {}
LIns* ins1(LOpcode v, LIns* a);
LIns* ins2(LOpcode v, LIns* a, LIns* b);
LIns* insGuard(LOpcode v, LIns *c, SideExit *x);
LIns* insGuard(LOpcode, LIns *cond, SideExit *);
LIns* insBranch(LOpcode, LIns *cond, LIns *target);
};
// @todo, this could be replaced by a generic HashMap or HashSet, if we had one
@ -652,14 +781,14 @@ namespace nanojit
// must be a power of 2.
// don't start too small, or we'll waste time growing and rehashing.
// don't start too large, will waste memory.
static const uint32_t kInitialCap = 2048;
static const uint32_t kInitialCap = 64;
InsList m_list;
uint32_t m_used;
LInsp *m_list; // explicit WB's are used, no DWB needed.
uint32_t m_used, m_cap;
GC* m_gc;
static uint32_t FASTCALL hashcode(LInsp i);
uint32_t FASTCALL find(LInsp name, uint32_t hash, const InsList& list, uint32_t cap);
uint32_t FASTCALL find(LInsp name, uint32_t hash, const LInsp *list, uint32_t cap);
static bool FASTCALL equals(LInsp a, LInsp b);
void FASTCALL grow();
@ -673,6 +802,7 @@ namespace nanojit
LInsp findcall(const CallInfo *call, uint32_t argc, LInsp args[], uint32_t &i);
LInsp add(LInsp i, uint32_t k);
void replace(LInsp i);
void clear();
static uint32_t FASTCALL hashimm(int32_t);
static uint32_t FASTCALL hashimmq(uint64_t);
@ -695,7 +825,6 @@ namespace nanojit
LIns* insGuard(LOpcode op, LInsp cond, SideExit *x);
};
struct Page;
class LirBuffer : public GCFinalizedObject
{
public:
@ -704,13 +833,13 @@ namespace nanojit
virtual ~LirBuffer();
void clear();
LInsp next();
LInsp commit(uint32_t count);
bool addPage();
bool outOmem() { return _noMem != 0; }
debug_only (void validate() const;)
debug_only (void validate() const;)
verbose_only(DWB(LirNameMap*) names;)
verbose_only(int insCount();)
verbose_only(int byteCount();)
int32_t insCount();
int32_t byteCount();
// stats
struct
@ -721,14 +850,20 @@ namespace nanojit
_stats;
const CallInfo* _functions;
AbiKind abi;
LInsp state,param1,sp,rp;
LInsp savedParams[NumSavedRegs];
private:
protected:
friend class LirBufWriter;
LInsp commit(uint32_t count);
bool addPage();
Page* pageAlloc();
Page* _start; // first page
LInsp _unused; // next unused instruction slot
int _noMem; // set if ran out of memory when writing to buffer
Page* _start; // first page
LInsp _unused; // next unused instruction slot
int _noMem; // set if ran out of memory when writing to buffer
};
class LirBufWriter : public LirWriter
@ -749,17 +884,24 @@ namespace nanojit
LInsp ins0(LOpcode op);
LInsp ins1(LOpcode op, LInsp o1);
LInsp ins2(LOpcode op, LInsp o1, LInsp o2);
LInsp insParam(int32_t i);
LInsp insParam(int32_t i, int32_t kind);
LInsp insImm(int32_t imm);
LInsp insImmq(uint64_t imm);
LInsp insCall(const CallInfo *call, LInsp args[]);
LInsp insGuard(LOpcode op, LInsp cond, SideExit *x);
LInsp insBranch(LOpcode v, LInsp condition, LInsp to);
LInsp insAlloc(int32_t size);
// buffer mgmt
LInsp skip(size_t);
protected:
LInsp insFar(LOpcode op, LInsp target);
LInsp insLink(LOpcode op, LInsp target);
LInsp ensureReferenceable(LInsp i, int32_t addedDistance);
bool ensureRoom(uint32_t count);
bool can8bReach(LInsp from, LInsp to) { return isU8(from-to-1); }
bool can24bReach(LInsp from, LInsp to){ return isS24(from-to); }
bool canReference(LInsp from, LInsp to) {
return isU8(from-to-1);
}
@ -795,24 +937,27 @@ namespace nanojit
LInsp pos() {
return _i;
}
void setpos(LIns *i) {
_i = i;
}
};
class Assembler;
void compile(Assembler *assm, Fragment *frag);
verbose_only( void printTracker(const char* s, avmplus::RegionTracker& trk, Assembler* assm); )
verbose_only(void live(GC *gc, Assembler *assm, Fragment *frag);)
verbose_only(void live(GC *gc, LirBuffer *lirbuf);)
class StackFilter: public LirFilter
{
GC *gc;
Fragment *frag;
LirBuffer *lirbuf;
LInsp sp;
avmplus::BitSet stk;
int top;
int getTop(LInsp guard);
int getTop(LInsp br);
public:
StackFilter(LirFilter *in, GC *gc, Fragment *frag, LInsp sp);
StackFilter(LirFilter *in, GC *gc, LirBuffer *lirbuf, LInsp sp);
virtual ~StackFilter() {}
LInsp read();
};
@ -825,5 +970,23 @@ namespace nanojit
CseReader(LirFilter *in, LInsHashSet *exprs, const CallInfo*);
LInsp read();
};
// eliminate redundant loads by watching for stores & mutator calls
class LoadFilter: public LirWriter
{
public:
LInsp sp, rp;
LInsHashSet exprs;
void clear(LInsp p);
public:
LoadFilter(LirWriter *out, GC *gc)
: LirWriter(out), exprs(gc) { }
LInsp ins0(LOpcode);
LInsp insLoad(LOpcode, LInsp base, LInsp disp);
LInsp insStore(LInsp v, LInsp b, LInsp d);
LInsp insStorei(LInsp v, LInsp b, int32_t d);
LInsp insCall(const CallInfo *call, LInsp args[]);
};
}
#endif // __nanojit_LIR__

Просмотреть файл

@ -109,6 +109,7 @@ namespace nanojit
typedef int RegisterMask;
/* RBX, R13-R15 */
static const int NumSavedRegs = 3;
static const RegisterMask SavedRegs = /*(1<<RBX) |*/ /*(1<<R12) |*/ (1<<R13) | (1<<R14) | (1<<R15);
/* RAX, RCX, RDX, RDI, RSI, R8-R11 */
static const RegisterMask TempRegs = (1<<RAX) | (1<<RCX) | (1<<RDX) | (1<<R8) | (1<<R9) | (1<<R10) | (1<<R11) | (1<<RDI) | (1<<RSI);

Просмотреть файл

@ -157,6 +157,7 @@ static const RegisterMask SavedFpRegs = 1<<D0 | 1<<D1 | 1<<D2 | 1<<D3 | 1<<D4 |
#else
static const RegisterMask SavedFpRegs = 0;
#endif
static const int NumSavedRegs = 7;
static const RegisterMask SavedRegs = 1<<R4 | 1<<R5 | 1<<R6 | 1<<R7 | 1<<R8 | 1<<R9 | 1<<R10 | SavedFpRegs;
static const RegisterMask FpRegs = 1<<D0 | 1<<D1 | 1<<D2 | 1<<D3 | 1<<D4 | 1<<D5 | 1<<D6; // no D7; S14-S15 are used for i2f/u2f.
static const RegisterMask GpRegs = 0x07FF;

Просмотреть файл

@ -61,6 +61,12 @@ namespace nanojit
const Register Assembler::argRegs[] = { R0, R1, R2, R3 };
const Register Assembler::retRegs[] = { R0, R1 };
#ifdef NJ_THUMB_JIT
const Register Assembler::savedRegs[] = { R4, R5, R6, R7 };
#else
const Register Assembler::savedRegs[] = { R4, R5, R6, R7, R8, R9, R10 };
#endif
void Assembler::nInit(AvmCore*)
{
// Thumb mode does not have conditional move, alas
@ -269,7 +275,7 @@ namespace nanojit
else if (op == LIR_callh)
prefer = rmask(R1);
else if (op == LIR_param)
prefer = rmask(imm2register(i->imm8()));
prefer = rmask(imm2register(argRegs[i->imm8()]));
if (_allocator.free & allow & prefer)
allow &= prefer;

Просмотреть файл

@ -101,6 +101,7 @@ namespace nanojit
}
FragInfo;
static const int NumSavedRegs = 4;
static const RegisterMask SavedRegs = 1<<R4 | 1<<R5 | 1<<R6 | 1<<R7;
static const RegisterMask FpRegs = 0x0000; // FST0-FST7
static const RegisterMask GpRegs = 0x003F;

Просмотреть файл

@ -71,6 +71,7 @@ namespace nanojit
#if defined NANOJIT_IA32
const Register Assembler::argRegs[] = { ECX, EDX };
const Register Assembler::retRegs[] = { EAX, EDX };
const Register Assembler::savedRegs[] = { EBX, ESI, EDI };
#elif defined NANOJIT_AMD64
#if defined WIN64
const Register Assembler::argRegs[] = { R8, R9, RCX, RDX };
@ -78,8 +79,17 @@ namespace nanojit
const Register Assembler::argRegs[] = { RDI, RSI, RDX, RCX, R8, R9 };
#endif
const Register Assembler::retRegs[] = { RAX, RDX };
const Register Assembler::savedRegs[] = { R13, R14, R15 };
#endif
const static uint8_t max_abi_regs[] = {
2, /* ABI_FASTCALL */
1, /* ABI_THISCALL */
0, /* ABI_STDCALL */
0 /* ABI_CDECL */
};
void Assembler::nInit(AvmCore* core)
{
OSDep::getDate();
@ -89,21 +99,16 @@ namespace nanojit
#endif
}
NIns* Assembler::genPrologue(RegisterMask needSaving)
NIns* Assembler::genPrologue()
{
/**
* Prologue
*/
uint32_t stackNeeded = STACK_GRANULARITY * _activation.highwatermark;
uint32_t savingCount = 0;
for(Register i=FirstReg; i <= LastReg; i = nextreg(i))
if (needSaving&rmask(i))
savingCount++;
// After forcing alignment, we've pushed the pre-alignment SP
// and savingCount registers.
uint32_t stackPushed = STACK_GRANULARITY * (1+savingCount);
uint32_t stackPushed =
STACK_GRANULARITY + // returnaddr
STACK_GRANULARITY; // ebp
uint32_t aligned = alignUp(stackNeeded + stackPushed, NJ_ALIGN_STACK);
uint32_t amt = aligned - stackPushed;
@ -122,37 +127,37 @@ namespace nanojit
verbose_only( verbose_output(" patch entry:"); )
NIns *patchEntry = _nIns;
MR(FP, SP); // Establish our own FP.
PUSHr(FP); // Save caller's FP.
// Save pre-alignment SP value here, where the FP will point,
// to preserve the illusion of a valid frame chain for
// functions like MMgc::GetStackTrace. The 'return address'
// of this 'frame' will be the last-saved register, but that's
// fine, because the next-older frame will be legit.
PUSHr(FP);
for(Register i=FirstReg; i <= LastReg; i = nextreg(i))
if (needSaving&rmask(i))
PUSHr(i);
// We'd like to be able to use SSE instructions like MOVDQA on
// stack slots; it requires 16B alignment. Darwin requires a
// 16B stack alignment, and Linux GCC seems to intend to
// establish and preserve the same, but we're told that GCC
// has not always done this right. To avoid doubt, do it on
// all platforms. The prologue runs only when we enter
// fragments from the interpreter, so forcing 16B alignment
// here is cheap.
#if defined NANOJIT_IA32
ANDi(SP, -NJ_ALIGN_STACK);
#elif defined NANOJIT_AMD64
ANDQi(SP, -NJ_ALIGN_STACK);
#endif
MR(FP,SP);
PUSHr(FP); // Save caller's FP.
// align the entry point
asm_align_code();
return patchEntry;
}
void Assembler::asm_align_code() {
static char nop[][9] = {
{0x90},
{0x66,0x90},
{0x0f,0x1f,0x00},
{0x0f,0x1f,0x40,0x00},
{0x0f,0x1f,0x44,0x00,0x00},
{0x66,0x0f,0x1f,0x44,0x00,0x00},
{0x0f,0x1f,0x80,0x00,0x00,0x00,0x00},
{0x0f,0x1f,0x84,0x00,0x00,0x00,0x00,0x00},
{0x66,0x0f,0x1f,0x84,0x00,0x00,0x00,0x00,0x00},
};
unsigned n;
while((n = uintptr_t(_nIns) & 15) != 0) {
if (n > 9)
n = 9;
underrunProtect(n);
_nIns -= n;
memcpy(_nIns, nop[n-1], n);
asm_output1("nop%d", n);
}
}
void Assembler::nFragExit(LInsp guard)
{
SideExit *exit = guard->exit();
@ -191,7 +196,6 @@ namespace nanojit
// first restore ESP from EBP, undoing SUBi(SP,amt) from genPrologue
MR(SP,FP);
#ifdef NJ_VERBOSE
if (_frago->core()->config.show_stats) {
// load EDX (arg1) with Fragment *fromFrag, target fragment
@ -213,17 +217,11 @@ namespace nanojit
#endif
}
NIns *Assembler::genEpilogue(RegisterMask restore)
NIns *Assembler::genEpilogue()
{
RET();
POPr(FP); // Restore caller's FP.
MR(SP,FP); // Undo forced alignment.
// Restore saved registers.
for (Register i=UnknownReg; i >= FirstReg; i = prevreg(i))
if (restore&rmask(i)) { POPr(i); }
POPr(FP); // Pop the pre-alignment SP.
MR(SP,FP); // pop the stack frame
return _nIns;
}
@ -232,75 +230,92 @@ namespace nanojit
{
const CallInfo* call = ins->callInfo();
// must be signed, not unsigned
const uint32_t iargs = call->count_iargs();
int32_t fstack = call->count_args() - iargs;
uint32_t iargs = call->count_iargs();
int32_t fargs = call->count_args() - iargs - call->isIndirect();
bool imt = call->isInterface();
if (imt)
iargs --;
uint32_t max_regs = max_abi_regs[call->_abi];
if (max_regs > iargs)
max_regs = iargs;
int32_t istack = iargs-max_regs; // first 2 4B args are in registers
int32_t extra = 0;
const int32_t pushsize = 4*istack + 8*fargs; // actual stack space used
#if defined NJ_NO_FASTCALL
int32_t istack = iargs;
#if _MSC_VER
// msc is slack, and MIR doesn't do anything extra, so lets use this
// call-site alignment to at least have code size parity with MIR.
uint32_t align = 4;//NJ_ALIGN_STACK;
#else
int32_t istack = iargs-2; // first 2 4B args are in registers
if (istack <= 0)
{
istack = 0;
}
uint32_t align = NJ_ALIGN_STACK;
#endif
const int32_t size = 4*istack + 8*fstack; // actual stack space used
if (size) {
if (pushsize) {
// stack re-alignment
// only pop our adjustment amount since callee pops args in FASTCALL mode
extra = alignUp(size, NJ_ALIGN_STACK) - (size);
#ifndef NJ_NO_FASTCALL
if (extra > 0)
{
extra = alignUp(pushsize, align) - pushsize;
if (call->_abi == ABI_CDECL) {
// with CDECL only, caller pops args
ADDi(SP, extra+pushsize);
} else if (extra > 0) {
ADDi(SP, extra);
}
#endif
}
#ifdef NJ_NO_FASTCALL
// In C calling conventions, callee doesn't pop args.
ADDi(SP, 4*iargs + 8*fstack + extra);
#endif
CALL(call);
#ifdef NJ_NO_FASTCALL
if (iargs >= 1) {
PUSHr(ECX);
if (iargs >= 2) {
PUSHr(EDX);
}
}
#endif
bool indirect = false;
if (ins->isop(LIR_call) || ins->isop(LIR_fcall)) {
verbose_only(if (_verbose)
outputf(" %p:", _nIns);
)
CALL(call);
}
else {
// indirect call. x86 Calling conventions don't use EAX as an
// argument, and do use EAX as a return value. We need a register
// for the address to call, so we use EAX since it will always be
// available
NanoAssert(ins->isop(LIR_calli) || ins->isop(LIR_fcalli));
CALLr(call, EAX);
indirect = true;
}
// make sure fpu stack is empty before call (restoreCallerSaved)
NanoAssert(_allocator.isFree(FST0));
// note: this code requires that ref arguments (ARGSIZE_Q)
// be one of the first two arguments
// pre-assign registers to the first 2 4B args
const int max_regs = (iargs < 2) ? iargs : 2;
int n = 0;
// pre-assign registers to the first N 4B args based on the calling convention
uint32_t n = 0;
ArgSize sizes[10];
ArgSize sizes[2*MAXARGS];
uint32_t argc = call->get_sizes(sizes);
if (indirect) {
argc--;
asm_arg(ARGSIZE_LO, ins->arg(argc), EAX);
}
if (imt) {
// interface thunk calling convention: put iid in EDX
NanoAssert(call->_abi == ABI_CDECL);
argc--;
asm_arg(ARGSIZE_LO, ins->arg(argc), EDX);
}
for(uint32_t i=0; i < argc; i++)
{
uint32_t j = argc-i-1;
ArgSize sz = sizes[j];
Register r = UnknownReg;
if (n < max_regs && sz != ARGSIZE_F)
r = argRegs[n++]; // tell asm_arg what reg to use
if (n < max_regs && sz != ARGSIZE_F) {
r = argRegs[n++]; // tell asm_arg what reg to use
}
asm_arg(sz, ins->arg(j), r);
}
if (extra > 0)
{
SUBi(SP, extra);
}
}
#elif defined NANOJIT_AMD64
@ -310,7 +325,7 @@ namespace nanojit
Register fpu_reg = XMM0;
const CallInfo* call = ins->callInfo();
int n = 0;
CALL(call);
ArgSize sizes[10];
@ -417,25 +432,26 @@ namespace nanojit
{
uint32_t op = i->opcode();
int prefer = allow;
if (op == LIR_call)
#if defined NANOJIT_IA32
prefer &= rmask(EAX);
#elif defined NANOJIT_AMD64
prefer &= rmask(RAX);
#endif
else if (op == LIR_param)
prefer &= rmask(Register(i->imm8()));
#if defined NANOJIT_IA32
else if (op == LIR_callh || op == LIR_rsh && i->oprnd1()->opcode()==LIR_callh)
prefer &= rmask(EDX);
#else
else if (op == LIR_callh)
prefer &= rmask(RAX);
#endif
else if (i->isCmp())
if (op == LIR_call || op == LIR_calli) {
prefer &= rmask(retRegs[0]);
}
else if (op == LIR_fcall || op == LIR_fcalli) {
prefer &= rmask(FST0);
}
else if (op == LIR_param) {
uint32_t max_regs = max_abi_regs[_thisfrag->lirbuf->abi];
if (i->imm8() < max_regs)
prefer &= rmask(Register(i->imm8()));
}
else if (op == LIR_callh || op == LIR_rsh && i->oprnd1()->opcode()==LIR_callh) {
prefer &= rmask(retRegs[1]);
}
else if (i->isCmp()) {
prefer &= AllowableFlagRegs;
else if (i->isconst())
}
else if (i->isconst()) {
prefer &= ScratchRegs;
}
return (_allocator.free & prefer) ? prefer : allow;
}
@ -476,38 +492,49 @@ namespace nanojit
freeRsrcOf(ins, false); // if we had a reg in use, emit a ST to flush it to mem
}
void Assembler::asm_load(int d, Register r)
{
if (rmask(r) & FpRegs)
{
#if defined NANOJIT_IA32
if (rmask(r) & XmmRegs) {
#endif
SSE_LDQ(r, d, FP);
#if defined NANOJIT_IA32
} else {
FLDQ(d, FP);
}
#endif
}
#if defined NANOJIT_AMD64
else if (i->opcode() == LIR_param)
{
LDQ(r, d, FP);
}
#endif
else
{
LD(r, d, FP);
}
}
void Assembler::asm_restore(LInsp i, Reservation *resv, Register r)
{
if (i->isconst())
{
if (i->isop(LIR_alloc)) {
LEA(r, disp(resv), FP);
verbose_only(if (_verbose) {
outputf(" remat %s size %d", _thisfrag->lirbuf->names->formatRef(i), i->size());
})
}
else if (i->isconst()) {
if (!resv->arIndex) {
reserveFree(i);
}
LDi(r, i->constval());
}
else
{
else {
int d = findMemFor(i);
if (rmask(r) & FpRegs)
{
#if defined NANOJIT_IA32
if (rmask(r) & XmmRegs) {
#endif
SSE_LDQ(r, d, FP);
#if defined NANOJIT_IA32
} else {
FLDQ(d, FP);
}
#endif
}
else
{
#if defined NANOJIT_AMD64
LDQ(r, d, FP);
#else
LD(r, d, FP);
#endif
}
asm_load(d,r);
verbose_only(if (_verbose) {
outputf(" restore %s", _thisfrag->lirbuf->names->formatRef(i));
})
@ -518,7 +545,13 @@ namespace nanojit
{
if (value->isconst())
{
Register rb = findRegFor(base, GpRegs);
Register rb;
if (base->isop(LIR_alloc)) {
rb = FP;
dr += findMemFor(base);
} else {
rb = findRegFor(base, GpRegs);
}
int c = value->constval();
STi(rb, dr, c);
}
@ -526,18 +559,28 @@ namespace nanojit
{
// make sure what is in a register
Reservation *rA, *rB;
findRegFor2(GpRegs, value, rA, base, rB);
Register ra = rA->reg;
Register rb = rB->reg;
Register ra, rb;
if (base->isop(LIR_alloc)) {
rb = FP;
dr += findMemFor(base);
ra = findRegFor(value, GpRegs);
} else if (base->isconst()) {
// absolute address
dr += base->constval();
ra = findRegFor(value, GpRegs);
rb = UnknownReg;
} else {
findRegFor2(GpRegs, value, rA, base, rB);
ra = rA->reg;
rb = rB->reg;
}
ST(rb, dr, ra);
}
}
void Assembler::asm_spill(LInsp i, Reservation *resv, bool pop)
void Assembler::asm_spill(Register rr, int d, bool pop, bool quad)
{
(void)i;
int d = disp(resv);
Register rr = resv->reg;
(void)quad;
if (d)
{
// save to spill location
@ -553,17 +596,16 @@ namespace nanojit
}
#endif
}
#if defined NANOJIT_AMD64
else if (quad)
{
STQ(FP, d, rr);
}
#endif
else
{
#if defined NANOJIT_AMD64
STQ(FP, d, rr);
#else
ST(FP, d, rr);
#endif
}
verbose_only(if (_verbose) {
outputf(" spill %s",_thisfrag->lirbuf->names->formatRef(i));
})
}
#if defined NANOJIT_IA32
else if (pop && (rmask(rr) & x87Regs))
@ -571,7 +613,21 @@ namespace nanojit
// pop the fpu result since it isn't used
FSTP(FST0);
}
#endif
#endif
}
void Assembler::asm_spilli(LInsp i, Reservation *resv, bool pop)
{
int d = disp(resv);
Register rr = resv->reg;
bool quad = i->opcode() == LIR_param || i->isQuad();
asm_spill(rr, d, pop, quad);
if (d)
{
verbose_only(if (_verbose) {
outputf(" spill %s",_thisfrag->lirbuf->names->formatRef(i));
})
}
}
void Assembler::asm_load64(LInsp ins)
@ -584,7 +640,13 @@ namespace nanojit
if (rr != UnknownReg && rmask(rr) & XmmRegs)
{
freeRsrcOf(ins, false);
Register rb = findRegFor(base, GpRegs);
Register rb;
if (base->isop(LIR_alloc)) {
rb = FP;
db += findMemFor(base);
} else {
rb = findRegFor(base, GpRegs);
}
SSE_LDQ(rr, db, rb);
}
#if defined NANOJIT_AMD64
@ -614,7 +676,13 @@ namespace nanojit
else
{
int dr = disp(resv);
Register rb = findRegFor(base, GpRegs);
Register rb;
if (base->isop(LIR_alloc)) {
rb = FP;
db += findMemFor(base);
} else {
rb = findRegFor(base, GpRegs);
}
resv->reg = UnknownReg;
// don't use an fpu reg to simply load & store the value.
@ -639,7 +707,13 @@ namespace nanojit
{
// if a constant 64-bit value just store it now rather than
// generating a pointless store/load/store sequence
Register rb = findRegFor(base, GpRegs);
Register rb;
if (base->isop(LIR_alloc)) {
rb = FP;
dr += findMemFor(base);
} else {
rb = findRegFor(base, GpRegs);
}
const int32_t* p = (const int32_t*) (value-2);
STi(rb, dr+4, p[1]);
STi(rb, dr, p[0]);
@ -647,7 +721,7 @@ namespace nanojit
}
#if defined NANOJIT_IA32
if (value->isop(LIR_ldq) || value->isop(LIR_qjoin))
if (value->isop(LIR_ldq) || value->isop(LIR_ldqc) || value->isop(LIR_qjoin))
{
// value is 64bit struct or int64_t, or maybe a double.
// it may be live in an FPU reg. Either way, don't
@ -660,21 +734,47 @@ namespace nanojit
if (avmplus::AvmCore::use_sse2()) {
Register rv = findRegFor(value, XmmRegs);
Register rb = findRegFor(base, GpRegs);
Register rb;
if (base->isop(LIR_alloc)) {
rb = FP;
dr += findMemFor(base);
} else {
rb = findRegFor(base, GpRegs);
}
SSE_STQ(dr, rb, rv);
return;
}
int da = findMemFor(value);
Register rb = findRegFor(base, GpRegs);
Register rb;
if (base->isop(LIR_alloc)) {
rb = FP;
dr += findMemFor(base);
} else {
rb = findRegFor(base, GpRegs);
}
asm_mmq(rb, dr, FP, da);
return;
}
Register rb;
if (base->isop(LIR_alloc)) {
rb = FP;
dr += findMemFor(base);
} else {
rb = findRegFor(base, GpRegs);
}
// if value already in a reg, use that, otherwise
// try to get it into XMM regs before FPU regs.
Reservation* rA = getresv(value);
Register rv;
int pop = !rA || rA->reg==UnknownReg;
Register rv = findRegFor(value, avmplus::AvmCore::use_sse2() ? XmmRegs : FpRegs);
Register rb = findRegFor(base, GpRegs);
if (pop) {
rv = findRegFor(value, avmplus::AvmCore::use_sse2() ? XmmRegs : FpRegs);
} else {
rv = rA->reg;
}
if (rmask(rv) & XmmRegs) {
SSE_STQ(dr, rb, rv);
@ -763,8 +863,10 @@ namespace nanojit
NanoAssert((rmask(rr) & FpRegs) != 0);
const double d = ins->constvalf();
const uint64_t q = ins->constvalq();
if (rmask(rr) & XmmRegs) {
if (d == 0.0) {
if (q == 0.0) {
// test (int64)0 since -0.0 == 0.0
SSE_XORPDr(rr, rr);
} else if (d == 1.0) {
// 1.0 is extremely frequent and worth special-casing!
@ -776,7 +878,8 @@ namespace nanojit
SSE_LDQ(rr, d, FP);
}
} else {
if (d == 0.0) {
if (q == 0.0) {
// test (int64)0 since -0.0 == 0.0
FLDZ();
} else if (d == 1.0) {
FLD1();
@ -803,24 +906,23 @@ namespace nanojit
if (rR->reg != UnknownReg)
{
Register rr = rR->reg;
freeRsrcOf(ins, false);
if (rmask(rr) & GpRegs)
if (rmask(rR->reg) & GpRegs)
{
LDQi(rr, val);
LDQi(rR->reg, val);
}
else if (rmask(rr) & XmmRegs)
else if (rmask(rR->reg) & XmmRegs)
{
if (ins->constvalf() == 0.0)
{
SSE_XORPDr(rr, rr);
SSE_XORPDr(rR->reg, rR->reg);
}
else
{
/* Get a short-lived register, not associated with instruction */
Register rd = rR->reg;
Register rs = registerAlloc(GpRegs);
SSE_MOVD(rr, rs);
SSE_MOVD(rd, rs);
LDQi(rs, val);
_allocator.addFree(rs);
@ -831,10 +933,11 @@ namespace nanojit
{
const int32_t* p = (const int32_t*) (ins-2);
int dr = disp(rR);
freeRsrcOf(ins, false);
STi(FP, dr+4, p[1]);
STi(FP, dr, p[0]);
}
freeRsrcOf(ins, false);
#endif
}
@ -925,22 +1028,75 @@ namespace nanojit
#endif
}
void Assembler::asm_arg(ArgSize sz, LInsp p, Register r)
{
if (sz == ARGSIZE_Q)
{
// ref arg - use lea
if (r != UnknownReg)
{
// arg in specific reg
int da = findMemFor(p);
LEA(r, da, FP);
}
else
{
NanoAssert(0); // not supported
}
}
else if (sz == ARGSIZE_LO)
{
if (r != UnknownReg) {
// arg goes in specific register
if (p->isconst()) {
LDi(r, p->constval());
} else {
Reservation* rA = getresv(p);
if (rA) {
if (rA->reg == UnknownReg) {
// load it into the arg reg
int d = findMemFor(p);
if (p->isop(LIR_alloc)) {
LEA(r, d, FP);
} else {
LD(r, d, FP);
}
} else {
// it must be in a saved reg
MR(r, rA->reg);
}
}
else {
// this is the last use, so fine to assign it
// to the scratch reg, it's dead after this point.
findSpecificRegFor(p, r);
}
}
}
else {
asm_pusharg(p);
}
}
else
{
NanoAssert(sz == ARGSIZE_F);
asm_farg(p);
}
}
void Assembler::asm_pusharg(LInsp p)
{
// arg goes on stack
Reservation* rA = getresv(p);
if (rA == 0)
if (rA == 0 && p->isconst())
{
if (p->isconst())
{
// small const we push directly
PUSHi(p->constval());
}
else
{
Register ra = findRegFor(p, GpRegs);
PUSHr(ra);
}
// small const we push directly
PUSHi(p->constval());
}
else if (rA == 0 || p->isop(LIR_alloc))
{
Register ra = findRegFor(p, GpRegs);
PUSHr(ra);
}
else if (rA->reg == UnknownReg)
{
@ -955,14 +1111,16 @@ namespace nanojit
void Assembler::asm_farg(LInsp p)
{
#if defined NANOJIT_IA32
NanoAssert(p->isQuad());
Register r = findRegFor(p, FpRegs);
if (rmask(r) & XmmRegs) {
SSE_STQ(0, SP, r);
} else {
FSTPQ(0, SP);
}
PUSHr(ECX); // 2*pushr is smaller than sub
PUSHr(ECX);
SUBi(ESP,8);
//PUSHr(ECX); // 2*pushr is smaller than sub
//PUSHr(ECX);
#endif
}
@ -997,7 +1155,10 @@ namespace nanojit
*/
ra = findRegFor(lhs, XmmRegs);
}
// else, rA already has a register assigned.
else {
// rA already has a register assigned but maybe not from the allow set
ra = findRegFor(lhs, allow);
}
if (lhs == rhs)
rb = ra;
@ -1190,6 +1351,75 @@ namespace nanojit
}
}
NIns * Assembler::asm_jmpcc(bool branchOnFalse, LIns *cond, NIns *targ)
{
LOpcode c = cond->opcode();
if (avmplus::AvmCore::use_sse2() && c != LIR_feq) {
LIns *lhs = cond->oprnd1();
LIns *rhs = cond->oprnd2();
if (c == LIR_flt) {
LIns *t = lhs; lhs = rhs; rhs = t;
c = LIR_fgt;
}
else if (c == LIR_fle) {
LIns *t = lhs; lhs = rhs; rhs = t;
c = LIR_fge;
}
if (c == LIR_fgt) {
if (branchOnFalse) { JNA(targ); } else { JA(targ); }
}
else { // if (c == LIR_fge)
if (branchOnFalse) { JNAE(targ); } else { JAE(targ); }
}
NIns *at = _nIns;
Reservation *rA, *rB;
findRegFor2(XmmRegs, lhs, rA, rhs, rB);
SSE_UCOMISD(rA->reg, rB->reg);
return at;
}
if (branchOnFalse)
JP(targ);
else
JNP(targ);
NIns *at = _nIns;
asm_fcmp(cond);
return at;
}
void Assembler::asm_setcc(Register r, LIns *cond)
{
LOpcode c = cond->opcode();
if (avmplus::AvmCore::use_sse2() && c != LIR_feq) {
MOVZX8(r,r);
LIns *lhs = cond->oprnd1();
LIns *rhs = cond->oprnd2();
if (c == LIR_flt) {
LIns *t = lhs; lhs = rhs; rhs = t;
SETA(r);
}
else if (c == LIR_fle) {
LIns *t = lhs; lhs = rhs; rhs = t;
SETAE(r);
}
else if (c == LIR_fgt) {
SETA(r);
}
else { // if (c == LIR_fge)
SETAE(r);
}
Reservation *rA, *rB;
findRegFor2(XmmRegs, lhs, rA, rhs, rB);
SSE_UCOMISD(rA->reg, rB->reg);
return;
}
// SETcc only sets low 8 bits, so extend
MOVZX8(r,r);
SETNP(r);
asm_fcmp(cond);
}
void Assembler::asm_fcmp(LIns *cond)
{
LOpcode condop = cond->opcode();
@ -1206,10 +1436,12 @@ namespace nanojit
mask = 0x05;
else if (condop == LIR_fge) {
// swap, use le
condop = LIR_fle;
LIns* t = lhs; lhs = rhs; rhs = t;
mask = 0x41;
} else { // if (condop == LIR_fgt)
// swap, use lt
condop = LIR_flt;
LIns* t = lhs; lhs = rhs; rhs = t;
mask = 0x05;
}
@ -1227,7 +1459,8 @@ namespace nanojit
// nan check
Register r = findRegFor(lhs, XmmRegs);
SSE_UCOMISD(r, r);
} else {
}
else {
#if defined NANOJIT_IA32
evict(EAX);
TEST_AH(mask);
@ -1384,5 +1617,19 @@ namespace nanojit
if (!_nIns) _nIns = pageAlloc();
if (!_nExitIns) _nExitIns = pageAlloc(true);
}
// enough room for n bytes
void Assembler::underrunProtect(int n)
{
NIns *eip = this->_nIns;
Page *p = (Page*)pageTop(eip-1);
NIns *top = (NIns*) &p->code[0];
if (eip - n < top) {
_nIns = pageAlloc(_inExit);
JMP(eip);
}
}
#endif /* FEATURE_NANOJIT */
}

Просмотреть файл

@ -101,6 +101,7 @@ namespace nanojit
typedef int RegisterMask;
static const int NumSavedRegs = 3;
static const RegisterMask SavedRegs = 1<<EBX | 1<<EDI | 1<<ESI;
static const RegisterMask GpRegs = SavedRegs | 1<<EAX | 1<<ECX | 1<<EDX;
static const RegisterMask XmmRegs = 1<<XMM0|1<<XMM1|1<<XMM2|1<<XMM3|1<<XMM4|1<<XMM5|1<<XMM6|1<<XMM7;
@ -132,23 +133,12 @@ namespace nanojit
bool pad[3];\
void nativePageReset();\
void nativePageSetup();\
void asm_farg(LInsp);
void underrunProtect(int);\
void asm_farg(LInsp);\
void asm_align_code();
#define swapptrs() { NIns* _tins = _nIns; _nIns=_nExitIns; _nExitIns=_tins; }
// enough room for n bytes
#define underrunProtect(n) \
{ \
intptr_t u = n + sizeof(PageHeader)/sizeof(NIns) + 5; \
if ( !samepage(_nIns-u,_nIns-1) ) \
{ \
NIns *tt = _nIns; \
_nIns = pageAlloc(_inExit); \
int d = tt-_nIns; \
JMP_long_nochk_offset(d); \
} \
} \
#define IMM32(i) \
_nIns -= 4; \
*((int32_t*)_nIns) = (int32_t)(i)
@ -171,8 +161,11 @@ namespace nanojit
}
#define MODRMm(r,d,b) \
NanoAssert(unsigned(r)<8 && unsigned(b)<8); \
if ((b) == ESP) { \
NanoAssert(unsigned(r)<8 && ((b)==UnknownReg || unsigned(b)<8)); \
if ((b) == UnknownReg) {\
IMM32(d);\
*(--_nIns) = (uint8_t) (0<<6 | (r)<<3 | 5);\
} else if ((b) == ESP) { \
MODRMs(r, d, b, 0, (Register)4); \
} \
else if ( (d) == 0 && (b) != EBP) { \
@ -344,7 +337,7 @@ namespace nanojit
#define ST(base,disp,reg) do { \
ALUm(0x89,reg,disp,base); \
asm_output3("mov %d(%s),%s",disp,gpn(base),gpn(reg)); } while(0)
asm_output3("mov %d(%s),%s",disp,base==UnknownReg?"0":gpn(base),gpn(reg)); } while(0)
#define STi(base,disp,imm) do { \
underrunProtect(12); \
@ -497,7 +490,7 @@ namespace nanojit
*(--_nIns) = 0x10;\
*(--_nIns) = 0x0f;\
*(--_nIns) = 0xf2;\
asm_output3("movsd %s,%p // =%f",gpn(r),daddr,*daddr); \
asm_output3("movsd %s,(#%p) // =%f",gpn(r),(void*)daddr,*daddr); \
} while(0)
#define STSD(d,b,r)do { \
@ -539,61 +532,70 @@ namespace nanojit
} while(0)
#define SSE_MOVSD(rd,rs) do{ \
NanoAssert(_is_xmm_reg_(rd) && _is_xmm_reg_(rs));\
SSE(0xf20f10, (rd)&7, (rs)&7); \
asm_output2("movsd %s,%s",gpn(rd),gpn(rs)); \
} while(0)
#define SSE_MOVDm(d,b,xrs) do {\
NanoAssert(_is_xmm_reg_(xrs) && _is_gp_reg_(b));\
SSEm(0x660f7e, (xrs)&7, d, b);\
asm_output3("movd %d(%s),%s", d, gpn(b), gpn(xrs));\
} while(0)
#define SSE_ADDSD(rd,rs) do{ \
NanoAssert(_is_xmm_reg_(rd) && _is_xmm_reg_(rs));\
SSE(0xf20f58, (rd)&7, (rs)&7); \
asm_output2("addsd %s,%s",gpn(rd),gpn(rs)); \
} while(0)
#define SSE_ADDSDm(r,addr)do { \
underrunProtect(8); \
NanoAssert(_is_xmm_reg_(r));\
const double* daddr = addr; \
IMM32(int32_t(daddr));\
*(--_nIns) = uint8_t(((r)&7)<<3|5); \
*(--_nIns) = 0x58;\
*(--_nIns) = 0x0f;\
*(--_nIns) = 0xf2;\
asm_output3("addsd %s,%p // =%f",gpn(r),daddr,*daddr); \
asm_output3("addsd %s,%p // =%f",gpn(r),(void*)daddr,*daddr); \
} while(0)
#define SSE_SUBSD(rd,rs) do{ \
NanoAssert(_is_xmm_reg_(rd) && _is_xmm_reg_(rs));\
SSE(0xf20f5c, (rd)&7, (rs)&7); \
asm_output2("subsd %s,%s",gpn(rd),gpn(rs)); \
} while(0)
#define SSE_MULSD(rd,rs) do{ \
NanoAssert(_is_xmm_reg_(rd) && _is_xmm_reg_(rs));\
SSE(0xf20f59, (rd)&7, (rs)&7); \
asm_output2("mulsd %s,%s",gpn(rd),gpn(rs)); \
} while(0)
#define SSE_DIVSD(rd,rs) do{ \
NanoAssert(_is_xmm_reg_(rd) && _is_xmm_reg_(rs));\
SSE(0xf20f5e, (rd)&7, (rs)&7); \
asm_output2("divsd %s,%s",gpn(rd),gpn(rs)); \
} while(0)
#define SSE_UCOMISD(rl,rr) do{ \
NanoAssert(_is_xmm_reg_(rl) && _is_xmm_reg_(rr));\
SSE(0x660f2e, (rl)&7, (rr)&7); \
asm_output2("ucomisd %s,%s",gpn(rl),gpn(rr)); \
} while(0)
#define CVTSI2SDm(xr,d,b) do{ \
NanoAssert(_is_xmm_reg_(xr) && _is_gp_reg_(b));\
SSEm(0xf20f2a, (xr)&7, (d), (b)); \
asm_output3("cvtsi2sd %s,%d(%s)",gpn(xr),(d),gpn(b)); \
} while(0)
#define SSE_XORPD(r, maskaddr) do {\
underrunProtect(8); \
underrunProtect(8); \
IMM32(maskaddr);\
*(--_nIns) = uint8_t(((r)&7)<<3|5); \
*(--_nIns) = 0x57;\
*(--_nIns) = 0x0f;\
*(--_nIns) = 0x66;\
asm_output2("xorpd %s,[0x%p]",gpn(r),(maskaddr));\
asm_output2("xorpd %s,[0x%p]",gpn(r),(void*)(maskaddr));\
} while(0)
#define SSE_XORPDr(rd,rs) do{ \
@ -657,6 +659,7 @@ namespace nanojit
#define FLDr(r) do { FPU(0xd9c0,r); asm_output1("fld %s",fpn(r)); fpu_push(); } while(0)
#define EMMS() do { FPUc(0x0f77); asm_output("emms"); } while (0)
// standard direct call
#define CALL(c) do { \
underrunProtect(5); \
int offset = (c->_address) - ((int)_nIns); \
@ -666,5 +669,14 @@ namespace nanojit
debug_only(if ((c->_argtypes&3)==ARGSIZE_F) fpu_push();)\
} while (0)
// indirect call thru register
#define CALLr(c,r) do { \
underrunProtect(2);\
ALU(0xff, 2, (r));\
verbose_only(asm_output1("call %s",gpn(r));) \
debug_only(if ((c->_argtypes&3)==ARGSIZE_F) fpu_push();)\
} while (0)
}
#endif // __nanojit_Nativei386__

Просмотреть файл

@ -72,11 +72,18 @@ namespace nanojit
void RegAlloc::addActive(Register r, LIns* v)
{
//addActiveCount++;
// Count++;
NanoAssert(v && r != UnknownReg && active[r] == NULL );
active[r] = v;
useActive(r);
}
void RegAlloc::useActive(Register r)
{
NanoAssert(r != UnknownReg && active[r] != NULL);
usepri[r] = priority++;
}
void RegAlloc::removeActive(Register r)
{
//registerReleaseCount++;
@ -87,12 +94,6 @@ namespace nanojit
active[r] = NULL;
}
LIns* RegAlloc::getActive(Register r)
{
NanoAssert(r != UnknownReg);
return active[r];
}
void RegAlloc::retire(Register r)
{
NanoAssert(r != UnknownReg);
@ -101,30 +102,26 @@ namespace nanojit
free |= rmask(r);
}
// scan table for instruction with longest span
LIns* Assembler::findVictim(RegAlloc &regs, RegisterMask allow, RegisterMask prefer)
// scan table for instruction with the lowest priority, meaning it is used
// furthest in the future.
LIns* Assembler::findVictim(RegAlloc &regs, RegisterMask allow)
{
NanoAssert(allow != 0 && (allow&prefer)==prefer);
LIns *i, *a=0, *p = 0;
int acost=10, pcost=10;
NanoAssert(allow != 0);
LIns *i, *a=0;
int allow_pri = 0x7fffffff;
for (Register r=FirstReg; r <= LastReg; r = nextreg(r))
{
if ((allow & rmask(r)) && (i = regs.getActive(r)) != 0)
{
int cost = getresv(i)->cost;
if (!a || cost < acost || cost == acost && nbr(i) < nbr(a)) {
int pri = canRemat(i) ? 0 : regs.getPriority(r);
if (!a || pri < allow_pri) {
a = i;
acost = cost;
}
if (prefer & rmask(r)) {
if (!p || cost < pcost || cost == pcost && nbr(i) < nbr(p)) {
p = i;
pcost = cost;
}
allow_pri = pri;
}
}
}
return acost < pcost ? a : p;
NanoAssert(a != 0);
return a;
}
#ifdef NJ_VERBOSE

Просмотреть файл

@ -51,15 +51,28 @@ namespace nanojit
class RegAlloc MMGC_SUBCLASS_DECL
{
public:
RegAlloc() {}
RegAlloc() : free(0), used(0), priority(0) {}
void clear();
bool isFree(Register r);
void addFree(Register r);
void removeFree(Register r);
void addActive(Register r, LIns* ins);
void useActive(Register r);
void removeActive(Register r);
LIns* getActive(Register r);
void retire(Register r);
bool isValid() {
return (free|used) != 0;
}
int32_t getPriority(Register r) {
NanoAssert(r != UnknownReg && active[r]);
return usepri[r];
}
LIns* getActive(Register r) {
NanoAssert(r != UnknownReg);
return active[r];
}
debug_only( uint32_t countFree(); )
debug_only( uint32_t countActive(); )
@ -68,11 +81,11 @@ namespace nanojit
debug_only( uint32_t count; )
debug_only( RegisterMask managed; ) // bitfield of 0..NJ_MAX_REGISTERS denoting which are under our management
// RegisterMask is a 32-bit value, so we can never have more than 32 active.
// hardcode 32 here in case we have non-contiguous register numbers
LIns* active[32]; // active[r] = OP that defines r
LIns* active[LastReg + 1]; // active[r] = OP that defines r
int32_t usepri[LastReg + 1]; // used priority. lower = more likely to spill.
RegisterMask free;
RegisterMask used;
int32_t priority;
verbose_only( static void formatRegisters(RegAlloc& regs, char* s, Fragment*); )

Просмотреть файл

@ -326,6 +326,8 @@ public:
#define DWB(x) x
#define DRCWB(x) x
#define WB(gc, container, addr, value) do { *(addr) = (value); } while(0)
#define WBRC(gc, container, addr, value) do { *(addr) = (value); } while(0)
#define MMGC_MEM_TYPE(x)

Просмотреть файл

@ -42,6 +42,8 @@
#include <stddef.h>
#include "avmplus.h"
#ifdef FEATURE_NANOJIT
#ifdef AVMPLUS_IA32
#define NANOJIT_IA32
#elif AVMPLUS_ARM
@ -73,6 +75,8 @@ namespace nanojit
typedef avmplus::List<LIns*,avmplus::LIST_NonGCObjects> InsList;
typedef avmplus::List<char*, avmplus::LIST_GCObjects> StringList;
const uint32_t MAXARGS = 8;
#if defined(_MSC_VER) && _MSC_VER < 1400
static void NanoAssertMsgf(bool a,const char *f,...) {}
static void NanoAssertMsg(bool a,const char *m) {}
@ -113,12 +117,12 @@ namespace nanojit
#define verbose_output if (verbose_enabled()) Assembler::output
#define verbose_outputf if (verbose_enabled()) Assembler::outputf
#define verbose_enabled() (_verbose)
#define verbose_only(x) x
#define verbose_only(...) __VA_ARGS__
#else
#define verbose_output
#define verbose_outputf
#define verbose_enabled()
#define verbose_only(x)
#define verbose_only(...)
#endif /*NJ_VERBOSE*/
#ifdef _DEBUG
@ -172,4 +176,5 @@ namespace nanojit
#include "Assembler.h"
#include "TraceTreeDrawer.h"
#endif // FEATURE_NANOJIT
#endif // __nanojit_h__

Просмотреть файл

@ -1,224 +0,0 @@
/*
* A JavaScript implementation of the Secure Hash Algorithm, SHA-1, as defined
* in FIPS PUB 180-1
* Version 2.1a Copyright Paul Johnston 2000 - 2002.
* Other contributors: Greg Holt, Andrew Kepert, Ydnar, Lostinet
* Distributed under the BSD License
* See http://pajhome.org.uk/crypt/md5 for details.
*/
/*
* Configurable variables. You may need to tweak these to be compatible with
* the server-side, but the defaults work in most cases.
*/
var hexcase = 0; /* hex output format. 0 - lowercase; 1 - uppercase */
var b64pad = ""; /* base-64 pad character. "=" for strict RFC compliance */
var chrsz = 8; /* bits per input character. 8 - ASCII; 16 - Unicode */
/*
* These are the functions you'll usually want to call
* They take string arguments and return either hex or base-64 encoded strings
*/
function hex_sha1(s){return binb2hex(core_sha1(str2binb(s),s.length * chrsz));}
function b64_sha1(s){return binb2b64(core_sha1(str2binb(s),s.length * chrsz));}
function str_sha1(s){return binb2str(core_sha1(str2binb(s),s.length * chrsz));}
function hex_hmac_sha1(key, data){ return binb2hex(core_hmac_sha1(key, data));}
function b64_hmac_sha1(key, data){ return binb2b64(core_hmac_sha1(key, data));}
function str_hmac_sha1(key, data){ return binb2str(core_hmac_sha1(key, data));}
/*
* Perform a simple self-test to see if the VM is working
*/
function sha1_vm_test()
{
return hex_sha1("abc") == "a9993e364706816aba3e25717850c26c9cd0d89d";
}
/*
* Calculate the SHA-1 of an array of big-endian words, and a bit length
*/
function core_sha1(x, len)
{
/* append padding */
x[len >> 5] |= 0x80 << (24 - len % 32);
x[((len + 64 >> 9) << 4) + 15] = len;
var w = Array(80);
var a = 1732584193;
var b = -271733879;
var c = -1732584194;
var d = 271733878;
var e = -1009589776;
for(var i = 0; i < x.length; i += 16)
{
var olda = a;
var oldb = b;
var oldc = c;
var oldd = d;
var olde = e;
for(var j = 0; j < 80; j++)
{
if(j < 16) w[j] = x[i + j];
else w[j] = rol(w[j-3] ^ w[j-8] ^ w[j-14] ^ w[j-16], 1);
var t = safe_add(safe_add(rol(a, 5), sha1_ft(j, b, c, d)),
safe_add(safe_add(e, w[j]), sha1_kt(j)));
e = d;
d = c;
c = rol(b, 30);
b = a;
a = t;
}
a = safe_add(a, olda);
b = safe_add(b, oldb);
c = safe_add(c, oldc);
d = safe_add(d, oldd);
e = safe_add(e, olde);
}
return Array(a, b, c, d, e);
}
/*
* Perform the appropriate triplet combination function for the current
* iteration
*/
function sha1_ft(t, b, c, d)
{
if(t < 20) return (b & c) | ((~b) & d);
if(t < 40) return b ^ c ^ d;
if(t < 60) return (b & c) | (b & d) | (c & d);
return b ^ c ^ d;
}
/*
* Determine the appropriate additive constant for the current iteration
*/
function sha1_kt(t)
{
return (t < 20) ? 1518500249 : (t < 40) ? 1859775393 :
(t < 60) ? -1894007588 : -899497514;
}
/*
* Calculate the HMAC-SHA1 of a key and some data
*/
function core_hmac_sha1(key, data)
{
var bkey = str2binb(key);
if(bkey.length > 16) bkey = core_sha1(bkey, key.length * chrsz);
var ipad = Array(16), opad = Array(16);
for(var i = 0; i < 16; i++)
{
ipad[i] = bkey[i] ^ 0x36363636;
opad[i] = bkey[i] ^ 0x5C5C5C5C;
}
var hash = core_sha1(ipad.concat(str2binb(data)), 512 + data.length * chrsz);
return core_sha1(opad.concat(hash), 512 + 160);
}
/*
* Add integers, wrapping at 2^32. This uses 16-bit operations internally
* to work around bugs in some JS interpreters.
*/
function safe_add(x, y)
{
var lsw = (x & 0xFFFF) + (y & 0xFFFF);
var msw = (x >> 16) + (y >> 16) + (lsw >> 16);
return (msw << 16) | (lsw & 0xFFFF);
}
/*
* Bitwise rotate a 32-bit number to the left.
*/
function rol(num, cnt)
{
return (num << cnt) | (num >>> (32 - cnt));
}
/*
* Convert an 8-bit or 16-bit string to an array of big-endian words
* In 8-bit function, characters >255 have their hi-byte silently ignored.
*/
function str2binb(str)
{
var bin = Array();
var mask = (1 << chrsz) - 1;
for(var i = 0; i < str.length * chrsz; i += chrsz)
bin[i>>5] |= (str.charCodeAt(i / chrsz) & mask) << (32 - chrsz - i%32);
return bin;
}
/*
* Convert an array of big-endian words to a string
*/
function binb2str(bin)
{
var str = "";
var mask = (1 << chrsz) - 1;
for(var i = 0; i < bin.length * 32; i += chrsz)
str += String.fromCharCode((bin[i>>5] >>> (32 - chrsz - i%32)) & mask);
return str;
}
/*
* Convert an array of big-endian words to a hex string.
*/
function binb2hex(binarray)
{
var hex_tab = hexcase ? "0123456789ABCDEF" : "0123456789abcdef";
var str = "";
for(var i = 0; i < binarray.length * 4; i++)
{
str += hex_tab.charAt((binarray[i>>2] >> ((3 - i%4)*8+4)) & 0xF) +
hex_tab.charAt((binarray[i>>2] >> ((3 - i%4)*8 )) & 0xF);
}
return str;
}
/*
* Convert an array of big-endian words to a base-64 string
*/
function binb2b64(binarray)
{
var tab = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
var str = "";
for(var i = 0; i < binarray.length * 4; i += 3)
{
var triplet = (((binarray[i >> 2] >> 8 * (3 - i %4)) & 0xFF) << 16)
| (((binarray[i+1 >> 2] >> 8 * (3 - (i+1)%4)) & 0xFF) << 8 )
| ((binarray[i+2 >> 2] >> 8 * (3 - (i+2)%4)) & 0xFF);
for(var j = 0; j < 4; j++)
{
if(i * 8 + j * 6 > binarray.length * 32) str += b64pad;
else str += tab.charAt((triplet >> 6*(3-j)) & 0x3F);
}
}
return str;
}
var plainText = "Two households, both alike in dignity,\n\
In fair Verona, where we lay our scene,\n\
From ancient grudge break to new mutiny,\n\
Where civil blood makes civil hands unclean.\n\
From forth the fatal loins of these two foes\n\
A pair of star-cross'd lovers take their life;\n\
Whole misadventured piteous overthrows\n\
Do with their death bury their parents' strife.\n\
The fearful passage of their death-mark'd love,\n\
And the continuance of their parents' rage,\n\
Which, but their children's end, nought could remove,\n\
Is now the two hours' traffic of our stage;\n\
The which if you with patient ears attend,\n\
What here shall miss, our toil shall strive to mend.";
for (var i = 0; i <4; i++) {
plainText += plainText;
}
var sha1Output = hex_sha1(plainText);