зеркало из https://github.com/mozilla/pjs.git
Add VFP for floating point ops to nanojit ARM backend.
This commit is contained in:
Родитель
0fe0d78272
Коммит
05c3cd68da
|
@ -119,7 +119,7 @@ static bool nesting_enabled = true;
|
|||
static bool oracle_enabled = true;
|
||||
static bool did_we_check_sse2 = false;
|
||||
|
||||
#ifdef DEBUG
|
||||
#if defined(DEBUG) || defined(INCLUDE_VERBOSE_OUTPUT)
|
||||
static bool verbose_debug = getenv("TRACEMONKEY") && strstr(getenv("TRACEMONKEY"), "verbose");
|
||||
#define debug_only_v(x) if (verbose_debug) { x; }
|
||||
#else
|
||||
|
@ -282,7 +282,7 @@ static bool isi2f(LInsp i)
|
|||
if (i->isop(LIR_i2f))
|
||||
return true;
|
||||
|
||||
#ifdef NANOJIT_ARM
|
||||
#if defined(NANOJIT_ARM) && defined(NJ_SOFTFLOAT)
|
||||
if (i->isop(LIR_qjoin) &&
|
||||
i->oprnd1()->isop(LIR_call) &&
|
||||
i->oprnd2()->isop(LIR_callh))
|
||||
|
@ -300,7 +300,7 @@ static bool isu2f(LInsp i)
|
|||
if (i->isop(LIR_u2f))
|
||||
return true;
|
||||
|
||||
#ifdef NANOJIT_ARM
|
||||
#if defined(NANOJIT_ARM) && defined(NJ_SOFTFLOAT)
|
||||
if (i->isop(LIR_qjoin) &&
|
||||
i->oprnd1()->isop(LIR_call) &&
|
||||
i->oprnd2()->isop(LIR_callh))
|
||||
|
@ -315,7 +315,7 @@ static bool isu2f(LInsp i)
|
|||
|
||||
static LInsp iu2fArg(LInsp i)
|
||||
{
|
||||
#ifdef NANOJIT_ARM
|
||||
#if defined(NANOJIT_ARM) && defined(NJ_SOFTFLOAT)
|
||||
if (i->isop(LIR_qjoin))
|
||||
return i->oprnd1()->arg(0);
|
||||
#endif
|
||||
|
@ -371,7 +371,7 @@ static bool overflowSafe(LIns* i)
|
|||
((c->constval() > 0)));
|
||||
}
|
||||
|
||||
#ifdef NANOJIT_ARM
|
||||
#if defined(NJ_SOFTFLOAT)
|
||||
|
||||
class SoftFloatFilter: public LirWriter
|
||||
{
|
||||
|
@ -428,19 +428,6 @@ public:
|
|||
return out->ins2(LIR_eq, bv, out->insImm(1));
|
||||
}
|
||||
|
||||
// not really a softfloat filter, but needed on ARM --
|
||||
// arm doesn't mask shifts to 31 like x86 does
|
||||
if (v == LIR_lsh ||
|
||||
v == LIR_rsh ||
|
||||
v == LIR_ush)
|
||||
{
|
||||
if (s1->isconst())
|
||||
s1->setimm16(s1->constval() & 31);
|
||||
else
|
||||
s1 = out->ins2(LIR_and, s1, out->insImm(31));
|
||||
return out->ins2(v, s0, s1);
|
||||
}
|
||||
|
||||
return out->ins2(v, s0, s1);
|
||||
}
|
||||
|
||||
|
@ -455,7 +442,7 @@ public:
|
|||
}
|
||||
};
|
||||
|
||||
#endif
|
||||
#endif // NJ_SOFTFLOAT
|
||||
|
||||
class FuncFilter: public LirWriter
|
||||
{
|
||||
|
@ -550,6 +537,20 @@ public:
|
|||
return out->ins2(LIR_add, x, y);
|
||||
}
|
||||
}
|
||||
#ifdef NANOJIT_ARM
|
||||
else if (v == LIR_lsh ||
|
||||
v == LIR_rsh ||
|
||||
v == LIR_ush)
|
||||
{
|
||||
// needed on ARM -- arm doesn't mask shifts to 31 like x86 does
|
||||
if (s1->isconst())
|
||||
s1->setimm16(s1->constval() & 31);
|
||||
else
|
||||
s1 = out->ins2(LIR_and, s1, out->insImm(31));
|
||||
return out->ins2(v, s0, s1);
|
||||
}
|
||||
#endif
|
||||
|
||||
return out->ins2(v, s0, s1);
|
||||
}
|
||||
|
||||
|
@ -604,7 +605,7 @@ public:
|
|||
|
||||
/* In debug mode vpname contains a textual description of the type of the
|
||||
slot during the forall iteration over al slots. */
|
||||
#ifdef DEBUG
|
||||
#if defined(DEBUG) || defined(INCLUDE_VERBOSE_OUTPUT)
|
||||
#define DEF_VPNAME const char* vpname; unsigned vpnum
|
||||
#define SET_VPNAME(name) do { vpname = name; vpnum = 0; } while(0)
|
||||
#define INC_VPNUM() do { ++vpnum; } while(0)
|
||||
|
@ -821,7 +822,7 @@ TraceRecorder::TraceRecorder(JSContext* cx, GuardRecord* _anchor, Fragment* _fra
|
|||
if (verbose_debug)
|
||||
lir = verbose_filter = new (&gc) VerboseWriter(&gc, lir, lirbuf->names);
|
||||
#endif
|
||||
#ifdef NANOJIT_ARM
|
||||
#ifdef NJ_SOFTFLOAT
|
||||
lir = float_filter = new (&gc) SoftFloatFilter(lir);
|
||||
#endif
|
||||
lir = cse_filter = new (&gc) CseFilter(lir, &gc);
|
||||
|
@ -867,7 +868,7 @@ TraceRecorder::~TraceRecorder()
|
|||
delete cse_filter;
|
||||
delete expr_filter;
|
||||
delete func_filter;
|
||||
#ifdef NANOJIT_ARM
|
||||
#ifdef NJ_SOFTFLOAT
|
||||
delete float_filter;
|
||||
#endif
|
||||
delete lir_buf_writer;
|
||||
|
@ -2277,8 +2278,10 @@ js_ExecuteTree(JSContext* cx, Fragment** treep, uintN& inlineCallCount,
|
|||
union { NIns *code; GuardRecord* (FASTCALL *func)(InterpState*, Fragment*); } u;
|
||||
u.code = f->code();
|
||||
|
||||
#if defined(DEBUG) && defined(NANOJIT_IA32)
|
||||
#ifdef DEBUG
|
||||
#if defined(NANOJIT_IA32)
|
||||
uint64 start = rdtsc();
|
||||
#endif
|
||||
#endif
|
||||
|
||||
/*
|
||||
|
@ -2362,19 +2365,18 @@ js_ExecuteTree(JSContext* cx, Fragment** treep, uintN& inlineCallCount,
|
|||
js_ReconstructStackDepth(cx, fp->script, fp->regs->pc) == fp->regs->sp);
|
||||
|
||||
#if defined(DEBUG) && defined(NANOJIT_IA32)
|
||||
if (verbose_debug) {
|
||||
printf("leaving trace at %s:%u@%u, op=%s, lr=%p, exitType=%d, sp=%d, ip=%p, "
|
||||
"cycles=%llu\n",
|
||||
fp->script->filename, js_PCToLineNumber(cx, fp->script, fp->regs->pc),
|
||||
fp->regs->pc - fp->script->code,
|
||||
js_CodeName[*fp->regs->pc],
|
||||
lr,
|
||||
lr->exit->exitType,
|
||||
fp->regs->sp - StackBase(fp), lr->jmp,
|
||||
(rdtsc() - start));
|
||||
}
|
||||
uint64 cycles = rdtsc() - start;
|
||||
#else
|
||||
uint64 cycles = 0;
|
||||
#endif
|
||||
|
||||
debug_only_v(printf("leaving trace at %s:%u@%u, exitType=%d, sp=%d, ip=%p, cycles=%llu\n",
|
||||
fp->script->filename, js_PCToLineNumber(cx, fp->script, fp->regs->pc),
|
||||
fp->regs->pc - fp->script->code,
|
||||
lr->exit->exitType,
|
||||
fp->regs->sp - StackBase(fp), lr->jmp,
|
||||
cycles));
|
||||
|
||||
/* If this trace is part of a tree, later branches might have added additional globals for
|
||||
with we don't have any type information available in the side exit. We merge in this
|
||||
information from the entry type-map. See also comment in the constructor of TraceRecorder
|
||||
|
|
|
@ -221,7 +221,7 @@ class TraceRecorder {
|
|||
nanojit::LirWriter* cse_filter;
|
||||
nanojit::LirWriter* expr_filter;
|
||||
nanojit::LirWriter* func_filter;
|
||||
#ifdef NANOJIT_ARM
|
||||
#ifdef NJ_SOFTFLOAT
|
||||
nanojit::LirWriter* float_filter;
|
||||
#endif
|
||||
nanojit::LIns* cx_ins;
|
||||
|
|
|
@ -44,6 +44,7 @@
|
|||
|
||||
#if defined(AVMPLUS_LINUX) && defined(AVMPLUS_ARM)
|
||||
#include <asm/unistd.h>
|
||||
extern "C" void __clear_cache(char *BEG, char *END);
|
||||
#endif
|
||||
|
||||
namespace nanojit
|
||||
|
@ -178,6 +179,8 @@ namespace nanojit
|
|||
// nothing free, steal one
|
||||
// LSRA says pick the one with the furthest use
|
||||
LIns* vic = findVictim(regs,allow,prefer);
|
||||
NanoAssert(vic != NULL);
|
||||
|
||||
Reservation* resv = getresv(vic);
|
||||
|
||||
// restore vic
|
||||
|
@ -446,25 +449,37 @@ namespace nanojit
|
|||
Reservation* resv = getresv(i);
|
||||
Register r;
|
||||
|
||||
// if we have an existing reservation and it has a non-unknown
|
||||
// register allocated, and that register is in our allowed mask,
|
||||
// return it.
|
||||
if (resv && (r=resv->reg) != UnknownReg && (rmask(r) & allow)) {
|
||||
return r;
|
||||
}
|
||||
|
||||
// figure out what registers are preferred for this instruction
|
||||
RegisterMask prefer = hint(i, allow);
|
||||
|
||||
// if we didn't have a reservation, allocate one now
|
||||
if (!resv)
|
||||
resv = reserveAlloc(i);
|
||||
|
||||
// if the reservation doesn't have a register assigned to it...
|
||||
if ((r=resv->reg) == UnknownReg)
|
||||
{
|
||||
// .. if the cost is 2 and the allowed mask includes
|
||||
// the saved regs, then prefer just those.
|
||||
if (resv->cost == 2 && (allow&SavedRegs))
|
||||
prefer = allow&SavedRegs;
|
||||
// grab one.
|
||||
r = resv->reg = registerAlloc(prefer);
|
||||
_allocator.addActive(r, i);
|
||||
return r;
|
||||
}
|
||||
else
|
||||
{
|
||||
// r not allowed
|
||||
// the already-allocated register isn't in the allowed mask;
|
||||
// we need to grab a new one and then copy over the old
|
||||
// contents to the new.
|
||||
resv->reg = UnknownReg;
|
||||
_allocator.retire(r);
|
||||
if (resv->cost == 2 && (allow&SavedRegs))
|
||||
|
@ -795,12 +810,15 @@ namespace nanojit
|
|||
# if defined(UNDER_CE)
|
||||
FlushInstructionCache(GetCurrentProcess(), NULL, NULL);
|
||||
# elif defined(AVMPLUS_LINUX)
|
||||
// XXX fixme flush adjacent pages together
|
||||
for (int i = 0; i < 2; i++) {
|
||||
Page *p = (i == 0) ? _nativePages : _nativeExitPages;
|
||||
|
||||
Page *first = p;
|
||||
while (p) {
|
||||
flushCache((NIns*)p, (NIns*)((intptr_t)(p) + NJ_PAGE_SIZE));
|
||||
if (!p->next || p->next != p+1) {
|
||||
__clear_cache((char*)first, (char*)(p+1));
|
||||
first = p->next;
|
||||
}
|
||||
p = p->next;
|
||||
}
|
||||
}
|
||||
|
@ -852,7 +870,7 @@ namespace nanojit
|
|||
switch(op)
|
||||
{
|
||||
default:
|
||||
NanoAssertMsgf(false, ("unsupported LIR instruction: %d (~0x40: %d)\n",op, op&~LIR64));
|
||||
NanoAssertMsgf(false, "unsupported LIR instruction: %d (~0x40: %d)\n", op, op&~LIR64);
|
||||
break;
|
||||
|
||||
case LIR_short:
|
||||
|
@ -1208,13 +1226,20 @@ namespace nanojit
|
|||
LIns* cond = ins->oprnd1();
|
||||
LOpcode condop = cond->opcode();
|
||||
NanoAssert(cond->isCond());
|
||||
#ifndef NJ_SOFTFLOAT
|
||||
#if !defined(NJ_SOFTFLOAT)
|
||||
if (condop >= LIR_feq && condop <= LIR_fge)
|
||||
{
|
||||
#if defined(NJ_ARM_VFP)
|
||||
if (op == LIR_xf)
|
||||
JNE(exit);
|
||||
else
|
||||
JE(exit);
|
||||
#else
|
||||
if (op == LIR_xf)
|
||||
JP(exit);
|
||||
else
|
||||
JNP(exit);
|
||||
#endif
|
||||
asm_fcmp(cond);
|
||||
break;
|
||||
}
|
||||
|
@ -1313,9 +1338,13 @@ namespace nanojit
|
|||
{
|
||||
// only want certain regs
|
||||
Register r = prepResultReg(ins, AllowableFlagRegs);
|
||||
#ifdef NJ_ARM_VFP
|
||||
SETE(r);
|
||||
#else
|
||||
// SETcc only sets low 8 bits, so extend
|
||||
MOVZX8(r,r);
|
||||
SETNP(r);
|
||||
#endif
|
||||
asm_fcmp(ins);
|
||||
break;
|
||||
}
|
||||
|
@ -1437,8 +1466,13 @@ namespace nanojit
|
|||
|
||||
uint32_t Assembler::arFree(uint32_t idx)
|
||||
{
|
||||
// nothing to free
|
||||
if (idx == 0)
|
||||
return 0;
|
||||
|
||||
if (idx > 0 && _activation.entry[idx] == _activation.entry[idx+stack_direction(1)])
|
||||
_activation.entry[idx+stack_direction(1)] = 0; // clear 2 slots for doubles
|
||||
|
||||
_activation.entry[idx] = 0;
|
||||
return 0;
|
||||
}
|
||||
|
|
|
@ -376,8 +376,6 @@ namespace nanojit
|
|||
return l;
|
||||
}
|
||||
|
||||
#define isS24(x) (((int32_t(x)<<8)>>8) == (x))
|
||||
|
||||
LInsp LirBufWriter::insFar(LOpcode op, LInsp target)
|
||||
{
|
||||
NanoAssert(op == LIR_skip || op == LIR_tramp);
|
||||
|
|
|
@ -49,14 +49,17 @@
|
|||
|
||||
#if defined(AVMPLUS_LINUX)
|
||||
#include <asm/unistd.h>
|
||||
extern "C" void __clear_cache(char *BEG, char *END);
|
||||
#endif
|
||||
|
||||
#ifdef FEATURE_NANOJIT
|
||||
|
||||
namespace nanojit
|
||||
{
|
||||
#ifdef FEATURE_NANOJIT
|
||||
|
||||
#ifdef NJ_VERBOSE
|
||||
const char* regNames[] = {"r0","r1","r2","r3","r4","r5","r6","r7","r8","r9","r10","r11","IP","SP","LR","PC"};
|
||||
const char* regNames[] = {"r0","r1","r2","r3","r4","r5","r6","r7","r8","r9","r10","FP","IP","SP","LR","PC",
|
||||
"d0","d1","d2","d3","d4","d5","d6","d7","s14"};
|
||||
#endif
|
||||
|
||||
const Register Assembler::argRegs[] = { R0, R1, R2, R3 };
|
||||
|
@ -122,6 +125,7 @@ Assembler::nFragExit(LInsp guard)
|
|||
// for us; always force a far jump here.
|
||||
BL_far(_epilogue);
|
||||
|
||||
// stick the jmp pointer to the start of the sequence
|
||||
lr->jmp = _nIns;
|
||||
}
|
||||
|
||||
|
@ -155,18 +159,26 @@ void
|
|||
Assembler::asm_call(LInsp ins)
|
||||
{
|
||||
const CallInfo* call = callInfoFor(ins->fid());
|
||||
Reservation *callRes = getresv(ins);
|
||||
|
||||
uint32_t atypes = call->_argtypes;
|
||||
uint32_t roffset = 0;
|
||||
|
||||
// skip return type
|
||||
#ifdef NJ_ARM_VFP
|
||||
ArgSize rsize = (ArgSize)(atypes & 3);
|
||||
#endif
|
||||
atypes >>= 2;
|
||||
|
||||
// we need to detect if we have arg0 as LO followed by arg1 as F;
|
||||
// in that case, we need to skip using r1 -- the F needs to be
|
||||
// loaded in r2/r3, at least according to the ARM EABI and gcc 4.2's
|
||||
// generated code.
|
||||
bool arg0IsInt32FollowedByFloat = false;
|
||||
while ((atypes & 3) != ARGSIZE_NONE) {
|
||||
if (((atypes >> 4) & 3) == ARGSIZE_LO &&
|
||||
((atypes >> 2) & 3) == ARGSIZE_F &&
|
||||
((atypes >> 6) & 3) == ARGSIZE_NONE)
|
||||
if (((atypes >> 2) & 3) == ARGSIZE_LO &&
|
||||
((atypes >> 0) & 3) == ARGSIZE_F &&
|
||||
((atypes >> 4) & 3) == ARGSIZE_NONE)
|
||||
{
|
||||
arg0IsInt32FollowedByFloat = true;
|
||||
break;
|
||||
|
@ -174,17 +186,68 @@ Assembler::asm_call(LInsp ins)
|
|||
atypes >>= 2;
|
||||
}
|
||||
|
||||
#ifdef NJ_ARM_VFP
|
||||
if (rsize == ARGSIZE_F) {
|
||||
NanoAssert(ins->opcode() == LIR_fcall);
|
||||
NanoAssert(callRes);
|
||||
|
||||
//fprintf (stderr, "call ins: %p callRes: %p reg: %d ar: %d\n", ins, callRes, callRes->reg, callRes->arIndex);
|
||||
|
||||
Register rr = callRes->reg;
|
||||
int d = disp(callRes);
|
||||
freeRsrcOf(ins, rr != UnknownReg);
|
||||
|
||||
if (rr != UnknownReg) {
|
||||
NanoAssert(IsFpReg(rr));
|
||||
FMDRR(rr,R0,R1);
|
||||
} else {
|
||||
NanoAssert(d);
|
||||
//fprintf (stderr, "call ins d: %d\n", d);
|
||||
STMIA(Scratch, 1<<R0 | 1<<R1);
|
||||
arm_ADDi(Scratch, FP, d);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
CALL(call);
|
||||
|
||||
ArgSize sizes[10];
|
||||
uint32_t argc = call->get_sizes(sizes);
|
||||
for(uint32_t i=0; i < argc; i++) {
|
||||
for(uint32_t i = 0; i < argc; i++) {
|
||||
uint32_t j = argc - i - 1;
|
||||
ArgSize sz = sizes[j];
|
||||
NanoAssert(sz == ARGSIZE_LO || sz == ARGSIZE_Q);
|
||||
LInsp arg = ins->arg(j);
|
||||
// pre-assign registers R0-R3 for arguments (if they fit)
|
||||
Register r = (i+roffset) < 4 ? argRegs[i+roffset] : UnknownReg;
|
||||
asm_arg(sz, ins->arg(j), r);
|
||||
|
||||
Register r = (i + roffset) < 4 ? argRegs[i+roffset] : UnknownReg;
|
||||
#ifdef NJ_ARM_VFP
|
||||
if (sz == ARGSIZE_F) {
|
||||
if (r == R0 || r == R2) {
|
||||
roffset++;
|
||||
} else if (r == R1) {
|
||||
r = R2;
|
||||
roffset++;
|
||||
} else {
|
||||
r = UnknownReg;
|
||||
}
|
||||
|
||||
// XXX move this into asm_farg
|
||||
Register sr = findRegFor(arg, FpRegs);
|
||||
|
||||
if (r != UnknownReg) {
|
||||
// stick it into our scratch fp reg, and then copy into the base reg
|
||||
//fprintf (stderr, "FMRRD: %d %d <- %d\n", r, nextreg(r), sr);
|
||||
FMRRD(r, nextreg(r), sr);
|
||||
} else {
|
||||
asm_pusharg(arg);
|
||||
}
|
||||
} else {
|
||||
asm_arg(sz, arg, r);
|
||||
}
|
||||
#else
|
||||
NanoAssert(sz == ARGSIZE_LO || sz == ARGSIZE_Q);
|
||||
asm_arg(sz, arg, r);
|
||||
#endif
|
||||
|
||||
if (i == 0 && arg0IsInt32FollowedByFloat)
|
||||
roffset = 1;
|
||||
|
@ -238,7 +301,7 @@ Assembler::nRegisterResetAll(RegAlloc& a)
|
|||
// add scratch registers to our free list for the allocator
|
||||
a.clear();
|
||||
a.used = 0;
|
||||
a.free = rmask(R0) | rmask(R1) | rmask(R2) | rmask(R3) | rmask(R4) | rmask(R5);
|
||||
a.free = rmask(R0) | rmask(R1) | rmask(R2) | rmask(R3) | rmask(R4) | rmask(R5) | FpRegs;
|
||||
debug_only(a.managed = a.free);
|
||||
}
|
||||
|
||||
|
@ -251,16 +314,15 @@ Assembler::nPatchBranch(NIns* branch, NIns* target)
|
|||
// Which is really 2 instructions, so we need to modify both
|
||||
// XXX -- this is B, not BL, at least on non-Thumb..
|
||||
|
||||
// branch+2 because PC is always 2 instructions ahead on ARM/Thumb
|
||||
int32_t offset = int(target) - int(branch+2);
|
||||
int32_t offset = PC_OFFSET_FROM(target, branch);
|
||||
|
||||
//printf("---patching branch at 0x%08x to location 0x%08x (%d-0x%08x)\n", branch, target, offset, offset);
|
||||
|
||||
// We have 2 words to work with here -- if offset is in range of a 24-bit
|
||||
// relative jump, emit that; otherwise, we do a pc-relative load into pc.
|
||||
if (-(1<<24) <= offset & offset < (1<<24)) {
|
||||
if (isS24(offset)) {
|
||||
// ARM goodness, using unconditional B
|
||||
*branch = (NIns)( COND_AL | (0xA<<24) | ((offset >>2) & 0xFFFFFF) );
|
||||
*branch = (NIns)( COND_AL | (0xA<<24) | ((offset>>2) & 0xFFFFFF) );
|
||||
} else {
|
||||
// LDR pc,[pc]
|
||||
*branch++ = (NIns)( COND_AL | (0x51<<20) | (PC<<16) | (PC<<12) | ( 0x004 ) );
|
||||
|
@ -295,11 +357,11 @@ Assembler::asm_qjoin(LIns *ins)
|
|||
LIns* hi = ins->oprnd2();
|
||||
|
||||
Register r = findRegFor(hi, GpRegs);
|
||||
ST(FP, d+4, r);
|
||||
STR(r, FP, d+4);
|
||||
|
||||
// okay if r gets recycled.
|
||||
r = findRegFor(lo, GpRegs);
|
||||
ST(FP, d, r);
|
||||
STR(r, FP, d);
|
||||
freeRsrcOf(ins, false); // if we had a reg in use, emit a ST to flush it to mem
|
||||
}
|
||||
|
||||
|
@ -311,7 +373,7 @@ Assembler::asm_store32(LIns *value, int dr, LIns *base)
|
|||
findRegFor2(GpRegs, value, rA, base, rB);
|
||||
Register ra = rA->reg;
|
||||
Register rb = rB->reg;
|
||||
ST(rb, dr, ra);
|
||||
STR(ra, rb, dr);
|
||||
}
|
||||
|
||||
void
|
||||
|
@ -319,7 +381,17 @@ Assembler::asm_restore(LInsp i, Reservation *resv, Register r)
|
|||
{
|
||||
(void)resv;
|
||||
int d = findMemFor(i);
|
||||
LD(r, d, FP);
|
||||
|
||||
if (IsFpReg(r)) {
|
||||
if (isS8(d >> 2)) {
|
||||
FLDD(r, FP, d);
|
||||
} else {
|
||||
FLDD(r, Scratch, 0);
|
||||
arm_ADDi(Scratch, FP, d);
|
||||
}
|
||||
} else {
|
||||
LDR(r, FP, d);
|
||||
}
|
||||
|
||||
verbose_only(
|
||||
if (_verbose)
|
||||
|
@ -332,12 +404,21 @@ Assembler::asm_spill(LInsp i, Reservation *resv, bool pop)
|
|||
{
|
||||
(void)i;
|
||||
(void)pop;
|
||||
|
||||
//fprintf (stderr, "resv->arIndex: %d\n", resv->arIndex);
|
||||
if (resv->arIndex) {
|
||||
int d = disp(resv);
|
||||
// save to spill location
|
||||
Register rr = resv->reg;
|
||||
ST(FP, d, rr);
|
||||
if (IsFpReg(rr)) {
|
||||
if (isS8(d >> 2)) {
|
||||
FSTD(rr, FP, d);
|
||||
} else {
|
||||
FSTD(rr, Scratch, 0);
|
||||
arm_ADDi(Scratch, FP, d);
|
||||
}
|
||||
} else {
|
||||
STR(rr, FP, d);
|
||||
}
|
||||
|
||||
verbose_only(if (_verbose){
|
||||
outputf(" spill %s",_thisfrag->lirbuf->names->formatRef(i));
|
||||
|
@ -349,38 +430,164 @@ Assembler::asm_spill(LInsp i, Reservation *resv, bool pop)
|
|||
void
|
||||
Assembler::asm_load64(LInsp ins)
|
||||
{
|
||||
LIns* base = ins->oprnd1();
|
||||
int db = ins->oprnd2()->constval();
|
||||
Reservation *resv = getresv(ins);
|
||||
int dr = disp(resv);
|
||||
NanoAssert(resv->reg == UnknownReg && dr != 0);
|
||||
///asm_output("<<< load64");
|
||||
|
||||
LIns* base = ins->oprnd1();
|
||||
int offset = ins->oprnd2()->constval();
|
||||
|
||||
Reservation *resv = getresv(ins);
|
||||
Register rr = resv->reg;
|
||||
int d = disp(resv);
|
||||
|
||||
Register rb = findRegFor(base, GpRegs);
|
||||
resv->reg = UnknownReg;
|
||||
asm_mmq(FP, dr, rb, db);
|
||||
freeRsrcOf(ins, false);
|
||||
|
||||
#ifdef NJ_ARM_VFP
|
||||
Register rb = findRegFor(base, GpRegs);
|
||||
|
||||
NanoAssert(rb != UnknownReg);
|
||||
NanoAssert(rr == UnknownReg || IsFpReg(rr));
|
||||
|
||||
if (rr != UnknownReg) {
|
||||
if (!isS8(offset >> 2) || (offset&3) != 0) {
|
||||
underrunProtect(LD32_size + 8);
|
||||
FLDD(rr,Scratch,0);
|
||||
ADD(Scratch, rb);
|
||||
LD32_nochk(Scratch, offset);
|
||||
} else {
|
||||
FLDD(rr,rb,offset);
|
||||
}
|
||||
} else {
|
||||
asm_mmq(FP, d, rb, offset);
|
||||
}
|
||||
|
||||
// *(FP+dr) <- *(rb+db)
|
||||
#else
|
||||
NanoAssert(resv->reg == UnknownReg && d != 0);
|
||||
Register rb = findRegFor(base, GpRegs);
|
||||
asm_mmq(FP, d, rb, offset);
|
||||
#endif
|
||||
|
||||
//asm_output(">>> load64");
|
||||
}
|
||||
|
||||
void
|
||||
Assembler::asm_store64(LInsp value, int dr, LInsp base)
|
||||
{
|
||||
//asm_output1("<<< store64 (dr: %d)", dr);
|
||||
|
||||
#ifdef NJ_ARM_VFP
|
||||
Reservation *valResv = getresv(value);
|
||||
|
||||
Register rb = findRegFor(base, GpRegs);
|
||||
Register rv = findRegFor(value, FpRegs);
|
||||
|
||||
NanoAssert(rb != UnknownReg);
|
||||
NanoAssert(rv != UnknownReg);
|
||||
|
||||
Register baseReg = rb;
|
||||
intptr_t baseOffset = dr;
|
||||
|
||||
if (!isS8(dr)) {
|
||||
baseReg = Scratch;
|
||||
baseOffset = 0;
|
||||
}
|
||||
|
||||
FSTD(rv, baseReg, baseOffset);
|
||||
|
||||
if (!isS8(dr)) {
|
||||
underrunProtect(4 + LD32_size);
|
||||
ADD(Scratch, rb);
|
||||
LD32_nochk(Scratch, dr);
|
||||
}
|
||||
|
||||
// if it's a constant, make sure our baseReg/baseOffset location
|
||||
// has the right value
|
||||
if (value->isconstq()) {
|
||||
const int32_t* p = (const int32_t*) (value-2);
|
||||
|
||||
underrunProtect(12 + LD32_size);
|
||||
|
||||
asm_quad_nochk(rv, p);
|
||||
}
|
||||
#else
|
||||
int da = findMemFor(value);
|
||||
Register rb = findRegFor(base, GpRegs);
|
||||
asm_mmq(rb, dr, FP, da);
|
||||
#endif
|
||||
//asm_output(">>> store64");
|
||||
}
|
||||
|
||||
// stick a quad into register rr, where p points to the two
|
||||
// 32-bit parts of the quad, optinally also storing at FP+d
|
||||
void
|
||||
Assembler::asm_quad_nochk(Register rr, const int32_t* p)
|
||||
{
|
||||
*(++_nSlot) = p[0];
|
||||
*(++_nSlot) = p[1];
|
||||
|
||||
intptr_t constAddr = (intptr_t) (_nSlot-1);
|
||||
intptr_t realOffset = PC_OFFSET_FROM(constAddr, _nIns-1);
|
||||
intptr_t offset = realOffset;
|
||||
Register baseReg = PC;
|
||||
|
||||
//int32_t *q = (int32_t*) constAddr;
|
||||
//fprintf (stderr, "asm_quad_nochk: rr = %d cAddr: 0x%x quad: %08x:%08x q: %f @0x%08x\n", rr, constAddr, p[0], p[1], *(double*)q, _nIns);
|
||||
|
||||
// for FLDD, we only get a left-shifted 8-bit offset
|
||||
if (!isS8(realOffset >> 2)) {
|
||||
offset = 0;
|
||||
baseReg = Scratch;
|
||||
}
|
||||
|
||||
FLDD(rr, baseReg, offset);
|
||||
|
||||
if (!isS8(realOffset >> 2))
|
||||
LD32_nochk(Scratch, constAddr);
|
||||
}
|
||||
|
||||
void
|
||||
Assembler::asm_quad(LInsp ins)
|
||||
{
|
||||
Reservation *rR = getresv(ins);
|
||||
int d = disp(rR);
|
||||
//asm_output(">>> asm_quad");
|
||||
|
||||
Reservation *res = getresv(ins);
|
||||
int d = disp(res);
|
||||
Register rr = res->reg;
|
||||
|
||||
NanoAssert(d || rr != UnknownReg);
|
||||
|
||||
const int32_t* p = (const int32_t*) (ins-2);
|
||||
|
||||
#ifdef NJ_ARM_VFP
|
||||
freeRsrcOf(ins, false);
|
||||
|
||||
// XXX We probably want nochk versions of FLDD/FSTD
|
||||
underrunProtect(16 + LD32_size);
|
||||
|
||||
// grab a register to do the load into if we don't have one already;
|
||||
// XXX -- maybe do a mmq in this case? We're going to use our
|
||||
// D7 register that's never allocated (since it's the one we use
|
||||
// for int-to-double conversions), so we don't have to worry about
|
||||
// spilling something in a fp reg.
|
||||
if (rr == UnknownReg)
|
||||
rr = D7;
|
||||
|
||||
if (d)
|
||||
FSTD(rr, FP, d);
|
||||
|
||||
asm_quad_nochk(rr, p);
|
||||
#else
|
||||
freeRsrcOf(ins, false);
|
||||
if (d) {
|
||||
const int32_t* p = (const int32_t*) (ins-2);
|
||||
STi(FP,d+4,p[1]);
|
||||
STi(FP,d,p[0]);
|
||||
underrunProtect(LD32_size * 2 + 8);
|
||||
STR(Scratch, FP, d+4);
|
||||
LD32_nochk(Scratch, p[1]);
|
||||
STR(Scratch, FP, d);
|
||||
LD32_nochk(Scratch, p[0]);
|
||||
}
|
||||
#endif
|
||||
|
||||
//asm_output("<<< asm_quad");
|
||||
}
|
||||
|
||||
bool
|
||||
|
@ -393,9 +600,17 @@ Assembler::asm_qlo(LInsp ins, LInsp q)
|
|||
void
|
||||
Assembler::asm_nongp_copy(Register r, Register s)
|
||||
{
|
||||
// we will need this for VFP support
|
||||
(void)r; (void)s;
|
||||
NanoAssert(false);
|
||||
if ((rmask(r) & FpRegs) && (rmask(s) & FpRegs)) {
|
||||
// fp->fp
|
||||
FCPYD(r, s);
|
||||
} else if ((rmask(r) & GpRegs) && (rmask(s) & FpRegs)) {
|
||||
// fp->gp
|
||||
// who's doing this and why?
|
||||
NanoAssert(0);
|
||||
// FMRS(r, loSingleVfp(s));
|
||||
} else {
|
||||
NanoAssert(0);
|
||||
}
|
||||
}
|
||||
|
||||
Register
|
||||
|
@ -416,31 +631,41 @@ Assembler::asm_mmq(Register rd, int dd, Register rs, int ds)
|
|||
// get a scratch reg
|
||||
Register t = registerAlloc(GpRegs & ~(rmask(rd)|rmask(rs)));
|
||||
_allocator.addFree(t);
|
||||
ST(rd, dd+4, t);
|
||||
LD(t, ds+4, rs);
|
||||
ST(rd, dd, t);
|
||||
LD(t, ds, rs);
|
||||
// XXX use LDM,STM
|
||||
STR(t, rd, dd+4);
|
||||
LDR(t, rs, ds+4);
|
||||
STR(t, rd, dd);
|
||||
LDR(t, rs, ds);
|
||||
}
|
||||
|
||||
void
|
||||
Assembler::asm_pusharg(LInsp p)
|
||||
Assembler::asm_pusharg(LInsp arg)
|
||||
{
|
||||
// arg goes on stack
|
||||
Reservation* rA = getresv(p);
|
||||
if (rA == 0)
|
||||
{
|
||||
Register ra = findRegFor(p, GpRegs);
|
||||
ST(SP,0,ra);
|
||||
}
|
||||
else if (rA->reg == UnknownReg)
|
||||
{
|
||||
ST(SP,0,Scratch);
|
||||
LD(Scratch,disp(rA),FP);
|
||||
}
|
||||
Reservation* argRes = getresv(arg);
|
||||
bool quad = arg->isQuad();
|
||||
intptr_t stack_growth = quad ? 8 : 4;
|
||||
|
||||
Register ra;
|
||||
|
||||
if (argRes)
|
||||
ra = argRes->reg;
|
||||
else
|
||||
{
|
||||
ST(SP,0,rA->reg);
|
||||
ra = findRegFor(arg, quad ? FpRegs : GpRegs);
|
||||
|
||||
if (ra == UnknownReg) {
|
||||
STR(Scratch, SP, 0);
|
||||
LDR(Scratch, FP, disp(argRes));
|
||||
} else {
|
||||
if (!quad) {
|
||||
Register ra = findRegFor(arg, GpRegs);
|
||||
STR(ra, SP, 0);
|
||||
} else {
|
||||
Register ra = findRegFor(arg, FpRegs);
|
||||
FSTD(ra, SP, 0);
|
||||
}
|
||||
}
|
||||
|
||||
SUBi(SP, stack_growth);
|
||||
}
|
||||
|
||||
void
|
||||
|
@ -470,22 +695,6 @@ Assembler::nativePageSetup()
|
|||
}
|
||||
}
|
||||
|
||||
void
|
||||
Assembler::flushCache(NIns* n1, NIns* n2) {
|
||||
#if defined(UNDER_CE)
|
||||
// we changed the code, so we need to do this (sadly)
|
||||
FlushInstructionCache(GetCurrentProcess(), NULL, NULL);
|
||||
#elif defined(AVMPLUS_LINUX)
|
||||
// Just need to clear this one page (not even the whole page really)
|
||||
//Page *page = (Page*)pageTop(_nIns);
|
||||
register unsigned long _beg __asm("a1") = (unsigned long)(n1);
|
||||
register unsigned long _end __asm("a2") = (unsigned long)(n2);
|
||||
register unsigned long _flg __asm("a3") = 0;
|
||||
register unsigned long _swi __asm("r7") = 0xF0002;
|
||||
__asm __volatile ("swi 0 @ sys_cacheflush" : "=r" (_beg) : "0" (_beg), "r" (_end), "r" (_flg), "r" (_swi));
|
||||
#endif
|
||||
}
|
||||
|
||||
NIns*
|
||||
Assembler::asm_adjustBranch(NIns* at, NIns* target)
|
||||
{
|
||||
|
@ -497,9 +706,16 @@ Assembler::asm_adjustBranch(NIns* at, NIns* target)
|
|||
|
||||
NIns* was = (NIns*) at[3];
|
||||
|
||||
//fprintf (stderr, "Adjusting branch @ 0x%8x: 0x%x -> 0x%x\n", at+3, at[3], target);
|
||||
|
||||
at[3] = (NIns)target;
|
||||
|
||||
flushCache(at, at+4);
|
||||
#if defined(UNDER_CE)
|
||||
// we changed the code, so we need to do this (sadly)
|
||||
FlushInstructionCache(GetCurrentProcess(), NULL, NULL);
|
||||
#elif defined(AVMPLUS_LINUX)
|
||||
__clear_cache((char*)at, (char*)(at+4));
|
||||
#endif
|
||||
|
||||
#ifdef AVMPLUS_PORTING_API
|
||||
NanoJIT_PortAPI_FlushInstructionCache(at, at+4);
|
||||
|
@ -550,6 +766,9 @@ Assembler::BL_far(NIns* addr)
|
|||
// point to the right spot before branching
|
||||
underrunProtect(16);
|
||||
|
||||
// TODO use a slot in const pool for address, but emit single insn
|
||||
// for branch if offset fits
|
||||
|
||||
// the address
|
||||
*(--_nIns) = (NIns)((addr));
|
||||
// bx ip // branch to the address we loaded earlier
|
||||
|
@ -558,17 +777,29 @@ Assembler::BL_far(NIns* addr)
|
|||
*(--_nIns) = (NIns)( COND_AL | OP_IMM | (1<<23) | (PC<<16) | (LR<<12) | (4) );
|
||||
// ldr ip, [pc + #4] // load the address into ip, reading it from [pc+4]
|
||||
*(--_nIns) = (NIns)( COND_AL | (0x59<<20) | (PC<<16) | (IP<<12) | (4));
|
||||
|
||||
//fprintf (stderr, "BL_far sequence @ 0x%08x\n", _nIns);
|
||||
|
||||
asm_output1("bl %p (32-bit)", addr);
|
||||
}
|
||||
|
||||
void
|
||||
Assembler::BL(NIns* addr)
|
||||
{
|
||||
intptr_t offs = PC_OFFSET_FROM(addr,(intptr_t)_nIns-4);
|
||||
if (JMP_S24_OFFSET_OK(offs)) {
|
||||
// we can do this with a single BL call
|
||||
intptr_t offs = PC_OFFSET_FROM(addr,_nIns-1);
|
||||
|
||||
//fprintf (stderr, "BL: 0x%x (offs: %d [%x]) @ 0x%08x\n", addr, offs, offs, (intptr_t)(_nIns-1));
|
||||
|
||||
if (isS24(offs)) {
|
||||
// try to do this with a single S24 call;
|
||||
// recompute offset in case underrunProtect had to allocate a new page
|
||||
underrunProtect(4);
|
||||
*(--_nIns) = (NIns)( COND_AL | (0xB<<24) | (((offs)>>2) & 0xFFFFFF) ); \
|
||||
offs = PC_OFFSET_FROM(addr,_nIns-1);
|
||||
}
|
||||
|
||||
if (isS24(offs)) {
|
||||
// already did underrunProtect above
|
||||
*(--_nIns) = (NIns)( COND_AL | (0xB<<24) | (((offs)>>2) & 0xFFFFFF) );
|
||||
asm_output1("bl %p", addr);
|
||||
} else {
|
||||
BL_far(addr);
|
||||
|
@ -579,6 +810,7 @@ void
|
|||
Assembler::CALL(const CallInfo *ci)
|
||||
{
|
||||
intptr_t addr = ci->_address;
|
||||
|
||||
BL((NIns*)addr);
|
||||
asm_output1(" (call %s)", ci->_name);
|
||||
}
|
||||
|
@ -586,21 +818,226 @@ Assembler::CALL(const CallInfo *ci)
|
|||
void
|
||||
Assembler::LD32_nochk(Register r, int32_t imm)
|
||||
{
|
||||
// We can always reach the const pool, since it's on the same page (<4096)
|
||||
underrunProtect(8);
|
||||
// We should always reach the const pool, since it's on the same page (<4096);
|
||||
// if we can't, someone didn't underrunProtect enough.
|
||||
|
||||
*(++_nSlot) = (int)imm;
|
||||
|
||||
//fprintf (stderr, "wrote slot(2) %p with %08x, jmp @ %p\n", _nSlot, (intptr_t)imm, _nIns-1);
|
||||
|
||||
int offset = PC_OFFSET_FROM(_nSlot,(intptr_t)(_nIns)-4);
|
||||
int offset = PC_OFFSET_FROM(_nSlot,_nIns-1);
|
||||
|
||||
NanoAssert(JMP_S24_OFFSET_OK(offset) && (offset < 0));
|
||||
NanoAssert(isS12(offset) && (offset < 0));
|
||||
|
||||
*(--_nIns) = (NIns)( COND_AL | (0x51<<20) | (PC<<16) | ((r)<<12) | ((-offset) & 0xFFFFFF) );
|
||||
asm_output2("ld %s,%d",gpn(r),imm);
|
||||
asm_output2(" (%d(PC) = 0x%x)", offset, imm);
|
||||
|
||||
LDR_nochk(r,PC,offset);
|
||||
}
|
||||
|
||||
|
||||
// Branch to target address _t with condition _c, doing underrun
|
||||
// checks (_chk == 1) or skipping them (_chk == 0).
|
||||
//
|
||||
// If the jump fits in a relative jump (+/-32MB), emit that.
|
||||
// If the jump is unconditional, emit the dest address inline in
|
||||
// the instruction stream and load it into pc.
|
||||
// If the jump has a condition, but noone's mucked with _nIns and our _nSlot
|
||||
// pointer is valid, stick the constant in the slot and emit a conditional
|
||||
// load into pc.
|
||||
// Otherwise, emit the conditional load into pc from a nearby constant,
|
||||
// and emit a jump to jump over it it in case the condition fails.
|
||||
//
|
||||
// NB: JMP_nochk depends on this not calling samepage() when _c == AL
|
||||
void
|
||||
Assembler::B_cond_chk(ConditionCode _c, NIns* _t, bool _chk)
|
||||
{
|
||||
int32 offs = PC_OFFSET_FROM(_t,_nIns-1);
|
||||
//fprintf(stderr, "B_cond_chk target: 0x%08x offset: %d @0x%08x\n", _t, offs, _nIns-1);
|
||||
if (isS24(offs)) {
|
||||
if (_chk) underrunProtect(4);
|
||||
offs = PC_OFFSET_FROM(_t,_nIns-1);
|
||||
}
|
||||
|
||||
if (isS24(offs)) {
|
||||
*(--_nIns) = (NIns)( ((_c)<<28) | (0xA<<24) | (((offs)>>2) & 0xFFFFFF) );
|
||||
} else if (_c == AL) {
|
||||
if(_chk) underrunProtect(8);
|
||||
*(--_nIns) = (NIns)(_t);
|
||||
*(--_nIns) = (NIns)( COND_AL | (0x51<<20) | (PC<<16) | (PC<<12) | 0x4 );
|
||||
} else if (samepage(_nIns,_nSlot)) {
|
||||
if(_chk) underrunProtect(8);
|
||||
*(++_nSlot) = (NIns)(_t);
|
||||
offs = PC_OFFSET_FROM(_nSlot,_nIns-1);
|
||||
NanoAssert(offs < 0);
|
||||
*(--_nIns) = (NIns)( ((_c)<<28) | (0x51<<20) | (PC<<16) | (PC<<12) | ((-offs) & 0xFFFFFF) );
|
||||
} else {
|
||||
if(_chk) underrunProtect(12);
|
||||
*(--_nIns) = (NIns)(_t);
|
||||
*(--_nIns) = (NIns)( COND_AL | (0xA<<24) | ((-4)>>2) & 0xFFFFFF );
|
||||
*(--_nIns) = (NIns)( ((_c)<<28) | (0x51<<20) | (PC<<16) | (PC<<12) | 0x0 );
|
||||
}
|
||||
|
||||
asm_output2("%s %p", _c == AL ? "jmp" : "b(cnd)", (void*)(_t));
|
||||
}
|
||||
|
||||
/*
|
||||
* VFP
|
||||
*/
|
||||
|
||||
#ifdef NJ_ARM_VFP
|
||||
|
||||
void
|
||||
Assembler::asm_i2f(LInsp ins)
|
||||
{
|
||||
Register rr = prepResultReg(ins, FpRegs);
|
||||
Register srcr = findRegFor(ins->oprnd1(), GpRegs);
|
||||
|
||||
// todo: support int value in memory, as per x86
|
||||
NanoAssert(srcr != UnknownReg);
|
||||
|
||||
FSITOD(rr, FpSingleScratch);
|
||||
FMSR(FpSingleScratch, srcr);
|
||||
}
|
||||
|
||||
void
|
||||
Assembler::asm_u2f(LInsp ins)
|
||||
{
|
||||
Register rr = prepResultReg(ins, FpRegs);
|
||||
Register sr = findRegFor(ins->oprnd1(), GpRegs);
|
||||
|
||||
// todo: support int value in memory, as per x86
|
||||
NanoAssert(sr != UnknownReg);
|
||||
|
||||
FUITOD(rr, FpSingleScratch);
|
||||
FMSR(FpSingleScratch, sr);
|
||||
}
|
||||
|
||||
void
|
||||
Assembler::asm_fneg(LInsp ins)
|
||||
{
|
||||
LInsp lhs = ins->oprnd1();
|
||||
Register rr = prepResultReg(ins, FpRegs);
|
||||
|
||||
Reservation* rA = getresv(lhs);
|
||||
Register sr;
|
||||
|
||||
if (!rA || rA->reg == UnknownReg)
|
||||
sr = findRegFor(lhs, FpRegs);
|
||||
else
|
||||
sr = rA->reg;
|
||||
|
||||
FNEGD(rr, sr);
|
||||
}
|
||||
|
||||
void
|
||||
Assembler::asm_fop(LInsp ins)
|
||||
{
|
||||
LInsp lhs = ins->oprnd1();
|
||||
LInsp rhs = ins->oprnd2();
|
||||
LOpcode op = ins->opcode();
|
||||
|
||||
NanoAssert(op >= LIR_fadd && op <= LIR_fdiv);
|
||||
|
||||
// rr = ra OP rb
|
||||
|
||||
Register rr = prepResultReg(ins, FpRegs);
|
||||
|
||||
Register ra = findRegFor(lhs, FpRegs);
|
||||
Register rb = (rhs == lhs) ? ra : findRegFor(rhs, FpRegs);
|
||||
|
||||
// XXX special-case 1.0 and 0.0
|
||||
|
||||
if (op == LIR_fadd)
|
||||
FADDD(rr,ra,rb);
|
||||
else if (op == LIR_fsub)
|
||||
FSUBD(rr,ra,rb);
|
||||
else if (op == LIR_fmul)
|
||||
FMULD(rr,ra,rb);
|
||||
else //if (op == LIR_fdiv)
|
||||
FDIVD(rr,ra,rb);
|
||||
}
|
||||
|
||||
void
|
||||
Assembler::asm_fcmp(LInsp ins)
|
||||
{
|
||||
LInsp lhs = ins->oprnd1();
|
||||
LInsp rhs = ins->oprnd2();
|
||||
LOpcode op = ins->opcode();
|
||||
|
||||
NanoAssert(op >= LIR_feq && op <= LIR_fge);
|
||||
|
||||
Register ra = findRegFor(lhs, FpRegs);
|
||||
Register rb = findRegFor(rhs, FpRegs);
|
||||
|
||||
// We can't uniquely identify fge/fle via a single bit
|
||||
// pattern (since equality and lt/gt are separate bits);
|
||||
// so convert to the single-bit variant.
|
||||
if (op == LIR_fge) {
|
||||
Register temp = ra;
|
||||
ra = rb;
|
||||
rb = temp;
|
||||
op = LIR_flt;
|
||||
} else if (op == LIR_fle) {
|
||||
Register temp = ra;
|
||||
ra = rb;
|
||||
rb = temp;
|
||||
op = LIR_fgt;
|
||||
}
|
||||
|
||||
// There is no way to test for an unordered result using
|
||||
// the conditional form of an instruction; the encoding (C=1 V=1)
|
||||
// ends up having overlaps with a few other tests. So, test for
|
||||
// the explicit mask.
|
||||
uint8_t mask = 0x0;
|
||||
|
||||
// NZCV
|
||||
// for a valid ordered result, V is always 0 from VFP
|
||||
if (op == LIR_feq)
|
||||
// ZC // cond EQ (both equal and "not less than"
|
||||
mask = 0x6;
|
||||
else if (op == LIR_flt)
|
||||
// N // cond MI
|
||||
mask = 0x8;
|
||||
else if (op == LIR_fgt)
|
||||
// C // cond CS
|
||||
mask = 0x2;
|
||||
else
|
||||
NanoAssert(0);
|
||||
/*
|
||||
// these were converted into gt and lt above.
|
||||
if (op == LIR_fle)
|
||||
// NZ // cond LE
|
||||
mask = 0xC;
|
||||
else if (op == LIR_fge)
|
||||
// ZC // cond fail?
|
||||
mask = 0x6;
|
||||
*/
|
||||
|
||||
// TODO XXX could do this as fcmpd; fmstat; tstvs rX, #0 the tstvs
|
||||
// would reset the status bits if V (NaN flag) is set, but that
|
||||
// doesn't work for NE. For NE could teqvs rX, #1. rX needs to
|
||||
// be any register that has lsb == 0, such as sp/fp/pc.
|
||||
|
||||
// Test explicily with the full mask; if V is set, test will fail.
|
||||
// Assumption is that this will be followed up by a BEQ/BNE
|
||||
CMPi(Scratch, mask);
|
||||
// grab just the condition fields
|
||||
SHRi(Scratch, 28);
|
||||
MRS(Scratch);
|
||||
|
||||
// do the comparison and get results loaded in ARM status register
|
||||
FMSTAT();
|
||||
FCMPD(ra, rb);
|
||||
}
|
||||
|
||||
Register
|
||||
Assembler::asm_prep_fcall(Reservation* rR, LInsp ins)
|
||||
{
|
||||
// We have nothing to do here; we do it all in asm_call.
|
||||
return UnknownReg;
|
||||
}
|
||||
|
||||
#endif /* NJ_ARM_VFP */
|
||||
|
||||
}
|
||||
#endif /* FEATURE_NANOJIT */
|
||||
|
||||
}
|
||||
|
|
|
@ -47,14 +47,28 @@ namespace nanojit
|
|||
|
||||
const int NJ_LOG2_PAGE_SIZE = 12; // 4K
|
||||
|
||||
#define NJ_MAX_REGISTERS 11
|
||||
// If NJ_ARM_VFP is defined, then VFP is assumed to
|
||||
// be present. If it's not defined, then softfloat
|
||||
// is used, and NJ_SOFTFLOAT is defined.
|
||||
#define NJ_ARM_VFP
|
||||
|
||||
#ifdef NJ_ARM_VFP
|
||||
|
||||
// only d0-d7; we'll use d7 as s14-s15 for i2f/u2f/etc.
|
||||
#define NJ_VFP_MAX_REGISTERS 8
|
||||
|
||||
#else
|
||||
|
||||
#define NJ_VFP_MAX_REGISTERS 0
|
||||
#define NJ_SOFTFLOAT
|
||||
|
||||
#endif
|
||||
|
||||
#define NJ_MAX_REGISTERS (11 + NJ_VFP_MAX_REGISTERS)
|
||||
#define NJ_MAX_STACK_ENTRY 256
|
||||
#define NJ_MAX_PARAMETERS 16
|
||||
#define NJ_ALIGN_STACK 8
|
||||
#define NJ_STACK_OFFSET 8
|
||||
|
||||
#define NJ_SOFTFLOAT
|
||||
#define NJ_STACK_GROWTH_UP
|
||||
#define NJ_STACK_OFFSET 0
|
||||
|
||||
#define NJ_CONSTANT_POOLS
|
||||
const int NJ_MAX_CPOOL_OFFSET = 4096;
|
||||
|
@ -75,25 +89,40 @@ typedef enum {
|
|||
R8 = 8,
|
||||
R9 = 9,
|
||||
R10 = 10,
|
||||
//FP =11,
|
||||
FP = 11,
|
||||
IP = 12,
|
||||
SP = 13,
|
||||
LR = 14,
|
||||
PC = 15,
|
||||
|
||||
FP = 13,
|
||||
// FP regs
|
||||
D0 = 16,
|
||||
D1 = 17,
|
||||
D2 = 18,
|
||||
D3 = 19,
|
||||
D4 = 20,
|
||||
D5 = 21,
|
||||
D6 = 22,
|
||||
D7 = 23,
|
||||
|
||||
// Pseudo-register for floating point
|
||||
F0 = 0,
|
||||
FirstFloatReg = 16,
|
||||
LastFloatReg = 22,
|
||||
|
||||
// helpers
|
||||
FRAME_PTR = 11,
|
||||
ESP = 13,
|
||||
ESP = SP,
|
||||
|
||||
FirstReg = 0,
|
||||
#ifdef NJ_ARM_VFP
|
||||
LastReg = 23,
|
||||
#else
|
||||
LastReg = 10,
|
||||
Scratch = 12,
|
||||
UnknownReg = 11
|
||||
#endif
|
||||
Scratch = IP,
|
||||
UnknownReg = 31,
|
||||
|
||||
// special value referring to S14
|
||||
FpSingleScratch = 24
|
||||
} Register;
|
||||
|
||||
/* ARM condition codes */
|
||||
|
@ -123,13 +152,30 @@ typedef struct _FragInfo {
|
|||
NIns* epilogue;
|
||||
} FragInfo;
|
||||
|
||||
static const RegisterMask SavedRegs = 1<<R4 | 1<<R5 | 1<<R6 | 1<<R7 | 1<<R8 | 1<<R9 | 1<<R10;
|
||||
static const RegisterMask FpRegs = 0x0000; // FST0-FST7
|
||||
#ifdef ARM_VFP
|
||||
static const RegisterMask SavedFpRegs = 1<<D0 | 1<<D1 | 1<<D2 | 1<<D3 | 1<<D4 | 1<<D5 | 1<<D6 | 1<<D7;
|
||||
#else
|
||||
static const RegisterMask SavedFpRegs = 0;
|
||||
#endif
|
||||
static const RegisterMask SavedRegs = 1<<R4 | 1<<R5 | 1<<R6 | 1<<R7 | 1<<R8 | 1<<R9 | 1<<R10 | SavedFpRegs;
|
||||
static const RegisterMask FpRegs = 1<<D0 | 1<<D1 | 1<<D2 | 1<<D3 | 1<<D4 | 1<<D5 | 1<<D6; // no D7; S14-S15 are used for i2f/u2f.
|
||||
static const RegisterMask GpRegs = 0x07FF;
|
||||
static const RegisterMask AllowableFlagRegs = 1<<R0 | 1<<R1 | 1<<R2 | 1<<R3 | 1<<R4 | 1<<R5 | 1<<R6 | 1<<R7 | 1<<R8 | 1<<R9 | 1<<R10;
|
||||
|
||||
#define IsFpReg(_r) ((rmask(_r) & (FpRegs | (1<<D7))) != 0)
|
||||
#define IsGpReg(_r) ((rmask(_r) & (GpRegs | (1<<Scratch))) != 0)
|
||||
#define FpRegNum(_fpr) ((_fpr) - FirstFloatReg)
|
||||
|
||||
#define firstreg() R0
|
||||
#define nextreg(r) (Register)((int)r+1)
|
||||
#define nextreg(r) ((Register)((int)(r)+1))
|
||||
#if 0
|
||||
static Register nextreg(Register r) {
|
||||
if (r == R10)
|
||||
return D0;
|
||||
return (Register)(r+1);
|
||||
}
|
||||
#endif
|
||||
// only good for normal regs
|
||||
#define imm2register(c) (Register)(c-1)
|
||||
|
||||
verbose_only( extern const char* regNames[]; )
|
||||
|
@ -148,11 +194,12 @@ verbose_only( extern const char* regNames[]; )
|
|||
void BL(NIns*); \
|
||||
void BL_far(NIns*); \
|
||||
void CALL(const CallInfo*); \
|
||||
void B_cond_chk(ConditionCode, NIns*, bool); \
|
||||
void underrunProtect(int bytes); \
|
||||
bool has_cmov; \
|
||||
void nativePageReset(); \
|
||||
void nativePageSetup(); \
|
||||
void flushCache(NIns*,NIns*); \
|
||||
void asm_quad_nochk(Register, const int32_t*); \
|
||||
int* _nSlot; \
|
||||
int* _nExitSlot;
|
||||
|
||||
|
@ -174,6 +221,7 @@ verbose_only( extern const char* regNames[]; )
|
|||
#define FUNCADDR(addr) ( ((int)addr) )
|
||||
|
||||
#define OP_IMM (1<<25)
|
||||
#define OP_STAT (1<<20)
|
||||
|
||||
#define COND_AL (0xE<<28)
|
||||
|
||||
|
@ -189,7 +237,7 @@ typedef enum {
|
|||
ROR_reg = 7 // Rotate Right
|
||||
} ShiftOperator;
|
||||
|
||||
#define LD32_size 4
|
||||
#define LD32_size 8
|
||||
|
||||
#define BEGIN_NATIVE_CODE(x) \
|
||||
{ DWORD* _nIns = (uint8_t*)x
|
||||
|
@ -251,45 +299,58 @@ typedef enum {
|
|||
*(--_nIns) = (NIns)( COND_AL | OP_IMM | (1<<21) | ((_r)<<16) | ((_r)<<12) | ((_imm)&0xFF) ); \
|
||||
asm_output2("eor %s,%d",gpn(_r),(_imm)); } while(0)
|
||||
|
||||
// _l = _l + _r
|
||||
#define ADD(_l,_r) do { \
|
||||
// _d = _n + _m
|
||||
#define arm_ADD(_d,_n,_m) do { \
|
||||
underrunProtect(4); \
|
||||
*(--_nIns) = (NIns)( COND_AL | (1<<23) | ((_r)<<16) | ((_l)<<12) | (_l)); \
|
||||
asm_output2("add %s,%s",gpn(_l),gpn(_r)); } while(0)
|
||||
*(--_nIns) = (NIns)( COND_AL | OP_STAT | (1<<23) | ((_n)<<16) | ((_d)<<12) | (_m)); \
|
||||
asm_output3("add %s,%s+%s",gpn(_d),gpn(_n),gpn(_m)); } while(0)
|
||||
|
||||
// _r = _r + _imm
|
||||
#define ADDi(_r,_imm) do { \
|
||||
if ((_imm)>-256 && (_imm)<256) { \
|
||||
// _l = _l + _r
|
||||
#define ADD(_l,_r) arm_ADD(_l,_l,_r)
|
||||
|
||||
// TODO: we can do better here, since we can rotate the 8-bit immediate left by
|
||||
// an even number of bits; should count zeros at the end.
|
||||
|
||||
// Note that this sometimes converts negative immediate values to a to a sub.
|
||||
// _d = _r + _imm
|
||||
#define arm_ADDi(_d,_n,_imm) do { \
|
||||
if ((_imm) > -256 && (_imm) < 256) { \
|
||||
underrunProtect(4); \
|
||||
if ((_imm)>=0) \
|
||||
*(--_nIns) = (NIns)( COND_AL | OP_IMM | (1<<23) | ((_r)<<16) | ((_r)<<12) | ((_imm)&0xFF) ); \
|
||||
*(--_nIns) = (NIns)( COND_AL | OP_IMM | OP_STAT | (1<<23) | ((_n)<<16) | ((_d)<<12) | ((_imm)&0xFF) ); \
|
||||
else \
|
||||
*(--_nIns) = (NIns)( COND_AL | OP_IMM | (1<<22) | ((_r)<<16) | ((_r)<<12) | ((-(_imm))&0xFF) ); \
|
||||
*(--_nIns) = (NIns)( COND_AL | OP_IMM | OP_STAT | (1<<22) | ((_n)<<16) | ((_d)<<12) | ((-(_imm))&0xFF) ); \
|
||||
} else { \
|
||||
if ((_imm)>=0) { \
|
||||
if ((_imm)<=1020 && (((_imm)&3)==0) ) { \
|
||||
underrunProtect(4); \
|
||||
*(--_nIns) = (NIns)( COND_AL | OP_IMM | (1<<23) | ((_r)<<16) | ((_r)<<12) | (15<<8)| ((_imm)>>2) ); \
|
||||
*(--_nIns) = (NIns)( COND_AL | OP_IMM | OP_STAT | (1<<23) | ((_n)<<16) | ((_d)<<12) | (15<<8)| ((_imm)>>2) ); \
|
||||
} else { \
|
||||
underrunProtect(4+LD32_size); \
|
||||
*(--_nIns) = (NIns)( COND_AL | (1<<23) | ((_r)<<16) | ((_r)<<12) | (Scratch)); \
|
||||
*(--_nIns) = (NIns)( COND_AL | OP_STAT | (1<<23) | ((_n)<<16) | ((_d)<<12) | (Scratch)); \
|
||||
LD32_nochk(Scratch, _imm); \
|
||||
} \
|
||||
} else { \
|
||||
underrunProtect(4+LD32_size); \
|
||||
*(--_nIns) = (NIns)( COND_AL | OP_STAT | (1<<22) | ((_n)<<16) | ((_d)<<12) | (Scratch)); \
|
||||
LD32_nochk(Scratch, -(_imm)); \
|
||||
} \
|
||||
} \
|
||||
asm_output3("add %s,%s,%d",gpn(_d),gpn(_n),(_imm)); \
|
||||
} while(0)
|
||||
|
||||
/*
|
||||
* There used to be a :
|
||||
if ((_imm)>=-510) { \
|
||||
underrunProtect(8); \
|
||||
int rem = -(_imm) - 255; \
|
||||
*(--_nIns) = (NIns)( COND_AL | OP_IMM | (1<<22) | ((_r)<<16) | ((_r)<<12) | ((rem)&0xFF) ); \
|
||||
*(--_nIns) = (NIns)( COND_AL | OP_IMM | (1<<22) | ((_r)<<16) | ((_r)<<12) | (0xFF) ); \
|
||||
} else { \
|
||||
underrunProtect(4+LD32_size); \
|
||||
*(--_nIns) = (NIns)( COND_AL | (1<<22) | ((_r)<<16) | ((_r)<<12) | (Scratch)); \
|
||||
LD32_nochk(Scratch, -(_imm)); \
|
||||
} \
|
||||
} \
|
||||
} \
|
||||
asm_output2("addi %s,%d",gpn(_r),(_imm)); \
|
||||
} while(0)
|
||||
*(--_nIns) = (NIns)( COND_AL | OP_IMM | (1<<22) | ((_n)<<16) | ((_d)<<12) | ((rem)&0xFF) ); \
|
||||
*(--_nIns) = (NIns)( COND_AL | OP_IMM | (1<<22) | ((_n)<<16) | ((_d)<<12) | (0xFF) ); \
|
||||
} else {
|
||||
* above, but if we do that we can't really update the status registers. So don't do that.
|
||||
*/
|
||||
|
||||
#define ADDi(_r,_imm) arm_ADDi(_r,_r,_imm)
|
||||
|
||||
// _l = _l - _r
|
||||
#define SUB(_l,_r) do { \
|
||||
|
@ -402,6 +463,13 @@ typedef enum {
|
|||
*(--_nIns) = (NIns)( COND_AL | (0x11<<20) | ((_d)<<16) | (_s) ); \
|
||||
asm_output2("test %s,%s",gpn(_d),gpn(_s)); } while(0)
|
||||
|
||||
#define TSTi(_d,_imm) do { \
|
||||
underrunProtect(4); \
|
||||
NanoAssert(((_imm) & 0xff) == (_imm)); \
|
||||
*(--_nIns) = (NIns)( COND_AL | OP_IMM | (0x11<<20) | ((_d) << 16) | (0xF<<12) | ((_imm) & 0xff) ); \
|
||||
asm_output2("tst %s,#0x%x", gpn(_d), _imm); \
|
||||
} while (0);
|
||||
|
||||
// CMP
|
||||
#define CMP(_l,_r) do { \
|
||||
underrunProtect(4); \
|
||||
|
@ -429,7 +497,7 @@ typedef enum {
|
|||
LD32_nochk(Scratch, (_imm)); \
|
||||
} \
|
||||
} \
|
||||
asm_output2("cmp %s,%X",gpn(_r),(_imm)); \
|
||||
asm_output2("cmp %s,0x%x",gpn(_r),(_imm)); \
|
||||
} while(0)
|
||||
|
||||
// MOV
|
||||
|
@ -457,25 +525,33 @@ typedef enum {
|
|||
#define MRNO(dr,sr) MR_cond(dr, sr, VC, "movvc") // overflow clear
|
||||
#define MRNC(dr,sr) MR_cond(dr, sr, CC, "movcc") // carry clear
|
||||
|
||||
#define LD(_d,_off,_b) do { \
|
||||
if ((_off)<0) { \
|
||||
underrunProtect(4); \
|
||||
#define LDR_chk(_d,_b,_off,_chk) do { \
|
||||
if (IsFpReg(_d)) { \
|
||||
FLDD_chk(_d,_b,_off,_chk); \
|
||||
} else if ((_off)<0) { \
|
||||
if (_chk) underrunProtect(4); \
|
||||
NanoAssert((_off)>-4096); \
|
||||
*(--_nIns) = (NIns)( COND_AL | (0x51<<20) | ((_b)<<16) | ((_d)<<12) | ((-(_off))&0xFFF) ); \
|
||||
} else { \
|
||||
if (isS16(_off) || isU16(_off)) { \
|
||||
underrunProtect(4); \
|
||||
if (_chk) underrunProtect(4); \
|
||||
NanoAssert((_off)<4096); \
|
||||
*(--_nIns) = (NIns)( COND_AL | (0x59<<20) | ((_b)<<16) | ((_d)<<12) | ((_off)&0xFFF) ); \
|
||||
} else { \
|
||||
underrunProtect(4+LD32_size); \
|
||||
if (_chk) underrunProtect(4+LD32_size); \
|
||||
*(--_nIns) = (NIns)( COND_AL | (0x79<<20) | ((_b)<<16) | ((_d)<<12) | Scratch ); \
|
||||
LD32_nochk(Scratch, _off); \
|
||||
} \
|
||||
} \
|
||||
asm_output3("ld %s,%d(%s)",gpn((_d)),(_off),gpn((_b))); \
|
||||
asm_output3("ldr %s,%d(%s)",gpn((_d)),(_off),gpn((_b))); \
|
||||
} while(0)
|
||||
|
||||
#define LDR(_d,_b,_off) LDR_chk(_d,_b,_off,0)
|
||||
#define LDR_nochk(_d,_b,_off) LDR_chk(_d,_b,_off,1)
|
||||
|
||||
// i386 compat, for Assembler.cpp
|
||||
#define LD(reg,offset,base) LDR_chk(reg,base,offset,1)
|
||||
#define ST(base,offset,reg) STR(reg,base,offset)
|
||||
|
||||
#define LDi(_d,_imm) do { \
|
||||
if (isS8((_imm)) || isU8((_imm))) { \
|
||||
|
@ -486,7 +562,7 @@ typedef enum {
|
|||
underrunProtect(LD32_size); \
|
||||
LD32_nochk(_d, (_imm)); \
|
||||
} \
|
||||
asm_output2("ld %s,%d",gpn((_d)),(_imm)); \
|
||||
asm_output2("ld %s,0x%x",gpn((_d)),(_imm)); \
|
||||
} while(0)
|
||||
|
||||
|
||||
|
@ -501,29 +577,13 @@ typedef enum {
|
|||
asm_output3("ldrb %s,%d(%s)", gpn(_d),(_off),gpn(_b)); \
|
||||
} while(0)
|
||||
|
||||
#define ST(_b,_off,_r) do { \
|
||||
#define STR(_d,_n,_off) do { \
|
||||
NanoAssert(!IsFpReg(_d) && isS12(_off)); \
|
||||
underrunProtect(4); \
|
||||
if ((_off)<0) *(--_nIns) = (NIns)( COND_AL | (0x50<<20) | ((_b)<<16) | ((_r)<<12) | ((-(_off))&0xFFF) ); \
|
||||
else *(--_nIns) = (NIns)( COND_AL | (0x58<<20) | ((_b)<<16) | ((_r)<<12) | ((_off)&0xFFF) ); \
|
||||
asm_output3("str %s, %d(%s)",gpn(_r), (_off),gpn(_b)); } while(0)
|
||||
|
||||
|
||||
#define STi(_b,_off,_imm) do { \
|
||||
NanoAssert((_off)>0); \
|
||||
if (isS8((_imm)) || isU8((_imm))) { \
|
||||
underrunProtect(8); \
|
||||
*(--_nIns) = (NIns)( COND_AL | (0x58<<20) | ((_b)<<16) | ((Scratch)<<12) | ((_off)&0xFFF) ); \
|
||||
asm_output3("str %s, %d(%s)",gpn(Scratch), (_off),gpn(_b)); \
|
||||
if ((_imm)<0) *(--_nIns) = (NIns)( COND_AL | (0x3E<<20) | (Scratch<<12) | (((_imm)^0xFFFFFFFF)&0xFF) ); \
|
||||
else *(--_nIns) = (NIns)( COND_AL | (0x3B<<20) | (Scratch<<12) | ((_imm)&0xFF) ); \
|
||||
asm_output2("ld %s,%d",gpn((Scratch)),(_imm)); \
|
||||
} else { \
|
||||
underrunProtect(4+LD32_size); \
|
||||
*(--_nIns) = (NIns)( COND_AL | (0x58<<20) | ((_b)<<16) | ((Scratch)<<12) | ((_off)&0xFFF) ); \
|
||||
asm_output3("str %s, %d(%s)",gpn(Scratch), (_off),gpn(_b)); \
|
||||
LD32_nochk(Scratch, (_imm)); \
|
||||
} \
|
||||
} while(0);
|
||||
if ((_off)<0) *(--_nIns) = (NIns)( COND_AL | (0x50<<20) | ((_n)<<16) | ((_d)<<12) | ((-(_off))&0xFFF) ); \
|
||||
else *(--_nIns) = (NIns)( COND_AL | (0x58<<20) | ((_n)<<16) | ((_d)<<12) | ((_off)&0xFFF) ); \
|
||||
asm_output3("str %s, %d(%s)",gpn(_d), (_off), gpn(_n)); \
|
||||
} while(0)
|
||||
|
||||
|
||||
#define LEA(_r,_d,_b) do { \
|
||||
|
@ -548,7 +608,7 @@ typedef enum {
|
|||
//#define RET() INT3()
|
||||
|
||||
#define BKPT_nochk() do { \
|
||||
*(--_nIns) = (NIns)( (0xE<<24) | (0x12<<20) | (0x7<<4) ); } while (0);
|
||||
*(--_nIns) = (NIns)( (0xE<<24) | (0x12<<20) | (0x7<<4) ); } while (0)
|
||||
|
||||
// this is pushing a reg
|
||||
#define PUSHr(_r) do { \
|
||||
|
@ -581,47 +641,10 @@ typedef enum {
|
|||
*(--_nIns) = (NIns)( COND_AL | (0x8B<<20) | (SP<<16) | (_mask) ); \
|
||||
asm_output1("pop %x", (_mask));} while (0)
|
||||
|
||||
// PC always points to current instruction + 8, so when calculating pc-relative
|
||||
// offsets, use PC+8.
|
||||
#define PC_OFFSET_FROM(target,frompc) ((intptr_t)(target) - ((intptr_t)(frompc) + 8))
|
||||
#define JMP_S24_OFFSET_OK(offs) ((-(1<<24)) <= (offs) && (offs) < (1<<24))
|
||||
|
||||
// (XXX This ought to be a function instead of a macro)
|
||||
//
|
||||
// Branch to target address _t with condition _c, doing underrun
|
||||
// checks (_chk == 1) or skipping them (_chk == 0).
|
||||
//
|
||||
// If the jump fits in a relative jump (+/-32MB), emit that.
|
||||
// If the jump is unconditional, emit the dest address inline in
|
||||
// the instruction stream and load it into pc.
|
||||
// If the jump has a condition, but noone's mucked with _nIns and our _nSlot
|
||||
// pointer is valid, stick the constant in the slot and emit a conditional
|
||||
// load into pc.
|
||||
// Otherwise, emit the conditional load into pc from a nearby constant,
|
||||
// and emit a jump to jump over it it in case the condition fails.
|
||||
//
|
||||
// NB: JMP_nochk depends on this not calling samepage() when _c == AL
|
||||
#define B_cond_chk(_c,_t,_chk) do { \
|
||||
int32 offs = PC_OFFSET_FROM(_t,(intptr_t)(_nIns)-4); \
|
||||
if (JMP_S24_OFFSET_OK(offs)) { \
|
||||
if(_chk) underrunProtect(4); \
|
||||
*(--_nIns) = (NIns)( ((_c)<<28) | (0xA<<24) | (((offs)>>2) & 0xFFFFFF) ); \
|
||||
} else if (_c == AL) { \
|
||||
if(_chk) underrunProtect(8); \
|
||||
*(--_nIns) = (NIns)(_t); \
|
||||
*(--_nIns) = (NIns)( COND_AL | (0x51<<20) | (PC<<16) | (PC<<12) | 0x4 ); \
|
||||
} else if (samepage(_nIns,_nSlot)) { \
|
||||
if(_chk) underrunProtect(8); \
|
||||
*(++_nSlot) = (NIns)(_t); \
|
||||
offs = PC_OFFSET_FROM(_nSlot,(intptr_t)(_nIns)-4); \
|
||||
NanoAssert(offs < 0); \
|
||||
*(--_nIns) = (NIns)( ((_c)<<28) | (0x51<<20) | (PC<<16) | (PC<<12) | ((-offs) & 0xFFFFFF) ); \
|
||||
} else { \
|
||||
if(_chk) underrunProtect(24); \
|
||||
*(--_nIns) = (NIns)(_t); \
|
||||
*(--_nIns) = (NIns)( COND_AL | (0xA<<24) | ((-4)>>2) & 0xFFFFFF ); \
|
||||
*(--_nIns) = (NIns)( ((_c)<<28) | (0x51<<20) | (PC<<16) | (PC<<12) | 0x0 ); \
|
||||
} \
|
||||
asm_output2("%s %p\n", _c == AL ? "jmp" : "b(cnd)", (void*)(_t)); \
|
||||
} while(0)
|
||||
#define isS12(offs) ((-(1<<12)) <= (offs) && (offs) < (1<<12))
|
||||
|
||||
#define B_cond(_c,_t) \
|
||||
B_cond_chk(_c,_t,1)
|
||||
|
@ -665,35 +688,12 @@ typedef enum {
|
|||
#define JO(t) do {B_cond(VS,t); asm_output1("bvs 0x%08x",(unsigned int)t); } while(0)
|
||||
#define JNO(t) do {B_cond(VC,t); asm_output1("bvc 0x%08x",(unsigned int)t); } while(0)
|
||||
|
||||
// used for testing result of an FP compare
|
||||
// used for testing result of an FP compare on x86; not used on arm.
|
||||
// JP = comparison false
|
||||
#define JP(t) do {B_cond(EQ,NE,t); asm_output1("jp 0x%08x",t); } while(0)
|
||||
#define JP(t) do {NanoAssert(0); B_cond(NE,t); asm_output1("jp 0x%08x",t); } while(0)
|
||||
|
||||
// JNP = comparison true
|
||||
#define JNP(t) do {B_cond(NE,EQ,t); asm_output1("jnp 0x%08x",t); } while(0)
|
||||
|
||||
|
||||
// floating point
|
||||
#define FNSTSW_AX() do {NanoAssert(0); asm_output("fnstsw_ax"); } while(0)
|
||||
#define FFREE(r) do {NanoAssert(0); asm_output1("ffree %s",gpn(b)); } while(0)
|
||||
#define FSTQ(p,d,b) do {NanoAssert(0); asm_output2("fstq %d(%s)",d,gpn(b)); } while(0)
|
||||
#define FSTPQ(d,b) FSTQ(1,d,b)
|
||||
//#define FSTPQ(d,b) do {NanoAssert(0); asm_output2("fstpq %d(%s)",d,gpn(b)); } while(0)
|
||||
#define FCOM(p,d,b) do {NanoAssert(0); asm_output2("fcom %d(%s)",d,gpn(b)); } while(0)
|
||||
#define FCOMP(d,b) do {NanoAssert(0); asm_output2("fcomp %d(%s)",d,gpn(b)); } while(0)
|
||||
#define FLDQ(d,b) do {NanoAssert(0); asm_output2("fldq %d(%s)",d,gpn(b)); } while(0)
|
||||
#define FILDQ(d,b) do {NanoAssert(0); asm_output2("fildq %d(%s)",d,gpn(b)); } while(0)
|
||||
#define FILD(d,b) do {NanoAssert(0); asm_output2("fild %d(%s)",d,gpn(b)); } while(0)
|
||||
#define FADD(d,b) do {NanoAssert(0); asm_output2("faddq %d(%s)",d,gpn(b)); } while(0)
|
||||
#define FSUB(d,b) do {NanoAssert(0); asm_output2("fsubq %d(%s)",d,gpn(b)); } while(0)
|
||||
#define FSUBR(d,b) do {NanoAssert(0); asm_output2("fsubr %d(%s)",d,gpn(b)); } while(0)
|
||||
#define FMUL(d,b) do {NanoAssert(0); asm_output2("fmulq %d(%s)",d,gpn(b)); } while(0)
|
||||
#define FDIV(d,b) do {NanoAssert(0); asm_output2("fdivq %d(%s)",d,gpn(b)); } while(0)
|
||||
#define FDIVR(d,b) do {NanoAssert(0); asm_output2("fdivr %d(%s)",d,gpn(b)); } while(0)
|
||||
#define FSTP(r) do {NanoAssert(0); asm_output1("fst st(%d)",r); } while(0)
|
||||
#define FLD1() do {NanoAssert(0); asm_output("fld1"); } while(0)
|
||||
#define FLDZ() do {NanoAssert(0); asm_output("fldz"); } while(0)
|
||||
|
||||
#define JNP(t) do {NanoAssert(0); B_cond(EQ,t); asm_output1("jnp 0x%08x",t); } while(0)
|
||||
|
||||
|
||||
// MOV(EQ) _r, #1
|
||||
|
@ -758,17 +758,147 @@ typedef enum {
|
|||
} while(0)
|
||||
|
||||
#define STMIA(_b, _mask) do { \
|
||||
underrunProtect(2); \
|
||||
underrunProtect(4); \
|
||||
NanoAssert(((_mask)&rmask(_b))==0 && isU8(_mask)); \
|
||||
*(--_nIns) = (NIns)(COND_AL | (0x8A<<20) | ((_b)<<16) | (_mask)&0xFF); \
|
||||
asm_output2("stmia %s!,{%x}", gpn(_b), _mask); \
|
||||
asm_output2("stmia %s!,{0x%x}", gpn(_b), _mask); \
|
||||
} while (0)
|
||||
|
||||
#define LDMIA(_b, _mask) do { \
|
||||
underrunProtect(2); \
|
||||
underrunProtect(4); \
|
||||
NanoAssert(((_mask)&rmask(_b))==0 && isU8(_mask)); \
|
||||
*(--_nIns) = (NIns)(COND_AL | (0x8B<<20) | ((_b)<<16) | (_mask)&0xFF); \
|
||||
asm_output2("ldmia %s!,{%x}", gpn(_b), (_mask)); \
|
||||
asm_output2("ldmia %s!,{0x%x}", gpn(_b), (_mask)); \
|
||||
} while (0)
|
||||
|
||||
#define MRS(_d) do { \
|
||||
underrunProtect(4); \
|
||||
*(--_nIns) = (NIns)(COND_AL | (0x10<<20) | (0xF<<16) | ((_d)<<12)); \
|
||||
asm_output1("msr %s", gpn(_d)); \
|
||||
} while (0)
|
||||
|
||||
/*
|
||||
* VFP
|
||||
*/
|
||||
|
||||
#define FMDRR(_Dm,_Rd,_Rn) do { \
|
||||
underrunProtect(4); \
|
||||
NanoAssert(IsFpReg(_Dm) && IsGpReg(_Rd) && IsGpReg(_Rn)); \
|
||||
*(--_nIns) = (NIns)( COND_AL | (0xC4<<20) | ((_Rn)<<16) | ((_Rd)<<12) | (0xB1<<4) | (FpRegNum(_Dm)) ); \
|
||||
asm_output3("fmdrr %s,%s,%s", gpn(_Dm), gpn(_Rd), gpn(_Rn)); \
|
||||
} while (0)
|
||||
|
||||
#define FMRRD(_Rd,_Rn,_Dm) do { \
|
||||
underrunProtect(4); \
|
||||
NanoAssert(IsGpReg(_Rd) && IsGpReg(_Rn) && IsFpReg(_Dm)); \
|
||||
*(--_nIns) = (NIns)( COND_AL | (0xC5<<20) | ((_Rn)<<16) | ((_Rd)<<12) | (0xB1<<4) | (FpRegNum(_Dm)) ); \
|
||||
asm_output3("fmrrd %s,%s,%s", gpn(_Rd), gpn(_Rn), gpn(_Dm)); \
|
||||
} while (0)
|
||||
|
||||
#define FSTD(_Dd,_Rn,_offs) do { \
|
||||
underrunProtect(4); \
|
||||
NanoAssert((((_offs) & 3) == 0) && isS8((_offs) >> 2)); \
|
||||
NanoAssert(IsFpReg(_Dd) && !IsFpReg(_Rn)); \
|
||||
int negflag = 1<<23; \
|
||||
intptr_t offs = (_offs); \
|
||||
if (_offs < 0) { \
|
||||
negflag = 0<<23; \
|
||||
offs = -(offs); \
|
||||
} \
|
||||
*(--_nIns) = (NIns)( COND_AL | (0xD0<<20) | ((_Rn)<<16) | (FpRegNum(_Dd)<<12) | (0xB<<8) | negflag | ((offs>>2)&0xff) ); \
|
||||
asm_output3("fstd %s,%s(%d)", gpn(_Dd), gpn(_Rn), _offs); \
|
||||
} while (0)
|
||||
|
||||
#define FLDD_chk(_Dd,_Rn,_offs,_chk) do { \
|
||||
if(_chk) underrunProtect(4); \
|
||||
NanoAssert((((_offs) & 3) == 0) && isS8((_offs) >> 2)); \
|
||||
NanoAssert(IsFpReg(_Dd) && !IsFpReg(_Rn)); \
|
||||
int negflag = 1<<23; \
|
||||
intptr_t offs = (_offs); \
|
||||
if (_offs < 0) { \
|
||||
negflag = 0<<23; \
|
||||
offs = -(offs); \
|
||||
} \
|
||||
*(--_nIns) = (NIns)( COND_AL | (0xD1<<20) | ((_Rn)<<16) | (FpRegNum(_Dd)<<12) | (0xB<<8) | negflag | ((offs>>2)&0xff) ); \
|
||||
asm_output3("fldd %s,%s(%d)", gpn(_Dd), gpn(_Rn), _offs); \
|
||||
} while (0)
|
||||
#define FLDD(_Dd,_Rn,_offs) FLDD_chk(_Dd,_Rn,_offs,1)
|
||||
|
||||
#define FSITOD(_Dd,_Sm) do { \
|
||||
underrunProtect(4); \
|
||||
NanoAssert(IsFpReg(_Dd) && ((_Sm) == FpSingleScratch)); \
|
||||
*(--_nIns) = (NIns)( COND_AL | (0xEB8<<16) | (FpRegNum(_Dd)<<12) | (0x2F<<6) | (0<<5) | (0x7) ); \
|
||||
asm_output2("fsitod %s,%s", gpn(_Dd), gpn(_Sm)); \
|
||||
} while (0)
|
||||
|
||||
|
||||
#define FUITOD(_Dd,_Sm) do { \
|
||||
underrunProtect(4); \
|
||||
NanoAssert(IsFpReg(_Dd) && ((_Sm) == FpSingleScratch)); \
|
||||
*(--_nIns) = (NIns)( COND_AL | (0xEB8<<16) | (FpRegNum(_Dd)<<12) | (0x2D<<6) | (0<<5) | (0x7) ); \
|
||||
asm_output2("fuitod %s,%s", gpn(_Dd), gpn(_Sm)); \
|
||||
} while (0)
|
||||
|
||||
#define FMSR(_Sn,_Rd) do { \
|
||||
underrunProtect(4); \
|
||||
NanoAssert(((_Sn) == FpSingleScratch) && IsGpReg(_Rd)); \
|
||||
*(--_nIns) = (NIns)( COND_AL | (0xE0<<20) | (0x7<<16) | ((_Rd)<<12) | (0xA<<8) | (0<<7) | (0x1<<4) ); \
|
||||
asm_output2("fmsr %s,%s", gpn(_Sn), gpn(_Rd)); \
|
||||
} while (0)
|
||||
|
||||
#define FNEGD(_Dd,_Dm) do { \
|
||||
underrunProtect(4); \
|
||||
NanoAssert(IsFpReg(_Dd) && IsFpReg(_Dm)); \
|
||||
*(--_nIns) = (NIns)( COND_AL | (0xEB1<<16) | (FpRegNum(_Dd)<<12) | (0xB4<<4) | (FpRegNum(_Dm)) ); \
|
||||
asm_output2("fnegd %s,%s", gpn(_Dd), gpn(_Dm)); \
|
||||
} while (0)
|
||||
|
||||
#define FADDD(_Dd,_Dn,_Dm) do { \
|
||||
underrunProtect(4); \
|
||||
NanoAssert(IsFpReg(_Dd) && IsFpReg(_Dn) && IsFpReg(_Dm)); \
|
||||
*(--_nIns) = (NIns)( COND_AL | (0xE3<<20) | (FpRegNum(_Dn)<<16) | (FpRegNum(_Dd)<<12) | (0xB0<<4) | (FpRegNum(_Dm)) ); \
|
||||
asm_output3("faddd %s,%s,%s", gpn(_Dd), gpn(_Dn), gpn(_Dm)); \
|
||||
} while (0)
|
||||
|
||||
#define FSUBD(_Dd,_Dn,_Dm) do { \
|
||||
underrunProtect(4); \
|
||||
NanoAssert(IsFpReg(_Dd) && IsFpReg(_Dn) && IsFpReg(_Dm)); \
|
||||
*(--_nIns) = (NIns)( COND_AL | (0xE3<<20) | (FpRegNum(_Dn)<<16) | (FpRegNum(_Dd)<<12) | (0xB4<<4) | (FpRegNum(_Dm)) ); \
|
||||
asm_output3("fsubd %s,%s,%s", gpn(_Dd), gpn(_Dn), gpn(_Dm)); \
|
||||
} while (0)
|
||||
|
||||
#define FMULD(_Dd,_Dn,_Dm) do { \
|
||||
underrunProtect(4); \
|
||||
NanoAssert(IsFpReg(_Dd) && IsFpReg(_Dn) && IsFpReg(_Dm)); \
|
||||
*(--_nIns) = (NIns)( COND_AL | (0xE2<<20) | (FpRegNum(_Dn)<<16) | (FpRegNum(_Dd)<<12) | (0xB0<<4) | (FpRegNum(_Dm)) ); \
|
||||
asm_output3("fmuld %s,%s,%s", gpn(_Dd), gpn(_Dn), gpn(_Dm)); \
|
||||
} while (0)
|
||||
|
||||
#define FDIVD(_Dd,_Dn,_Dm) do { \
|
||||
underrunProtect(4); \
|
||||
NanoAssert(IsFpReg(_Dd) && IsFpReg(_Dn) && IsFpReg(_Dm)); \
|
||||
*(--_nIns) = (NIns)( COND_AL | (0xE8<<20) | (FpRegNum(_Dn)<<16) | (FpRegNum(_Dd)<<12) | (0xB0<<4) | (FpRegNum(_Dm)) ); \
|
||||
asm_output3("fmuld %s,%s,%s", gpn(_Dd), gpn(_Dn), gpn(_Dm)); \
|
||||
} while (0)
|
||||
|
||||
#define FMSTAT() do { \
|
||||
underrunProtect(4); \
|
||||
*(--_nIns) = (NIns)( COND_AL | 0x0EF1FA10); \
|
||||
asm_output("fmstat"); \
|
||||
} while (0)
|
||||
|
||||
#define FCMPD(_Dd,_Dm) do { \
|
||||
underrunProtect(4); \
|
||||
NanoAssert(IsFpReg(_Dd) && IsFpReg(_Dm)); \
|
||||
*(--_nIns) = (NIns)( COND_AL | (0xEB4<<16) | (FpRegNum(_Dd)<<12) | (0xB4<<4) | (FpRegNum(_Dm)) ); \
|
||||
asm_output2("fcmpd %s,%s", gpn(_Dd), gpn(_Dm)); \
|
||||
} while (0)
|
||||
|
||||
#define FCPYD(_Dd,_Dm) do { \
|
||||
underrunProtect(4); \
|
||||
NanoAssert(IsFpReg(_Dd) && IsFpReg(_Dm)); \
|
||||
*(--_nIns) = (NIns)( COND_AL | (0xEB0<<16) | (FpRegNum(_Dd)<<12) | (0xB4<<4) | (FpRegNum(_Dm)) ); \
|
||||
asm_output2("fcpyd %s,%s", gpn(_Dd), gpn(_Dm)); \
|
||||
} while (0)
|
||||
}
|
||||
#endif // __nanojit_NativeThumb__
|
||||
|
|
|
@ -68,7 +68,9 @@ namespace nanojit
|
|||
debug_only( uint32_t count; )
|
||||
debug_only( RegisterMask managed; ) // bitfield of 0..NJ_MAX_REGISTERS denoting which are under our management
|
||||
|
||||
LIns* active[NJ_MAX_REGISTERS]; // active[r] = OP that defines r
|
||||
// RegisterMask is a 32-bit value, so we can never have more than 32 active.
|
||||
// hardcode 32 here in case we have non-contiguous register numbers
|
||||
LIns* active[32]; // active[r] = OP that defines r
|
||||
RegisterMask free;
|
||||
RegisterMask used;
|
||||
|
||||
|
|
|
@ -151,6 +151,7 @@ namespace nanojit
|
|||
#define isU8(i) ( int32_t(i) == uint8_t(i) )
|
||||
#define isS16(i) ( int32_t(i) == int16_t(i) )
|
||||
#define isU16(i) ( int32_t(i) == uint16_t(i) )
|
||||
#define isS24(i) ( ((int32_t(i)<<8)>>8) == (i) )
|
||||
|
||||
#define alignTo(x,s) ((((uintptr_t)(x)))&~(((uintptr_t)s)-1))
|
||||
#define alignUp(x,s) ((((uintptr_t)(x))+(((uintptr_t)s)-1))&~(((uintptr_t)s)-1))
|
||||
|
|
Загрузка…
Ссылка в новой задаче