Add VFP for floating point ops to nanojit ARM backend.

This commit is contained in:
Vladimir Vukicevic 2008-09-02 22:29:23 -07:00
Родитель 0fe0d78272
Коммит 05c3cd68da
8 изменённых файлов: 872 добавлений и 268 удалений

Просмотреть файл

@ -119,7 +119,7 @@ static bool nesting_enabled = true;
static bool oracle_enabled = true;
static bool did_we_check_sse2 = false;
#ifdef DEBUG
#if defined(DEBUG) || defined(INCLUDE_VERBOSE_OUTPUT)
static bool verbose_debug = getenv("TRACEMONKEY") && strstr(getenv("TRACEMONKEY"), "verbose");
#define debug_only_v(x) if (verbose_debug) { x; }
#else
@ -282,7 +282,7 @@ static bool isi2f(LInsp i)
if (i->isop(LIR_i2f))
return true;
#ifdef NANOJIT_ARM
#if defined(NANOJIT_ARM) && defined(NJ_SOFTFLOAT)
if (i->isop(LIR_qjoin) &&
i->oprnd1()->isop(LIR_call) &&
i->oprnd2()->isop(LIR_callh))
@ -300,7 +300,7 @@ static bool isu2f(LInsp i)
if (i->isop(LIR_u2f))
return true;
#ifdef NANOJIT_ARM
#if defined(NANOJIT_ARM) && defined(NJ_SOFTFLOAT)
if (i->isop(LIR_qjoin) &&
i->oprnd1()->isop(LIR_call) &&
i->oprnd2()->isop(LIR_callh))
@ -315,7 +315,7 @@ static bool isu2f(LInsp i)
static LInsp iu2fArg(LInsp i)
{
#ifdef NANOJIT_ARM
#if defined(NANOJIT_ARM) && defined(NJ_SOFTFLOAT)
if (i->isop(LIR_qjoin))
return i->oprnd1()->arg(0);
#endif
@ -371,7 +371,7 @@ static bool overflowSafe(LIns* i)
((c->constval() > 0)));
}
#ifdef NANOJIT_ARM
#if defined(NJ_SOFTFLOAT)
class SoftFloatFilter: public LirWriter
{
@ -428,19 +428,6 @@ public:
return out->ins2(LIR_eq, bv, out->insImm(1));
}
// not really a softfloat filter, but needed on ARM --
// arm doesn't mask shifts to 31 like x86 does
if (v == LIR_lsh ||
v == LIR_rsh ||
v == LIR_ush)
{
if (s1->isconst())
s1->setimm16(s1->constval() & 31);
else
s1 = out->ins2(LIR_and, s1, out->insImm(31));
return out->ins2(v, s0, s1);
}
return out->ins2(v, s0, s1);
}
@ -455,7 +442,7 @@ public:
}
};
#endif
#endif // NJ_SOFTFLOAT
class FuncFilter: public LirWriter
{
@ -550,6 +537,20 @@ public:
return out->ins2(LIR_add, x, y);
}
}
#ifdef NANOJIT_ARM
else if (v == LIR_lsh ||
v == LIR_rsh ||
v == LIR_ush)
{
// needed on ARM -- arm doesn't mask shifts to 31 like x86 does
if (s1->isconst())
s1->setimm16(s1->constval() & 31);
else
s1 = out->ins2(LIR_and, s1, out->insImm(31));
return out->ins2(v, s0, s1);
}
#endif
return out->ins2(v, s0, s1);
}
@ -604,7 +605,7 @@ public:
/* In debug mode vpname contains a textual description of the type of the
slot during the forall iteration over al slots. */
#ifdef DEBUG
#if defined(DEBUG) || defined(INCLUDE_VERBOSE_OUTPUT)
#define DEF_VPNAME const char* vpname; unsigned vpnum
#define SET_VPNAME(name) do { vpname = name; vpnum = 0; } while(0)
#define INC_VPNUM() do { ++vpnum; } while(0)
@ -821,7 +822,7 @@ TraceRecorder::TraceRecorder(JSContext* cx, GuardRecord* _anchor, Fragment* _fra
if (verbose_debug)
lir = verbose_filter = new (&gc) VerboseWriter(&gc, lir, lirbuf->names);
#endif
#ifdef NANOJIT_ARM
#ifdef NJ_SOFTFLOAT
lir = float_filter = new (&gc) SoftFloatFilter(lir);
#endif
lir = cse_filter = new (&gc) CseFilter(lir, &gc);
@ -867,7 +868,7 @@ TraceRecorder::~TraceRecorder()
delete cse_filter;
delete expr_filter;
delete func_filter;
#ifdef NANOJIT_ARM
#ifdef NJ_SOFTFLOAT
delete float_filter;
#endif
delete lir_buf_writer;
@ -2277,8 +2278,10 @@ js_ExecuteTree(JSContext* cx, Fragment** treep, uintN& inlineCallCount,
union { NIns *code; GuardRecord* (FASTCALL *func)(InterpState*, Fragment*); } u;
u.code = f->code();
#if defined(DEBUG) && defined(NANOJIT_IA32)
#ifdef DEBUG
#if defined(NANOJIT_IA32)
uint64 start = rdtsc();
#endif
#endif
/*
@ -2362,19 +2365,18 @@ js_ExecuteTree(JSContext* cx, Fragment** treep, uintN& inlineCallCount,
js_ReconstructStackDepth(cx, fp->script, fp->regs->pc) == fp->regs->sp);
#if defined(DEBUG) && defined(NANOJIT_IA32)
if (verbose_debug) {
printf("leaving trace at %s:%u@%u, op=%s, lr=%p, exitType=%d, sp=%d, ip=%p, "
"cycles=%llu\n",
fp->script->filename, js_PCToLineNumber(cx, fp->script, fp->regs->pc),
fp->regs->pc - fp->script->code,
js_CodeName[*fp->regs->pc],
lr,
lr->exit->exitType,
fp->regs->sp - StackBase(fp), lr->jmp,
(rdtsc() - start));
}
uint64 cycles = rdtsc() - start;
#else
uint64 cycles = 0;
#endif
debug_only_v(printf("leaving trace at %s:%u@%u, exitType=%d, sp=%d, ip=%p, cycles=%llu\n",
fp->script->filename, js_PCToLineNumber(cx, fp->script, fp->regs->pc),
fp->regs->pc - fp->script->code,
lr->exit->exitType,
fp->regs->sp - StackBase(fp), lr->jmp,
cycles));
/* If this trace is part of a tree, later branches might have added additional globals for
with we don't have any type information available in the side exit. We merge in this
information from the entry type-map. See also comment in the constructor of TraceRecorder

Просмотреть файл

@ -221,7 +221,7 @@ class TraceRecorder {
nanojit::LirWriter* cse_filter;
nanojit::LirWriter* expr_filter;
nanojit::LirWriter* func_filter;
#ifdef NANOJIT_ARM
#ifdef NJ_SOFTFLOAT
nanojit::LirWriter* float_filter;
#endif
nanojit::LIns* cx_ins;

Просмотреть файл

@ -44,6 +44,7 @@
#if defined(AVMPLUS_LINUX) && defined(AVMPLUS_ARM)
#include <asm/unistd.h>
extern "C" void __clear_cache(char *BEG, char *END);
#endif
namespace nanojit
@ -178,6 +179,8 @@ namespace nanojit
// nothing free, steal one
// LSRA says pick the one with the furthest use
LIns* vic = findVictim(regs,allow,prefer);
NanoAssert(vic != NULL);
Reservation* resv = getresv(vic);
// restore vic
@ -446,25 +449,37 @@ namespace nanojit
Reservation* resv = getresv(i);
Register r;
// if we have an existing reservation and it has a non-unknown
// register allocated, and that register is in our allowed mask,
// return it.
if (resv && (r=resv->reg) != UnknownReg && (rmask(r) & allow)) {
return r;
}
// figure out what registers are preferred for this instruction
RegisterMask prefer = hint(i, allow);
// if we didn't have a reservation, allocate one now
if (!resv)
resv = reserveAlloc(i);
// if the reservation doesn't have a register assigned to it...
if ((r=resv->reg) == UnknownReg)
{
// .. if the cost is 2 and the allowed mask includes
// the saved regs, then prefer just those.
if (resv->cost == 2 && (allow&SavedRegs))
prefer = allow&SavedRegs;
// grab one.
r = resv->reg = registerAlloc(prefer);
_allocator.addActive(r, i);
return r;
}
else
{
// r not allowed
// the already-allocated register isn't in the allowed mask;
// we need to grab a new one and then copy over the old
// contents to the new.
resv->reg = UnknownReg;
_allocator.retire(r);
if (resv->cost == 2 && (allow&SavedRegs))
@ -795,12 +810,15 @@ namespace nanojit
# if defined(UNDER_CE)
FlushInstructionCache(GetCurrentProcess(), NULL, NULL);
# elif defined(AVMPLUS_LINUX)
// XXX fixme flush adjacent pages together
for (int i = 0; i < 2; i++) {
Page *p = (i == 0) ? _nativePages : _nativeExitPages;
Page *first = p;
while (p) {
flushCache((NIns*)p, (NIns*)((intptr_t)(p) + NJ_PAGE_SIZE));
if (!p->next || p->next != p+1) {
__clear_cache((char*)first, (char*)(p+1));
first = p->next;
}
p = p->next;
}
}
@ -852,7 +870,7 @@ namespace nanojit
switch(op)
{
default:
NanoAssertMsgf(false, ("unsupported LIR instruction: %d (~0x40: %d)\n",op, op&~LIR64));
NanoAssertMsgf(false, "unsupported LIR instruction: %d (~0x40: %d)\n", op, op&~LIR64);
break;
case LIR_short:
@ -1208,13 +1226,20 @@ namespace nanojit
LIns* cond = ins->oprnd1();
LOpcode condop = cond->opcode();
NanoAssert(cond->isCond());
#ifndef NJ_SOFTFLOAT
#if !defined(NJ_SOFTFLOAT)
if (condop >= LIR_feq && condop <= LIR_fge)
{
#if defined(NJ_ARM_VFP)
if (op == LIR_xf)
JNE(exit);
else
JE(exit);
#else
if (op == LIR_xf)
JP(exit);
else
JNP(exit);
#endif
asm_fcmp(cond);
break;
}
@ -1313,9 +1338,13 @@ namespace nanojit
{
// only want certain regs
Register r = prepResultReg(ins, AllowableFlagRegs);
#ifdef NJ_ARM_VFP
SETE(r);
#else
// SETcc only sets low 8 bits, so extend
MOVZX8(r,r);
SETNP(r);
#endif
asm_fcmp(ins);
break;
}
@ -1437,8 +1466,13 @@ namespace nanojit
uint32_t Assembler::arFree(uint32_t idx)
{
// nothing to free
if (idx == 0)
return 0;
if (idx > 0 && _activation.entry[idx] == _activation.entry[idx+stack_direction(1)])
_activation.entry[idx+stack_direction(1)] = 0; // clear 2 slots for doubles
_activation.entry[idx] = 0;
return 0;
}

Просмотреть файл

@ -376,8 +376,6 @@ namespace nanojit
return l;
}
#define isS24(x) (((int32_t(x)<<8)>>8) == (x))
LInsp LirBufWriter::insFar(LOpcode op, LInsp target)
{
NanoAssert(op == LIR_skip || op == LIR_tramp);

Просмотреть файл

@ -49,14 +49,17 @@
#if defined(AVMPLUS_LINUX)
#include <asm/unistd.h>
extern "C" void __clear_cache(char *BEG, char *END);
#endif
#ifdef FEATURE_NANOJIT
namespace nanojit
{
#ifdef FEATURE_NANOJIT
#ifdef NJ_VERBOSE
const char* regNames[] = {"r0","r1","r2","r3","r4","r5","r6","r7","r8","r9","r10","r11","IP","SP","LR","PC"};
const char* regNames[] = {"r0","r1","r2","r3","r4","r5","r6","r7","r8","r9","r10","FP","IP","SP","LR","PC",
"d0","d1","d2","d3","d4","d5","d6","d7","s14"};
#endif
const Register Assembler::argRegs[] = { R0, R1, R2, R3 };
@ -122,6 +125,7 @@ Assembler::nFragExit(LInsp guard)
// for us; always force a far jump here.
BL_far(_epilogue);
// stick the jmp pointer to the start of the sequence
lr->jmp = _nIns;
}
@ -155,18 +159,26 @@ void
Assembler::asm_call(LInsp ins)
{
const CallInfo* call = callInfoFor(ins->fid());
Reservation *callRes = getresv(ins);
uint32_t atypes = call->_argtypes;
uint32_t roffset = 0;
// skip return type
#ifdef NJ_ARM_VFP
ArgSize rsize = (ArgSize)(atypes & 3);
#endif
atypes >>= 2;
// we need to detect if we have arg0 as LO followed by arg1 as F;
// in that case, we need to skip using r1 -- the F needs to be
// loaded in r2/r3, at least according to the ARM EABI and gcc 4.2's
// generated code.
bool arg0IsInt32FollowedByFloat = false;
while ((atypes & 3) != ARGSIZE_NONE) {
if (((atypes >> 4) & 3) == ARGSIZE_LO &&
((atypes >> 2) & 3) == ARGSIZE_F &&
((atypes >> 6) & 3) == ARGSIZE_NONE)
if (((atypes >> 2) & 3) == ARGSIZE_LO &&
((atypes >> 0) & 3) == ARGSIZE_F &&
((atypes >> 4) & 3) == ARGSIZE_NONE)
{
arg0IsInt32FollowedByFloat = true;
break;
@ -174,17 +186,68 @@ Assembler::asm_call(LInsp ins)
atypes >>= 2;
}
#ifdef NJ_ARM_VFP
if (rsize == ARGSIZE_F) {
NanoAssert(ins->opcode() == LIR_fcall);
NanoAssert(callRes);
//fprintf (stderr, "call ins: %p callRes: %p reg: %d ar: %d\n", ins, callRes, callRes->reg, callRes->arIndex);
Register rr = callRes->reg;
int d = disp(callRes);
freeRsrcOf(ins, rr != UnknownReg);
if (rr != UnknownReg) {
NanoAssert(IsFpReg(rr));
FMDRR(rr,R0,R1);
} else {
NanoAssert(d);
//fprintf (stderr, "call ins d: %d\n", d);
STMIA(Scratch, 1<<R0 | 1<<R1);
arm_ADDi(Scratch, FP, d);
}
}
#endif
CALL(call);
ArgSize sizes[10];
uint32_t argc = call->get_sizes(sizes);
for(uint32_t i=0; i < argc; i++) {
for(uint32_t i = 0; i < argc; i++) {
uint32_t j = argc - i - 1;
ArgSize sz = sizes[j];
NanoAssert(sz == ARGSIZE_LO || sz == ARGSIZE_Q);
LInsp arg = ins->arg(j);
// pre-assign registers R0-R3 for arguments (if they fit)
Register r = (i+roffset) < 4 ? argRegs[i+roffset] : UnknownReg;
asm_arg(sz, ins->arg(j), r);
Register r = (i + roffset) < 4 ? argRegs[i+roffset] : UnknownReg;
#ifdef NJ_ARM_VFP
if (sz == ARGSIZE_F) {
if (r == R0 || r == R2) {
roffset++;
} else if (r == R1) {
r = R2;
roffset++;
} else {
r = UnknownReg;
}
// XXX move this into asm_farg
Register sr = findRegFor(arg, FpRegs);
if (r != UnknownReg) {
// stick it into our scratch fp reg, and then copy into the base reg
//fprintf (stderr, "FMRRD: %d %d <- %d\n", r, nextreg(r), sr);
FMRRD(r, nextreg(r), sr);
} else {
asm_pusharg(arg);
}
} else {
asm_arg(sz, arg, r);
}
#else
NanoAssert(sz == ARGSIZE_LO || sz == ARGSIZE_Q);
asm_arg(sz, arg, r);
#endif
if (i == 0 && arg0IsInt32FollowedByFloat)
roffset = 1;
@ -238,7 +301,7 @@ Assembler::nRegisterResetAll(RegAlloc& a)
// add scratch registers to our free list for the allocator
a.clear();
a.used = 0;
a.free = rmask(R0) | rmask(R1) | rmask(R2) | rmask(R3) | rmask(R4) | rmask(R5);
a.free = rmask(R0) | rmask(R1) | rmask(R2) | rmask(R3) | rmask(R4) | rmask(R5) | FpRegs;
debug_only(a.managed = a.free);
}
@ -251,16 +314,15 @@ Assembler::nPatchBranch(NIns* branch, NIns* target)
// Which is really 2 instructions, so we need to modify both
// XXX -- this is B, not BL, at least on non-Thumb..
// branch+2 because PC is always 2 instructions ahead on ARM/Thumb
int32_t offset = int(target) - int(branch+2);
int32_t offset = PC_OFFSET_FROM(target, branch);
//printf("---patching branch at 0x%08x to location 0x%08x (%d-0x%08x)\n", branch, target, offset, offset);
// We have 2 words to work with here -- if offset is in range of a 24-bit
// relative jump, emit that; otherwise, we do a pc-relative load into pc.
if (-(1<<24) <= offset & offset < (1<<24)) {
if (isS24(offset)) {
// ARM goodness, using unconditional B
*branch = (NIns)( COND_AL | (0xA<<24) | ((offset >>2) & 0xFFFFFF) );
*branch = (NIns)( COND_AL | (0xA<<24) | ((offset>>2) & 0xFFFFFF) );
} else {
// LDR pc,[pc]
*branch++ = (NIns)( COND_AL | (0x51<<20) | (PC<<16) | (PC<<12) | ( 0x004 ) );
@ -295,11 +357,11 @@ Assembler::asm_qjoin(LIns *ins)
LIns* hi = ins->oprnd2();
Register r = findRegFor(hi, GpRegs);
ST(FP, d+4, r);
STR(r, FP, d+4);
// okay if r gets recycled.
r = findRegFor(lo, GpRegs);
ST(FP, d, r);
STR(r, FP, d);
freeRsrcOf(ins, false); // if we had a reg in use, emit a ST to flush it to mem
}
@ -311,7 +373,7 @@ Assembler::asm_store32(LIns *value, int dr, LIns *base)
findRegFor2(GpRegs, value, rA, base, rB);
Register ra = rA->reg;
Register rb = rB->reg;
ST(rb, dr, ra);
STR(ra, rb, dr);
}
void
@ -319,7 +381,17 @@ Assembler::asm_restore(LInsp i, Reservation *resv, Register r)
{
(void)resv;
int d = findMemFor(i);
LD(r, d, FP);
if (IsFpReg(r)) {
if (isS8(d >> 2)) {
FLDD(r, FP, d);
} else {
FLDD(r, Scratch, 0);
arm_ADDi(Scratch, FP, d);
}
} else {
LDR(r, FP, d);
}
verbose_only(
if (_verbose)
@ -332,12 +404,21 @@ Assembler::asm_spill(LInsp i, Reservation *resv, bool pop)
{
(void)i;
(void)pop;
//fprintf (stderr, "resv->arIndex: %d\n", resv->arIndex);
if (resv->arIndex) {
int d = disp(resv);
// save to spill location
Register rr = resv->reg;
ST(FP, d, rr);
if (IsFpReg(rr)) {
if (isS8(d >> 2)) {
FSTD(rr, FP, d);
} else {
FSTD(rr, Scratch, 0);
arm_ADDi(Scratch, FP, d);
}
} else {
STR(rr, FP, d);
}
verbose_only(if (_verbose){
outputf(" spill %s",_thisfrag->lirbuf->names->formatRef(i));
@ -349,38 +430,164 @@ Assembler::asm_spill(LInsp i, Reservation *resv, bool pop)
void
Assembler::asm_load64(LInsp ins)
{
LIns* base = ins->oprnd1();
int db = ins->oprnd2()->constval();
Reservation *resv = getresv(ins);
int dr = disp(resv);
NanoAssert(resv->reg == UnknownReg && dr != 0);
///asm_output("<<< load64");
LIns* base = ins->oprnd1();
int offset = ins->oprnd2()->constval();
Reservation *resv = getresv(ins);
Register rr = resv->reg;
int d = disp(resv);
Register rb = findRegFor(base, GpRegs);
resv->reg = UnknownReg;
asm_mmq(FP, dr, rb, db);
freeRsrcOf(ins, false);
#ifdef NJ_ARM_VFP
Register rb = findRegFor(base, GpRegs);
NanoAssert(rb != UnknownReg);
NanoAssert(rr == UnknownReg || IsFpReg(rr));
if (rr != UnknownReg) {
if (!isS8(offset >> 2) || (offset&3) != 0) {
underrunProtect(LD32_size + 8);
FLDD(rr,Scratch,0);
ADD(Scratch, rb);
LD32_nochk(Scratch, offset);
} else {
FLDD(rr,rb,offset);
}
} else {
asm_mmq(FP, d, rb, offset);
}
// *(FP+dr) <- *(rb+db)
#else
NanoAssert(resv->reg == UnknownReg && d != 0);
Register rb = findRegFor(base, GpRegs);
asm_mmq(FP, d, rb, offset);
#endif
//asm_output(">>> load64");
}
void
Assembler::asm_store64(LInsp value, int dr, LInsp base)
{
//asm_output1("<<< store64 (dr: %d)", dr);
#ifdef NJ_ARM_VFP
Reservation *valResv = getresv(value);
Register rb = findRegFor(base, GpRegs);
Register rv = findRegFor(value, FpRegs);
NanoAssert(rb != UnknownReg);
NanoAssert(rv != UnknownReg);
Register baseReg = rb;
intptr_t baseOffset = dr;
if (!isS8(dr)) {
baseReg = Scratch;
baseOffset = 0;
}
FSTD(rv, baseReg, baseOffset);
if (!isS8(dr)) {
underrunProtect(4 + LD32_size);
ADD(Scratch, rb);
LD32_nochk(Scratch, dr);
}
// if it's a constant, make sure our baseReg/baseOffset location
// has the right value
if (value->isconstq()) {
const int32_t* p = (const int32_t*) (value-2);
underrunProtect(12 + LD32_size);
asm_quad_nochk(rv, p);
}
#else
int da = findMemFor(value);
Register rb = findRegFor(base, GpRegs);
asm_mmq(rb, dr, FP, da);
#endif
//asm_output(">>> store64");
}
// stick a quad into register rr, where p points to the two
// 32-bit parts of the quad, optinally also storing at FP+d
void
Assembler::asm_quad_nochk(Register rr, const int32_t* p)
{
*(++_nSlot) = p[0];
*(++_nSlot) = p[1];
intptr_t constAddr = (intptr_t) (_nSlot-1);
intptr_t realOffset = PC_OFFSET_FROM(constAddr, _nIns-1);
intptr_t offset = realOffset;
Register baseReg = PC;
//int32_t *q = (int32_t*) constAddr;
//fprintf (stderr, "asm_quad_nochk: rr = %d cAddr: 0x%x quad: %08x:%08x q: %f @0x%08x\n", rr, constAddr, p[0], p[1], *(double*)q, _nIns);
// for FLDD, we only get a left-shifted 8-bit offset
if (!isS8(realOffset >> 2)) {
offset = 0;
baseReg = Scratch;
}
FLDD(rr, baseReg, offset);
if (!isS8(realOffset >> 2))
LD32_nochk(Scratch, constAddr);
}
void
Assembler::asm_quad(LInsp ins)
{
Reservation *rR = getresv(ins);
int d = disp(rR);
//asm_output(">>> asm_quad");
Reservation *res = getresv(ins);
int d = disp(res);
Register rr = res->reg;
NanoAssert(d || rr != UnknownReg);
const int32_t* p = (const int32_t*) (ins-2);
#ifdef NJ_ARM_VFP
freeRsrcOf(ins, false);
// XXX We probably want nochk versions of FLDD/FSTD
underrunProtect(16 + LD32_size);
// grab a register to do the load into if we don't have one already;
// XXX -- maybe do a mmq in this case? We're going to use our
// D7 register that's never allocated (since it's the one we use
// for int-to-double conversions), so we don't have to worry about
// spilling something in a fp reg.
if (rr == UnknownReg)
rr = D7;
if (d)
FSTD(rr, FP, d);
asm_quad_nochk(rr, p);
#else
freeRsrcOf(ins, false);
if (d) {
const int32_t* p = (const int32_t*) (ins-2);
STi(FP,d+4,p[1]);
STi(FP,d,p[0]);
underrunProtect(LD32_size * 2 + 8);
STR(Scratch, FP, d+4);
LD32_nochk(Scratch, p[1]);
STR(Scratch, FP, d);
LD32_nochk(Scratch, p[0]);
}
#endif
//asm_output("<<< asm_quad");
}
bool
@ -393,9 +600,17 @@ Assembler::asm_qlo(LInsp ins, LInsp q)
void
Assembler::asm_nongp_copy(Register r, Register s)
{
// we will need this for VFP support
(void)r; (void)s;
NanoAssert(false);
if ((rmask(r) & FpRegs) && (rmask(s) & FpRegs)) {
// fp->fp
FCPYD(r, s);
} else if ((rmask(r) & GpRegs) && (rmask(s) & FpRegs)) {
// fp->gp
// who's doing this and why?
NanoAssert(0);
// FMRS(r, loSingleVfp(s));
} else {
NanoAssert(0);
}
}
Register
@ -416,31 +631,41 @@ Assembler::asm_mmq(Register rd, int dd, Register rs, int ds)
// get a scratch reg
Register t = registerAlloc(GpRegs & ~(rmask(rd)|rmask(rs)));
_allocator.addFree(t);
ST(rd, dd+4, t);
LD(t, ds+4, rs);
ST(rd, dd, t);
LD(t, ds, rs);
// XXX use LDM,STM
STR(t, rd, dd+4);
LDR(t, rs, ds+4);
STR(t, rd, dd);
LDR(t, rs, ds);
}
void
Assembler::asm_pusharg(LInsp p)
Assembler::asm_pusharg(LInsp arg)
{
// arg goes on stack
Reservation* rA = getresv(p);
if (rA == 0)
{
Register ra = findRegFor(p, GpRegs);
ST(SP,0,ra);
}
else if (rA->reg == UnknownReg)
{
ST(SP,0,Scratch);
LD(Scratch,disp(rA),FP);
}
Reservation* argRes = getresv(arg);
bool quad = arg->isQuad();
intptr_t stack_growth = quad ? 8 : 4;
Register ra;
if (argRes)
ra = argRes->reg;
else
{
ST(SP,0,rA->reg);
ra = findRegFor(arg, quad ? FpRegs : GpRegs);
if (ra == UnknownReg) {
STR(Scratch, SP, 0);
LDR(Scratch, FP, disp(argRes));
} else {
if (!quad) {
Register ra = findRegFor(arg, GpRegs);
STR(ra, SP, 0);
} else {
Register ra = findRegFor(arg, FpRegs);
FSTD(ra, SP, 0);
}
}
SUBi(SP, stack_growth);
}
void
@ -470,22 +695,6 @@ Assembler::nativePageSetup()
}
}
void
Assembler::flushCache(NIns* n1, NIns* n2) {
#if defined(UNDER_CE)
// we changed the code, so we need to do this (sadly)
FlushInstructionCache(GetCurrentProcess(), NULL, NULL);
#elif defined(AVMPLUS_LINUX)
// Just need to clear this one page (not even the whole page really)
//Page *page = (Page*)pageTop(_nIns);
register unsigned long _beg __asm("a1") = (unsigned long)(n1);
register unsigned long _end __asm("a2") = (unsigned long)(n2);
register unsigned long _flg __asm("a3") = 0;
register unsigned long _swi __asm("r7") = 0xF0002;
__asm __volatile ("swi 0 @ sys_cacheflush" : "=r" (_beg) : "0" (_beg), "r" (_end), "r" (_flg), "r" (_swi));
#endif
}
NIns*
Assembler::asm_adjustBranch(NIns* at, NIns* target)
{
@ -497,9 +706,16 @@ Assembler::asm_adjustBranch(NIns* at, NIns* target)
NIns* was = (NIns*) at[3];
//fprintf (stderr, "Adjusting branch @ 0x%8x: 0x%x -> 0x%x\n", at+3, at[3], target);
at[3] = (NIns)target;
flushCache(at, at+4);
#if defined(UNDER_CE)
// we changed the code, so we need to do this (sadly)
FlushInstructionCache(GetCurrentProcess(), NULL, NULL);
#elif defined(AVMPLUS_LINUX)
__clear_cache((char*)at, (char*)(at+4));
#endif
#ifdef AVMPLUS_PORTING_API
NanoJIT_PortAPI_FlushInstructionCache(at, at+4);
@ -550,6 +766,9 @@ Assembler::BL_far(NIns* addr)
// point to the right spot before branching
underrunProtect(16);
// TODO use a slot in const pool for address, but emit single insn
// for branch if offset fits
// the address
*(--_nIns) = (NIns)((addr));
// bx ip // branch to the address we loaded earlier
@ -558,17 +777,29 @@ Assembler::BL_far(NIns* addr)
*(--_nIns) = (NIns)( COND_AL | OP_IMM | (1<<23) | (PC<<16) | (LR<<12) | (4) );
// ldr ip, [pc + #4] // load the address into ip, reading it from [pc+4]
*(--_nIns) = (NIns)( COND_AL | (0x59<<20) | (PC<<16) | (IP<<12) | (4));
//fprintf (stderr, "BL_far sequence @ 0x%08x\n", _nIns);
asm_output1("bl %p (32-bit)", addr);
}
void
Assembler::BL(NIns* addr)
{
intptr_t offs = PC_OFFSET_FROM(addr,(intptr_t)_nIns-4);
if (JMP_S24_OFFSET_OK(offs)) {
// we can do this with a single BL call
intptr_t offs = PC_OFFSET_FROM(addr,_nIns-1);
//fprintf (stderr, "BL: 0x%x (offs: %d [%x]) @ 0x%08x\n", addr, offs, offs, (intptr_t)(_nIns-1));
if (isS24(offs)) {
// try to do this with a single S24 call;
// recompute offset in case underrunProtect had to allocate a new page
underrunProtect(4);
*(--_nIns) = (NIns)( COND_AL | (0xB<<24) | (((offs)>>2) & 0xFFFFFF) ); \
offs = PC_OFFSET_FROM(addr,_nIns-1);
}
if (isS24(offs)) {
// already did underrunProtect above
*(--_nIns) = (NIns)( COND_AL | (0xB<<24) | (((offs)>>2) & 0xFFFFFF) );
asm_output1("bl %p", addr);
} else {
BL_far(addr);
@ -579,6 +810,7 @@ void
Assembler::CALL(const CallInfo *ci)
{
intptr_t addr = ci->_address;
BL((NIns*)addr);
asm_output1(" (call %s)", ci->_name);
}
@ -586,21 +818,226 @@ Assembler::CALL(const CallInfo *ci)
void
Assembler::LD32_nochk(Register r, int32_t imm)
{
// We can always reach the const pool, since it's on the same page (<4096)
underrunProtect(8);
// We should always reach the const pool, since it's on the same page (<4096);
// if we can't, someone didn't underrunProtect enough.
*(++_nSlot) = (int)imm;
//fprintf (stderr, "wrote slot(2) %p with %08x, jmp @ %p\n", _nSlot, (intptr_t)imm, _nIns-1);
int offset = PC_OFFSET_FROM(_nSlot,(intptr_t)(_nIns)-4);
int offset = PC_OFFSET_FROM(_nSlot,_nIns-1);
NanoAssert(JMP_S24_OFFSET_OK(offset) && (offset < 0));
NanoAssert(isS12(offset) && (offset < 0));
*(--_nIns) = (NIns)( COND_AL | (0x51<<20) | (PC<<16) | ((r)<<12) | ((-offset) & 0xFFFFFF) );
asm_output2("ld %s,%d",gpn(r),imm);
asm_output2(" (%d(PC) = 0x%x)", offset, imm);
LDR_nochk(r,PC,offset);
}
// Branch to target address _t with condition _c, doing underrun
// checks (_chk == 1) or skipping them (_chk == 0).
//
// If the jump fits in a relative jump (+/-32MB), emit that.
// If the jump is unconditional, emit the dest address inline in
// the instruction stream and load it into pc.
// If the jump has a condition, but noone's mucked with _nIns and our _nSlot
// pointer is valid, stick the constant in the slot and emit a conditional
// load into pc.
// Otherwise, emit the conditional load into pc from a nearby constant,
// and emit a jump to jump over it it in case the condition fails.
//
// NB: JMP_nochk depends on this not calling samepage() when _c == AL
void
Assembler::B_cond_chk(ConditionCode _c, NIns* _t, bool _chk)
{
int32 offs = PC_OFFSET_FROM(_t,_nIns-1);
//fprintf(stderr, "B_cond_chk target: 0x%08x offset: %d @0x%08x\n", _t, offs, _nIns-1);
if (isS24(offs)) {
if (_chk) underrunProtect(4);
offs = PC_OFFSET_FROM(_t,_nIns-1);
}
if (isS24(offs)) {
*(--_nIns) = (NIns)( ((_c)<<28) | (0xA<<24) | (((offs)>>2) & 0xFFFFFF) );
} else if (_c == AL) {
if(_chk) underrunProtect(8);
*(--_nIns) = (NIns)(_t);
*(--_nIns) = (NIns)( COND_AL | (0x51<<20) | (PC<<16) | (PC<<12) | 0x4 );
} else if (samepage(_nIns,_nSlot)) {
if(_chk) underrunProtect(8);
*(++_nSlot) = (NIns)(_t);
offs = PC_OFFSET_FROM(_nSlot,_nIns-1);
NanoAssert(offs < 0);
*(--_nIns) = (NIns)( ((_c)<<28) | (0x51<<20) | (PC<<16) | (PC<<12) | ((-offs) & 0xFFFFFF) );
} else {
if(_chk) underrunProtect(12);
*(--_nIns) = (NIns)(_t);
*(--_nIns) = (NIns)( COND_AL | (0xA<<24) | ((-4)>>2) & 0xFFFFFF );
*(--_nIns) = (NIns)( ((_c)<<28) | (0x51<<20) | (PC<<16) | (PC<<12) | 0x0 );
}
asm_output2("%s %p", _c == AL ? "jmp" : "b(cnd)", (void*)(_t));
}
/*
* VFP
*/
#ifdef NJ_ARM_VFP
void
Assembler::asm_i2f(LInsp ins)
{
Register rr = prepResultReg(ins, FpRegs);
Register srcr = findRegFor(ins->oprnd1(), GpRegs);
// todo: support int value in memory, as per x86
NanoAssert(srcr != UnknownReg);
FSITOD(rr, FpSingleScratch);
FMSR(FpSingleScratch, srcr);
}
void
Assembler::asm_u2f(LInsp ins)
{
Register rr = prepResultReg(ins, FpRegs);
Register sr = findRegFor(ins->oprnd1(), GpRegs);
// todo: support int value in memory, as per x86
NanoAssert(sr != UnknownReg);
FUITOD(rr, FpSingleScratch);
FMSR(FpSingleScratch, sr);
}
void
Assembler::asm_fneg(LInsp ins)
{
LInsp lhs = ins->oprnd1();
Register rr = prepResultReg(ins, FpRegs);
Reservation* rA = getresv(lhs);
Register sr;
if (!rA || rA->reg == UnknownReg)
sr = findRegFor(lhs, FpRegs);
else
sr = rA->reg;
FNEGD(rr, sr);
}
void
Assembler::asm_fop(LInsp ins)
{
LInsp lhs = ins->oprnd1();
LInsp rhs = ins->oprnd2();
LOpcode op = ins->opcode();
NanoAssert(op >= LIR_fadd && op <= LIR_fdiv);
// rr = ra OP rb
Register rr = prepResultReg(ins, FpRegs);
Register ra = findRegFor(lhs, FpRegs);
Register rb = (rhs == lhs) ? ra : findRegFor(rhs, FpRegs);
// XXX special-case 1.0 and 0.0
if (op == LIR_fadd)
FADDD(rr,ra,rb);
else if (op == LIR_fsub)
FSUBD(rr,ra,rb);
else if (op == LIR_fmul)
FMULD(rr,ra,rb);
else //if (op == LIR_fdiv)
FDIVD(rr,ra,rb);
}
void
Assembler::asm_fcmp(LInsp ins)
{
LInsp lhs = ins->oprnd1();
LInsp rhs = ins->oprnd2();
LOpcode op = ins->opcode();
NanoAssert(op >= LIR_feq && op <= LIR_fge);
Register ra = findRegFor(lhs, FpRegs);
Register rb = findRegFor(rhs, FpRegs);
// We can't uniquely identify fge/fle via a single bit
// pattern (since equality and lt/gt are separate bits);
// so convert to the single-bit variant.
if (op == LIR_fge) {
Register temp = ra;
ra = rb;
rb = temp;
op = LIR_flt;
} else if (op == LIR_fle) {
Register temp = ra;
ra = rb;
rb = temp;
op = LIR_fgt;
}
// There is no way to test for an unordered result using
// the conditional form of an instruction; the encoding (C=1 V=1)
// ends up having overlaps with a few other tests. So, test for
// the explicit mask.
uint8_t mask = 0x0;
// NZCV
// for a valid ordered result, V is always 0 from VFP
if (op == LIR_feq)
// ZC // cond EQ (both equal and "not less than"
mask = 0x6;
else if (op == LIR_flt)
// N // cond MI
mask = 0x8;
else if (op == LIR_fgt)
// C // cond CS
mask = 0x2;
else
NanoAssert(0);
/*
// these were converted into gt and lt above.
if (op == LIR_fle)
// NZ // cond LE
mask = 0xC;
else if (op == LIR_fge)
// ZC // cond fail?
mask = 0x6;
*/
// TODO XXX could do this as fcmpd; fmstat; tstvs rX, #0 the tstvs
// would reset the status bits if V (NaN flag) is set, but that
// doesn't work for NE. For NE could teqvs rX, #1. rX needs to
// be any register that has lsb == 0, such as sp/fp/pc.
// Test explicily with the full mask; if V is set, test will fail.
// Assumption is that this will be followed up by a BEQ/BNE
CMPi(Scratch, mask);
// grab just the condition fields
SHRi(Scratch, 28);
MRS(Scratch);
// do the comparison and get results loaded in ARM status register
FMSTAT();
FCMPD(ra, rb);
}
Register
Assembler::asm_prep_fcall(Reservation* rR, LInsp ins)
{
// We have nothing to do here; we do it all in asm_call.
return UnknownReg;
}
#endif /* NJ_ARM_VFP */
}
#endif /* FEATURE_NANOJIT */
}

Просмотреть файл

@ -47,14 +47,28 @@ namespace nanojit
const int NJ_LOG2_PAGE_SIZE = 12; // 4K
#define NJ_MAX_REGISTERS 11
// If NJ_ARM_VFP is defined, then VFP is assumed to
// be present. If it's not defined, then softfloat
// is used, and NJ_SOFTFLOAT is defined.
#define NJ_ARM_VFP
#ifdef NJ_ARM_VFP
// only d0-d7; we'll use d7 as s14-s15 for i2f/u2f/etc.
#define NJ_VFP_MAX_REGISTERS 8
#else
#define NJ_VFP_MAX_REGISTERS 0
#define NJ_SOFTFLOAT
#endif
#define NJ_MAX_REGISTERS (11 + NJ_VFP_MAX_REGISTERS)
#define NJ_MAX_STACK_ENTRY 256
#define NJ_MAX_PARAMETERS 16
#define NJ_ALIGN_STACK 8
#define NJ_STACK_OFFSET 8
#define NJ_SOFTFLOAT
#define NJ_STACK_GROWTH_UP
#define NJ_STACK_OFFSET 0
#define NJ_CONSTANT_POOLS
const int NJ_MAX_CPOOL_OFFSET = 4096;
@ -75,25 +89,40 @@ typedef enum {
R8 = 8,
R9 = 9,
R10 = 10,
//FP =11,
FP = 11,
IP = 12,
SP = 13,
LR = 14,
PC = 15,
FP = 13,
// FP regs
D0 = 16,
D1 = 17,
D2 = 18,
D3 = 19,
D4 = 20,
D5 = 21,
D6 = 22,
D7 = 23,
// Pseudo-register for floating point
F0 = 0,
FirstFloatReg = 16,
LastFloatReg = 22,
// helpers
FRAME_PTR = 11,
ESP = 13,
ESP = SP,
FirstReg = 0,
#ifdef NJ_ARM_VFP
LastReg = 23,
#else
LastReg = 10,
Scratch = 12,
UnknownReg = 11
#endif
Scratch = IP,
UnknownReg = 31,
// special value referring to S14
FpSingleScratch = 24
} Register;
/* ARM condition codes */
@ -123,13 +152,30 @@ typedef struct _FragInfo {
NIns* epilogue;
} FragInfo;
static const RegisterMask SavedRegs = 1<<R4 | 1<<R5 | 1<<R6 | 1<<R7 | 1<<R8 | 1<<R9 | 1<<R10;
static const RegisterMask FpRegs = 0x0000; // FST0-FST7
#ifdef ARM_VFP
static const RegisterMask SavedFpRegs = 1<<D0 | 1<<D1 | 1<<D2 | 1<<D3 | 1<<D4 | 1<<D5 | 1<<D6 | 1<<D7;
#else
static const RegisterMask SavedFpRegs = 0;
#endif
static const RegisterMask SavedRegs = 1<<R4 | 1<<R5 | 1<<R6 | 1<<R7 | 1<<R8 | 1<<R9 | 1<<R10 | SavedFpRegs;
static const RegisterMask FpRegs = 1<<D0 | 1<<D1 | 1<<D2 | 1<<D3 | 1<<D4 | 1<<D5 | 1<<D6; // no D7; S14-S15 are used for i2f/u2f.
static const RegisterMask GpRegs = 0x07FF;
static const RegisterMask AllowableFlagRegs = 1<<R0 | 1<<R1 | 1<<R2 | 1<<R3 | 1<<R4 | 1<<R5 | 1<<R6 | 1<<R7 | 1<<R8 | 1<<R9 | 1<<R10;
#define IsFpReg(_r) ((rmask(_r) & (FpRegs | (1<<D7))) != 0)
#define IsGpReg(_r) ((rmask(_r) & (GpRegs | (1<<Scratch))) != 0)
#define FpRegNum(_fpr) ((_fpr) - FirstFloatReg)
#define firstreg() R0
#define nextreg(r) (Register)((int)r+1)
#define nextreg(r) ((Register)((int)(r)+1))
#if 0
static Register nextreg(Register r) {
if (r == R10)
return D0;
return (Register)(r+1);
}
#endif
// only good for normal regs
#define imm2register(c) (Register)(c-1)
verbose_only( extern const char* regNames[]; )
@ -148,11 +194,12 @@ verbose_only( extern const char* regNames[]; )
void BL(NIns*); \
void BL_far(NIns*); \
void CALL(const CallInfo*); \
void B_cond_chk(ConditionCode, NIns*, bool); \
void underrunProtect(int bytes); \
bool has_cmov; \
void nativePageReset(); \
void nativePageSetup(); \
void flushCache(NIns*,NIns*); \
void asm_quad_nochk(Register, const int32_t*); \
int* _nSlot; \
int* _nExitSlot;
@ -174,6 +221,7 @@ verbose_only( extern const char* regNames[]; )
#define FUNCADDR(addr) ( ((int)addr) )
#define OP_IMM (1<<25)
#define OP_STAT (1<<20)
#define COND_AL (0xE<<28)
@ -189,7 +237,7 @@ typedef enum {
ROR_reg = 7 // Rotate Right
} ShiftOperator;
#define LD32_size 4
#define LD32_size 8
#define BEGIN_NATIVE_CODE(x) \
{ DWORD* _nIns = (uint8_t*)x
@ -251,45 +299,58 @@ typedef enum {
*(--_nIns) = (NIns)( COND_AL | OP_IMM | (1<<21) | ((_r)<<16) | ((_r)<<12) | ((_imm)&0xFF) ); \
asm_output2("eor %s,%d",gpn(_r),(_imm)); } while(0)
// _l = _l + _r
#define ADD(_l,_r) do { \
// _d = _n + _m
#define arm_ADD(_d,_n,_m) do { \
underrunProtect(4); \
*(--_nIns) = (NIns)( COND_AL | (1<<23) | ((_r)<<16) | ((_l)<<12) | (_l)); \
asm_output2("add %s,%s",gpn(_l),gpn(_r)); } while(0)
*(--_nIns) = (NIns)( COND_AL | OP_STAT | (1<<23) | ((_n)<<16) | ((_d)<<12) | (_m)); \
asm_output3("add %s,%s+%s",gpn(_d),gpn(_n),gpn(_m)); } while(0)
// _r = _r + _imm
#define ADDi(_r,_imm) do { \
if ((_imm)>-256 && (_imm)<256) { \
// _l = _l + _r
#define ADD(_l,_r) arm_ADD(_l,_l,_r)
// TODO: we can do better here, since we can rotate the 8-bit immediate left by
// an even number of bits; should count zeros at the end.
// Note that this sometimes converts negative immediate values to a to a sub.
// _d = _r + _imm
#define arm_ADDi(_d,_n,_imm) do { \
if ((_imm) > -256 && (_imm) < 256) { \
underrunProtect(4); \
if ((_imm)>=0) \
*(--_nIns) = (NIns)( COND_AL | OP_IMM | (1<<23) | ((_r)<<16) | ((_r)<<12) | ((_imm)&0xFF) ); \
*(--_nIns) = (NIns)( COND_AL | OP_IMM | OP_STAT | (1<<23) | ((_n)<<16) | ((_d)<<12) | ((_imm)&0xFF) ); \
else \
*(--_nIns) = (NIns)( COND_AL | OP_IMM | (1<<22) | ((_r)<<16) | ((_r)<<12) | ((-(_imm))&0xFF) ); \
*(--_nIns) = (NIns)( COND_AL | OP_IMM | OP_STAT | (1<<22) | ((_n)<<16) | ((_d)<<12) | ((-(_imm))&0xFF) ); \
} else { \
if ((_imm)>=0) { \
if ((_imm)<=1020 && (((_imm)&3)==0) ) { \
underrunProtect(4); \
*(--_nIns) = (NIns)( COND_AL | OP_IMM | (1<<23) | ((_r)<<16) | ((_r)<<12) | (15<<8)| ((_imm)>>2) ); \
*(--_nIns) = (NIns)( COND_AL | OP_IMM | OP_STAT | (1<<23) | ((_n)<<16) | ((_d)<<12) | (15<<8)| ((_imm)>>2) ); \
} else { \
underrunProtect(4+LD32_size); \
*(--_nIns) = (NIns)( COND_AL | (1<<23) | ((_r)<<16) | ((_r)<<12) | (Scratch)); \
*(--_nIns) = (NIns)( COND_AL | OP_STAT | (1<<23) | ((_n)<<16) | ((_d)<<12) | (Scratch)); \
LD32_nochk(Scratch, _imm); \
} \
} else { \
underrunProtect(4+LD32_size); \
*(--_nIns) = (NIns)( COND_AL | OP_STAT | (1<<22) | ((_n)<<16) | ((_d)<<12) | (Scratch)); \
LD32_nochk(Scratch, -(_imm)); \
} \
} \
asm_output3("add %s,%s,%d",gpn(_d),gpn(_n),(_imm)); \
} while(0)
/*
* There used to be a :
if ((_imm)>=-510) { \
underrunProtect(8); \
int rem = -(_imm) - 255; \
*(--_nIns) = (NIns)( COND_AL | OP_IMM | (1<<22) | ((_r)<<16) | ((_r)<<12) | ((rem)&0xFF) ); \
*(--_nIns) = (NIns)( COND_AL | OP_IMM | (1<<22) | ((_r)<<16) | ((_r)<<12) | (0xFF) ); \
} else { \
underrunProtect(4+LD32_size); \
*(--_nIns) = (NIns)( COND_AL | (1<<22) | ((_r)<<16) | ((_r)<<12) | (Scratch)); \
LD32_nochk(Scratch, -(_imm)); \
} \
} \
} \
asm_output2("addi %s,%d",gpn(_r),(_imm)); \
} while(0)
*(--_nIns) = (NIns)( COND_AL | OP_IMM | (1<<22) | ((_n)<<16) | ((_d)<<12) | ((rem)&0xFF) ); \
*(--_nIns) = (NIns)( COND_AL | OP_IMM | (1<<22) | ((_n)<<16) | ((_d)<<12) | (0xFF) ); \
} else {
* above, but if we do that we can't really update the status registers. So don't do that.
*/
#define ADDi(_r,_imm) arm_ADDi(_r,_r,_imm)
// _l = _l - _r
#define SUB(_l,_r) do { \
@ -402,6 +463,13 @@ typedef enum {
*(--_nIns) = (NIns)( COND_AL | (0x11<<20) | ((_d)<<16) | (_s) ); \
asm_output2("test %s,%s",gpn(_d),gpn(_s)); } while(0)
#define TSTi(_d,_imm) do { \
underrunProtect(4); \
NanoAssert(((_imm) & 0xff) == (_imm)); \
*(--_nIns) = (NIns)( COND_AL | OP_IMM | (0x11<<20) | ((_d) << 16) | (0xF<<12) | ((_imm) & 0xff) ); \
asm_output2("tst %s,#0x%x", gpn(_d), _imm); \
} while (0);
// CMP
#define CMP(_l,_r) do { \
underrunProtect(4); \
@ -429,7 +497,7 @@ typedef enum {
LD32_nochk(Scratch, (_imm)); \
} \
} \
asm_output2("cmp %s,%X",gpn(_r),(_imm)); \
asm_output2("cmp %s,0x%x",gpn(_r),(_imm)); \
} while(0)
// MOV
@ -457,25 +525,33 @@ typedef enum {
#define MRNO(dr,sr) MR_cond(dr, sr, VC, "movvc") // overflow clear
#define MRNC(dr,sr) MR_cond(dr, sr, CC, "movcc") // carry clear
#define LD(_d,_off,_b) do { \
if ((_off)<0) { \
underrunProtect(4); \
#define LDR_chk(_d,_b,_off,_chk) do { \
if (IsFpReg(_d)) { \
FLDD_chk(_d,_b,_off,_chk); \
} else if ((_off)<0) { \
if (_chk) underrunProtect(4); \
NanoAssert((_off)>-4096); \
*(--_nIns) = (NIns)( COND_AL | (0x51<<20) | ((_b)<<16) | ((_d)<<12) | ((-(_off))&0xFFF) ); \
} else { \
if (isS16(_off) || isU16(_off)) { \
underrunProtect(4); \
if (_chk) underrunProtect(4); \
NanoAssert((_off)<4096); \
*(--_nIns) = (NIns)( COND_AL | (0x59<<20) | ((_b)<<16) | ((_d)<<12) | ((_off)&0xFFF) ); \
} else { \
underrunProtect(4+LD32_size); \
if (_chk) underrunProtect(4+LD32_size); \
*(--_nIns) = (NIns)( COND_AL | (0x79<<20) | ((_b)<<16) | ((_d)<<12) | Scratch ); \
LD32_nochk(Scratch, _off); \
} \
} \
asm_output3("ld %s,%d(%s)",gpn((_d)),(_off),gpn((_b))); \
asm_output3("ldr %s,%d(%s)",gpn((_d)),(_off),gpn((_b))); \
} while(0)
#define LDR(_d,_b,_off) LDR_chk(_d,_b,_off,0)
#define LDR_nochk(_d,_b,_off) LDR_chk(_d,_b,_off,1)
// i386 compat, for Assembler.cpp
#define LD(reg,offset,base) LDR_chk(reg,base,offset,1)
#define ST(base,offset,reg) STR(reg,base,offset)
#define LDi(_d,_imm) do { \
if (isS8((_imm)) || isU8((_imm))) { \
@ -486,7 +562,7 @@ typedef enum {
underrunProtect(LD32_size); \
LD32_nochk(_d, (_imm)); \
} \
asm_output2("ld %s,%d",gpn((_d)),(_imm)); \
asm_output2("ld %s,0x%x",gpn((_d)),(_imm)); \
} while(0)
@ -501,29 +577,13 @@ typedef enum {
asm_output3("ldrb %s,%d(%s)", gpn(_d),(_off),gpn(_b)); \
} while(0)
#define ST(_b,_off,_r) do { \
#define STR(_d,_n,_off) do { \
NanoAssert(!IsFpReg(_d) && isS12(_off)); \
underrunProtect(4); \
if ((_off)<0) *(--_nIns) = (NIns)( COND_AL | (0x50<<20) | ((_b)<<16) | ((_r)<<12) | ((-(_off))&0xFFF) ); \
else *(--_nIns) = (NIns)( COND_AL | (0x58<<20) | ((_b)<<16) | ((_r)<<12) | ((_off)&0xFFF) ); \
asm_output3("str %s, %d(%s)",gpn(_r), (_off),gpn(_b)); } while(0)
#define STi(_b,_off,_imm) do { \
NanoAssert((_off)>0); \
if (isS8((_imm)) || isU8((_imm))) { \
underrunProtect(8); \
*(--_nIns) = (NIns)( COND_AL | (0x58<<20) | ((_b)<<16) | ((Scratch)<<12) | ((_off)&0xFFF) ); \
asm_output3("str %s, %d(%s)",gpn(Scratch), (_off),gpn(_b)); \
if ((_imm)<0) *(--_nIns) = (NIns)( COND_AL | (0x3E<<20) | (Scratch<<12) | (((_imm)^0xFFFFFFFF)&0xFF) ); \
else *(--_nIns) = (NIns)( COND_AL | (0x3B<<20) | (Scratch<<12) | ((_imm)&0xFF) ); \
asm_output2("ld %s,%d",gpn((Scratch)),(_imm)); \
} else { \
underrunProtect(4+LD32_size); \
*(--_nIns) = (NIns)( COND_AL | (0x58<<20) | ((_b)<<16) | ((Scratch)<<12) | ((_off)&0xFFF) ); \
asm_output3("str %s, %d(%s)",gpn(Scratch), (_off),gpn(_b)); \
LD32_nochk(Scratch, (_imm)); \
} \
} while(0);
if ((_off)<0) *(--_nIns) = (NIns)( COND_AL | (0x50<<20) | ((_n)<<16) | ((_d)<<12) | ((-(_off))&0xFFF) ); \
else *(--_nIns) = (NIns)( COND_AL | (0x58<<20) | ((_n)<<16) | ((_d)<<12) | ((_off)&0xFFF) ); \
asm_output3("str %s, %d(%s)",gpn(_d), (_off), gpn(_n)); \
} while(0)
#define LEA(_r,_d,_b) do { \
@ -548,7 +608,7 @@ typedef enum {
//#define RET() INT3()
#define BKPT_nochk() do { \
*(--_nIns) = (NIns)( (0xE<<24) | (0x12<<20) | (0x7<<4) ); } while (0);
*(--_nIns) = (NIns)( (0xE<<24) | (0x12<<20) | (0x7<<4) ); } while (0)
// this is pushing a reg
#define PUSHr(_r) do { \
@ -581,47 +641,10 @@ typedef enum {
*(--_nIns) = (NIns)( COND_AL | (0x8B<<20) | (SP<<16) | (_mask) ); \
asm_output1("pop %x", (_mask));} while (0)
// PC always points to current instruction + 8, so when calculating pc-relative
// offsets, use PC+8.
#define PC_OFFSET_FROM(target,frompc) ((intptr_t)(target) - ((intptr_t)(frompc) + 8))
#define JMP_S24_OFFSET_OK(offs) ((-(1<<24)) <= (offs) && (offs) < (1<<24))
// (XXX This ought to be a function instead of a macro)
//
// Branch to target address _t with condition _c, doing underrun
// checks (_chk == 1) or skipping them (_chk == 0).
//
// If the jump fits in a relative jump (+/-32MB), emit that.
// If the jump is unconditional, emit the dest address inline in
// the instruction stream and load it into pc.
// If the jump has a condition, but noone's mucked with _nIns and our _nSlot
// pointer is valid, stick the constant in the slot and emit a conditional
// load into pc.
// Otherwise, emit the conditional load into pc from a nearby constant,
// and emit a jump to jump over it it in case the condition fails.
//
// NB: JMP_nochk depends on this not calling samepage() when _c == AL
#define B_cond_chk(_c,_t,_chk) do { \
int32 offs = PC_OFFSET_FROM(_t,(intptr_t)(_nIns)-4); \
if (JMP_S24_OFFSET_OK(offs)) { \
if(_chk) underrunProtect(4); \
*(--_nIns) = (NIns)( ((_c)<<28) | (0xA<<24) | (((offs)>>2) & 0xFFFFFF) ); \
} else if (_c == AL) { \
if(_chk) underrunProtect(8); \
*(--_nIns) = (NIns)(_t); \
*(--_nIns) = (NIns)( COND_AL | (0x51<<20) | (PC<<16) | (PC<<12) | 0x4 ); \
} else if (samepage(_nIns,_nSlot)) { \
if(_chk) underrunProtect(8); \
*(++_nSlot) = (NIns)(_t); \
offs = PC_OFFSET_FROM(_nSlot,(intptr_t)(_nIns)-4); \
NanoAssert(offs < 0); \
*(--_nIns) = (NIns)( ((_c)<<28) | (0x51<<20) | (PC<<16) | (PC<<12) | ((-offs) & 0xFFFFFF) ); \
} else { \
if(_chk) underrunProtect(24); \
*(--_nIns) = (NIns)(_t); \
*(--_nIns) = (NIns)( COND_AL | (0xA<<24) | ((-4)>>2) & 0xFFFFFF ); \
*(--_nIns) = (NIns)( ((_c)<<28) | (0x51<<20) | (PC<<16) | (PC<<12) | 0x0 ); \
} \
asm_output2("%s %p\n", _c == AL ? "jmp" : "b(cnd)", (void*)(_t)); \
} while(0)
#define isS12(offs) ((-(1<<12)) <= (offs) && (offs) < (1<<12))
#define B_cond(_c,_t) \
B_cond_chk(_c,_t,1)
@ -665,35 +688,12 @@ typedef enum {
#define JO(t) do {B_cond(VS,t); asm_output1("bvs 0x%08x",(unsigned int)t); } while(0)
#define JNO(t) do {B_cond(VC,t); asm_output1("bvc 0x%08x",(unsigned int)t); } while(0)
// used for testing result of an FP compare
// used for testing result of an FP compare on x86; not used on arm.
// JP = comparison false
#define JP(t) do {B_cond(EQ,NE,t); asm_output1("jp 0x%08x",t); } while(0)
#define JP(t) do {NanoAssert(0); B_cond(NE,t); asm_output1("jp 0x%08x",t); } while(0)
// JNP = comparison true
#define JNP(t) do {B_cond(NE,EQ,t); asm_output1("jnp 0x%08x",t); } while(0)
// floating point
#define FNSTSW_AX() do {NanoAssert(0); asm_output("fnstsw_ax"); } while(0)
#define FFREE(r) do {NanoAssert(0); asm_output1("ffree %s",gpn(b)); } while(0)
#define FSTQ(p,d,b) do {NanoAssert(0); asm_output2("fstq %d(%s)",d,gpn(b)); } while(0)
#define FSTPQ(d,b) FSTQ(1,d,b)
//#define FSTPQ(d,b) do {NanoAssert(0); asm_output2("fstpq %d(%s)",d,gpn(b)); } while(0)
#define FCOM(p,d,b) do {NanoAssert(0); asm_output2("fcom %d(%s)",d,gpn(b)); } while(0)
#define FCOMP(d,b) do {NanoAssert(0); asm_output2("fcomp %d(%s)",d,gpn(b)); } while(0)
#define FLDQ(d,b) do {NanoAssert(0); asm_output2("fldq %d(%s)",d,gpn(b)); } while(0)
#define FILDQ(d,b) do {NanoAssert(0); asm_output2("fildq %d(%s)",d,gpn(b)); } while(0)
#define FILD(d,b) do {NanoAssert(0); asm_output2("fild %d(%s)",d,gpn(b)); } while(0)
#define FADD(d,b) do {NanoAssert(0); asm_output2("faddq %d(%s)",d,gpn(b)); } while(0)
#define FSUB(d,b) do {NanoAssert(0); asm_output2("fsubq %d(%s)",d,gpn(b)); } while(0)
#define FSUBR(d,b) do {NanoAssert(0); asm_output2("fsubr %d(%s)",d,gpn(b)); } while(0)
#define FMUL(d,b) do {NanoAssert(0); asm_output2("fmulq %d(%s)",d,gpn(b)); } while(0)
#define FDIV(d,b) do {NanoAssert(0); asm_output2("fdivq %d(%s)",d,gpn(b)); } while(0)
#define FDIVR(d,b) do {NanoAssert(0); asm_output2("fdivr %d(%s)",d,gpn(b)); } while(0)
#define FSTP(r) do {NanoAssert(0); asm_output1("fst st(%d)",r); } while(0)
#define FLD1() do {NanoAssert(0); asm_output("fld1"); } while(0)
#define FLDZ() do {NanoAssert(0); asm_output("fldz"); } while(0)
#define JNP(t) do {NanoAssert(0); B_cond(EQ,t); asm_output1("jnp 0x%08x",t); } while(0)
// MOV(EQ) _r, #1
@ -758,17 +758,147 @@ typedef enum {
} while(0)
#define STMIA(_b, _mask) do { \
underrunProtect(2); \
underrunProtect(4); \
NanoAssert(((_mask)&rmask(_b))==0 && isU8(_mask)); \
*(--_nIns) = (NIns)(COND_AL | (0x8A<<20) | ((_b)<<16) | (_mask)&0xFF); \
asm_output2("stmia %s!,{%x}", gpn(_b), _mask); \
asm_output2("stmia %s!,{0x%x}", gpn(_b), _mask); \
} while (0)
#define LDMIA(_b, _mask) do { \
underrunProtect(2); \
underrunProtect(4); \
NanoAssert(((_mask)&rmask(_b))==0 && isU8(_mask)); \
*(--_nIns) = (NIns)(COND_AL | (0x8B<<20) | ((_b)<<16) | (_mask)&0xFF); \
asm_output2("ldmia %s!,{%x}", gpn(_b), (_mask)); \
asm_output2("ldmia %s!,{0x%x}", gpn(_b), (_mask)); \
} while (0)
#define MRS(_d) do { \
underrunProtect(4); \
*(--_nIns) = (NIns)(COND_AL | (0x10<<20) | (0xF<<16) | ((_d)<<12)); \
asm_output1("msr %s", gpn(_d)); \
} while (0)
/*
* VFP
*/
#define FMDRR(_Dm,_Rd,_Rn) do { \
underrunProtect(4); \
NanoAssert(IsFpReg(_Dm) && IsGpReg(_Rd) && IsGpReg(_Rn)); \
*(--_nIns) = (NIns)( COND_AL | (0xC4<<20) | ((_Rn)<<16) | ((_Rd)<<12) | (0xB1<<4) | (FpRegNum(_Dm)) ); \
asm_output3("fmdrr %s,%s,%s", gpn(_Dm), gpn(_Rd), gpn(_Rn)); \
} while (0)
#define FMRRD(_Rd,_Rn,_Dm) do { \
underrunProtect(4); \
NanoAssert(IsGpReg(_Rd) && IsGpReg(_Rn) && IsFpReg(_Dm)); \
*(--_nIns) = (NIns)( COND_AL | (0xC5<<20) | ((_Rn)<<16) | ((_Rd)<<12) | (0xB1<<4) | (FpRegNum(_Dm)) ); \
asm_output3("fmrrd %s,%s,%s", gpn(_Rd), gpn(_Rn), gpn(_Dm)); \
} while (0)
#define FSTD(_Dd,_Rn,_offs) do { \
underrunProtect(4); \
NanoAssert((((_offs) & 3) == 0) && isS8((_offs) >> 2)); \
NanoAssert(IsFpReg(_Dd) && !IsFpReg(_Rn)); \
int negflag = 1<<23; \
intptr_t offs = (_offs); \
if (_offs < 0) { \
negflag = 0<<23; \
offs = -(offs); \
} \
*(--_nIns) = (NIns)( COND_AL | (0xD0<<20) | ((_Rn)<<16) | (FpRegNum(_Dd)<<12) | (0xB<<8) | negflag | ((offs>>2)&0xff) ); \
asm_output3("fstd %s,%s(%d)", gpn(_Dd), gpn(_Rn), _offs); \
} while (0)
#define FLDD_chk(_Dd,_Rn,_offs,_chk) do { \
if(_chk) underrunProtect(4); \
NanoAssert((((_offs) & 3) == 0) && isS8((_offs) >> 2)); \
NanoAssert(IsFpReg(_Dd) && !IsFpReg(_Rn)); \
int negflag = 1<<23; \
intptr_t offs = (_offs); \
if (_offs < 0) { \
negflag = 0<<23; \
offs = -(offs); \
} \
*(--_nIns) = (NIns)( COND_AL | (0xD1<<20) | ((_Rn)<<16) | (FpRegNum(_Dd)<<12) | (0xB<<8) | negflag | ((offs>>2)&0xff) ); \
asm_output3("fldd %s,%s(%d)", gpn(_Dd), gpn(_Rn), _offs); \
} while (0)
#define FLDD(_Dd,_Rn,_offs) FLDD_chk(_Dd,_Rn,_offs,1)
#define FSITOD(_Dd,_Sm) do { \
underrunProtect(4); \
NanoAssert(IsFpReg(_Dd) && ((_Sm) == FpSingleScratch)); \
*(--_nIns) = (NIns)( COND_AL | (0xEB8<<16) | (FpRegNum(_Dd)<<12) | (0x2F<<6) | (0<<5) | (0x7) ); \
asm_output2("fsitod %s,%s", gpn(_Dd), gpn(_Sm)); \
} while (0)
#define FUITOD(_Dd,_Sm) do { \
underrunProtect(4); \
NanoAssert(IsFpReg(_Dd) && ((_Sm) == FpSingleScratch)); \
*(--_nIns) = (NIns)( COND_AL | (0xEB8<<16) | (FpRegNum(_Dd)<<12) | (0x2D<<6) | (0<<5) | (0x7) ); \
asm_output2("fuitod %s,%s", gpn(_Dd), gpn(_Sm)); \
} while (0)
#define FMSR(_Sn,_Rd) do { \
underrunProtect(4); \
NanoAssert(((_Sn) == FpSingleScratch) && IsGpReg(_Rd)); \
*(--_nIns) = (NIns)( COND_AL | (0xE0<<20) | (0x7<<16) | ((_Rd)<<12) | (0xA<<8) | (0<<7) | (0x1<<4) ); \
asm_output2("fmsr %s,%s", gpn(_Sn), gpn(_Rd)); \
} while (0)
#define FNEGD(_Dd,_Dm) do { \
underrunProtect(4); \
NanoAssert(IsFpReg(_Dd) && IsFpReg(_Dm)); \
*(--_nIns) = (NIns)( COND_AL | (0xEB1<<16) | (FpRegNum(_Dd)<<12) | (0xB4<<4) | (FpRegNum(_Dm)) ); \
asm_output2("fnegd %s,%s", gpn(_Dd), gpn(_Dm)); \
} while (0)
#define FADDD(_Dd,_Dn,_Dm) do { \
underrunProtect(4); \
NanoAssert(IsFpReg(_Dd) && IsFpReg(_Dn) && IsFpReg(_Dm)); \
*(--_nIns) = (NIns)( COND_AL | (0xE3<<20) | (FpRegNum(_Dn)<<16) | (FpRegNum(_Dd)<<12) | (0xB0<<4) | (FpRegNum(_Dm)) ); \
asm_output3("faddd %s,%s,%s", gpn(_Dd), gpn(_Dn), gpn(_Dm)); \
} while (0)
#define FSUBD(_Dd,_Dn,_Dm) do { \
underrunProtect(4); \
NanoAssert(IsFpReg(_Dd) && IsFpReg(_Dn) && IsFpReg(_Dm)); \
*(--_nIns) = (NIns)( COND_AL | (0xE3<<20) | (FpRegNum(_Dn)<<16) | (FpRegNum(_Dd)<<12) | (0xB4<<4) | (FpRegNum(_Dm)) ); \
asm_output3("fsubd %s,%s,%s", gpn(_Dd), gpn(_Dn), gpn(_Dm)); \
} while (0)
#define FMULD(_Dd,_Dn,_Dm) do { \
underrunProtect(4); \
NanoAssert(IsFpReg(_Dd) && IsFpReg(_Dn) && IsFpReg(_Dm)); \
*(--_nIns) = (NIns)( COND_AL | (0xE2<<20) | (FpRegNum(_Dn)<<16) | (FpRegNum(_Dd)<<12) | (0xB0<<4) | (FpRegNum(_Dm)) ); \
asm_output3("fmuld %s,%s,%s", gpn(_Dd), gpn(_Dn), gpn(_Dm)); \
} while (0)
#define FDIVD(_Dd,_Dn,_Dm) do { \
underrunProtect(4); \
NanoAssert(IsFpReg(_Dd) && IsFpReg(_Dn) && IsFpReg(_Dm)); \
*(--_nIns) = (NIns)( COND_AL | (0xE8<<20) | (FpRegNum(_Dn)<<16) | (FpRegNum(_Dd)<<12) | (0xB0<<4) | (FpRegNum(_Dm)) ); \
asm_output3("fmuld %s,%s,%s", gpn(_Dd), gpn(_Dn), gpn(_Dm)); \
} while (0)
#define FMSTAT() do { \
underrunProtect(4); \
*(--_nIns) = (NIns)( COND_AL | 0x0EF1FA10); \
asm_output("fmstat"); \
} while (0)
#define FCMPD(_Dd,_Dm) do { \
underrunProtect(4); \
NanoAssert(IsFpReg(_Dd) && IsFpReg(_Dm)); \
*(--_nIns) = (NIns)( COND_AL | (0xEB4<<16) | (FpRegNum(_Dd)<<12) | (0xB4<<4) | (FpRegNum(_Dm)) ); \
asm_output2("fcmpd %s,%s", gpn(_Dd), gpn(_Dm)); \
} while (0)
#define FCPYD(_Dd,_Dm) do { \
underrunProtect(4); \
NanoAssert(IsFpReg(_Dd) && IsFpReg(_Dm)); \
*(--_nIns) = (NIns)( COND_AL | (0xEB0<<16) | (FpRegNum(_Dd)<<12) | (0xB4<<4) | (FpRegNum(_Dm)) ); \
asm_output2("fcpyd %s,%s", gpn(_Dd), gpn(_Dm)); \
} while (0)
}
#endif // __nanojit_NativeThumb__

Просмотреть файл

@ -68,7 +68,9 @@ namespace nanojit
debug_only( uint32_t count; )
debug_only( RegisterMask managed; ) // bitfield of 0..NJ_MAX_REGISTERS denoting which are under our management
LIns* active[NJ_MAX_REGISTERS]; // active[r] = OP that defines r
// RegisterMask is a 32-bit value, so we can never have more than 32 active.
// hardcode 32 here in case we have non-contiguous register numbers
LIns* active[32]; // active[r] = OP that defines r
RegisterMask free;
RegisterMask used;

Просмотреть файл

@ -151,6 +151,7 @@ namespace nanojit
#define isU8(i) ( int32_t(i) == uint8_t(i) )
#define isS16(i) ( int32_t(i) == int16_t(i) )
#define isU16(i) ( int32_t(i) == uint16_t(i) )
#define isS24(i) ( ((int32_t(i)<<8)>>8) == (i) )
#define alignTo(x,s) ((((uintptr_t)(x)))&~(((uintptr_t)s)-1))
#define alignUp(x,s) ((((uintptr_t)(x))+(((uintptr_t)s)-1))&~(((uintptr_t)s)-1))