diff --git a/js/src/jstracer.cpp b/js/src/jstracer.cpp index 88572c844dc..e5bf42dc69f 100644 --- a/js/src/jstracer.cpp +++ b/js/src/jstracer.cpp @@ -119,7 +119,7 @@ static bool nesting_enabled = true; static bool oracle_enabled = true; static bool did_we_check_sse2 = false; -#ifdef DEBUG +#if defined(DEBUG) || defined(INCLUDE_VERBOSE_OUTPUT) static bool verbose_debug = getenv("TRACEMONKEY") && strstr(getenv("TRACEMONKEY"), "verbose"); #define debug_only_v(x) if (verbose_debug) { x; } #else @@ -282,7 +282,7 @@ static bool isi2f(LInsp i) if (i->isop(LIR_i2f)) return true; -#ifdef NANOJIT_ARM +#if defined(NANOJIT_ARM) && defined(NJ_SOFTFLOAT) if (i->isop(LIR_qjoin) && i->oprnd1()->isop(LIR_call) && i->oprnd2()->isop(LIR_callh)) @@ -300,7 +300,7 @@ static bool isu2f(LInsp i) if (i->isop(LIR_u2f)) return true; -#ifdef NANOJIT_ARM +#if defined(NANOJIT_ARM) && defined(NJ_SOFTFLOAT) if (i->isop(LIR_qjoin) && i->oprnd1()->isop(LIR_call) && i->oprnd2()->isop(LIR_callh)) @@ -315,7 +315,7 @@ static bool isu2f(LInsp i) static LInsp iu2fArg(LInsp i) { -#ifdef NANOJIT_ARM +#if defined(NANOJIT_ARM) && defined(NJ_SOFTFLOAT) if (i->isop(LIR_qjoin)) return i->oprnd1()->arg(0); #endif @@ -371,7 +371,7 @@ static bool overflowSafe(LIns* i) ((c->constval() > 0))); } -#ifdef NANOJIT_ARM +#if defined(NJ_SOFTFLOAT) class SoftFloatFilter: public LirWriter { @@ -428,19 +428,6 @@ public: return out->ins2(LIR_eq, bv, out->insImm(1)); } - // not really a softfloat filter, but needed on ARM -- - // arm doesn't mask shifts to 31 like x86 does - if (v == LIR_lsh || - v == LIR_rsh || - v == LIR_ush) - { - if (s1->isconst()) - s1->setimm16(s1->constval() & 31); - else - s1 = out->ins2(LIR_and, s1, out->insImm(31)); - return out->ins2(v, s0, s1); - } - return out->ins2(v, s0, s1); } @@ -455,7 +442,7 @@ public: } }; -#endif +#endif // NJ_SOFTFLOAT class FuncFilter: public LirWriter { @@ -550,6 +537,20 @@ public: return out->ins2(LIR_add, x, y); } } +#ifdef NANOJIT_ARM + else if (v == LIR_lsh || + v == LIR_rsh || + v == LIR_ush) + { + // needed on ARM -- arm doesn't mask shifts to 31 like x86 does + if (s1->isconst()) + s1->setimm16(s1->constval() & 31); + else + s1 = out->ins2(LIR_and, s1, out->insImm(31)); + return out->ins2(v, s0, s1); + } +#endif + return out->ins2(v, s0, s1); } @@ -604,7 +605,7 @@ public: /* In debug mode vpname contains a textual description of the type of the slot during the forall iteration over al slots. */ -#ifdef DEBUG +#if defined(DEBUG) || defined(INCLUDE_VERBOSE_OUTPUT) #define DEF_VPNAME const char* vpname; unsigned vpnum #define SET_VPNAME(name) do { vpname = name; vpnum = 0; } while(0) #define INC_VPNUM() do { ++vpnum; } while(0) @@ -821,7 +822,7 @@ TraceRecorder::TraceRecorder(JSContext* cx, GuardRecord* _anchor, Fragment* _fra if (verbose_debug) lir = verbose_filter = new (&gc) VerboseWriter(&gc, lir, lirbuf->names); #endif -#ifdef NANOJIT_ARM +#ifdef NJ_SOFTFLOAT lir = float_filter = new (&gc) SoftFloatFilter(lir); #endif lir = cse_filter = new (&gc) CseFilter(lir, &gc); @@ -867,7 +868,7 @@ TraceRecorder::~TraceRecorder() delete cse_filter; delete expr_filter; delete func_filter; -#ifdef NANOJIT_ARM +#ifdef NJ_SOFTFLOAT delete float_filter; #endif delete lir_buf_writer; @@ -2277,8 +2278,10 @@ js_ExecuteTree(JSContext* cx, Fragment** treep, uintN& inlineCallCount, union { NIns *code; GuardRecord* (FASTCALL *func)(InterpState*, Fragment*); } u; u.code = f->code(); -#if defined(DEBUG) && defined(NANOJIT_IA32) +#ifdef DEBUG +#if defined(NANOJIT_IA32) uint64 start = rdtsc(); +#endif #endif /* @@ -2362,19 +2365,18 @@ js_ExecuteTree(JSContext* cx, Fragment** treep, uintN& inlineCallCount, js_ReconstructStackDepth(cx, fp->script, fp->regs->pc) == fp->regs->sp); #if defined(DEBUG) && defined(NANOJIT_IA32) - if (verbose_debug) { - printf("leaving trace at %s:%u@%u, op=%s, lr=%p, exitType=%d, sp=%d, ip=%p, " - "cycles=%llu\n", - fp->script->filename, js_PCToLineNumber(cx, fp->script, fp->regs->pc), - fp->regs->pc - fp->script->code, - js_CodeName[*fp->regs->pc], - lr, - lr->exit->exitType, - fp->regs->sp - StackBase(fp), lr->jmp, - (rdtsc() - start)); - } + uint64 cycles = rdtsc() - start; +#else + uint64 cycles = 0; #endif + debug_only_v(printf("leaving trace at %s:%u@%u, exitType=%d, sp=%d, ip=%p, cycles=%llu\n", + fp->script->filename, js_PCToLineNumber(cx, fp->script, fp->regs->pc), + fp->regs->pc - fp->script->code, + lr->exit->exitType, + fp->regs->sp - StackBase(fp), lr->jmp, + cycles)); + /* If this trace is part of a tree, later branches might have added additional globals for with we don't have any type information available in the side exit. We merge in this information from the entry type-map. See also comment in the constructor of TraceRecorder diff --git a/js/src/jstracer.h b/js/src/jstracer.h index 8e287495000..866e4601016 100644 --- a/js/src/jstracer.h +++ b/js/src/jstracer.h @@ -221,7 +221,7 @@ class TraceRecorder { nanojit::LirWriter* cse_filter; nanojit::LirWriter* expr_filter; nanojit::LirWriter* func_filter; -#ifdef NANOJIT_ARM +#ifdef NJ_SOFTFLOAT nanojit::LirWriter* float_filter; #endif nanojit::LIns* cx_ins; diff --git a/js/src/nanojit/Assembler.cpp b/js/src/nanojit/Assembler.cpp index a90cd409202..b34d36e9e6f 100755 --- a/js/src/nanojit/Assembler.cpp +++ b/js/src/nanojit/Assembler.cpp @@ -44,6 +44,7 @@ #if defined(AVMPLUS_LINUX) && defined(AVMPLUS_ARM) #include +extern "C" void __clear_cache(char *BEG, char *END); #endif namespace nanojit @@ -178,6 +179,8 @@ namespace nanojit // nothing free, steal one // LSRA says pick the one with the furthest use LIns* vic = findVictim(regs,allow,prefer); + NanoAssert(vic != NULL); + Reservation* resv = getresv(vic); // restore vic @@ -446,25 +449,37 @@ namespace nanojit Reservation* resv = getresv(i); Register r; + // if we have an existing reservation and it has a non-unknown + // register allocated, and that register is in our allowed mask, + // return it. if (resv && (r=resv->reg) != UnknownReg && (rmask(r) & allow)) { return r; } + // figure out what registers are preferred for this instruction RegisterMask prefer = hint(i, allow); + + // if we didn't have a reservation, allocate one now if (!resv) resv = reserveAlloc(i); + // if the reservation doesn't have a register assigned to it... if ((r=resv->reg) == UnknownReg) { + // .. if the cost is 2 and the allowed mask includes + // the saved regs, then prefer just those. if (resv->cost == 2 && (allow&SavedRegs)) prefer = allow&SavedRegs; + // grab one. r = resv->reg = registerAlloc(prefer); _allocator.addActive(r, i); return r; } else { - // r not allowed + // the already-allocated register isn't in the allowed mask; + // we need to grab a new one and then copy over the old + // contents to the new. resv->reg = UnknownReg; _allocator.retire(r); if (resv->cost == 2 && (allow&SavedRegs)) @@ -795,12 +810,15 @@ namespace nanojit # if defined(UNDER_CE) FlushInstructionCache(GetCurrentProcess(), NULL, NULL); # elif defined(AVMPLUS_LINUX) - // XXX fixme flush adjacent pages together for (int i = 0; i < 2; i++) { Page *p = (i == 0) ? _nativePages : _nativeExitPages; + Page *first = p; while (p) { - flushCache((NIns*)p, (NIns*)((intptr_t)(p) + NJ_PAGE_SIZE)); + if (!p->next || p->next != p+1) { + __clear_cache((char*)first, (char*)(p+1)); + first = p->next; + } p = p->next; } } @@ -852,7 +870,7 @@ namespace nanojit switch(op) { default: - NanoAssertMsgf(false, ("unsupported LIR instruction: %d (~0x40: %d)\n",op, op&~LIR64)); + NanoAssertMsgf(false, "unsupported LIR instruction: %d (~0x40: %d)\n", op, op&~LIR64); break; case LIR_short: @@ -1208,13 +1226,20 @@ namespace nanojit LIns* cond = ins->oprnd1(); LOpcode condop = cond->opcode(); NanoAssert(cond->isCond()); -#ifndef NJ_SOFTFLOAT +#if !defined(NJ_SOFTFLOAT) if (condop >= LIR_feq && condop <= LIR_fge) { +#if defined(NJ_ARM_VFP) + if (op == LIR_xf) + JNE(exit); + else + JE(exit); +#else if (op == LIR_xf) JP(exit); else JNP(exit); +#endif asm_fcmp(cond); break; } @@ -1313,9 +1338,13 @@ namespace nanojit { // only want certain regs Register r = prepResultReg(ins, AllowableFlagRegs); +#ifdef NJ_ARM_VFP + SETE(r); +#else // SETcc only sets low 8 bits, so extend MOVZX8(r,r); SETNP(r); +#endif asm_fcmp(ins); break; } @@ -1437,8 +1466,13 @@ namespace nanojit uint32_t Assembler::arFree(uint32_t idx) { + // nothing to free + if (idx == 0) + return 0; + if (idx > 0 && _activation.entry[idx] == _activation.entry[idx+stack_direction(1)]) _activation.entry[idx+stack_direction(1)] = 0; // clear 2 slots for doubles + _activation.entry[idx] = 0; return 0; } diff --git a/js/src/nanojit/LIR.cpp b/js/src/nanojit/LIR.cpp index 8e803952811..758a326b8d4 100755 --- a/js/src/nanojit/LIR.cpp +++ b/js/src/nanojit/LIR.cpp @@ -376,8 +376,6 @@ namespace nanojit return l; } -#define isS24(x) (((int32_t(x)<<8)>>8) == (x)) - LInsp LirBufWriter::insFar(LOpcode op, LInsp target) { NanoAssert(op == LIR_skip || op == LIR_tramp); diff --git a/js/src/nanojit/NativeARM.cpp b/js/src/nanojit/NativeARM.cpp index 4345cfba786..a954507168b 100644 --- a/js/src/nanojit/NativeARM.cpp +++ b/js/src/nanojit/NativeARM.cpp @@ -49,14 +49,17 @@ #if defined(AVMPLUS_LINUX) #include +extern "C" void __clear_cache(char *BEG, char *END); #endif +#ifdef FEATURE_NANOJIT + namespace nanojit { -#ifdef FEATURE_NANOJIT #ifdef NJ_VERBOSE -const char* regNames[] = {"r0","r1","r2","r3","r4","r5","r6","r7","r8","r9","r10","r11","IP","SP","LR","PC"}; +const char* regNames[] = {"r0","r1","r2","r3","r4","r5","r6","r7","r8","r9","r10","FP","IP","SP","LR","PC", + "d0","d1","d2","d3","d4","d5","d6","d7","s14"}; #endif const Register Assembler::argRegs[] = { R0, R1, R2, R3 }; @@ -122,6 +125,7 @@ Assembler::nFragExit(LInsp guard) // for us; always force a far jump here. BL_far(_epilogue); + // stick the jmp pointer to the start of the sequence lr->jmp = _nIns; } @@ -155,18 +159,26 @@ void Assembler::asm_call(LInsp ins) { const CallInfo* call = callInfoFor(ins->fid()); + Reservation *callRes = getresv(ins); + uint32_t atypes = call->_argtypes; uint32_t roffset = 0; + // skip return type +#ifdef NJ_ARM_VFP + ArgSize rsize = (ArgSize)(atypes & 3); +#endif + atypes >>= 2; + // we need to detect if we have arg0 as LO followed by arg1 as F; // in that case, we need to skip using r1 -- the F needs to be // loaded in r2/r3, at least according to the ARM EABI and gcc 4.2's // generated code. bool arg0IsInt32FollowedByFloat = false; while ((atypes & 3) != ARGSIZE_NONE) { - if (((atypes >> 4) & 3) == ARGSIZE_LO && - ((atypes >> 2) & 3) == ARGSIZE_F && - ((atypes >> 6) & 3) == ARGSIZE_NONE) + if (((atypes >> 2) & 3) == ARGSIZE_LO && + ((atypes >> 0) & 3) == ARGSIZE_F && + ((atypes >> 4) & 3) == ARGSIZE_NONE) { arg0IsInt32FollowedByFloat = true; break; @@ -174,17 +186,68 @@ Assembler::asm_call(LInsp ins) atypes >>= 2; } +#ifdef NJ_ARM_VFP + if (rsize == ARGSIZE_F) { + NanoAssert(ins->opcode() == LIR_fcall); + NanoAssert(callRes); + + //fprintf (stderr, "call ins: %p callRes: %p reg: %d ar: %d\n", ins, callRes, callRes->reg, callRes->arIndex); + + Register rr = callRes->reg; + int d = disp(callRes); + freeRsrcOf(ins, rr != UnknownReg); + + if (rr != UnknownReg) { + NanoAssert(IsFpReg(rr)); + FMDRR(rr,R0,R1); + } else { + NanoAssert(d); + //fprintf (stderr, "call ins d: %d\n", d); + STMIA(Scratch, 1<get_sizes(sizes); - for(uint32_t i=0; i < argc; i++) { + for(uint32_t i = 0; i < argc; i++) { uint32_t j = argc - i - 1; ArgSize sz = sizes[j]; - NanoAssert(sz == ARGSIZE_LO || sz == ARGSIZE_Q); + LInsp arg = ins->arg(j); // pre-assign registers R0-R3 for arguments (if they fit) - Register r = (i+roffset) < 4 ? argRegs[i+roffset] : UnknownReg; - asm_arg(sz, ins->arg(j), r); + + Register r = (i + roffset) < 4 ? argRegs[i+roffset] : UnknownReg; +#ifdef NJ_ARM_VFP + if (sz == ARGSIZE_F) { + if (r == R0 || r == R2) { + roffset++; + } else if (r == R1) { + r = R2; + roffset++; + } else { + r = UnknownReg; + } + + // XXX move this into asm_farg + Register sr = findRegFor(arg, FpRegs); + + if (r != UnknownReg) { + // stick it into our scratch fp reg, and then copy into the base reg + //fprintf (stderr, "FMRRD: %d %d <- %d\n", r, nextreg(r), sr); + FMRRD(r, nextreg(r), sr); + } else { + asm_pusharg(arg); + } + } else { + asm_arg(sz, arg, r); + } +#else + NanoAssert(sz == ARGSIZE_LO || sz == ARGSIZE_Q); + asm_arg(sz, arg, r); +#endif if (i == 0 && arg0IsInt32FollowedByFloat) roffset = 1; @@ -238,7 +301,7 @@ Assembler::nRegisterResetAll(RegAlloc& a) // add scratch registers to our free list for the allocator a.clear(); a.used = 0; - a.free = rmask(R0) | rmask(R1) | rmask(R2) | rmask(R3) | rmask(R4) | rmask(R5); + a.free = rmask(R0) | rmask(R1) | rmask(R2) | rmask(R3) | rmask(R4) | rmask(R5) | FpRegs; debug_only(a.managed = a.free); } @@ -251,16 +314,15 @@ Assembler::nPatchBranch(NIns* branch, NIns* target) // Which is really 2 instructions, so we need to modify both // XXX -- this is B, not BL, at least on non-Thumb.. - // branch+2 because PC is always 2 instructions ahead on ARM/Thumb - int32_t offset = int(target) - int(branch+2); + int32_t offset = PC_OFFSET_FROM(target, branch); //printf("---patching branch at 0x%08x to location 0x%08x (%d-0x%08x)\n", branch, target, offset, offset); // We have 2 words to work with here -- if offset is in range of a 24-bit // relative jump, emit that; otherwise, we do a pc-relative load into pc. - if (-(1<<24) <= offset & offset < (1<<24)) { + if (isS24(offset)) { // ARM goodness, using unconditional B - *branch = (NIns)( COND_AL | (0xA<<24) | ((offset >>2) & 0xFFFFFF) ); + *branch = (NIns)( COND_AL | (0xA<<24) | ((offset>>2) & 0xFFFFFF) ); } else { // LDR pc,[pc] *branch++ = (NIns)( COND_AL | (0x51<<20) | (PC<<16) | (PC<<12) | ( 0x004 ) ); @@ -295,11 +357,11 @@ Assembler::asm_qjoin(LIns *ins) LIns* hi = ins->oprnd2(); Register r = findRegFor(hi, GpRegs); - ST(FP, d+4, r); + STR(r, FP, d+4); // okay if r gets recycled. r = findRegFor(lo, GpRegs); - ST(FP, d, r); + STR(r, FP, d); freeRsrcOf(ins, false); // if we had a reg in use, emit a ST to flush it to mem } @@ -311,7 +373,7 @@ Assembler::asm_store32(LIns *value, int dr, LIns *base) findRegFor2(GpRegs, value, rA, base, rB); Register ra = rA->reg; Register rb = rB->reg; - ST(rb, dr, ra); + STR(ra, rb, dr); } void @@ -319,7 +381,17 @@ Assembler::asm_restore(LInsp i, Reservation *resv, Register r) { (void)resv; int d = findMemFor(i); - LD(r, d, FP); + + if (IsFpReg(r)) { + if (isS8(d >> 2)) { + FLDD(r, FP, d); + } else { + FLDD(r, Scratch, 0); + arm_ADDi(Scratch, FP, d); + } + } else { + LDR(r, FP, d); + } verbose_only( if (_verbose) @@ -332,12 +404,21 @@ Assembler::asm_spill(LInsp i, Reservation *resv, bool pop) { (void)i; (void)pop; - + //fprintf (stderr, "resv->arIndex: %d\n", resv->arIndex); if (resv->arIndex) { int d = disp(resv); // save to spill location Register rr = resv->reg; - ST(FP, d, rr); + if (IsFpReg(rr)) { + if (isS8(d >> 2)) { + FSTD(rr, FP, d); + } else { + FSTD(rr, Scratch, 0); + arm_ADDi(Scratch, FP, d); + } + } else { + STR(rr, FP, d); + } verbose_only(if (_verbose){ outputf(" spill %s",_thisfrag->lirbuf->names->formatRef(i)); @@ -349,38 +430,164 @@ Assembler::asm_spill(LInsp i, Reservation *resv, bool pop) void Assembler::asm_load64(LInsp ins) { - LIns* base = ins->oprnd1(); - int db = ins->oprnd2()->constval(); - Reservation *resv = getresv(ins); - int dr = disp(resv); - NanoAssert(resv->reg == UnknownReg && dr != 0); + ///asm_output("<<< load64"); + + LIns* base = ins->oprnd1(); + int offset = ins->oprnd2()->constval(); + + Reservation *resv = getresv(ins); + Register rr = resv->reg; + int d = disp(resv); - Register rb = findRegFor(base, GpRegs); - resv->reg = UnknownReg; - asm_mmq(FP, dr, rb, db); freeRsrcOf(ins, false); + +#ifdef NJ_ARM_VFP + Register rb = findRegFor(base, GpRegs); + + NanoAssert(rb != UnknownReg); + NanoAssert(rr == UnknownReg || IsFpReg(rr)); + + if (rr != UnknownReg) { + if (!isS8(offset >> 2) || (offset&3) != 0) { + underrunProtect(LD32_size + 8); + FLDD(rr,Scratch,0); + ADD(Scratch, rb); + LD32_nochk(Scratch, offset); + } else { + FLDD(rr,rb,offset); + } + } else { + asm_mmq(FP, d, rb, offset); + } + + // *(FP+dr) <- *(rb+db) +#else + NanoAssert(resv->reg == UnknownReg && d != 0); + Register rb = findRegFor(base, GpRegs); + asm_mmq(FP, d, rb, offset); +#endif + + //asm_output(">>> load64"); } void Assembler::asm_store64(LInsp value, int dr, LInsp base) { + //asm_output1("<<< store64 (dr: %d)", dr); + +#ifdef NJ_ARM_VFP + Reservation *valResv = getresv(value); + + Register rb = findRegFor(base, GpRegs); + Register rv = findRegFor(value, FpRegs); + + NanoAssert(rb != UnknownReg); + NanoAssert(rv != UnknownReg); + + Register baseReg = rb; + intptr_t baseOffset = dr; + + if (!isS8(dr)) { + baseReg = Scratch; + baseOffset = 0; + } + + FSTD(rv, baseReg, baseOffset); + + if (!isS8(dr)) { + underrunProtect(4 + LD32_size); + ADD(Scratch, rb); + LD32_nochk(Scratch, dr); + } + + // if it's a constant, make sure our baseReg/baseOffset location + // has the right value + if (value->isconstq()) { + const int32_t* p = (const int32_t*) (value-2); + + underrunProtect(12 + LD32_size); + + asm_quad_nochk(rv, p); + } +#else int da = findMemFor(value); Register rb = findRegFor(base, GpRegs); asm_mmq(rb, dr, FP, da); +#endif + //asm_output(">>> store64"); +} + +// stick a quad into register rr, where p points to the two +// 32-bit parts of the quad, optinally also storing at FP+d +void +Assembler::asm_quad_nochk(Register rr, const int32_t* p) +{ + *(++_nSlot) = p[0]; + *(++_nSlot) = p[1]; + + intptr_t constAddr = (intptr_t) (_nSlot-1); + intptr_t realOffset = PC_OFFSET_FROM(constAddr, _nIns-1); + intptr_t offset = realOffset; + Register baseReg = PC; + + //int32_t *q = (int32_t*) constAddr; + //fprintf (stderr, "asm_quad_nochk: rr = %d cAddr: 0x%x quad: %08x:%08x q: %f @0x%08x\n", rr, constAddr, p[0], p[1], *(double*)q, _nIns); + + // for FLDD, we only get a left-shifted 8-bit offset + if (!isS8(realOffset >> 2)) { + offset = 0; + baseReg = Scratch; + } + + FLDD(rr, baseReg, offset); + + if (!isS8(realOffset >> 2)) + LD32_nochk(Scratch, constAddr); } void Assembler::asm_quad(LInsp ins) { - Reservation *rR = getresv(ins); - int d = disp(rR); + //asm_output(">>> asm_quad"); + + Reservation *res = getresv(ins); + int d = disp(res); + Register rr = res->reg; + + NanoAssert(d || rr != UnknownReg); + + const int32_t* p = (const int32_t*) (ins-2); + +#ifdef NJ_ARM_VFP freeRsrcOf(ins, false); + // XXX We probably want nochk versions of FLDD/FSTD + underrunProtect(16 + LD32_size); + + // grab a register to do the load into if we don't have one already; + // XXX -- maybe do a mmq in this case? We're going to use our + // D7 register that's never allocated (since it's the one we use + // for int-to-double conversions), so we don't have to worry about + // spilling something in a fp reg. + if (rr == UnknownReg) + rr = D7; + + if (d) + FSTD(rr, FP, d); + + asm_quad_nochk(rr, p); +#else + freeRsrcOf(ins, false); if (d) { - const int32_t* p = (const int32_t*) (ins-2); - STi(FP,d+4,p[1]); - STi(FP,d,p[0]); + underrunProtect(LD32_size * 2 + 8); + STR(Scratch, FP, d+4); + LD32_nochk(Scratch, p[1]); + STR(Scratch, FP, d); + LD32_nochk(Scratch, p[0]); } +#endif + + //asm_output("<<< asm_quad"); } bool @@ -393,9 +600,17 @@ Assembler::asm_qlo(LInsp ins, LInsp q) void Assembler::asm_nongp_copy(Register r, Register s) { - // we will need this for VFP support - (void)r; (void)s; - NanoAssert(false); + if ((rmask(r) & FpRegs) && (rmask(s) & FpRegs)) { + // fp->fp + FCPYD(r, s); + } else if ((rmask(r) & GpRegs) && (rmask(s) & FpRegs)) { + // fp->gp + // who's doing this and why? + NanoAssert(0); + // FMRS(r, loSingleVfp(s)); + } else { + NanoAssert(0); + } } Register @@ -416,31 +631,41 @@ Assembler::asm_mmq(Register rd, int dd, Register rs, int ds) // get a scratch reg Register t = registerAlloc(GpRegs & ~(rmask(rd)|rmask(rs))); _allocator.addFree(t); - ST(rd, dd+4, t); - LD(t, ds+4, rs); - ST(rd, dd, t); - LD(t, ds, rs); + // XXX use LDM,STM + STR(t, rd, dd+4); + LDR(t, rs, ds+4); + STR(t, rd, dd); + LDR(t, rs, ds); } void -Assembler::asm_pusharg(LInsp p) +Assembler::asm_pusharg(LInsp arg) { - // arg goes on stack - Reservation* rA = getresv(p); - if (rA == 0) - { - Register ra = findRegFor(p, GpRegs); - ST(SP,0,ra); - } - else if (rA->reg == UnknownReg) - { - ST(SP,0,Scratch); - LD(Scratch,disp(rA),FP); - } + Reservation* argRes = getresv(arg); + bool quad = arg->isQuad(); + intptr_t stack_growth = quad ? 8 : 4; + + Register ra; + + if (argRes) + ra = argRes->reg; else - { - ST(SP,0,rA->reg); + ra = findRegFor(arg, quad ? FpRegs : GpRegs); + + if (ra == UnknownReg) { + STR(Scratch, SP, 0); + LDR(Scratch, FP, disp(argRes)); + } else { + if (!quad) { + Register ra = findRegFor(arg, GpRegs); + STR(ra, SP, 0); + } else { + Register ra = findRegFor(arg, FpRegs); + FSTD(ra, SP, 0); + } } + + SUBi(SP, stack_growth); } void @@ -470,22 +695,6 @@ Assembler::nativePageSetup() } } -void -Assembler::flushCache(NIns* n1, NIns* n2) { -#if defined(UNDER_CE) - // we changed the code, so we need to do this (sadly) - FlushInstructionCache(GetCurrentProcess(), NULL, NULL); -#elif defined(AVMPLUS_LINUX) - // Just need to clear this one page (not even the whole page really) - //Page *page = (Page*)pageTop(_nIns); - register unsigned long _beg __asm("a1") = (unsigned long)(n1); - register unsigned long _end __asm("a2") = (unsigned long)(n2); - register unsigned long _flg __asm("a3") = 0; - register unsigned long _swi __asm("r7") = 0xF0002; - __asm __volatile ("swi 0 @ sys_cacheflush" : "=r" (_beg) : "0" (_beg), "r" (_end), "r" (_flg), "r" (_swi)); -#endif -} - NIns* Assembler::asm_adjustBranch(NIns* at, NIns* target) { @@ -497,9 +706,16 @@ Assembler::asm_adjustBranch(NIns* at, NIns* target) NIns* was = (NIns*) at[3]; + //fprintf (stderr, "Adjusting branch @ 0x%8x: 0x%x -> 0x%x\n", at+3, at[3], target); + at[3] = (NIns)target; - flushCache(at, at+4); +#if defined(UNDER_CE) + // we changed the code, so we need to do this (sadly) + FlushInstructionCache(GetCurrentProcess(), NULL, NULL); +#elif defined(AVMPLUS_LINUX) + __clear_cache((char*)at, (char*)(at+4)); +#endif #ifdef AVMPLUS_PORTING_API NanoJIT_PortAPI_FlushInstructionCache(at, at+4); @@ -550,6 +766,9 @@ Assembler::BL_far(NIns* addr) // point to the right spot before branching underrunProtect(16); + // TODO use a slot in const pool for address, but emit single insn + // for branch if offset fits + // the address *(--_nIns) = (NIns)((addr)); // bx ip // branch to the address we loaded earlier @@ -558,17 +777,29 @@ Assembler::BL_far(NIns* addr) *(--_nIns) = (NIns)( COND_AL | OP_IMM | (1<<23) | (PC<<16) | (LR<<12) | (4) ); // ldr ip, [pc + #4] // load the address into ip, reading it from [pc+4] *(--_nIns) = (NIns)( COND_AL | (0x59<<20) | (PC<<16) | (IP<<12) | (4)); + + //fprintf (stderr, "BL_far sequence @ 0x%08x\n", _nIns); + asm_output1("bl %p (32-bit)", addr); } void Assembler::BL(NIns* addr) { - intptr_t offs = PC_OFFSET_FROM(addr,(intptr_t)_nIns-4); - if (JMP_S24_OFFSET_OK(offs)) { - // we can do this with a single BL call + intptr_t offs = PC_OFFSET_FROM(addr,_nIns-1); + + //fprintf (stderr, "BL: 0x%x (offs: %d [%x]) @ 0x%08x\n", addr, offs, offs, (intptr_t)(_nIns-1)); + + if (isS24(offs)) { + // try to do this with a single S24 call; + // recompute offset in case underrunProtect had to allocate a new page underrunProtect(4); - *(--_nIns) = (NIns)( COND_AL | (0xB<<24) | (((offs)>>2) & 0xFFFFFF) ); \ + offs = PC_OFFSET_FROM(addr,_nIns-1); + } + + if (isS24(offs)) { + // already did underrunProtect above + *(--_nIns) = (NIns)( COND_AL | (0xB<<24) | (((offs)>>2) & 0xFFFFFF) ); asm_output1("bl %p", addr); } else { BL_far(addr); @@ -579,6 +810,7 @@ void Assembler::CALL(const CallInfo *ci) { intptr_t addr = ci->_address; + BL((NIns*)addr); asm_output1(" (call %s)", ci->_name); } @@ -586,21 +818,226 @@ Assembler::CALL(const CallInfo *ci) void Assembler::LD32_nochk(Register r, int32_t imm) { - // We can always reach the const pool, since it's on the same page (<4096) - underrunProtect(8); + // We should always reach the const pool, since it's on the same page (<4096); + // if we can't, someone didn't underrunProtect enough. *(++_nSlot) = (int)imm; //fprintf (stderr, "wrote slot(2) %p with %08x, jmp @ %p\n", _nSlot, (intptr_t)imm, _nIns-1); - int offset = PC_OFFSET_FROM(_nSlot,(intptr_t)(_nIns)-4); + int offset = PC_OFFSET_FROM(_nSlot,_nIns-1); - NanoAssert(JMP_S24_OFFSET_OK(offset) && (offset < 0)); + NanoAssert(isS12(offset) && (offset < 0)); - *(--_nIns) = (NIns)( COND_AL | (0x51<<20) | (PC<<16) | ((r)<<12) | ((-offset) & 0xFFFFFF) ); - asm_output2("ld %s,%d",gpn(r),imm); + asm_output2(" (%d(PC) = 0x%x)", offset, imm); + + LDR_nochk(r,PC,offset); } + +// Branch to target address _t with condition _c, doing underrun +// checks (_chk == 1) or skipping them (_chk == 0). +// +// If the jump fits in a relative jump (+/-32MB), emit that. +// If the jump is unconditional, emit the dest address inline in +// the instruction stream and load it into pc. +// If the jump has a condition, but noone's mucked with _nIns and our _nSlot +// pointer is valid, stick the constant in the slot and emit a conditional +// load into pc. +// Otherwise, emit the conditional load into pc from a nearby constant, +// and emit a jump to jump over it it in case the condition fails. +// +// NB: JMP_nochk depends on this not calling samepage() when _c == AL +void +Assembler::B_cond_chk(ConditionCode _c, NIns* _t, bool _chk) +{ + int32 offs = PC_OFFSET_FROM(_t,_nIns-1); + //fprintf(stderr, "B_cond_chk target: 0x%08x offset: %d @0x%08x\n", _t, offs, _nIns-1); + if (isS24(offs)) { + if (_chk) underrunProtect(4); + offs = PC_OFFSET_FROM(_t,_nIns-1); + } + + if (isS24(offs)) { + *(--_nIns) = (NIns)( ((_c)<<28) | (0xA<<24) | (((offs)>>2) & 0xFFFFFF) ); + } else if (_c == AL) { + if(_chk) underrunProtect(8); + *(--_nIns) = (NIns)(_t); + *(--_nIns) = (NIns)( COND_AL | (0x51<<20) | (PC<<16) | (PC<<12) | 0x4 ); + } else if (samepage(_nIns,_nSlot)) { + if(_chk) underrunProtect(8); + *(++_nSlot) = (NIns)(_t); + offs = PC_OFFSET_FROM(_nSlot,_nIns-1); + NanoAssert(offs < 0); + *(--_nIns) = (NIns)( ((_c)<<28) | (0x51<<20) | (PC<<16) | (PC<<12) | ((-offs) & 0xFFFFFF) ); + } else { + if(_chk) underrunProtect(12); + *(--_nIns) = (NIns)(_t); + *(--_nIns) = (NIns)( COND_AL | (0xA<<24) | ((-4)>>2) & 0xFFFFFF ); + *(--_nIns) = (NIns)( ((_c)<<28) | (0x51<<20) | (PC<<16) | (PC<<12) | 0x0 ); + } + + asm_output2("%s %p", _c == AL ? "jmp" : "b(cnd)", (void*)(_t)); +} + +/* + * VFP + */ + +#ifdef NJ_ARM_VFP + +void +Assembler::asm_i2f(LInsp ins) +{ + Register rr = prepResultReg(ins, FpRegs); + Register srcr = findRegFor(ins->oprnd1(), GpRegs); + + // todo: support int value in memory, as per x86 + NanoAssert(srcr != UnknownReg); + + FSITOD(rr, FpSingleScratch); + FMSR(FpSingleScratch, srcr); +} + +void +Assembler::asm_u2f(LInsp ins) +{ + Register rr = prepResultReg(ins, FpRegs); + Register sr = findRegFor(ins->oprnd1(), GpRegs); + + // todo: support int value in memory, as per x86 + NanoAssert(sr != UnknownReg); + + FUITOD(rr, FpSingleScratch); + FMSR(FpSingleScratch, sr); +} + +void +Assembler::asm_fneg(LInsp ins) +{ + LInsp lhs = ins->oprnd1(); + Register rr = prepResultReg(ins, FpRegs); + + Reservation* rA = getresv(lhs); + Register sr; + + if (!rA || rA->reg == UnknownReg) + sr = findRegFor(lhs, FpRegs); + else + sr = rA->reg; + + FNEGD(rr, sr); +} + +void +Assembler::asm_fop(LInsp ins) +{ + LInsp lhs = ins->oprnd1(); + LInsp rhs = ins->oprnd2(); + LOpcode op = ins->opcode(); + + NanoAssert(op >= LIR_fadd && op <= LIR_fdiv); + + // rr = ra OP rb + + Register rr = prepResultReg(ins, FpRegs); + + Register ra = findRegFor(lhs, FpRegs); + Register rb = (rhs == lhs) ? ra : findRegFor(rhs, FpRegs); + + // XXX special-case 1.0 and 0.0 + + if (op == LIR_fadd) + FADDD(rr,ra,rb); + else if (op == LIR_fsub) + FSUBD(rr,ra,rb); + else if (op == LIR_fmul) + FMULD(rr,ra,rb); + else //if (op == LIR_fdiv) + FDIVD(rr,ra,rb); +} + +void +Assembler::asm_fcmp(LInsp ins) +{ + LInsp lhs = ins->oprnd1(); + LInsp rhs = ins->oprnd2(); + LOpcode op = ins->opcode(); + + NanoAssert(op >= LIR_feq && op <= LIR_fge); + + Register ra = findRegFor(lhs, FpRegs); + Register rb = findRegFor(rhs, FpRegs); + + // We can't uniquely identify fge/fle via a single bit + // pattern (since equality and lt/gt are separate bits); + // so convert to the single-bit variant. + if (op == LIR_fge) { + Register temp = ra; + ra = rb; + rb = temp; + op = LIR_flt; + } else if (op == LIR_fle) { + Register temp = ra; + ra = rb; + rb = temp; + op = LIR_fgt; + } + + // There is no way to test for an unordered result using + // the conditional form of an instruction; the encoding (C=1 V=1) + // ends up having overlaps with a few other tests. So, test for + // the explicit mask. + uint8_t mask = 0x0; + + // NZCV + // for a valid ordered result, V is always 0 from VFP + if (op == LIR_feq) + // ZC // cond EQ (both equal and "not less than" + mask = 0x6; + else if (op == LIR_flt) + // N // cond MI + mask = 0x8; + else if (op == LIR_fgt) + // C // cond CS + mask = 0x2; + else + NanoAssert(0); +/* + // these were converted into gt and lt above. + if (op == LIR_fle) + // NZ // cond LE + mask = 0xC; + else if (op == LIR_fge) + // ZC // cond fail? + mask = 0x6; +*/ + + // TODO XXX could do this as fcmpd; fmstat; tstvs rX, #0 the tstvs + // would reset the status bits if V (NaN flag) is set, but that + // doesn't work for NE. For NE could teqvs rX, #1. rX needs to + // be any register that has lsb == 0, such as sp/fp/pc. + + // Test explicily with the full mask; if V is set, test will fail. + // Assumption is that this will be followed up by a BEQ/BNE + CMPi(Scratch, mask); + // grab just the condition fields + SHRi(Scratch, 28); + MRS(Scratch); + + // do the comparison and get results loaded in ARM status register + FMSTAT(); + FCMPD(ra, rb); +} + +Register +Assembler::asm_prep_fcall(Reservation* rR, LInsp ins) +{ + // We have nothing to do here; we do it all in asm_call. + return UnknownReg; +} + +#endif /* NJ_ARM_VFP */ + +} #endif /* FEATURE_NANOJIT */ - -} diff --git a/js/src/nanojit/NativeARM.h b/js/src/nanojit/NativeARM.h index 5ec87cd2575..20ed5f3fa2f 100644 --- a/js/src/nanojit/NativeARM.h +++ b/js/src/nanojit/NativeARM.h @@ -47,14 +47,28 @@ namespace nanojit const int NJ_LOG2_PAGE_SIZE = 12; // 4K -#define NJ_MAX_REGISTERS 11 +// If NJ_ARM_VFP is defined, then VFP is assumed to +// be present. If it's not defined, then softfloat +// is used, and NJ_SOFTFLOAT is defined. +#define NJ_ARM_VFP + +#ifdef NJ_ARM_VFP + +// only d0-d7; we'll use d7 as s14-s15 for i2f/u2f/etc. +#define NJ_VFP_MAX_REGISTERS 8 + +#else + +#define NJ_VFP_MAX_REGISTERS 0 +#define NJ_SOFTFLOAT + +#endif + +#define NJ_MAX_REGISTERS (11 + NJ_VFP_MAX_REGISTERS) #define NJ_MAX_STACK_ENTRY 256 #define NJ_MAX_PARAMETERS 16 #define NJ_ALIGN_STACK 8 -#define NJ_STACK_OFFSET 8 - -#define NJ_SOFTFLOAT -#define NJ_STACK_GROWTH_UP +#define NJ_STACK_OFFSET 0 #define NJ_CONSTANT_POOLS const int NJ_MAX_CPOOL_OFFSET = 4096; @@ -75,25 +89,40 @@ typedef enum { R8 = 8, R9 = 9, R10 = 10, - //FP =11, + FP = 11, IP = 12, SP = 13, LR = 14, PC = 15, - FP = 13, - - // Pseudo-register for floating point - F0 = 0, + // FP regs + D0 = 16, + D1 = 17, + D2 = 18, + D3 = 19, + D4 = 20, + D5 = 21, + D6 = 22, + D7 = 23, + + FirstFloatReg = 16, + LastFloatReg = 22, // helpers FRAME_PTR = 11, - ESP = 13, + ESP = SP, FirstReg = 0, +#ifdef NJ_ARM_VFP + LastReg = 23, +#else LastReg = 10, - Scratch = 12, - UnknownReg = 11 +#endif + Scratch = IP, + UnknownReg = 31, + + // special value referring to S14 + FpSingleScratch = 24 } Register; /* ARM condition codes */ @@ -123,13 +152,30 @@ typedef struct _FragInfo { NIns* epilogue; } FragInfo; -static const RegisterMask SavedRegs = 1<-256 && (_imm)<256) { \ +// _l = _l + _r +#define ADD(_l,_r) arm_ADD(_l,_l,_r) + +// TODO: we can do better here, since we can rotate the 8-bit immediate left by +// an even number of bits; should count zeros at the end. + +// Note that this sometimes converts negative immediate values to a to a sub. +// _d = _r + _imm +#define arm_ADDi(_d,_n,_imm) do { \ + if ((_imm) > -256 && (_imm) < 256) { \ underrunProtect(4); \ if ((_imm)>=0) \ - *(--_nIns) = (NIns)( COND_AL | OP_IMM | (1<<23) | ((_r)<<16) | ((_r)<<12) | ((_imm)&0xFF) ); \ + *(--_nIns) = (NIns)( COND_AL | OP_IMM | OP_STAT | (1<<23) | ((_n)<<16) | ((_d)<<12) | ((_imm)&0xFF) ); \ else \ - *(--_nIns) = (NIns)( COND_AL | OP_IMM | (1<<22) | ((_r)<<16) | ((_r)<<12) | ((-(_imm))&0xFF) ); \ + *(--_nIns) = (NIns)( COND_AL | OP_IMM | OP_STAT | (1<<22) | ((_n)<<16) | ((_d)<<12) | ((-(_imm))&0xFF) ); \ } else { \ if ((_imm)>=0) { \ if ((_imm)<=1020 && (((_imm)&3)==0) ) { \ underrunProtect(4); \ - *(--_nIns) = (NIns)( COND_AL | OP_IMM | (1<<23) | ((_r)<<16) | ((_r)<<12) | (15<<8)| ((_imm)>>2) ); \ + *(--_nIns) = (NIns)( COND_AL | OP_IMM | OP_STAT | (1<<23) | ((_n)<<16) | ((_d)<<12) | (15<<8)| ((_imm)>>2) ); \ } else { \ underrunProtect(4+LD32_size); \ - *(--_nIns) = (NIns)( COND_AL | (1<<23) | ((_r)<<16) | ((_r)<<12) | (Scratch)); \ + *(--_nIns) = (NIns)( COND_AL | OP_STAT | (1<<23) | ((_n)<<16) | ((_d)<<12) | (Scratch)); \ LD32_nochk(Scratch, _imm); \ } \ } else { \ + underrunProtect(4+LD32_size); \ + *(--_nIns) = (NIns)( COND_AL | OP_STAT | (1<<22) | ((_n)<<16) | ((_d)<<12) | (Scratch)); \ + LD32_nochk(Scratch, -(_imm)); \ + } \ + } \ + asm_output3("add %s,%s,%d",gpn(_d),gpn(_n),(_imm)); \ + } while(0) + +/* + * There used to be a : if ((_imm)>=-510) { \ underrunProtect(8); \ int rem = -(_imm) - 255; \ - *(--_nIns) = (NIns)( COND_AL | OP_IMM | (1<<22) | ((_r)<<16) | ((_r)<<12) | ((rem)&0xFF) ); \ - *(--_nIns) = (NIns)( COND_AL | OP_IMM | (1<<22) | ((_r)<<16) | ((_r)<<12) | (0xFF) ); \ - } else { \ - underrunProtect(4+LD32_size); \ - *(--_nIns) = (NIns)( COND_AL | (1<<22) | ((_r)<<16) | ((_r)<<12) | (Scratch)); \ - LD32_nochk(Scratch, -(_imm)); \ - } \ - } \ - } \ - asm_output2("addi %s,%d",gpn(_r),(_imm)); \ - } while(0) + *(--_nIns) = (NIns)( COND_AL | OP_IMM | (1<<22) | ((_n)<<16) | ((_d)<<12) | ((rem)&0xFF) ); \ + *(--_nIns) = (NIns)( COND_AL | OP_IMM | (1<<22) | ((_n)<<16) | ((_d)<<12) | (0xFF) ); \ + } else { + * above, but if we do that we can't really update the status registers. So don't do that. + */ + +#define ADDi(_r,_imm) arm_ADDi(_r,_r,_imm) // _l = _l - _r #define SUB(_l,_r) do { \ @@ -402,6 +463,13 @@ typedef enum { *(--_nIns) = (NIns)( COND_AL | (0x11<<20) | ((_d)<<16) | (_s) ); \ asm_output2("test %s,%s",gpn(_d),gpn(_s)); } while(0) +#define TSTi(_d,_imm) do { \ + underrunProtect(4); \ + NanoAssert(((_imm) & 0xff) == (_imm)); \ + *(--_nIns) = (NIns)( COND_AL | OP_IMM | (0x11<<20) | ((_d) << 16) | (0xF<<12) | ((_imm) & 0xff) ); \ + asm_output2("tst %s,#0x%x", gpn(_d), _imm); \ + } while (0); + // CMP #define CMP(_l,_r) do { \ underrunProtect(4); \ @@ -429,7 +497,7 @@ typedef enum { LD32_nochk(Scratch, (_imm)); \ } \ } \ - asm_output2("cmp %s,%X",gpn(_r),(_imm)); \ + asm_output2("cmp %s,0x%x",gpn(_r),(_imm)); \ } while(0) // MOV @@ -457,25 +525,33 @@ typedef enum { #define MRNO(dr,sr) MR_cond(dr, sr, VC, "movvc") // overflow clear #define MRNC(dr,sr) MR_cond(dr, sr, CC, "movcc") // carry clear -#define LD(_d,_off,_b) do { \ - if ((_off)<0) { \ - underrunProtect(4); \ +#define LDR_chk(_d,_b,_off,_chk) do { \ + if (IsFpReg(_d)) { \ + FLDD_chk(_d,_b,_off,_chk); \ + } else if ((_off)<0) { \ + if (_chk) underrunProtect(4); \ NanoAssert((_off)>-4096); \ *(--_nIns) = (NIns)( COND_AL | (0x51<<20) | ((_b)<<16) | ((_d)<<12) | ((-(_off))&0xFFF) ); \ } else { \ if (isS16(_off) || isU16(_off)) { \ - underrunProtect(4); \ + if (_chk) underrunProtect(4); \ NanoAssert((_off)<4096); \ *(--_nIns) = (NIns)( COND_AL | (0x59<<20) | ((_b)<<16) | ((_d)<<12) | ((_off)&0xFFF) ); \ } else { \ - underrunProtect(4+LD32_size); \ + if (_chk) underrunProtect(4+LD32_size); \ *(--_nIns) = (NIns)( COND_AL | (0x79<<20) | ((_b)<<16) | ((_d)<<12) | Scratch ); \ LD32_nochk(Scratch, _off); \ } \ } \ - asm_output3("ld %s,%d(%s)",gpn((_d)),(_off),gpn((_b))); \ + asm_output3("ldr %s,%d(%s)",gpn((_d)),(_off),gpn((_b))); \ } while(0) +#define LDR(_d,_b,_off) LDR_chk(_d,_b,_off,0) +#define LDR_nochk(_d,_b,_off) LDR_chk(_d,_b,_off,1) + +// i386 compat, for Assembler.cpp +#define LD(reg,offset,base) LDR_chk(reg,base,offset,1) +#define ST(base,offset,reg) STR(reg,base,offset) #define LDi(_d,_imm) do { \ if (isS8((_imm)) || isU8((_imm))) { \ @@ -486,7 +562,7 @@ typedef enum { underrunProtect(LD32_size); \ LD32_nochk(_d, (_imm)); \ } \ - asm_output2("ld %s,%d",gpn((_d)),(_imm)); \ + asm_output2("ld %s,0x%x",gpn((_d)),(_imm)); \ } while(0) @@ -501,29 +577,13 @@ typedef enum { asm_output3("ldrb %s,%d(%s)", gpn(_d),(_off),gpn(_b)); \ } while(0) -#define ST(_b,_off,_r) do { \ +#define STR(_d,_n,_off) do { \ + NanoAssert(!IsFpReg(_d) && isS12(_off)); \ underrunProtect(4); \ - if ((_off)<0) *(--_nIns) = (NIns)( COND_AL | (0x50<<20) | ((_b)<<16) | ((_r)<<12) | ((-(_off))&0xFFF) ); \ - else *(--_nIns) = (NIns)( COND_AL | (0x58<<20) | ((_b)<<16) | ((_r)<<12) | ((_off)&0xFFF) ); \ - asm_output3("str %s, %d(%s)",gpn(_r), (_off),gpn(_b)); } while(0) - - -#define STi(_b,_off,_imm) do { \ - NanoAssert((_off)>0); \ - if (isS8((_imm)) || isU8((_imm))) { \ - underrunProtect(8); \ - *(--_nIns) = (NIns)( COND_AL | (0x58<<20) | ((_b)<<16) | ((Scratch)<<12) | ((_off)&0xFFF) ); \ - asm_output3("str %s, %d(%s)",gpn(Scratch), (_off),gpn(_b)); \ - if ((_imm)<0) *(--_nIns) = (NIns)( COND_AL | (0x3E<<20) | (Scratch<<12) | (((_imm)^0xFFFFFFFF)&0xFF) ); \ - else *(--_nIns) = (NIns)( COND_AL | (0x3B<<20) | (Scratch<<12) | ((_imm)&0xFF) ); \ - asm_output2("ld %s,%d",gpn((Scratch)),(_imm)); \ - } else { \ - underrunProtect(4+LD32_size); \ - *(--_nIns) = (NIns)( COND_AL | (0x58<<20) | ((_b)<<16) | ((Scratch)<<12) | ((_off)&0xFFF) ); \ - asm_output3("str %s, %d(%s)",gpn(Scratch), (_off),gpn(_b)); \ - LD32_nochk(Scratch, (_imm)); \ - } \ - } while(0); + if ((_off)<0) *(--_nIns) = (NIns)( COND_AL | (0x50<<20) | ((_n)<<16) | ((_d)<<12) | ((-(_off))&0xFFF) ); \ + else *(--_nIns) = (NIns)( COND_AL | (0x58<<20) | ((_n)<<16) | ((_d)<<12) | ((_off)&0xFFF) ); \ + asm_output3("str %s, %d(%s)",gpn(_d), (_off), gpn(_n)); \ + } while(0) #define LEA(_r,_d,_b) do { \ @@ -548,7 +608,7 @@ typedef enum { //#define RET() INT3() #define BKPT_nochk() do { \ - *(--_nIns) = (NIns)( (0xE<<24) | (0x12<<20) | (0x7<<4) ); } while (0); + *(--_nIns) = (NIns)( (0xE<<24) | (0x12<<20) | (0x7<<4) ); } while (0) // this is pushing a reg #define PUSHr(_r) do { \ @@ -581,47 +641,10 @@ typedef enum { *(--_nIns) = (NIns)( COND_AL | (0x8B<<20) | (SP<<16) | (_mask) ); \ asm_output1("pop %x", (_mask));} while (0) +// PC always points to current instruction + 8, so when calculating pc-relative +// offsets, use PC+8. #define PC_OFFSET_FROM(target,frompc) ((intptr_t)(target) - ((intptr_t)(frompc) + 8)) -#define JMP_S24_OFFSET_OK(offs) ((-(1<<24)) <= (offs) && (offs) < (1<<24)) - -// (XXX This ought to be a function instead of a macro) -// -// Branch to target address _t with condition _c, doing underrun -// checks (_chk == 1) or skipping them (_chk == 0). -// -// If the jump fits in a relative jump (+/-32MB), emit that. -// If the jump is unconditional, emit the dest address inline in -// the instruction stream and load it into pc. -// If the jump has a condition, but noone's mucked with _nIns and our _nSlot -// pointer is valid, stick the constant in the slot and emit a conditional -// load into pc. -// Otherwise, emit the conditional load into pc from a nearby constant, -// and emit a jump to jump over it it in case the condition fails. -// -// NB: JMP_nochk depends on this not calling samepage() when _c == AL -#define B_cond_chk(_c,_t,_chk) do { \ - int32 offs = PC_OFFSET_FROM(_t,(intptr_t)(_nIns)-4); \ - if (JMP_S24_OFFSET_OK(offs)) { \ - if(_chk) underrunProtect(4); \ - *(--_nIns) = (NIns)( ((_c)<<28) | (0xA<<24) | (((offs)>>2) & 0xFFFFFF) ); \ - } else if (_c == AL) { \ - if(_chk) underrunProtect(8); \ - *(--_nIns) = (NIns)(_t); \ - *(--_nIns) = (NIns)( COND_AL | (0x51<<20) | (PC<<16) | (PC<<12) | 0x4 ); \ - } else if (samepage(_nIns,_nSlot)) { \ - if(_chk) underrunProtect(8); \ - *(++_nSlot) = (NIns)(_t); \ - offs = PC_OFFSET_FROM(_nSlot,(intptr_t)(_nIns)-4); \ - NanoAssert(offs < 0); \ - *(--_nIns) = (NIns)( ((_c)<<28) | (0x51<<20) | (PC<<16) | (PC<<12) | ((-offs) & 0xFFFFFF) ); \ - } else { \ - if(_chk) underrunProtect(24); \ - *(--_nIns) = (NIns)(_t); \ - *(--_nIns) = (NIns)( COND_AL | (0xA<<24) | ((-4)>>2) & 0xFFFFFF ); \ - *(--_nIns) = (NIns)( ((_c)<<28) | (0x51<<20) | (PC<<16) | (PC<<12) | 0x0 ); \ - } \ - asm_output2("%s %p\n", _c == AL ? "jmp" : "b(cnd)", (void*)(_t)); \ - } while(0) +#define isS12(offs) ((-(1<<12)) <= (offs) && (offs) < (1<<12)) #define B_cond(_c,_t) \ B_cond_chk(_c,_t,1) @@ -665,35 +688,12 @@ typedef enum { #define JO(t) do {B_cond(VS,t); asm_output1("bvs 0x%08x",(unsigned int)t); } while(0) #define JNO(t) do {B_cond(VC,t); asm_output1("bvc 0x%08x",(unsigned int)t); } while(0) -// used for testing result of an FP compare +// used for testing result of an FP compare on x86; not used on arm. // JP = comparison false -#define JP(t) do {B_cond(EQ,NE,t); asm_output1("jp 0x%08x",t); } while(0) +#define JP(t) do {NanoAssert(0); B_cond(NE,t); asm_output1("jp 0x%08x",t); } while(0) // JNP = comparison true -#define JNP(t) do {B_cond(NE,EQ,t); asm_output1("jnp 0x%08x",t); } while(0) - - -// floating point -#define FNSTSW_AX() do {NanoAssert(0); asm_output("fnstsw_ax"); } while(0) -#define FFREE(r) do {NanoAssert(0); asm_output1("ffree %s",gpn(b)); } while(0) -#define FSTQ(p,d,b) do {NanoAssert(0); asm_output2("fstq %d(%s)",d,gpn(b)); } while(0) -#define FSTPQ(d,b) FSTQ(1,d,b) -//#define FSTPQ(d,b) do {NanoAssert(0); asm_output2("fstpq %d(%s)",d,gpn(b)); } while(0) -#define FCOM(p,d,b) do {NanoAssert(0); asm_output2("fcom %d(%s)",d,gpn(b)); } while(0) -#define FCOMP(d,b) do {NanoAssert(0); asm_output2("fcomp %d(%s)",d,gpn(b)); } while(0) -#define FLDQ(d,b) do {NanoAssert(0); asm_output2("fldq %d(%s)",d,gpn(b)); } while(0) -#define FILDQ(d,b) do {NanoAssert(0); asm_output2("fildq %d(%s)",d,gpn(b)); } while(0) -#define FILD(d,b) do {NanoAssert(0); asm_output2("fild %d(%s)",d,gpn(b)); } while(0) -#define FADD(d,b) do {NanoAssert(0); asm_output2("faddq %d(%s)",d,gpn(b)); } while(0) -#define FSUB(d,b) do {NanoAssert(0); asm_output2("fsubq %d(%s)",d,gpn(b)); } while(0) -#define FSUBR(d,b) do {NanoAssert(0); asm_output2("fsubr %d(%s)",d,gpn(b)); } while(0) -#define FMUL(d,b) do {NanoAssert(0); asm_output2("fmulq %d(%s)",d,gpn(b)); } while(0) -#define FDIV(d,b) do {NanoAssert(0); asm_output2("fdivq %d(%s)",d,gpn(b)); } while(0) -#define FDIVR(d,b) do {NanoAssert(0); asm_output2("fdivr %d(%s)",d,gpn(b)); } while(0) -#define FSTP(r) do {NanoAssert(0); asm_output1("fst st(%d)",r); } while(0) -#define FLD1() do {NanoAssert(0); asm_output("fld1"); } while(0) -#define FLDZ() do {NanoAssert(0); asm_output("fldz"); } while(0) - +#define JNP(t) do {NanoAssert(0); B_cond(EQ,t); asm_output1("jnp 0x%08x",t); } while(0) // MOV(EQ) _r, #1 @@ -758,17 +758,147 @@ typedef enum { } while(0) #define STMIA(_b, _mask) do { \ - underrunProtect(2); \ + underrunProtect(4); \ NanoAssert(((_mask)&rmask(_b))==0 && isU8(_mask)); \ *(--_nIns) = (NIns)(COND_AL | (0x8A<<20) | ((_b)<<16) | (_mask)&0xFF); \ - asm_output2("stmia %s!,{%x}", gpn(_b), _mask); \ + asm_output2("stmia %s!,{0x%x}", gpn(_b), _mask); \ } while (0) #define LDMIA(_b, _mask) do { \ - underrunProtect(2); \ + underrunProtect(4); \ NanoAssert(((_mask)&rmask(_b))==0 && isU8(_mask)); \ *(--_nIns) = (NIns)(COND_AL | (0x8B<<20) | ((_b)<<16) | (_mask)&0xFF); \ - asm_output2("ldmia %s!,{%x}", gpn(_b), (_mask)); \ + asm_output2("ldmia %s!,{0x%x}", gpn(_b), (_mask)); \ + } while (0) + +#define MRS(_d) do { \ + underrunProtect(4); \ + *(--_nIns) = (NIns)(COND_AL | (0x10<<20) | (0xF<<16) | ((_d)<<12)); \ + asm_output1("msr %s", gpn(_d)); \ + } while (0) + +/* + * VFP + */ + +#define FMDRR(_Dm,_Rd,_Rn) do { \ + underrunProtect(4); \ + NanoAssert(IsFpReg(_Dm) && IsGpReg(_Rd) && IsGpReg(_Rn)); \ + *(--_nIns) = (NIns)( COND_AL | (0xC4<<20) | ((_Rn)<<16) | ((_Rd)<<12) | (0xB1<<4) | (FpRegNum(_Dm)) ); \ + asm_output3("fmdrr %s,%s,%s", gpn(_Dm), gpn(_Rd), gpn(_Rn)); \ + } while (0) + +#define FMRRD(_Rd,_Rn,_Dm) do { \ + underrunProtect(4); \ + NanoAssert(IsGpReg(_Rd) && IsGpReg(_Rn) && IsFpReg(_Dm)); \ + *(--_nIns) = (NIns)( COND_AL | (0xC5<<20) | ((_Rn)<<16) | ((_Rd)<<12) | (0xB1<<4) | (FpRegNum(_Dm)) ); \ + asm_output3("fmrrd %s,%s,%s", gpn(_Rd), gpn(_Rn), gpn(_Dm)); \ + } while (0) + +#define FSTD(_Dd,_Rn,_offs) do { \ + underrunProtect(4); \ + NanoAssert((((_offs) & 3) == 0) && isS8((_offs) >> 2)); \ + NanoAssert(IsFpReg(_Dd) && !IsFpReg(_Rn)); \ + int negflag = 1<<23; \ + intptr_t offs = (_offs); \ + if (_offs < 0) { \ + negflag = 0<<23; \ + offs = -(offs); \ + } \ + *(--_nIns) = (NIns)( COND_AL | (0xD0<<20) | ((_Rn)<<16) | (FpRegNum(_Dd)<<12) | (0xB<<8) | negflag | ((offs>>2)&0xff) ); \ + asm_output3("fstd %s,%s(%d)", gpn(_Dd), gpn(_Rn), _offs); \ + } while (0) + +#define FLDD_chk(_Dd,_Rn,_offs,_chk) do { \ + if(_chk) underrunProtect(4); \ + NanoAssert((((_offs) & 3) == 0) && isS8((_offs) >> 2)); \ + NanoAssert(IsFpReg(_Dd) && !IsFpReg(_Rn)); \ + int negflag = 1<<23; \ + intptr_t offs = (_offs); \ + if (_offs < 0) { \ + negflag = 0<<23; \ + offs = -(offs); \ + } \ + *(--_nIns) = (NIns)( COND_AL | (0xD1<<20) | ((_Rn)<<16) | (FpRegNum(_Dd)<<12) | (0xB<<8) | negflag | ((offs>>2)&0xff) ); \ + asm_output3("fldd %s,%s(%d)", gpn(_Dd), gpn(_Rn), _offs); \ + } while (0) +#define FLDD(_Dd,_Rn,_offs) FLDD_chk(_Dd,_Rn,_offs,1) + +#define FSITOD(_Dd,_Sm) do { \ + underrunProtect(4); \ + NanoAssert(IsFpReg(_Dd) && ((_Sm) == FpSingleScratch)); \ + *(--_nIns) = (NIns)( COND_AL | (0xEB8<<16) | (FpRegNum(_Dd)<<12) | (0x2F<<6) | (0<<5) | (0x7) ); \ + asm_output2("fsitod %s,%s", gpn(_Dd), gpn(_Sm)); \ + } while (0) + + +#define FUITOD(_Dd,_Sm) do { \ + underrunProtect(4); \ + NanoAssert(IsFpReg(_Dd) && ((_Sm) == FpSingleScratch)); \ + *(--_nIns) = (NIns)( COND_AL | (0xEB8<<16) | (FpRegNum(_Dd)<<12) | (0x2D<<6) | (0<<5) | (0x7) ); \ + asm_output2("fuitod %s,%s", gpn(_Dd), gpn(_Sm)); \ + } while (0) + +#define FMSR(_Sn,_Rd) do { \ + underrunProtect(4); \ + NanoAssert(((_Sn) == FpSingleScratch) && IsGpReg(_Rd)); \ + *(--_nIns) = (NIns)( COND_AL | (0xE0<<20) | (0x7<<16) | ((_Rd)<<12) | (0xA<<8) | (0<<7) | (0x1<<4) ); \ + asm_output2("fmsr %s,%s", gpn(_Sn), gpn(_Rd)); \ + } while (0) + +#define FNEGD(_Dd,_Dm) do { \ + underrunProtect(4); \ + NanoAssert(IsFpReg(_Dd) && IsFpReg(_Dm)); \ + *(--_nIns) = (NIns)( COND_AL | (0xEB1<<16) | (FpRegNum(_Dd)<<12) | (0xB4<<4) | (FpRegNum(_Dm)) ); \ + asm_output2("fnegd %s,%s", gpn(_Dd), gpn(_Dm)); \ + } while (0) + +#define FADDD(_Dd,_Dn,_Dm) do { \ + underrunProtect(4); \ + NanoAssert(IsFpReg(_Dd) && IsFpReg(_Dn) && IsFpReg(_Dm)); \ + *(--_nIns) = (NIns)( COND_AL | (0xE3<<20) | (FpRegNum(_Dn)<<16) | (FpRegNum(_Dd)<<12) | (0xB0<<4) | (FpRegNum(_Dm)) ); \ + asm_output3("faddd %s,%s,%s", gpn(_Dd), gpn(_Dn), gpn(_Dm)); \ + } while (0) + +#define FSUBD(_Dd,_Dn,_Dm) do { \ + underrunProtect(4); \ + NanoAssert(IsFpReg(_Dd) && IsFpReg(_Dn) && IsFpReg(_Dm)); \ + *(--_nIns) = (NIns)( COND_AL | (0xE3<<20) | (FpRegNum(_Dn)<<16) | (FpRegNum(_Dd)<<12) | (0xB4<<4) | (FpRegNum(_Dm)) ); \ + asm_output3("fsubd %s,%s,%s", gpn(_Dd), gpn(_Dn), gpn(_Dm)); \ + } while (0) + +#define FMULD(_Dd,_Dn,_Dm) do { \ + underrunProtect(4); \ + NanoAssert(IsFpReg(_Dd) && IsFpReg(_Dn) && IsFpReg(_Dm)); \ + *(--_nIns) = (NIns)( COND_AL | (0xE2<<20) | (FpRegNum(_Dn)<<16) | (FpRegNum(_Dd)<<12) | (0xB0<<4) | (FpRegNum(_Dm)) ); \ + asm_output3("fmuld %s,%s,%s", gpn(_Dd), gpn(_Dn), gpn(_Dm)); \ + } while (0) + +#define FDIVD(_Dd,_Dn,_Dm) do { \ + underrunProtect(4); \ + NanoAssert(IsFpReg(_Dd) && IsFpReg(_Dn) && IsFpReg(_Dm)); \ + *(--_nIns) = (NIns)( COND_AL | (0xE8<<20) | (FpRegNum(_Dn)<<16) | (FpRegNum(_Dd)<<12) | (0xB0<<4) | (FpRegNum(_Dm)) ); \ + asm_output3("fmuld %s,%s,%s", gpn(_Dd), gpn(_Dn), gpn(_Dm)); \ + } while (0) + +#define FMSTAT() do { \ + underrunProtect(4); \ + *(--_nIns) = (NIns)( COND_AL | 0x0EF1FA10); \ + asm_output("fmstat"); \ + } while (0) + +#define FCMPD(_Dd,_Dm) do { \ + underrunProtect(4); \ + NanoAssert(IsFpReg(_Dd) && IsFpReg(_Dm)); \ + *(--_nIns) = (NIns)( COND_AL | (0xEB4<<16) | (FpRegNum(_Dd)<<12) | (0xB4<<4) | (FpRegNum(_Dm)) ); \ + asm_output2("fcmpd %s,%s", gpn(_Dd), gpn(_Dm)); \ + } while (0) + +#define FCPYD(_Dd,_Dm) do { \ + underrunProtect(4); \ + NanoAssert(IsFpReg(_Dd) && IsFpReg(_Dm)); \ + *(--_nIns) = (NIns)( COND_AL | (0xEB0<<16) | (FpRegNum(_Dd)<<12) | (0xB4<<4) | (FpRegNum(_Dm)) ); \ + asm_output2("fcpyd %s,%s", gpn(_Dd), gpn(_Dm)); \ } while (0) } #endif // __nanojit_NativeThumb__ diff --git a/js/src/nanojit/RegAlloc.h b/js/src/nanojit/RegAlloc.h index c18853ffc58..73d9f2dec02 100644 --- a/js/src/nanojit/RegAlloc.h +++ b/js/src/nanojit/RegAlloc.h @@ -68,7 +68,9 @@ namespace nanojit debug_only( uint32_t count; ) debug_only( RegisterMask managed; ) // bitfield of 0..NJ_MAX_REGISTERS denoting which are under our management - LIns* active[NJ_MAX_REGISTERS]; // active[r] = OP that defines r + // RegisterMask is a 32-bit value, so we can never have more than 32 active. + // hardcode 32 here in case we have non-contiguous register numbers + LIns* active[32]; // active[r] = OP that defines r RegisterMask free; RegisterMask used; diff --git a/js/src/nanojit/nanojit.h b/js/src/nanojit/nanojit.h index 42fd678b19f..ba5771071ed 100644 --- a/js/src/nanojit/nanojit.h +++ b/js/src/nanojit/nanojit.h @@ -151,6 +151,7 @@ namespace nanojit #define isU8(i) ( int32_t(i) == uint8_t(i) ) #define isS16(i) ( int32_t(i) == int16_t(i) ) #define isU16(i) ( int32_t(i) == uint16_t(i) ) +#define isS24(i) ( ((int32_t(i)<<8)>>8) == (i) ) #define alignTo(x,s) ((((uintptr_t)(x)))&~(((uintptr_t)s)-1)) #define alignUp(x,s) ((((uintptr_t)(x))+(((uintptr_t)s)-1))&~(((uintptr_t)s)-1))