Backout the last NJ-to-TM merge (TM revisions b44daa2c0503 to b3167f8f9459, inclusive) because it caused lots of oranges. r=me.

This commit is contained in:
Nicholas Nethercote 2010-06-03 21:28:18 -07:00
Родитель b89b4107c3
Коммит 864fd5ca21
31 изменённых файлов: 320 добавлений и 850 удалений

Просмотреть файл

@ -329,11 +329,9 @@ private:
LIns *assemble_ret(ReturnType rt);
LIns *assemble_guard(bool isCond);
LIns *assemble_guard_xov();
LIns *assemble_jump_jov();
void bad(const string &msg);
void nyi(const string &opname);
void extract_any_label(string &lab, char lab_delim);
void resolve_forward_jumps(string &lab, LIns *ins);
void endFragment();
};
@ -747,6 +745,7 @@ FragmentAssembler::createGuardRecord(LasmSideExit *exit)
return rec;
}
LIns *
FragmentAssembler::assemble_guard(bool isCond)
{
@ -781,29 +780,6 @@ FragmentAssembler::assemble_guard_xov()
return mLir->insGuardXov(mOpcode, ref(mTokens[0]), ref(mTokens[1]), guard);
}
LIns *
FragmentAssembler::assemble_jump_jov()
{
need(3);
LIns *a = ref(mTokens[0]);
LIns *b = ref(mTokens[1]);
string name = mTokens[2];
if (mLabels.find(name) != mLabels.end()) {
LIns *target = ref(name);
return mLir->insBranchJov(mOpcode, a, b, target);
} else {
LIns *ins = mLir->insBranchJov(mOpcode, a, b, NULL);
#ifdef __SUNPRO_CC
mFwdJumps.insert(make_pair<const string, LIns *>(name, ins));
#else
mFwdJumps.insert(make_pair(name, ins));
#endif
return ins;
}
}
void
FragmentAssembler::endFragment()
{
@ -882,22 +858,6 @@ FragmentAssembler::extract_any_label(string &lab, char lab_delim)
}
}
void
FragmentAssembler::resolve_forward_jumps(string &lab, LIns *ins)
{
typedef multimap<string, LIns *> mulmap;
#ifdef __SUNPRO_CC
typedef mulmap::iterator ci;
#else
typedef mulmap::const_iterator ci;
#endif
pair<ci, ci> range = mFwdJumps.equal_range(lab);
for (ci i = range.first; i != range.second; ++i) {
i->second->setTarget(ins);
}
mFwdJumps.erase(lab);
}
void
FragmentAssembler::assembleFragment(LirTokenStream &in, bool implicitBegin, const LirToken *firstToken)
{
@ -937,7 +897,17 @@ FragmentAssembler::assembleFragment(LirTokenStream &in, bool implicitBegin, cons
/* Save label and do any back-patching of deferred forward-jumps. */
if (!lab.empty()) {
ins = mLir->ins0(LIR_label);
resolve_forward_jumps(lab, ins);
typedef multimap<string, LIns *> mulmap;
#ifdef __SUNPRO_CC
typedef mulmap::iterator ci;
#else
typedef mulmap::const_iterator ci;
#endif
pair<ci, ci> range = mFwdJumps.equal_range(lab);
for (ci i = range.first; i != range.second; ++i) {
i->second->setTarget(ins);
}
mFwdJumps.erase(lab);
lab.clear();
}
extract_any_label(lab, '=');
@ -1131,14 +1101,6 @@ FragmentAssembler::assembleFragment(LirTokenStream &in, bool implicitBegin, cons
ins = assemble_guard_xov();
break;
case LIR_addjovi:
case LIR_subjovi:
case LIR_muljovi:
CASE64(LIR_addjovq:)
CASE64(LIR_subjovq:)
ins = assemble_jump_jov();
break;
case LIR_calli:
CASESF(LIR_hcalli:)
case LIR_calld:
@ -1155,12 +1117,6 @@ FragmentAssembler::assembleFragment(LirTokenStream &in, bool implicitBegin, cons
break;
case LIR_label:
ins = mLir->ins0(LIR_label);
if (!lab.empty()) {
resolve_forward_jumps(lab, ins);
}
break;
case LIR_file:
case LIR_line:
case LIR_xtbl:

Просмотреть файл

@ -1,14 +0,0 @@
ptr = allocp 8
a = immi 2147483647
b = immi 0
c = addjovi a b ovf
sti c ptr 0
j done
ovf: i = immi 12345678
sti i ptr 0
done: res = ldi ptr 0
reti res

Просмотреть файл

@ -1 +0,0 @@
Output is: 2147483647

Просмотреть файл

@ -1,14 +0,0 @@
ptr = allocp 8
a = immi 2147483647
b = immi 1
c = addjovi a b ovf
sti c ptr 0
j done
ovf: i = immi 12345678
sti i ptr 0
done: res = ldi ptr 0
reti res

Просмотреть файл

@ -1 +0,0 @@
Output is: 12345678

Просмотреть файл

@ -1,15 +0,0 @@
ptr = allocp 8
a = immi 65536
b = immi 32767
c = muljovi a b ovf
sti c ptr 0
j done
ovf: i = immi 12345678
sti i ptr 0
done: res = ldi ptr 0
reti res

Просмотреть файл

@ -1 +0,0 @@
Output is: 2147418112

Просмотреть файл

@ -1,14 +0,0 @@
ptr = allocp 8
a = immi 65536
b = immi 32768
c = muljovi a b ovf
sti c ptr 0
j done
ovf: i = immi 12345678
sti i ptr 0
done: res = ldi ptr 0
reti res

Просмотреть файл

@ -1 +0,0 @@
Output is: 12345678

Просмотреть файл

@ -1,15 +0,0 @@
ptr = allocp 8
a = immi -2147483647
b = immi 1
c = subjovi a b ovf
sti c ptr 0
j done
ovf: i = immi 12345678
sti i ptr 0
done: res = ldi ptr 0
reti res

Просмотреть файл

@ -1 +0,0 @@
Output is: -2147483648

Просмотреть файл

@ -1,14 +0,0 @@
ptr = allocp 8
a = immi -2147483647
b = immi 2
c = subjovi a b ovf
sti c ptr 0
j done
ovf: i = immi 12345678
sti i ptr 0
done: res = ldi ptr 0
reti res

Просмотреть файл

@ -1 +0,0 @@
Output is: 12345678

Просмотреть файл

@ -1 +1 @@
10b865fa5d413277c2e11c211302f75d87029f06
f96ca24c15235dba3282e3649356b6a7acf5d2e2

Просмотреть файл

@ -1216,8 +1216,6 @@ namespace nanojit
return;
}
// Changes to the logic below will likely need to be propagated to Assembler::asm_jov().
countlir_jcc();
LInsp to = ins->getTarget();
LabelState *label = _labels.get(to);
@ -1243,37 +1241,6 @@ namespace nanojit
}
}
void Assembler::asm_jov(LInsp ins, InsList& pending_lives)
{
// The caller is responsible for countlir_* profiling, unlike
// asm_jcc above. The reason for this is that asm_jov may not be
// be called if the instruction is dead, and it is our convention
// to count such instructions anyway.
LOpcode op = ins->opcode();
LInsp to = ins->getTarget();
LabelState *label = _labels.get(to);
if (label && label->addr) {
// forward jump to known label. need to merge with label's register state.
unionRegisterState(label->regs);
asm_branch_ov(op, label->addr);
}
else {
// back edge.
handleLoopCarriedExprs(pending_lives);
if (!label) {
// evict all registers, most conservative approach.
evictAllActiveRegs();
_labels.add(to, 0, _allocator);
}
else {
// evict all registers, most conservative approach.
intersectRegisterState(label->regs);
}
NIns *branch = asm_branch_ov(op, 0);
_patches.put(branch,to);
}
}
void Assembler::asm_x(LInsp ins)
{
verbose_only( _thisfrag->nStaticExits++; )
@ -1540,7 +1507,6 @@ namespace nanojit
#if defined NANOJIT_64BIT
case LIR_addq:
case LIR_subq:
case LIR_andq:
case LIR_lshq:
case LIR_rshuq:
@ -1801,7 +1767,7 @@ namespace nanojit
case LIR_addxovi:
case LIR_subxovi:
case LIR_mulxovi:
case LIR_mulxovi: {
verbose_only( _thisfrag->nStaticExits++; )
countlir_xcc();
countlir_alu();
@ -1809,37 +1775,11 @@ namespace nanojit
ins->oprnd2()->setResultLive();
if (ins->isExtant()) {
NIns* exit = asm_exit(ins); // does intersectRegisterState()
asm_branch_ov(op, exit);
asm_branch_xov(op, exit);
asm_arith(ins);
}
break;
case LIR_addjovi:
case LIR_subjovi:
case LIR_muljovi:
countlir_jcc();
countlir_alu();
ins->oprnd1()->setResultLive();
ins->oprnd2()->setResultLive();
if (ins->isExtant()) {
asm_jov(ins, pending_lives);
asm_arith(ins);
}
break;
#ifdef NANOJIT_64BIT
case LIR_addjovq:
case LIR_subjovq:
countlir_jcc();
countlir_alu();
ins->oprnd1()->setResultLive();
ins->oprnd2()->setResultLive();
if (ins->isExtant()) {
asm_jov(ins, pending_lives);
asm_qbinop(ins);
}
break;
#endif
}
case LIR_eqd:
case LIR_led:

Просмотреть файл

@ -412,7 +412,6 @@ namespace nanojit
void asm_mmq(Register rd, int dd, Register rs, int ds);
void asm_jmp(LInsp ins, InsList& pending_lives);
void asm_jcc(LInsp ins, InsList& pending_lives);
void asm_jov(LInsp ins, InsList& pending_lives);
void asm_x(LInsp ins);
void asm_xcc(LInsp ins);
NIns* asm_exit(LInsp guard);
@ -459,7 +458,7 @@ namespace nanojit
void asm_call(LInsp);
Register asm_binop_rhs_reg(LInsp ins);
NIns* asm_branch(bool branchOnFalse, LInsp cond, NIns* targ);
NIns* asm_branch_ov(LOpcode op, NIns* targ);
void asm_branch_xov(LOpcode op, NIns* targ);
void asm_switch(LIns* ins, NIns* target);
void asm_jtbl(LIns* ins, NIns** table);
void emitJumpTable(SwitchInfo* si, NIns* target);

Просмотреть файл

@ -428,18 +428,6 @@ extern "C" void sync_instruction_memory(caddr_t v, u_int len);
}
}
#ifdef PERFM
// This method is used only for profiling purposes.
// See CodegenLIR::emitMD() in Tamarin for an example.
size_t CodeAlloc::size(const CodeList* blocks) {
size_t size = 0;
for (const CodeList* b = blocks; b != 0; b = b->next)
size += int((uintptr_t)b->end - (uintptr_t)b);
return size;
}
#endif
size_t CodeAlloc::size() {
return totalAllocated;
}

Просмотреть файл

@ -188,11 +188,6 @@ namespace nanojit
/** add a block previously returned by alloc(), to code */
static void add(CodeList* &code, NIns* start, NIns* end);
/** return the number of bytes in all the code blocks in "code", including block overhead */
#ifdef PERFM
static size_t size(const CodeList* code);
#endif
/** return the total number of bytes held by this CodeAlloc. */
size_t size();

Просмотреть файл

@ -327,11 +327,6 @@ namespace nanojit
return ins2(op, condition, toLabel);
}
LInsp LirBufWriter::insBranchJov(LOpcode op, LInsp a, LInsp b, LInsp toLabel)
{
return ins3(op, a, b, toLabel);
}
LIns* LirBufWriter::insJtbl(LIns* index, uint32_t size)
{
LInsJtbl* insJtbl = (LInsJtbl*) _buf->makeRoom(sizeof(LInsJtbl));
@ -542,14 +537,6 @@ namespace nanojit
if (oprnd->isImmQ())
return insImmI(oprnd->immQlo());
break;
case LIR_i2q:
if (oprnd->isImmI())
return insImmQ(int64_t(int32_t(oprnd->immI())));
break;
case LIR_ui2uq:
if (oprnd->isImmI())
return insImmQ(uint64_t(uint32_t(oprnd->immI())));
break;
#endif
#if NJ_SOFTFLOAT_SUPPORTED
case LIR_dlo2i:
@ -811,11 +798,7 @@ namespace nanojit
return oprnd1;
case LIR_andi:
case LIR_muli:
case LIR_ltui: // unsigned < 0 -> always false
// note that we know that oprnd2 == insImmI(0), so just return that
return oprnd2;
case LIR_gtui: // unsigned >= 0 -> always true
return insImmI(1);
case LIR_eqi:
if (oprnd1->isop(LIR_ori) &&
oprnd1->oprnd2()->isImmI() &&
@ -826,40 +809,19 @@ namespace nanojit
default:
;
}
} else if (c == -1) {
} else if (c == -1 || (c == 1 && oprnd1->isCmp())) {
switch (v) {
case LIR_ori:
// x | -1 = -1
// x | -1 = -1, cmp | 1 = 1
return oprnd2;
case LIR_andi:
// x & -1 = x
// x & -1 = x, cmp & 1 = cmp
return oprnd1;
case LIR_gtui:
// u32 > 0xffffffff -> always false
return insImmI(0);
default:
;
}
} else if (c == 1) {
if (oprnd1->isCmp()) {
switch (v) {
case LIR_ori:
// cmp | 1 = 1
// note that we know that oprnd2 == insImmI(1), so just return that
return oprnd2;
case LIR_andi:
// cmp & 1 = cmp
return oprnd1;
case LIR_gtui:
// (0|1) > 1 -> always false
return insImmI(0);
default:
;
}
} else if (v == LIR_muli) {
// x * 1 = x
return oprnd1;
}
} else if (c == 1 && v == LIR_muli) {
return oprnd1;
}
}
@ -925,31 +887,18 @@ namespace nanojit
return out->insGuard(v, c, gr);
}
// Simplify operator if possible. Always return NULL if overflow is possible.
LIns* ExprFilter::simplifyOverflowArith(LOpcode op, LInsp *opnd1, LInsp *opnd2)
LIns* ExprFilter::insGuardXov(LOpcode op, LInsp oprnd1, LInsp oprnd2, GuardRecord *gr)
{
LInsp oprnd1 = *opnd1;
LInsp oprnd2 = *opnd2;
if (oprnd1->isImmI() && oprnd2->isImmI()) {
int32_t c1 = oprnd1->immI();
int32_t c2 = oprnd2->immI();
double d = 0.0;
// The code below attempts to perform the operation while
// detecting overflow. For multiplication, we may unnecessarily
// infer a possible overflow due to the insufficient integer
// range of the double type.
switch (op) {
case LIR_addjovi:
case LIR_addxovi: d = double(c1) + double(c2); break;
case LIR_subjovi:
case LIR_subxovi: d = double(c1) - double(c2); break;
case LIR_muljovi:
case LIR_mulxovi: d = double(c1) * double(c2); break;
default: NanoAssert(0); break;
default: NanoAssert(0); break;
}
int32_t r = int32_t(d);
if (r == d)
@ -957,20 +906,14 @@ namespace nanojit
} else if (oprnd1->isImmI() && !oprnd2->isImmI()) {
switch (op) {
case LIR_addjovi:
case LIR_addxovi:
case LIR_muljovi:
case LIR_mulxovi: {
// swap operands, moving const to rhs
// move const to rhs
LIns* t = oprnd2;
oprnd2 = oprnd1;
oprnd1 = t;
// swap actual arguments in caller as well
*opnd1 = oprnd1;
*opnd2 = oprnd2;
break;
}
case LIR_subjovi:
case LIR_subxovi:
break;
default:
@ -982,31 +925,19 @@ namespace nanojit
int c = oprnd2->immI();
if (c == 0) {
switch (op) {
case LIR_addjovi:
case LIR_addxovi:
case LIR_subjovi:
case LIR_subxovi:
return oprnd1;
case LIR_muljovi:
case LIR_mulxovi:
return oprnd2;
default:
;
}
} else if (c == 1 && (op == LIR_muljovi || op == LIR_mulxovi)) {
} else if (c == 1 && op == LIR_mulxovi) {
return oprnd1;
}
}
return NULL;
}
LIns* ExprFilter::insGuardXov(LOpcode op, LInsp oprnd1, LInsp oprnd2, GuardRecord *gr)
{
LIns* simplified = simplifyOverflowArith(op, &oprnd1, &oprnd2);
if (simplified)
return simplified;
return out->insGuardXov(op, oprnd1, oprnd2, gr);
}
@ -1038,15 +969,6 @@ namespace nanojit
return out->insBranch(v, c, t);
}
LIns* ExprFilter::insBranchJov(LOpcode op, LInsp oprnd1, LInsp oprnd2, LIns* target)
{
LIns* simplified = simplifyOverflowArith(op, &oprnd1, &oprnd2);
if (simplified)
return simplified;
return out->insBranchJov(op, oprnd1, oprnd2, target);
}
LIns* ExprFilter::insLoad(LOpcode op, LIns* base, int32_t off, AccSet accSet) {
if (base->isImmP() && !isS8(off)) {
// if the effective address is constant, then transform:
@ -1394,18 +1316,12 @@ namespace nanojit
case LIR_addxovi:
case LIR_subxovi:
case LIR_mulxovi:
case LIR_addjovi:
case LIR_subjovi:
case LIR_muljovi:
CASE86(LIR_divi:)
case LIR_addd:
case LIR_subd:
case LIR_muld:
case LIR_divd:
CASE64(LIR_addq:)
CASE64(LIR_subq:)
CASE64(LIR_addjovq:)
CASE64(LIR_subjovq:)
case LIR_andi:
case LIR_ori:
case LIR_xori:
@ -1786,19 +1702,8 @@ namespace nanojit
formatGuardXov(buf, i);
break;
case LIR_addjovi:
case LIR_subjovi:
case LIR_muljovi:
CASE64(LIR_addjovq:)
CASE64(LIR_subjovq:)
VMPI_snprintf(s, n, "%s = %s %s, %s ; ovf -> %s", formatRef(&b1, i), lirNames[op],
formatRef(&b2, i->oprnd1()),
formatRef(&b3, i->oprnd2()),
i->oprnd3() ? formatRef(&b4, i->oprnd3()) : "unpatched");
break;
case LIR_addi: CASE64(LIR_addq:)
case LIR_subi: CASE64(LIR_subq:)
case LIR_subi:
case LIR_muli:
CASE86(LIR_divi:)
case LIR_addd:
@ -1810,7 +1715,7 @@ namespace nanojit
case LIR_xori: CASE64(LIR_xorq:)
case LIR_lshi: CASE64(LIR_lshq:)
case LIR_rshi: CASE64(LIR_rshq:)
case LIR_rshui: CASE64(LIR_rshuq:)
case LIR_rshui: CASE64(LIR_rshuq:)
case LIR_eqi: CASE64(LIR_eqq:)
case LIR_lti: CASE64(LIR_ltq:)
case LIR_lei: CASE64(LIR_leq:)
@ -2496,8 +2401,6 @@ namespace nanojit
return ins;
}
// There is no CseFilter::insBranchJov(), as LIR_*jov* are not CSEable.
LInsp CseFilter::insCall(const CallInfo *ci, LInsp args[])
{
LInsp ins;
@ -3023,7 +2926,6 @@ namespace nanojit
case LIR_orq:
case LIR_xorq:
case LIR_addq:
case LIR_subq:
case LIR_eqq:
case LIR_ltq:
case LIR_gtq:
@ -3247,39 +3149,6 @@ namespace nanojit
return out->insBranch(op, cond, to);
}
LIns* ValidateWriter::insBranchJov(LOpcode op, LIns* a, LIns* b, LIns* to)
{
int nArgs = 2;
LTy formals[2];
LIns* args[2] = { a, b };
switch (op) {
case LIR_addjovi:
case LIR_subjovi:
case LIR_muljovi:
formals[0] = LTy_I;
formals[1] = LTy_I;
break;
#ifdef NANOJIT_64BIT
case LIR_addjovq:
case LIR_subjovq:
formals[0] = LTy_Q;
formals[1] = LTy_Q;
break;
#endif
default:
NanoAssert(0);
}
// We check that target is a label in ValidateReader because it may
// not have been set here.
typeCheckArgs(op, nArgs, formals, args);
return out->insBranchJov(op, a, b, to);
}
LIns* ValidateWriter::insAlloc(int32_t size)
{
return out->insAlloc(size);
@ -3311,15 +3180,6 @@ namespace nanojit
case LIR_j:
NanoAssert(ins->getTarget() && ins->oprnd2()->isop(LIR_label));
break;
case LIR_addjovi:
case LIR_subjovi:
case LIR_muljovi:
CASE64(LIR_addjovq:)
CASE64(LIR_subjovq:)
NanoAssert(ins->getTarget() && ins->oprnd3()->isop(LIR_label));
break;
case LIR_jtbl: {
uint32_t tableSize = ins->getTableSize();
NanoAssert(tableSize > 0);

Просмотреть файл

@ -62,41 +62,39 @@ namespace nanojit
// Pointer-sized synonyms.
LIR_paramp = PTR_SIZE(LIR_parami, LIR_paramq),
LIR_paramp = PTR_SIZE(LIR_parami, LIR_paramq),
LIR_retp = PTR_SIZE(LIR_reti, LIR_retq),
LIR_retp = PTR_SIZE(LIR_reti, LIR_retq),
LIR_livep = PTR_SIZE(LIR_livei, LIR_liveq),
LIR_livep = PTR_SIZE(LIR_livei, LIR_liveq),
LIR_ldp = PTR_SIZE(LIR_ldi, LIR_ldq),
LIR_ldp = PTR_SIZE(LIR_ldi, LIR_ldq),
LIR_stp = PTR_SIZE(LIR_sti, LIR_stq),
LIR_stp = PTR_SIZE(LIR_sti, LIR_stq),
LIR_callp = PTR_SIZE(LIR_calli, LIR_callq),
LIR_callp = PTR_SIZE(LIR_calli, LIR_callq),
LIR_eqp = PTR_SIZE(LIR_eqi, LIR_eqq),
LIR_ltp = PTR_SIZE(LIR_lti, LIR_ltq),
LIR_gtp = PTR_SIZE(LIR_gti, LIR_gtq),
LIR_lep = PTR_SIZE(LIR_lei, LIR_leq),
LIR_gep = PTR_SIZE(LIR_gei, LIR_geq),
LIR_ltup = PTR_SIZE(LIR_ltui, LIR_ltuq),
LIR_gtup = PTR_SIZE(LIR_gtui, LIR_gtuq),
LIR_leup = PTR_SIZE(LIR_leui, LIR_leuq),
LIR_geup = PTR_SIZE(LIR_geui, LIR_geuq),
LIR_eqp = PTR_SIZE(LIR_eqi, LIR_eqq),
LIR_ltp = PTR_SIZE(LIR_lti, LIR_ltq),
LIR_gtp = PTR_SIZE(LIR_gti, LIR_gtq),
LIR_lep = PTR_SIZE(LIR_lei, LIR_leq),
LIR_gep = PTR_SIZE(LIR_gei, LIR_geq),
LIR_ltup = PTR_SIZE(LIR_ltui, LIR_ltuq),
LIR_gtup = PTR_SIZE(LIR_gtui, LIR_gtuq),
LIR_leup = PTR_SIZE(LIR_leui, LIR_leuq),
LIR_geup = PTR_SIZE(LIR_geui, LIR_geuq),
LIR_addp = PTR_SIZE(LIR_addi, LIR_addq),
LIR_subp = PTR_SIZE(LIR_subi, LIR_subq),
LIR_addjovp = PTR_SIZE(LIR_addjovi, LIR_addjovq),
LIR_addp = PTR_SIZE(LIR_addi, LIR_addq),
LIR_andp = PTR_SIZE(LIR_andi, LIR_andq),
LIR_orp = PTR_SIZE(LIR_ori, LIR_orq),
LIR_xorp = PTR_SIZE(LIR_xori, LIR_xorq),
LIR_andp = PTR_SIZE(LIR_andi, LIR_andq),
LIR_orp = PTR_SIZE(LIR_ori, LIR_orq),
LIR_xorp = PTR_SIZE(LIR_xori, LIR_xorq),
LIR_lshp = PTR_SIZE(LIR_lshi, LIR_lshq),
LIR_rshp = PTR_SIZE(LIR_rshi, LIR_rshq),
LIR_rshup = PTR_SIZE(LIR_rshui, LIR_rshuq),
LIR_lshp = PTR_SIZE(LIR_lshi, LIR_lshq),
LIR_rshp = PTR_SIZE(LIR_rshi, LIR_rshq),
LIR_rshup = PTR_SIZE(LIR_rshui, LIR_rshuq),
LIR_cmovp = PTR_SIZE(LIR_cmovi, LIR_cmovq)
LIR_cmovp = PTR_SIZE(LIR_cmovi, LIR_cmovq)
};
// 32-bit integer comparisons must be contiguous, as must 64-bit integer
@ -323,7 +321,7 @@ namespace nanojit
uintptr_t _address;
uint32_t _typesig:27; // 9 3-bit fields indicating arg type, by ARGTYPE above (including ret type): a1 a2 a3 a4 a5 ret
AbiKind _abi:3;
uint32_t _isPure:1; // _isPure=1 means no side-effects, result only depends on args
uint8_t _isPure:1; // _isPure=1 means no side-effects, result only depends on args
AccSet _storeAccSet; // access regions stored by the function
verbose_only ( const char* _name; )
@ -871,13 +869,6 @@ namespace nanojit
isop(LIR_xbarrier) || isop(LIR_xtbl) ||
isop(LIR_addxovi) || isop(LIR_subxovi) || isop(LIR_mulxovi);
}
bool isJov() const {
return
#ifdef NANOJIT_64BIT
isop(LIR_addjovq) || isop(LIR_subjovq) ||
#endif
isop(LIR_addjovi) || isop(LIR_subjovi) || isop(LIR_muljovi);
}
// True if the instruction is a 32-bit integer immediate.
bool isImmI() const {
return isop(LIR_immi);
@ -920,7 +911,7 @@ namespace nanojit
}
bool isBranch() const {
return isop(LIR_jt) || isop(LIR_jf) || isop(LIR_j) || isop(LIR_jtbl) || isJov();
return isop(LIR_jt) || isop(LIR_jf) || isop(LIR_j) || isop(LIR_jtbl);
}
LTy retType() const {
@ -1013,7 +1004,7 @@ namespace nanojit
LIns* getLIns() { return &ins; };
};
// 3-operand form. Used for conditional moves, jov branches, and xov guards.
// 3-operand form. Used for conditional moves and xov guards.
class LInsOp3
{
private:
@ -1277,19 +1268,13 @@ namespace nanojit
LIns* LIns::getTarget() const {
NanoAssert(isBranch() && !isop(LIR_jtbl));
if (isJov())
return oprnd3();
else
return oprnd2();
return oprnd2();
}
void LIns::setTarget(LIns* label) {
NanoAssert(label && label->isop(LIR_label));
NanoAssert(isBranch() && !isop(LIR_jtbl));
if (isJov())
toLInsOp3()->oprnd_3 = label;
else
toLInsOp2()->oprnd_2 = label;
toLInsOp2()->oprnd_2 = label;
}
LIns* LIns::getTarget(uint32_t index) const {
@ -1445,12 +1430,9 @@ namespace nanojit
virtual LInsp insGuardXov(LOpcode v, LIns *a, LIns* b, GuardRecord *gr) {
return out->insGuardXov(v, a, b, gr);
}
virtual LInsp insBranch(LOpcode v, LIns* condition, LIns* to) {
virtual LInsp insBranch(LOpcode v, LInsp condition, LInsp to) {
return out->insBranch(v, condition, to);
}
virtual LInsp insBranchJov(LOpcode v, LIns* a, LIns* b, LIns* to) {
return out->insBranchJov(v, a, b, to);
}
// arg: 0=first, 1=second, ...
// kind: 0=arg 1=saved-reg
virtual LInsp insParam(int32_t arg, int32_t kind) {
@ -1717,10 +1699,6 @@ namespace nanojit
return add_flush(out->insBranch(v, condition, to));
}
LIns* insBranchJov(LOpcode v, LInsp a, LInsp b, LInsp to) {
return add_flush(out->insBranchJov(v, a, b, to));
}
LIns* insJtbl(LIns* index, uint32_t size) {
return add_flush(out->insJtbl(index, size));
}
@ -1778,13 +1756,10 @@ namespace nanojit
LIns* ins1(LOpcode v, LIns* a);
LIns* ins2(LOpcode v, LIns* a, LIns* b);
LIns* ins3(LOpcode v, LIns* a, LIns* b, LIns* c);
LIns* insGuard(LOpcode, LIns* cond, GuardRecord *);
LIns* insGuard(LOpcode, LIns *cond, GuardRecord *);
LIns* insGuardXov(LOpcode, LIns* a, LIns* b, GuardRecord *);
LIns* insBranch(LOpcode, LIns* cond, LIns* target);
LIns* insBranchJov(LOpcode, LIns* a, LIns* b, LIns* target);
LIns* insBranch(LOpcode, LIns *cond, LIns *target);
LIns* insLoad(LOpcode op, LInsp base, int32_t off, AccSet accSet);
private:
LIns* simplifyOverflowArith(LOpcode op, LInsp *opnd1, LInsp *opnd2);
};
class CseFilter: public LirWriter
@ -1979,7 +1954,6 @@ namespace nanojit
LInsp insGuard(LOpcode op, LInsp cond, GuardRecord *gr);
LInsp insGuardXov(LOpcode op, LInsp a, LInsp b, GuardRecord *gr);
LInsp insBranch(LOpcode v, LInsp condition, LInsp to);
LInsp insBranchJov(LOpcode v, LInsp a, LInsp b, LInsp to);
LInsp insAlloc(int32_t size);
LInsp insJtbl(LIns* index, uint32_t size);
};
@ -2127,7 +2101,6 @@ namespace nanojit
LIns* insGuard(LOpcode v, LIns *c, GuardRecord *gr);
LIns* insGuardXov(LOpcode v, LIns* a, LIns* b, GuardRecord* gr);
LIns* insBranch(LOpcode v, LIns* condition, LIns* to);
LIns* insBranchJov(LOpcode v, LIns* a, LIns* b, LIns* to);
LIns* insAlloc(int32_t size);
LIns* insJtbl(LIns* index, uint32_t size);
};

Просмотреть файл

@ -270,30 +270,30 @@ OP___(rshi, 90, Op2, I, 1) // right shift int (>>)
OP___(rshui, 91, Op2, I, 1) // right shift unsigned int (>>>)
OP_64(addq, 92, Op2, Q, 1) // add quad
OP_64(subq, 93, Op2, Q, 1) // subtract quad
OP_64(andq, 94, Op2, Q, 1) // bitwise-AND quad
OP_64(orq, 95, Op2, Q, 1) // bitwise-OR quad
OP_64(xorq, 96, Op2, Q, 1) // bitwise-XOR quad
OP_64(andq, 93, Op2, Q, 1) // bitwise-AND quad
OP_64(orq, 94, Op2, Q, 1) // bitwise-OR quad
OP_64(xorq, 95, Op2, Q, 1) // bitwise-XOR quad
OP_64(lshq, 97, Op2, Q, 1) // left shift quad; 2nd operand is an int
OP_64(rshq, 98, Op2, Q, 1) // right shift quad; 2nd operand is an int
OP_64(rshuq, 99, Op2, Q, 1) // right shift unsigned quad; 2nd operand is an int
OP_64(lshq, 96, Op2, Q, 1) // left shift quad; 2nd operand is an int
OP_64(rshq, 97, Op2, Q, 1) // right shift quad; 2nd operand is an int
OP_64(rshuq, 98, Op2, Q, 1) // right shift unsigned quad; 2nd operand is an int
OP___(negd, 100, Op1, D, 1) // negate double
OP___(addd, 101, Op2, D, 1) // add double
OP___(subd, 102, Op2, D, 1) // subtract double
OP___(muld, 103, Op2, D, 1) // multiply double
OP___(divd, 104, Op2, D, 1) // divide double
OP___(negd, 99, Op1, D, 1) // negate double
OP___(addd, 100, Op2, D, 1) // add double
OP___(subd, 101, Op2, D, 1) // subtract double
OP___(muld, 102, Op2, D, 1) // multiply double
OP___(divd, 103, Op2, D, 1) // divide double
// LIR_modd is just a place-holder opcode, ie. the back-ends cannot generate
// code for it. It's used in TraceMonkey briefly but is always demoted to a
// LIR_modl or converted to a function call before Nanojit has to do anything
// serious with it.
OP___(modd, 105, Op2, D, 1) // modulo double
OP___(modd, 104, Op2, D, 1) // modulo double
OP___(cmovi, 106, Op3, I, 1) // conditional move int
OP_64(cmovq, 107, Op3, Q, 1) // conditional move quad
OP___(cmovi, 105, Op3, I, 1) // conditional move int
OP_64(cmovq, 106, Op3, Q, 1) // conditional move quad
OP_UN(107)
OP_UN(108)
//---------------------------------------------------------------------------
@ -307,36 +307,35 @@ OP___(i2d, 112, Op1, D, 1) // convert int to double
OP___(ui2d, 113, Op1, D, 1) // convert unsigned int to double
OP___(d2i, 114, Op1, I, 1) // convert double to int (no exceptions raised, platform rounding rules)
OP_UN(115)
OP_UN(116)
//---------------------------------------------------------------------------
// Overflow arithmetic
//---------------------------------------------------------------------------
// These all exit if overflow occurred. The result is valid on either path.
OP___(addxovi, 115, Op3, I, 1) // add int and exit on overflow
OP___(subxovi, 116, Op3, I, 1) // subtract int and exit on overflow
OP___(mulxovi, 117, Op3, I, 1) // multiply int and exit on overflow
// These all exit if overflow occurred. The results is valid on either path.
OP___(addxovi, 117, Op3, I, 1) // add int and exit on overflow
OP___(subxovi, 118, Op3, I, 1) // sub int and exit on overflow
OP___(mulxovi, 119, Op3, I, 1) // multiply int and exit on overflow
// These all branch if overflow occurred. The result is valid on either path.
OP___(addjovi, 118, Op3, I, 1) // add int and branch on overflow
OP___(subjovi, 119, Op3, I, 1) // subtract int and branch on overflow
OP___(muljovi, 120, Op3, I, 1) // multiply int and branch on overflow
OP_64(addjovq, 121, Op3, Q, 1) // add quad and branch on overflow
OP_64(subjovq, 122, Op3, Q, 1) // subtract quad and branch on overflow
OP_UN(120)
//---------------------------------------------------------------------------
// SoftFloat
//---------------------------------------------------------------------------
OP_SF(dlo2i, 123, Op1, I, 1) // get the low 32 bits of a double as an int
OP_SF(dhi2i, 124, Op1, I, 1) // get the high 32 bits of a double as an int
OP_SF(ii2d, 125, Op2, D, 1) // join two ints (1st arg is low bits, 2nd is high)
OP_SF(dlo2i, 121, Op1, I, 1) // get the low 32 bits of a double as an int
OP_SF(dhi2i, 122, Op1, I, 1) // get the high 32 bits of a double as an int
OP_SF(ii2d, 123, Op2, D, 1) // join two ints (1st arg is low bits, 2nd is high)
// LIR_hcalli is a hack that's only used on 32-bit platforms that use
// SoftFloat. Its operand is always a LIR_calli, but one that specifies a
// function that returns a double. It indicates that the double result is
// returned via two 32-bit integer registers. The result is always used as the
// second operand of a LIR_ii2d.
OP_SF(hcalli, 126, Op1, I, 1)
OP_SF(hcalli, 124, Op1, I, 1)
OP_UN(125)
OP_UN(126)
OP_UN(127)
#undef OP_UN

Просмотреть файл

@ -23,7 +23,6 @@
* Contributor(s):
* Adobe AS3 Team
* Vladimir Vukicevic <vladimir@pobox.com>
* Jacob Bramley <Jacob.Bramley@arm.com>
*
* Alternatively, the contents of this file may be used under the terms of
* either the GNU General Public License Version 2 or later (the "GPL"), or
@ -270,13 +269,6 @@ Assembler::asm_add_imm(Register rd, Register rn, int32_t imm, int stat /* =0 */)
NanoAssert(IsGpReg(rn));
NanoAssert((stat & 1) == stat);
// As a special case to simplify code elsewhere, emit nothing where we
// don't want to update the flags (stat == 0), the second operand is 0 and
// (rd == rn). Such instructions are effectively NOPs.
if ((imm == 0) && (stat == 0) && (rd == rn)) {
return;
}
// Try to encode the value directly as an operand 2 immediate value, then
// fall back to loading the value into a register.
if (encOp2Imm(imm, &op2imm)) {
@ -317,13 +309,6 @@ Assembler::asm_sub_imm(Register rd, Register rn, int32_t imm, int stat /* =0 */)
NanoAssert(IsGpReg(rd));
NanoAssert(IsGpReg(rn));
NanoAssert((stat & 1) == stat);
// As a special case to simplify code elsewhere, emit nothing where we
// don't want to update the flags (stat == 0), the second operand is 0 and
// (rd == rn). Such instructions are effectively NOPs.
if ((imm == 0) && (stat == 0) && (rd == rn)) {
return;
}
// Try to encode the value directly as an operand 2 immediate value, then
// fall back to loading the value into a register.
@ -786,7 +771,7 @@ Assembler::asm_stkarg(LInsp arg, int stkd)
if (!_config.arm_vfp || !isF64) {
NanoAssert(IsGpReg(rr));
asm_str(rr, SP, stkd);
STR(rr, SP, stkd);
} else {
// According to the comments in asm_arg_64, LIR_ii2d
// can have a 64-bit argument even if VFP is disabled. However,
@ -809,7 +794,7 @@ Assembler::asm_stkarg(LInsp arg, int stkd)
// memory for it and then copy it onto the stack.
int d = findMemFor(arg);
if (!isF64) {
asm_str(IP, SP, stkd);
STR(IP, SP, stkd);
if (arg->isop(LIR_allocp)) {
asm_add_imm(IP, FP, d);
} else {
@ -821,9 +806,9 @@ Assembler::asm_stkarg(LInsp arg, int stkd)
NanoAssert((stkd & 7) == 0);
#endif
asm_str(IP, SP, stkd+4);
STR(IP, SP, stkd+4);
LDR(IP, FP, d+4);
asm_str(IP, SP, stkd);
STR(IP, SP, stkd);
LDR(IP, FP, d);
}
}
@ -891,8 +876,8 @@ Assembler::asm_call(LInsp ins)
// The result doesn't have a register allocated, so store the
// result (in R0,R1) directly to its stack slot.
asm_str(R0, FP, d+0);
asm_str(R1, FP, d+4);
STR(R0, FP, d+0);
STR(R1, FP, d+4);
} else {
NanoAssert(IsFpReg(rr));
@ -1199,11 +1184,11 @@ Assembler::asm_qjoin(LIns *ins)
LIns* hi = ins->oprnd2();
Register r = findRegFor(hi, GpRegs);
asm_str(r, FP, d+4);
STR(r, FP, d+4);
// okay if r gets recycled.
r = findRegFor(lo, GpRegs);
asm_str(r, FP, d);
STR(r, FP, d);
deprecated_freeRsrcOf(ins); // if we had a reg in use, emit a ST to flush it to mem
}
@ -1272,11 +1257,6 @@ Assembler::canRemat(LIns* ins)
void
Assembler::asm_restore(LInsp i, Register r)
{
// The following registers should never be restored:
NanoAssert(r != PC);
NanoAssert(r != IP);
NanoAssert(r != SP);
if (i->isop(LIR_allocp)) {
asm_add_imm(r, FP, deprecated_disp(i));
} else if (i->isImmI()) {
@ -1328,10 +1308,8 @@ Assembler::asm_spill(Register rr, int d, bool pop, bool quad)
(void) pop;
(void) quad;
NanoAssert(d);
// The following registers should never be spilled:
NanoAssert(rr != PC);
NanoAssert(rr != IP);
NanoAssert(rr != SP);
// fixme: bug 556175 this code doesn't appear to handle
// values of d outside the 12-bit range.
if (_config.arm_vfp && IsFpReg(rr)) {
if (isS8(d >> 2)) {
FSTD(rr, FP, d);
@ -1341,20 +1319,17 @@ Assembler::asm_spill(Register rr, int d, bool pop, bool quad)
}
} else {
NIns merged;
// asm_str always succeeds, but returns '1' to indicate that it emitted
// a simple, easy-to-merge STR.
if (asm_str(rr, FP, d)) {
// See if we can merge this store into an immediately following one,
// one, by creating or extending a STM instruction.
if (/* is it safe to poke _nIns[1] ? */
does_next_instruction_exist(_nIns, codeStart, codeEnd,
exitStart, exitEnd)
&& /* can we merge _nIns[0] into _nIns[1] ? */
do_peep_2_1(&merged, _nIns[0], _nIns[1])) {
_nIns[1] = merged;
_nIns++;
verbose_only( asm_output("merge next into STMDB"); )
}
STR(rr, FP, d);
// See if we can merge this store into an immediately following one,
// one, by creating or extending a STM instruction.
if (/* is it safe to poke _nIns[1] ? */
does_next_instruction_exist(_nIns, codeStart, codeEnd,
exitStart, exitEnd)
&& /* can we merge _nIns[0] into _nIns[1] ? */
do_peep_2_1(&merged, _nIns[0], _nIns[1])) {
_nIns[1] = merged;
_nIns++;
verbose_only( asm_output("merge next into STMDB"); )
}
}
}
@ -1457,9 +1432,9 @@ Assembler::asm_store64(LOpcode op, LInsp value, int dr, LInsp base)
underrunProtect(LD32_size*2 + 8);
// XXX use another reg, get rid of dependency
asm_str(IP, rb, dr);
STR(IP, rb, dr);
asm_ld_imm(IP, value->immDlo(), false);
asm_str(IP, rb, dr+4);
STR(IP, rb, dr+4);
asm_ld_imm(IP, value->immDhi(), false);
return;
@ -1506,9 +1481,9 @@ Assembler::asm_store64(LOpcode op, LInsp value, int dr, LInsp base)
underrunProtect(LD32_size*2 + 8);
// XXX use another reg, get rid of dependency
asm_str(IP, rb, dr);
STR(IP, rb, dr);
asm_ld_imm(IP, value->immDlo(), false);
asm_str(IP, rb, dr+4);
STR(IP, rb, dr+4);
asm_ld_imm(IP, value->immDhi(), false);
return;
@ -1580,6 +1555,8 @@ Assembler::asm_immd_nochk(Register rr, int32_t immDlo, int32_t immDhi)
void
Assembler::asm_immd(LInsp ins)
{
//asm_output(">>> asm_immd");
int d = deprecated_disp(ins);
Register rr = ins->deprecated_getReg();
@ -1593,12 +1570,17 @@ Assembler::asm_immd(LInsp ins)
asm_immd_nochk(rr, ins->immDlo(), ins->immDhi());
} else {
NanoAssert(d);
// asm_mmq might spill a reg, so don't call it;
// instead do the equivalent directly.
//asm_mmq(FP, d, PC, -16);
asm_str(IP, FP, d+4);
STR(IP, FP, d+4);
asm_ld_imm(IP, ins->immDhi());
asm_str(IP, FP, d);
STR(IP, FP, d);
asm_ld_imm(IP, ins->immDlo());
}
//asm_output("<<< asm_immd");
}
void
@ -1656,9 +1638,6 @@ Assembler::asm_mmq(Register rd, int dd, Register rs, int ds)
// STR ip, [rd, #dd]
// LDR ip, [rs, #(ds+4)]
// STR ip, [rd, #(dd+4)]
//
// Note that if rs+4 or rd+4 is outside the LDR or STR range, extra
// instructions will be emitted as required to make the code work.
// Ensure that the PC is not used as either base register. The instruction
// generation macros call underrunProtect, and a side effect of this is
@ -1667,50 +1646,18 @@ Assembler::asm_mmq(Register rd, int dd, Register rs, int ds)
NanoAssert(rs != PC);
NanoAssert(rd != PC);
// We use IP as a swap register, so check that it isn't used for something
// else by the caller.
NanoAssert(rs != IP);
NanoAssert(rd != IP);
// Find the list of free registers from the allocator's free list and the
// GpRegs mask. This excludes any floating-point registers that may be on
// the free list.
RegisterMask free = _allocator.free & AllowableFlagRegs;
// Ensure that ds and dd are within the +/-4095 offset range of STR and
// LDR. If either is out of range, adjust and modify rd or rs so that the
// load works correctly.
// The modification here is performed after the LDR/STR block (because code
// is emitted backwards), so this one is the reverse operation.
int32_t dd_adj = 0;
int32_t ds_adj = 0;
if ((dd+4) >= 0x1000) {
dd_adj = ((dd+4) & ~0xfff);
} else if (dd <= -0x1000) {
dd_adj = -((-dd) & ~0xfff);
}
if ((ds+4) >= 0x1000) {
ds_adj = ((ds+4) & ~0xfff);
} else if (ds <= -0x1000) {
ds_adj = -((-ds) & ~0xfff);
}
// These will emit no code if d*_adj is 0.
asm_sub_imm(rd, rd, dd_adj);
asm_sub_imm(rs, rs, ds_adj);
ds -= ds_adj;
dd -= dd_adj;
if (free) {
// There is at least one register on the free list, so grab one for
// temporary use. There is no need to allocate it explicitly because
// we won't need it after this function returns.
// The CountLeadingZeroes utility can be used to quickly find a set bit
// in the free mask.
// The CountLeadingZeroes can be used to quickly find a set bit in the
// free mask.
Register rr = (Register)(31-CountLeadingZeroes(free));
// Note: Not every register in GpRegs is usable here. However, these
@ -1722,6 +1669,7 @@ Assembler::asm_mmq(Register rd, int dd, Register rs, int ds)
NanoAssert((free & rmask(FP)) == 0);
// Emit the actual instruction sequence.
STR(IP, rd, dd+4);
STR(rr, rd, dd);
LDR(IP, rs, ds+4);
@ -1733,10 +1681,6 @@ Assembler::asm_mmq(Register rd, int dd, Register rs, int ds)
STR(IP, rd, dd);
LDR(IP, rs, ds);
}
// Re-adjust the base registers. (These will emit no code if d*_adj is 0.
asm_add_imm(rd, rd, dd_adj);
asm_add_imm(rs, rs, ds_adj);
}
// Increment the 32-bit profiling counter at pCtr, without
@ -1984,72 +1928,6 @@ Assembler::asm_ldr_chk(Register d, Register b, int32_t off, bool chk)
asm_output("ldr %s, [%s, #%d]",gpn(d),gpn(b),(off));
}
// Emit a store, using a register base and an arbitrary immediate offset. This
// behaves like a STR instruction, but doesn't care about the offset range, and
// emits one of the following instruction sequences:
//
// ----
// STR rt, [rr, #offset]
// ----
// asm_add_imm ip, rr, #(offset & ~0xfff)
// STR rt, [ip, #(offset & 0xfff)]
// ----
// # This one's fairly horrible, but should be rare.
// asm_add_imm rr, rr, #(offset & ~0xfff)
// STR rt, [ip, #(offset & 0xfff)]
// asm_sub_imm rr, rr, #(offset & ~0xfff)
// ----
// SUB-based variants (for negative offsets) are also supported.
// ----
//
// The return value is 1 if a simple STR could be emitted, or 0 if the required
// sequence was more complex.
int32_t
Assembler::asm_str(Register rt, Register rr, int32_t offset)
{
// We can't do PC-relative stores, and we can't store the PC value, because
// we use macros (such as STR) which call underrunProtect, and this can
// push _nIns to a new page, thus making any PC value impractical to
// predict.
NanoAssert(rr != PC);
NanoAssert(rt != PC);
if (offset >= 0) {
// The offset is positive, so use ADD (and variants).
if (isU12(offset)) {
STR(rt, rr, offset);
return 1;
}
if (rt != IP) {
STR(rt, IP, offset & 0xfff);
asm_add_imm(IP, rr, offset & ~0xfff);
} else {
int32_t adj = offset & ~0xfff;
asm_sub_imm(rr, rr, adj);
STR(rt, rr, offset-adj);
asm_add_imm(rr, rr, adj);
}
} else {
// The offset is negative, so use SUB (and variants).
if (isU12(-offset)) {
STR(rt, rr, offset);
return 1;
}
if (rt != IP) {
STR(rt, IP, -((-offset) & 0xfff));
asm_sub_imm(IP, rr, (-offset) & ~0xfff);
} else {
int32_t adj = -((-offset) & 0xfff);
asm_add_imm(IP, rr, adj);
STR(rt, rr, offset-adj);
asm_sub_imm(rr, rr, adj);
}
}
return 0;
}
// Emit the code required to load an immediate value (imm) into general-purpose
// register d. Optimal (MOV-based) mechanisms are used if the immediate can be
// encoded using ARM's operand 2 encoding. Otherwise, a slot is used on the
@ -2391,7 +2269,7 @@ Assembler::asm_branch(bool branchOnFalse, LInsp cond, NIns* targ)
return at;
}
NIns* Assembler::asm_branch_ov(LOpcode op, NIns* target)
void Assembler::asm_branch_xov(LOpcode op, NIns* target)
{
// Because MUL can't set the V flag, we use SMULL and CMP to set the Z flag
// to detect overflow on multiply. Thus, if we have a LIR_mulxovi, we must
@ -2400,7 +2278,6 @@ NIns* Assembler::asm_branch_ov(LOpcode op, NIns* target)
// Emit a suitable branch instruction.
B_cond(cc, target);
return _nIns;
}
void

Просмотреть файл

@ -23,7 +23,6 @@
* Contributor(s):
* Adobe AS3 Team
* Vladimir Vukicevic <vladimir@pobox.com>
* Jacob Bramley <Jacob.Bramley@arm.com>
*
* Alternatively, the contents of this file may be used under the terms of
* either the GNU General Public License Version 2 or later (the "GPL"), or
@ -66,7 +65,12 @@ namespace nanojit
// only d0-d6 are actually used; we'll use d7 as s14-s15 for i2d/u2f/etc.
#define NJ_VFP_MAX_REGISTERS 8
#define NJ_MAX_REGISTERS (11 + NJ_VFP_MAX_REGISTERS)
#define NJ_MAX_STACK_ENTRY 4096
// fixme: bug 556175: this cant be over 1024, because
// the ARM backend cannot support more than 12-bit displacements
// in a single load/store instruction, for spilling. see asm_spill().
#define NJ_MAX_STACK_ENTRY 1024
#define NJ_MAX_PARAMETERS 16
#define NJ_ALIGN_STACK 8
@ -225,7 +229,6 @@ verbose_only( extern const char* shiftNames[]; )
void asm_stkarg(LInsp p, int stkd); \
void asm_cmpi(Register, int32_t imm); \
void asm_ldr_chk(Register d, Register b, int32_t off, bool chk); \
int32_t asm_str(Register rt, Register rr, int32_t off); \
void asm_cmp(LIns *cond); \
void asm_cmpd(LIns *cond); \
void asm_ld_imm(Register d, int32_t imm, bool chk = true); \
@ -607,8 +610,8 @@ enum {
NanoAssert(IsGpReg(_d) && IsGpReg(_n)); \
underrunProtect(4); \
if (_off < 0) { \
NanoAssert(isU12(-(_off))); \
*(--_nIns) = (NIns)( COND_AL | (0x55<<20) | ((_n)<<16) | ((_d)<<12) | ((-(_off))&0xfff) ); \
NanoAssert(isU12(-_off)); \
*(--_nIns) = (NIns)( COND_AL | (0x55<<20) | ((_n)<<16) | ((_d)<<12) | ((-_off)&0xfff) ); \
} else { \
NanoAssert(isU12(_off)); \
*(--_nIns) = (NIns)( COND_AL | (0x5D<<20) | ((_n)<<16) | ((_d)<<12) | ((_off)&0xfff) ); \
@ -622,8 +625,8 @@ enum {
NanoAssert(IsGpReg(_d) && IsGpReg(_n)); \
underrunProtect(4); \
if (_off < 0) { \
NanoAssert(isU8(-(_off))); \
*(--_nIns) = (NIns)( COND_AL | (0x15<<20) | ((_n)<<16) | ((_d)<<12) | ((0xD)<<4) | (((-(_off))&0xf0)<<4) | ((-(_off))&0xf) ); \
NanoAssert(isU8(-_off)); \
*(--_nIns) = (NIns)( COND_AL | (0x15<<20) | ((_n)<<16) | ((_d)<<12) | ((0xD)<<4) | (((-_off)&0xf0)<<4) | ((-_off)&0xf) ); \
} else { \
NanoAssert(isU8(_off)); \
*(--_nIns) = (NIns)( COND_AL | (0x1D<<20) | ((_n)<<16) | ((_d)<<12) | ((0xD)<<4) | (((_off)&0xf0)<<4) | ((_off)&0xf) ); \
@ -638,8 +641,8 @@ enum {
NanoAssert(IsGpReg(_d) && IsGpReg(_n)); \
underrunProtect(4); \
if (_off < 0) { \
NanoAssert(isU8(-(_off))); \
*(--_nIns) = (NIns)( COND_AL | (0x15<<20) | ((_n)<<16) | ((_d)<<12) | ((0xB)<<4) | (((-(_off))&0xf0)<<4) | ((-(_off))&0xf) ); \
NanoAssert(isU8(-_off)); \
*(--_nIns) = (NIns)( COND_AL | (0x15<<20) | ((_n)<<16) | ((_d)<<12) | ((0xB)<<4) | (((-_off)&0xf0)<<4) | ((-_off)&0xf) ); \
} else { \
NanoAssert(isU8(_off)); \
*(--_nIns) = (NIns)( COND_AL | (0x1D<<20) | ((_n)<<16) | ((_d)<<12) | ((0xB)<<4) | (((_off)&0xf0)<<4) | ((_off)&0xf) ); \
@ -654,8 +657,8 @@ enum {
NanoAssert(IsGpReg(_d) && IsGpReg(_n)); \
underrunProtect(4); \
if (_off < 0) { \
NanoAssert(isU8(-(_off))); \
*(--_nIns) = (NIns)( COND_AL | (0x15<<20) | ((_n)<<16) | ((_d)<<12) | ((0xF)<<4) | (((-(_off))&0xf0)<<4) | ((-(_off))&0xf) ); \
NanoAssert(isU8(-_off)); \
*(--_nIns) = (NIns)( COND_AL | (0x15<<20) | ((_n)<<16) | ((_d)<<12) | ((0xF)<<4) | (((-_off)&0xf0)<<4) | ((-_off)&0xf) ); \
} else { \
NanoAssert(isU8(_off)); \
*(--_nIns) = (NIns)( COND_AL | (0x1D<<20) | ((_n)<<16) | ((_d)<<12) | ((0xF)<<4) | (((_off)&0xf0)<<4) | ((_off)&0xf) ); \
@ -666,7 +669,7 @@ enum {
// Valid offset for STR and STRB is +/- 4095, STRH only has +/- 255
#define STR(_d,_n,_off) do { \
NanoAssert(IsGpReg(_d) && IsGpReg(_n)); \
NanoAssert(isU12(_off) || isU12(-(_off))); \
NanoAssert(isU12(_off) || isU12(-_off)); \
underrunProtect(4); \
if ((_off)<0) *(--_nIns) = (NIns)( COND_AL | (0x50<<20) | ((_n)<<16) | ((_d)<<12) | ((-(_off))&0xFFF) ); \
else *(--_nIns) = (NIns)( COND_AL | (0x58<<20) | ((_n)<<16) | ((_d)<<12) | ((_off)&0xFFF) ); \
@ -675,7 +678,7 @@ enum {
#define STRB(_d,_n,_off) do { \
NanoAssert(IsGpReg(_d) && IsGpReg(_n)); \
NanoAssert(isU12(_off) || isU12(-(_off))); \
NanoAssert(isU12(_off) || isU12(-_off)); \
underrunProtect(4); \
if ((_off)<0) *(--_nIns) = (NIns)( COND_AL | (0x54<<20) | ((_n)<<16) | ((_d)<<12) | ((-(_off))&0xFFF) ); \
else *(--_nIns) = (NIns)( COND_AL | (0x5C<<20) | ((_n)<<16) | ((_d)<<12) | ((_off)&0xFFF) ); \
@ -687,7 +690,7 @@ enum {
NanoAssert(IsGpReg(_d) && IsGpReg(_n)); \
underrunProtect(4); \
if ((_off)<0) { \
NanoAssert(isU8(-(_off))); \
NanoAssert(isU8(-_off)); \
*(--_nIns) = (NIns)( COND_AL | (0x14<<20) | ((_n)<<16) | ((_d)<<12) | (((-(_off))&0xF0)<<4) | (0xB<<4) | ((-(_off))&0xF) ); \
} else { \
NanoAssert(isU8(_off)); \

Просмотреть файл

@ -954,7 +954,6 @@ namespace nanojit
// MIPS arith immediate ops sign-extend the imm16 value
switch (op) {
case LIR_addxovi:
case LIR_addjovi:
SLT(AT, rr, ra);
ADDIU(rr, ra, rhsc);
goto done;
@ -962,7 +961,6 @@ namespace nanojit
ADDIU(rr, ra, rhsc);
goto done;
case LIR_subxovi:
case LIR_subjovi:
if (isS16(-rhsc)) {
SLT(AT, ra, rr);
ADDIU(rr, ra, -rhsc);
@ -976,7 +974,6 @@ namespace nanojit
}
break;
case LIR_mulxovi:
case LIR_muljovi:
case LIR_muli:
// FIXME: optimise constant multiply by 2^n
// if ((rhsc & (rhsc-1)) == 0)
@ -1027,7 +1024,6 @@ namespace nanojit
switch (op) {
case LIR_addxovi:
case LIR_addjovi:
SLT(AT, rr, ra);
ADDU(rr, ra, rb);
break;
@ -1044,7 +1040,6 @@ namespace nanojit
XOR(rr, ra, rb);
break;
case LIR_subxovi:
case LIR_subjovi:
SLT(AT,ra,rr);
SUBU(rr, ra, rb);
break;
@ -1064,7 +1059,6 @@ namespace nanojit
ANDI(rb, rb, 31);
break;
case LIR_mulxovi:
case LIR_muljovi:
t = registerAllocTmp(allow);
// Overflow indication required
// Do a 32x32 signed multiply generating a 64 bit result
@ -1487,15 +1481,14 @@ namespace nanojit
return patch;
}
NIns* Assembler::asm_branch_ov(LOpcode op, NIns* target)
void Assembler::asm_branch_xov(LOpcode op, NIns* target)
{
USE(op);
NanoAssert(target != NULL);
NIns* patch = asm_bxx(true, LIR_eqi, AT, ZERO, target);
(void) asm_bxx(true, LIR_eqi, AT, ZERO, target);
TAG("asm_branch_ov(op=%s, target=%p)", lirNames[op], target);
return patch;
TAG("asm_branch_xov(op=%s, target=%p)", lirNames[op], target);
}
NIns* Assembler::asm_branch(bool branchOnFalse, LIns *cond, NIns * const targ)

Просмотреть файл

@ -545,9 +545,8 @@ namespace nanojit
return _nIns;
}
NIns* Assembler::asm_branch_ov(LOpcode, NIns*) {
TODO(asm_branch_ov);
return _nIns;
void Assembler::asm_branch_xov(LOpcode, NIns*) {
TODO(asm_branch_xov);
}
void Assembler::asm_cmp(LOpcode condop, LIns *a, LIns *b, ConditionRegister cr) {

Просмотреть файл

@ -535,15 +535,13 @@ namespace nanojit
return at;
}
NIns* Assembler::asm_branch_ov(LOpcode, NIns* targ)
void Assembler::asm_branch_xov(LOpcode, NIns* targ)
{
NIns* at = 0;
underrunProtect(32);
intptr_t tt = ((intptr_t)targ - (intptr_t)_nIns + 8) >> 2;
// !targ means that it needs patch.
if( !(isIMM22((int32_t)tt)) || !targ ) {
JMP_long_nocheck((intptr_t)targ);
at = _nIns;
NOP();
BA(0, 5);
tt = 4;
@ -551,7 +549,6 @@ namespace nanojit
NOP();
BVS(0, tt);
return at;
}
void Assembler::asm_cmp(LIns *cond)
@ -875,8 +872,9 @@ namespace nanojit
LIns *rhs = ins->oprnd2();
RegisterMask allow = FpRegs;
Register ra, rb;
findRegFor2(allow, lhs, ra, allow, rhs, rb);
Register ra = findRegFor(lhs, FpRegs);
Register rb = (rhs == lhs) ? ra : findRegFor(rhs, FpRegs);
Register rr = deprecated_prepResultReg(ins, allow);
if (op == LIR_addd)

Просмотреть файл

@ -700,7 +700,7 @@ namespace nanojit
int32_t imm = getImm32(b);
LOpcode op = ins->opcode();
Register rr, ra;
if (op == LIR_muli || op == LIR_muljovi || op == LIR_mulxovi) {
if (op == LIR_muli || op == LIR_mulxovi) {
// Special case: imul-by-imm has true 3-addr form. So we don't
// need the MR(rr, ra) after the IMULI.
beginOp1Regs(ins, GpRegs, rr, ra);
@ -714,18 +714,13 @@ namespace nanojit
switch (ins->opcode()) {
default: TODO(arith_imm8);
case LIR_addi:
case LIR_addjovi:
case LIR_addxovi: ADDLR8(rr, imm); break; // XXX: bug 547125: could use LEA for LIR_addi
case LIR_andi: ANDLR8(rr, imm); break;
case LIR_ori: ORLR8( rr, imm); break;
case LIR_subi:
case LIR_subjovi:
case LIR_subxovi: SUBLR8(rr, imm); break;
case LIR_xori: XORLR8(rr, imm); break;
case LIR_addq:
case LIR_addjovq: ADDQR8(rr, imm); break;
case LIR_subq:
case LIR_subjovq: SUBQR8(rr, imm); break;
case LIR_addq: ADDQR8(rr, imm); break;
case LIR_andq: ANDQR8(rr, imm); break;
case LIR_orq: ORQR8( rr, imm); break;
case LIR_xorq: XORQR8(rr, imm); break;
@ -734,18 +729,13 @@ namespace nanojit
switch (ins->opcode()) {
default: TODO(arith_imm);
case LIR_addi:
case LIR_addjovi:
case LIR_addxovi: ADDLRI(rr, imm); break; // XXX: bug 547125: could use LEA for LIR_addi
case LIR_andi: ANDLRI(rr, imm); break;
case LIR_ori: ORLRI( rr, imm); break;
case LIR_subi:
case LIR_subjovi:
case LIR_subxovi: SUBLRI(rr, imm); break;
case LIR_xori: XORLRI(rr, imm); break;
case LIR_addq:
case LIR_addjovq: ADDQRI(rr, imm); break;
case LIR_subq:
case LIR_subjovq: SUBQRI(rr, imm); break;
case LIR_addq: ADDQRI(rr, imm); break;
case LIR_andq: ANDQRI(rr, imm); break;
case LIR_orq: ORQRI( rr, imm); break;
case LIR_xorq: XORQRI(rr, imm); break;
@ -844,23 +834,17 @@ namespace nanojit
default: TODO(asm_arith);
case LIR_ori: ORLRR(rr, rb); break;
case LIR_subi:
case LIR_subjovi:
case LIR_subxovi: SUBRR(rr, rb); break;
case LIR_addi:
case LIR_addjovi:
case LIR_addxovi: ADDRR(rr, rb); break; // XXX: bug 547125: could use LEA for LIR_addi
case LIR_andi: ANDRR(rr, rb); break;
case LIR_xori: XORRR(rr, rb); break;
case LIR_muli:
case LIR_muljovi:
case LIR_mulxovi: IMUL(rr, rb); break;
case LIR_xorq: XORQRR(rr, rb); break;
case LIR_orq: ORQRR(rr, rb); break;
case LIR_andq: ANDQRR(rr, rb); break;
case LIR_addq:
case LIR_addjovq: ADDQRR(rr, rb); break;
case LIR_subq:
case LIR_subjovq: SUBQRR(rr, rb); break;
case LIR_addq: ADDQRR(rr, rb); break;
}
if (rr != ra)
MR(rr, ra);
@ -1220,7 +1204,7 @@ namespace nanojit
return patch;
}
NIns* Assembler::asm_branch_ov(LOpcode, NIns* target) {
void Assembler::asm_branch_xov(LOpcode, NIns* target) {
if (target && !isTargetWithinS32(target)) {
setError(ConditionalBranchTooFar);
NanoAssert(0);
@ -1231,7 +1215,6 @@ namespace nanojit
JO8(8, target);
else
JO( 8, target);
return _nIns;
}
// WARNING: this function cannot generate code that will affect the

Просмотреть файл

@ -1492,10 +1492,9 @@ namespace nanojit
return at;
}
NIns* Assembler::asm_branch_ov(LOpcode, NIns* target)
void Assembler::asm_branch_xov(LOpcode, NIns* target)
{
JO(target);
return _nIns;
}
void Assembler::asm_switch(LIns* ins, NIns* exit)
@ -1714,7 +1713,6 @@ namespace nanojit
evictIfActive(EDX);
break;
case LIR_muli:
case LIR_muljovi:
case LIR_mulxovi:
isConstRhs = false;
if (lhs != rhs) {
@ -1752,13 +1750,10 @@ namespace nanojit
switch (op) {
case LIR_addi:
case LIR_addjovi:
case LIR_addxovi: ADD(rr, rb); break; // XXX: bug 547125: could use LEA for LIR_addi
case LIR_subi:
case LIR_subjovi:
case LIR_subxovi: SUB(rr, rb); break;
case LIR_muli:
case LIR_muljovi:
case LIR_mulxovi: MUL(rr, rb); break;
case LIR_andi: AND(rr, rb); break;
case LIR_ori: OR( rr, rb); break;
@ -1781,10 +1776,8 @@ namespace nanojit
LEA(rr, c, ra);
ra = rr; // suppress mov
break;
case LIR_addjovi:
case LIR_addxovi: ADDi(rr, c); break;
case LIR_subi:
case LIR_subjovi:
case LIR_subxovi: SUBi(rr, c); break;
case LIR_andi: ANDi(rr, c); break;
case LIR_ori: ORi( rr, c); break;
@ -2217,7 +2210,7 @@ namespace nanojit
}
} else {
debug_only( Register rr = ) prepareResultReg(ins, x87Regs);
verbose_only( Register rr = ) prepareResultReg(ins, x87Regs);
NanoAssert(FST0 == rr);
NanoAssert(!lhs->isInReg() || FST0 == lhs->getReg());

Просмотреть файл

@ -148,7 +148,9 @@ namespace nanojit
}
#ifdef AVMPLUS_VERBOSE
#define NJ_VERBOSE 1
#ifndef NJ_VERBOSE_DISABLED
#define NJ_VERBOSE 1
#endif
#endif
#ifdef NJ_NO_VARIADIC_MACROS

Просмотреть файл

@ -143,7 +143,6 @@ static void dumpProfile (void)
entries = reverse(entries);
vprof_printf ("event avg [min : max] total count\n");
for (e = entries; e; e = e->next) {
if (e->count == 0) continue; // ignore entries with zero count.
vprof_printf ("%s", e->file);
if (e->line >= 0) {
vprof_printf (":%d", e->line);
@ -180,6 +179,23 @@ static void dumpProfile (void)
entries = reverse(entries);
}
int _profileEntryValue (void* id, int64_t value)
{
entry_t e = (entry_t) id;
long* lock = &(e->lock);
LOCK (lock);
e->value = value;
e->sum += value;
e->count ++;
e->min = MIN (e->min, value);
e->max = MAX (e->max, value);
if (e->func) e->func (e);
UNLOCK (lock);
return 0;
}
inline static entry_t findEntry (char* file, int line)
{
for (entry_t e = entries; e; e = e->next) {
@ -190,11 +206,7 @@ inline static entry_t findEntry (char* file, int line)
return NULL;
}
// Initialize the location pointed to by 'id' to a new value profile entry
// associated with 'file' and 'line', or do nothing if already initialized.
// An optional final argument provides a user-defined probe function.
int initValueProfile(void** id, char* file, int line, ...)
int profileValue(void** id, char* file, int line, int64_t value, ...)
{
DO_LOCK (&glock);
entry_t e = (entry_t) *id;
@ -208,7 +220,7 @@ int initValueProfile(void** id, char* file, int line, ...)
if (e) {
*id = e;
}
}
}
if (e == NULL) {
va_list va;
@ -216,58 +228,72 @@ int initValueProfile(void** id, char* file, int line, ...)
e->lock = LOCK_IS_FREE;
e->file = file;
e->line = line;
e->value = 0;
e->sum = 0;
e->count = 0;
e->min = 0;
e->max = 0;
// optional probe function argument
va_start (va, line);
e->value = value;
e->sum = value;
e->count = 1;
e->min = value;
e->max = value;
va_start (va, value);
e->func = (void (__cdecl*)(void*)) va_arg (va, void*);
va_end (va);
e->h = NULL;
e->genptr = NULL;
VMPI_memset (&e->ivar, 0, sizeof(e->ivar));
VMPI_memset (&e->i64var, 0, sizeof(e->i64var));
VMPI_memset (&e->dvar, 0, sizeof(e->dvar));
e->next = entries;
entries = e;
if (e->func) e->func (e);
*id = e;
} else {
long* lock = &(e->lock);
LOCK (lock);
e->value = value;
e->sum += value;
e->count ++;
e->min = MIN (e->min, value);
e->max = MAX (e->max, value);
if (e->func) e->func (e);
UNLOCK (lock);
}
DO_UNLOCK (&glock);
return 0;
}
// Record a value profile event.
int profileValue(void* id, int64_t value)
int _histEntryValue (void* id, int64_t value)
{
entry_t e = (entry_t) id;
long* lock = &(e->lock);
hist_t h = e->h;
int nbins = h->nbins;
int64_t* lb = h->lb;
int b;
for (b = 0; b < nbins; b ++) {
if (value < lb[b]) break;
}
LOCK (lock);
e->value = value;
if (e->count == 0) {
e->sum = value;
e->count = 1;
e->min = value;
e->max = value;
} else {
e->sum += value;
e->count ++;
e->min = MIN (e->min, value);
e->max = MAX (e->max, value);
}
if (e->func) e->func (e);
e->value = value;
e->sum += value;
e->count ++;
e->min = MIN (e->min, value);
e->max = MAX (e->max, value);
h->count[b] ++;
UNLOCK (lock);
return 0;
}
// Initialize the location pointed to by 'id' to a new histogram profile entry
// associated with 'file' and 'line', or do nothing if already initialized.
int initHistProfile(void** id, char* file, int line, int nbins, ...)
int histValue(void** id, char* file, int line, int64_t value, int nbins, ...)
{
DO_LOCK (&glock);
entry_t e = (entry_t) *id;
@ -293,11 +319,11 @@ int initHistProfile(void** id, char* file, int line, int nbins, ...)
e->lock = LOCK_IS_FREE;
e->file = file;
e->line = line;
e->value = 0;
e->sum = 0;
e->count = 0;
e->min = 0;
e->max = 0;
e->value = value;
e->sum = value;
e->count = 1;
e->min = value;
e->max = value;
e->func = NULL;
e->h = h = (hist_t) malloc (sizeof(hist));
n = 1+MAX(nbins,0);
@ -317,60 +343,51 @@ int initHistProfile(void** id, char* file, int line, int nbins, ...)
lb[b] = MAXINT64;
va_end (va);
for (b = 0; b < nbins; b ++) {
if (value < lb[b]) break;
}
h->count[b] ++;
e->genptr = NULL;
VMPI_memset (&e->ivar, 0, sizeof(e->ivar));
VMPI_memset (&e->i64var, 0, sizeof(e->i64var));
VMPI_memset (&e->dvar, 0, sizeof(e->dvar));
e->next = entries;
entries = e;
*id = e;
} else {
int b;
long* lock = &(e->lock);
hist_t h=e->h;
int64_t* lb = h->lb;
LOCK (lock);
e->value = value;
e->sum += value;
e->count ++;
e->min = MIN (e->min, value);
e->max = MAX (e->max, value);
for (b = 0; b < nbins; b ++) {
if (value < lb[b]) break;
}
h->count[b] ++;
UNLOCK (lock);
}
DO_UNLOCK (&glock);
return 0;
}
// Record a histogram profile event.
int histValue(void* id, int64_t value)
{
entry_t e = (entry_t) id;
long* lock = &(e->lock);
hist_t h = e->h;
int nbins = h->nbins;
int64_t* lb = h->lb;
int b;
LOCK (lock);
e->value = value;
if (e->count == 0) {
e->sum = value;
e->count = 1;
e->min = value;
e->max = value;
} else {
e->sum += value;
e->count ++;
e->min = MIN (e->min, value);
e->max = MAX (e->max, value);
}
for (b = 0; b < nbins; b ++) {
if (value < lb[b]) break;
}
h->count[b] ++;
UNLOCK (lock);
return 0;
}
#if defined(_MSC_VER) && defined(_M_IX86)
uint64_t readTimestampCounter()
inline uint64_t _rdtsc()
{
// read the cpu cycle counter. 1 tick = 1 cycle on IA32
_asm rdtsc;
}
#elif defined(__GNUC__) && (__i386__ || __x86_64__)
uint64_t readTimestampCounter()
inline uint64_t _rdtsc()
{
uint32_t lo, hi;
__asm__ __volatile__ ("rdtsc" : "=a" (lo), "=d" (hi));
@ -378,6 +395,16 @@ uint64_t readTimestampCounter()
}
#else
// add stub for platforms without it, so fat builds don't fail
uint64_t readTimestampCounter() { return 0; }
inline uint64_t _rdtsc() { return 0; }
#endif
void* _tprof_before_id=0;
static uint64_t _tprof_before = 0;
int64_t _tprof_time()
{
uint64_t now = _rdtsc();
uint64_t v = _tprof_before ? now-_tprof_before : 0;
_tprof_before = now;
return v/2600; // v = microseconds on a 2.6ghz cpu
}

Просмотреть файл

@ -105,11 +105,12 @@
extern "C" {
#endif
int initValueProfile(void** id, char* file, int line, ...);
int profileValue(void* id, int64_t value);
int initHistProfile(void** id, char* file, int line, int nbins, ...);
int histValue(void* id, int64_t value);
uint64_t readTimestampCounter();
int profileValue (void** id, char* file, int line, int64_t value, ...);
int _profileEntryValue (void* id, int64_t value);
int histValue(void** id, char* file, int line, int64_t value, int nbins, ...);
int _histEntryValue (void* id, int64_t value);
int64_t _tprof_time();
extern void* _tprof_before_id;
#ifdef __cplusplus
}
@ -123,97 +124,73 @@ uint64_t readTimestampCounter();
#define _vprof(v,...)
#define _hprof(h,n,...)
#define _nhprof(e,v,n,...)
#define _ntprof_begin(e)
#define _ntprof_end(e)
#define _jvprof_init(id,...)
#define _jnvprof_init(id,e,...)
#define _jvprof(id,v)
#define _ntprof(e)
#define _tprof_end()
#endif // ! VMCFG_SYMBIAN
#else
// Historical/compatibility note:
// The macros below were originally written using conditional expressions, not if/else. The original author
// said that this was done to allow _vprof and _nvprof to be used in an expression context, but the old code
// had already wrapped the macro bodies in { }, so it is not clear how this could have worked. At present,
// the profiling macros must appear in a statement context only.
#define _vprof(v,...) \
do { \
{ \
static void* id = 0; \
if (id == 0) \
initValueProfile(&id, __FILE__, __LINE__, ##__VA_ARGS__, NULL); \
profileValue(id, (int64_t) (v)); \
} while (0)
(id != 0) ? \
_profileEntryValue (id, (int64_t) (v)) \
: \
profileValue (&id, __FILE__, __LINE__, (int64_t) (v), ##__VA_ARGS__, NULL) \
;\
}
#define _nvprof(e,v) \
do { \
{ \
static void* id = 0; \
if (id == 0) \
initValueProfile(&id, (char*) (e), -1, NULL); \
profileValue(id, (int64_t) (v)); \
} while (0)
(id != 0) ? \
_profileEntryValue (id, (int64_t) (v)) \
: \
profileValue (&id, (char*) (e), -1, (int64_t) (v), NULL) \
; \
}
#define _hprof(v,n,...) \
do { \
{ \
static void* id = 0; \
if (id == 0) \
initHistProfile(&id, __FILE__, __LINE__, (int) (n), ##__VA_ARGS__); \
histValue(id, (int64_t) (v)); \
} while (0)
(id != 0) ? \
_histEntryValue (id, (int64_t) (v)) \
: \
histValue (&id, __FILE__, __LINE__, (int64_t) (v), (int) (n), ##__VA_ARGS__) \
; \
}
#define _nhprof(e,v,n,...) \
do { \
{ \
static void* id = 0; \
if (id == 0) \
initHistProfile(&id, (char*) (e), -1, (int) (n), ##__VA_ARGS__); \
histValue(id, (int64_t) (v)); \
} while (0)
(id != 0) ? \
_histEntryValue (id, (int64_t) (v)) \
: \
histValue (&id, (char*) (e), -1, (int64_t) (v), (int) (n), ##__VA_ARGS__) \
; \
}
// Profile execution time between _ntprof_begin(e) and _ntprof_end(e).
// The tag 'e' must match at the beginning and end of the region to
// be timed. Regions may be nested or overlap arbitrarily, as it is
// the tag alone that defines the begin/end correspondence.
#define _ntprof_begin(e) \
do { \
#define _ntprof(e) \
{ \
uint64_t v = _tprof_time();\
(_tprof_before_id != 0) ? \
_profileEntryValue(_tprof_before_id, v)\
: 0;\
static void* id = 0; \
if (id == 0) \
initValueProfile(&id, (char*)(e), -1, NULL); \
((entry_t)id)->i64var[0] = readTimestampCounter(); \
} while (0)
(id != 0) ? \
_profileEntryValue (id, (int64_t) 0) \
: \
profileValue (&id, (char*)(e), -1, (int64_t) 0, NULL) \
;\
_tprof_before_id = id;\
}
// Assume 2.6 Ghz CPU
#define TICKS_PER_USEC 2600
#define _ntprof_end(e) \
do { \
static void* id = 0; \
uint64_t stop = readTimestampCounter(); \
if (id == 0) \
initValueProfile(&id, (char*)(e), -1, NULL); \
uint64_t start = ((entry_t)id)->i64var[0]; \
uint64_t usecs = (stop - start) / TICKS_PER_USEC; \
profileValue(id, usecs); \
} while (0)
// These macros separate the creation of a profile record from its later usage.
// They are intended for profiling JIT-generated code. Once created, the JIT can
// bind a pointer to the profile record into the generated code, which can then
// record profile events during execution.
#define _jvprof_init(id,...) \
if (*(id) == 0) \
initValueProfile((id), __FILE__, __LINE__, ##__VA_ARGS__, NULL)
#define _jnvprof_init(id,e,...) \
if (*(id) == 0) \
initValueProfile((id), (char*) (e), -1, ##__VA_ARGS__, NULL)
// Calls to the _jvprof macro must be wrapped in an actual function
// in order to be invoked from JIT-compiled code.
#define _jvprof(id,v) \
profileValue((id), (int64_t) (v))
#define _tprof_end() \
{\
uint64_t v = _tprof_time();\
if (_tprof_before_id)\
_profileEntryValue(_tprof_before_id, v);\
_tprof_before_id = 0;\
}
#endif