зеркало из https://github.com/mozilla/gecko-dev.git
merge
This commit is contained in:
Коммит
eae82f529a
|
@ -1,12 +1,12 @@
|
||||||
ptr = allocp 8
|
ptr = allocp 8
|
||||||
a = immi 65
|
a = immi 65
|
||||||
sti a ptr 0
|
sti2c a ptr 0
|
||||||
b = immi 66
|
b = immi 66
|
||||||
sti b ptr 1
|
sti2c b ptr 1
|
||||||
c = immi 67
|
c = immi 67
|
||||||
sti c ptr 2
|
sti2c c ptr 2
|
||||||
zero = immi 0
|
zero = immi 0
|
||||||
sti zero ptr 3
|
sti2c zero ptr 3
|
||||||
ss = calli puts cdecl ptr
|
ss = calli puts cdecl ptr
|
||||||
nn = gei ss zero
|
nn = gei ss zero
|
||||||
reti nn
|
reti nn
|
||||||
|
|
|
@ -1,13 +1,13 @@
|
||||||
.begin a
|
.begin a
|
||||||
ptr = allocp 8
|
ptr = allocp 8
|
||||||
a = immi 65
|
a = immi 65
|
||||||
sti a ptr 0
|
sti2c a ptr 0
|
||||||
b = immi 66
|
b = immi 66
|
||||||
sti b ptr 1
|
sti2c b ptr 1
|
||||||
c = immi 67
|
c = immi 67
|
||||||
sti c ptr 2
|
sti2c c ptr 2
|
||||||
zero = immi 0
|
zero = immi 0
|
||||||
sti zero ptr 3
|
sti2c zero ptr 3
|
||||||
ss = calli puts cdecl ptr
|
ss = calli puts cdecl ptr
|
||||||
nn = gei ss zero
|
nn = gei ss zero
|
||||||
reti nn
|
reti nn
|
||||||
|
|
|
@ -1 +1 @@
|
||||||
982cd218ddb049bdbbcdda4fa3a9d7e40e45e0be
|
c7009f5cd83ea028b98f59e1f8830a76ba27c1dd
|
||||||
|
|
|
@ -41,7 +41,7 @@
|
||||||
|
|
||||||
#ifdef FEATURE_NANOJIT
|
#ifdef FEATURE_NANOJIT
|
||||||
|
|
||||||
#ifdef VTUNE
|
#ifdef VMCFG_VTUNE
|
||||||
#include "../core/CodegenLIR.h"
|
#include "../core/CodegenLIR.h"
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
@ -50,6 +50,18 @@
|
||||||
#pragma warning(disable:4310) // cast truncates constant value
|
#pragma warning(disable:4310) // cast truncates constant value
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#ifdef VMCFG_VTUNE
|
||||||
|
namespace vtune {
|
||||||
|
using namespace nanojit;
|
||||||
|
void vtuneStart(void*, NIns*);
|
||||||
|
void vtuneEnd(void*, NIns*);
|
||||||
|
void vtuneLine(void*, int, NIns*);
|
||||||
|
void vtuneFile(void*, void*);
|
||||||
|
}
|
||||||
|
using namespace vtune;
|
||||||
|
#endif // VMCFG_VTUNE
|
||||||
|
|
||||||
|
|
||||||
namespace nanojit
|
namespace nanojit
|
||||||
{
|
{
|
||||||
/**
|
/**
|
||||||
|
@ -74,8 +86,8 @@ namespace nanojit
|
||||||
#if PEDANTIC
|
#if PEDANTIC
|
||||||
, pedanticTop(NULL)
|
, pedanticTop(NULL)
|
||||||
#endif
|
#endif
|
||||||
#ifdef VTUNE
|
#ifdef VMCFG_VTUNE
|
||||||
, cgen(NULL)
|
, vtuneHandle(NULL)
|
||||||
#endif
|
#endif
|
||||||
, _config(config)
|
, _config(config)
|
||||||
{
|
{
|
||||||
|
@ -186,10 +198,11 @@ namespace nanojit
|
||||||
void Assembler::registerResetAll()
|
void Assembler::registerResetAll()
|
||||||
{
|
{
|
||||||
nRegisterResetAll(_allocator);
|
nRegisterResetAll(_allocator);
|
||||||
|
_allocator.managed = _allocator.free;
|
||||||
|
|
||||||
// At start, should have some registers free and none active.
|
// At start, should have some registers free and none active.
|
||||||
NanoAssert(0 != _allocator.free);
|
NanoAssert(0 != _allocator.free);
|
||||||
NanoAssert(0 == _allocator.countActive());
|
NanoAssert(0 == _allocator.activeMask());
|
||||||
#ifdef NANOJIT_IA32
|
#ifdef NANOJIT_IA32
|
||||||
debug_only(_fpuStkDepth = 0; )
|
debug_only(_fpuStkDepth = 0; )
|
||||||
#endif
|
#endif
|
||||||
|
@ -273,14 +286,6 @@ namespace nanojit
|
||||||
verbose_only( nBytes += (end - start) * sizeof(NIns); )
|
verbose_only( nBytes += (end - start) * sizeof(NIns); )
|
||||||
NanoAssert(uintptr_t(end) - uintptr_t(start) >= (size_t)LARGEST_UNDERRUN_PROT);
|
NanoAssert(uintptr_t(end) - uintptr_t(start) >= (size_t)LARGEST_UNDERRUN_PROT);
|
||||||
eip = end;
|
eip = end;
|
||||||
|
|
||||||
#ifdef VTUNE
|
|
||||||
if (_nIns && _nExitIns) {
|
|
||||||
//cgen->jitAddRecord((uintptr_t)list->code, 0, 0, true); // add placeholder record for top of page
|
|
||||||
cgen->jitCodePosUpdate((uintptr_t)list->code);
|
|
||||||
cgen->jitPushInfo(); // new page requires new entry
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void Assembler::reset()
|
void Assembler::reset()
|
||||||
|
@ -360,23 +365,26 @@ namespace nanojit
|
||||||
void Assembler::registerConsistencyCheck()
|
void Assembler::registerConsistencyCheck()
|
||||||
{
|
{
|
||||||
RegisterMask managed = _allocator.managed;
|
RegisterMask managed = _allocator.managed;
|
||||||
for (Register r = FirstReg; r <= LastReg; r = nextreg(r)) {
|
for (Register r = lsReg(managed); managed; r = nextLsReg(managed, r)) {
|
||||||
if (rmask(r) & managed) {
|
// A register managed by register allocation must be either
|
||||||
// A register managed by register allocation must be either
|
// free or active, but not both.
|
||||||
// free or active, but not both.
|
if (_allocator.isFree(r)) {
|
||||||
if (_allocator.isFree(r)) {
|
NanoAssertMsgf(_allocator.getActive(r)==0,
|
||||||
NanoAssertMsgf(_allocator.getActive(r)==0,
|
"register %s is free but assigned to ins", gpn(r));
|
||||||
"register %s is free but assigned to ins", gpn(r));
|
|
||||||
} else {
|
|
||||||
// An LIns defining a register must have that register in
|
|
||||||
// its reservation.
|
|
||||||
LIns* ins = _allocator.getActive(r);
|
|
||||||
NanoAssert(ins);
|
|
||||||
NanoAssertMsg(r == ins->getReg(), "Register record mismatch");
|
|
||||||
}
|
|
||||||
} else {
|
} else {
|
||||||
// A register not managed by register allocation must be
|
// An LIns defining a register must have that register in
|
||||||
// neither free nor active.
|
// its reservation.
|
||||||
|
LIns* ins = _allocator.getActive(r);
|
||||||
|
NanoAssert(ins);
|
||||||
|
NanoAssertMsg(r == ins->getReg(), "Register record mismatch");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
RegisterMask not_managed = ~_allocator.managed;
|
||||||
|
for (Register r = lsReg(not_managed); not_managed; r = nextLsReg(not_managed, r)) {
|
||||||
|
// A register not managed by register allocation must be
|
||||||
|
// neither free nor active.
|
||||||
|
if (r <= LastReg) {
|
||||||
NanoAssert(!_allocator.isFree(r));
|
NanoAssert(!_allocator.isFree(r));
|
||||||
NanoAssert(!_allocator.getActive(r));
|
NanoAssert(!_allocator.getActive(r));
|
||||||
}
|
}
|
||||||
|
@ -1108,6 +1116,15 @@ namespace nanojit
|
||||||
// save entry point pointers
|
// save entry point pointers
|
||||||
frag->fragEntry = fragEntry;
|
frag->fragEntry = fragEntry;
|
||||||
frag->setCode(_nIns);
|
frag->setCode(_nIns);
|
||||||
|
|
||||||
|
#ifdef VMCFG_VTUNE
|
||||||
|
if (vtuneHandle)
|
||||||
|
{
|
||||||
|
vtuneEnd(vtuneHandle, codeEnd);
|
||||||
|
vtuneStart(vtuneHandle, _nIns);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
PERFM_NVPROF("code", CodeAlloc::size(codeList));
|
PERFM_NVPROF("code", CodeAlloc::size(codeList));
|
||||||
|
|
||||||
#ifdef NANOJIT_IA32
|
#ifdef NANOJIT_IA32
|
||||||
|
@ -1120,15 +1137,14 @@ namespace nanojit
|
||||||
|
|
||||||
void Assembler::releaseRegisters()
|
void Assembler::releaseRegisters()
|
||||||
{
|
{
|
||||||
for (Register r = FirstReg; r <= LastReg; r = nextreg(r))
|
RegisterMask active = _allocator.activeMask();
|
||||||
|
for (Register r = lsReg(active); active; r = nextLsReg(active, r))
|
||||||
{
|
{
|
||||||
LIns *ins = _allocator.getActive(r);
|
LIns *ins = _allocator.getActive(r);
|
||||||
if (ins) {
|
// Clear reg allocation, preserve stack allocation.
|
||||||
// Clear reg allocation, preserve stack allocation.
|
_allocator.retire(r);
|
||||||
_allocator.retire(r);
|
NanoAssert(r == ins->getReg());
|
||||||
NanoAssert(r == ins->getReg());
|
ins->clearReg();
|
||||||
ins->clearReg();
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1731,7 +1747,7 @@ namespace nanojit
|
||||||
// Out of range indices aren't allowed or checked.
|
// Out of range indices aren't allowed or checked.
|
||||||
// Code after this jtbl instruction is unreachable.
|
// Code after this jtbl instruction is unreachable.
|
||||||
releaseRegisters();
|
releaseRegisters();
|
||||||
NanoAssert(_allocator.countActive() == 0);
|
NanoAssert(_allocator.activeMask() == 0);
|
||||||
|
|
||||||
uint32_t count = ins->getTableSize();
|
uint32_t count = ins->getTableSize();
|
||||||
bool has_back_edges = false;
|
bool has_back_edges = false;
|
||||||
|
@ -1756,7 +1772,7 @@ namespace nanojit
|
||||||
// to reconcile registers. So, frontends *must* insert LIR_regfence at labels of
|
// to reconcile registers. So, frontends *must* insert LIR_regfence at labels of
|
||||||
// forward jtbl jumps. Check here to make sure no registers were picked up from
|
// forward jtbl jumps. Check here to make sure no registers were picked up from
|
||||||
// any forward edges.
|
// any forward edges.
|
||||||
NanoAssert(_allocator.countActive() == 0);
|
NanoAssert(_allocator.activeMask() == 0);
|
||||||
|
|
||||||
if (has_back_edges) {
|
if (has_back_edges) {
|
||||||
handleLoopCarriedExprs(pending_lives);
|
handleLoopCarriedExprs(pending_lives);
|
||||||
|
@ -1928,27 +1944,28 @@ namespace nanojit
|
||||||
asm_call(ins);
|
asm_call(ins);
|
||||||
break;
|
break;
|
||||||
|
|
||||||
#ifdef VTUNE
|
#ifdef VMCFG_VTUNE
|
||||||
case LIR_file: {
|
case LIR_file: {
|
||||||
// we traverse backwards so we are now hitting the file
|
// we traverse backwards so we are now hitting the file
|
||||||
// that is associated with a bunch of LIR_lines we already have seen
|
// that is associated with a bunch of LIR_lines we already have seen
|
||||||
ins->oprnd1()->setResultLive();
|
if (vtuneHandle) {
|
||||||
uintptr_t currentFile = ins->oprnd1()->immI();
|
void * currentFile = (void *) ins->oprnd1()->immI();
|
||||||
cgen->jitFilenameUpdate(currentFile);
|
vtuneFile(vtuneHandle, currentFile);
|
||||||
|
}
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
case LIR_line: {
|
case LIR_line: {
|
||||||
// add a new table entry, we don't yet knwo which file it belongs
|
// add a new table entry, we don't yet knwo which file it belongs
|
||||||
// to so we need to add it to the update table too
|
// to so we need to add it to the update table too
|
||||||
// note the alloc, actual act is delayed; see above
|
// note the alloc, actual act is delayed; see above
|
||||||
ins->oprnd1()->setResultLive();
|
if (vtuneHandle) {
|
||||||
uint32_t currentLine = (uint32_t) ins->oprnd1()->immI();
|
uint32_t currentLine = (uint32_t) ins->oprnd1()->immI();
|
||||||
cgen->jitLineNumUpdate(currentLine);
|
vtuneLine(vtuneHandle, currentLine, _nIns);
|
||||||
cgen->jitAddRecord((uintptr_t)_nIns, 0, currentLine, true);
|
}
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
#endif // VTUNE
|
#endif // VMCFG_VTUNE
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef NJ_VERBOSE
|
#ifdef NJ_VERBOSE
|
||||||
|
@ -1968,10 +1985,6 @@ namespace nanojit
|
||||||
if (error())
|
if (error())
|
||||||
return;
|
return;
|
||||||
|
|
||||||
#ifdef VTUNE
|
|
||||||
cgen->jitCodePosUpdate((uintptr_t)_nIns);
|
|
||||||
#endif
|
|
||||||
|
|
||||||
// check that all is well (don't check in exit paths since its more complicated)
|
// check that all is well (don't check in exit paths since its more complicated)
|
||||||
debug_only( pageValidate(); )
|
debug_only( pageValidate(); )
|
||||||
debug_only( resourceConsistencyCheck(); )
|
debug_only( resourceConsistencyCheck(); )
|
||||||
|
@ -2073,24 +2086,23 @@ namespace nanojit
|
||||||
VMPI_sprintf(s, "RR");
|
VMPI_sprintf(s, "RR");
|
||||||
s += VMPI_strlen(s);
|
s += VMPI_strlen(s);
|
||||||
|
|
||||||
for (Register r = FirstReg; r <= LastReg; r = nextreg(r)) {
|
RegisterMask active = _allocator.activeMask();
|
||||||
|
for (Register r = lsReg(active); active != 0; r = nextLsReg(active, r)) {
|
||||||
LIns *ins = _allocator.getActive(r);
|
LIns *ins = _allocator.getActive(r);
|
||||||
if (ins) {
|
NanoAssertMsg(!_allocator.isFree(r),
|
||||||
NanoAssertMsg(!_allocator.isFree(r),
|
"Coding error; register is both free and active! " );
|
||||||
"Coding error; register is both free and active! " );
|
RefBuf b;
|
||||||
RefBuf b;
|
const char* n = _thisfrag->lirbuf->printer->formatRef(&b, ins);
|
||||||
const char* n = _thisfrag->lirbuf->printer->formatRef(&b, ins);
|
|
||||||
|
|
||||||
if (ins->isop(LIR_paramp) && ins->paramKind()==1 &&
|
if (ins->isop(LIR_paramp) && ins->paramKind()==1 &&
|
||||||
r == Assembler::savedRegs[ins->paramArg()])
|
r == Assembler::savedRegs[ins->paramArg()])
|
||||||
{
|
{
|
||||||
// dont print callee-saved regs that arent used
|
// dont print callee-saved regs that arent used
|
||||||
continue;
|
continue;
|
||||||
}
|
|
||||||
|
|
||||||
VMPI_sprintf(s, " %s(%s)", gpn(r), n);
|
|
||||||
s += VMPI_strlen(s);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
VMPI_sprintf(s, " %s(%s)", gpn(r), n);
|
||||||
|
s += VMPI_strlen(s);
|
||||||
}
|
}
|
||||||
output();
|
output();
|
||||||
}
|
}
|
||||||
|
@ -2236,26 +2248,23 @@ namespace nanojit
|
||||||
Register tosave[LastReg-FirstReg+1];
|
Register tosave[LastReg-FirstReg+1];
|
||||||
int len=0;
|
int len=0;
|
||||||
RegAlloc *regs = &_allocator;
|
RegAlloc *regs = &_allocator;
|
||||||
for (Register r = FirstReg; r <= LastReg; r = nextreg(r)) {
|
RegisterMask evict_set = regs->activeMask() & GpRegs & ~ignore;
|
||||||
if (rmask(r) & GpRegs & ~ignore) {
|
for (Register r = lsReg(evict_set); evict_set; r = nextLsReg(evict_set, r)) {
|
||||||
LIns *ins = regs->getActive(r);
|
LIns *ins = regs->getActive(r);
|
||||||
if (ins) {
|
if (canRemat(ins)) {
|
||||||
if (canRemat(ins)) {
|
NanoAssert(ins->getReg() == r);
|
||||||
NanoAssert(ins->getReg() == r);
|
evict(ins);
|
||||||
evict(ins);
|
}
|
||||||
}
|
else {
|
||||||
else {
|
int32_t pri = regs->getPriority(r);
|
||||||
int32_t pri = regs->getPriority(r);
|
// add to heap by adding to end and bubbling up
|
||||||
// add to heap by adding to end and bubbling up
|
int j = len++;
|
||||||
int j = len++;
|
while (j > 0 && pri > regs->getPriority(tosave[j/2])) {
|
||||||
while (j > 0 && pri > regs->getPriority(tosave[j/2])) {
|
tosave[j] = tosave[j/2];
|
||||||
tosave[j] = tosave[j/2];
|
j /= 2;
|
||||||
j /= 2;
|
|
||||||
}
|
|
||||||
NanoAssert(size_t(j) < sizeof(tosave)/sizeof(tosave[0]));
|
|
||||||
tosave[j] = r;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
NanoAssert(size_t(j) < sizeof(tosave)/sizeof(tosave[0]));
|
||||||
|
tosave[j] = r;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -2297,24 +2306,12 @@ namespace nanojit
|
||||||
evictSomeActiveRegs(~(SavedRegs | ignore));
|
evictSomeActiveRegs(~(SavedRegs | ignore));
|
||||||
}
|
}
|
||||||
|
|
||||||
void Assembler::evictAllActiveRegs()
|
// Generate code to restore any registers in 'regs' that are currently active,
|
||||||
{
|
|
||||||
// generate code to restore callee saved registers
|
|
||||||
// @todo speed this up
|
|
||||||
for (Register r = FirstReg; r <= LastReg; r = nextreg(r)) {
|
|
||||||
evictIfActive(r);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void Assembler::evictSomeActiveRegs(RegisterMask regs)
|
void Assembler::evictSomeActiveRegs(RegisterMask regs)
|
||||||
{
|
{
|
||||||
// generate code to restore callee saved registers
|
RegisterMask evict_set = regs & _allocator.activeMask();
|
||||||
// @todo speed this up
|
for (Register r = lsReg(evict_set); evict_set; r = nextLsReg(evict_set, r))
|
||||||
for (Register r = FirstReg; r <= LastReg; r = nextreg(r)) {
|
evict(_allocator.getActive(r));
|
||||||
if ((rmask(r) & regs)) {
|
|
||||||
evictIfActive(r);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -2337,19 +2334,13 @@ namespace nanojit
|
||||||
// Do evictions and pops first.
|
// Do evictions and pops first.
|
||||||
verbose_only(bool shouldMention=false; )
|
verbose_only(bool shouldMention=false; )
|
||||||
// The obvious thing to do here is to iterate from FirstReg to LastReg.
|
// The obvious thing to do here is to iterate from FirstReg to LastReg.
|
||||||
// viz: for (Register r = FirstReg; r <= LastReg; r = nextreg(r)) ...
|
|
||||||
// However, on ARM that causes lower-numbered integer registers
|
// However, on ARM that causes lower-numbered integer registers
|
||||||
// to be be saved at higher addresses, which inhibits the formation
|
// to be be saved at higher addresses, which inhibits the formation
|
||||||
// of load/store multiple instructions. Hence iterate the loop the
|
// of load/store multiple instructions. Hence iterate the loop the
|
||||||
// other way. The "r <= LastReg" guards against wraparound in
|
// other way.
|
||||||
// the case where Register is treated as unsigned and FirstReg is zero.
|
RegisterMask reg_set = _allocator.activeMask() | saved.activeMask();
|
||||||
//
|
for (Register r = msReg(reg_set); reg_set; r = nextMsReg(reg_set, r))
|
||||||
// Note, the loop var is deliberately typed as int (*not* Register)
|
|
||||||
// to outsmart compilers that will otherwise report
|
|
||||||
// "error: comparison is always true due to limited range of data type".
|
|
||||||
for (int ri = LastReg; ri >= FirstReg && ri <= LastReg; ri = int(prevreg(Register(ri))))
|
|
||||||
{
|
{
|
||||||
Register const r = Register(ri);
|
|
||||||
LIns* curins = _allocator.getActive(r);
|
LIns* curins = _allocator.getActive(r);
|
||||||
LIns* savedins = saved.getActive(r);
|
LIns* savedins = saved.getActive(r);
|
||||||
if (curins != savedins)
|
if (curins != savedins)
|
||||||
|
@ -2403,7 +2394,8 @@ namespace nanojit
|
||||||
|
|
||||||
// Do evictions and pops first.
|
// Do evictions and pops first.
|
||||||
verbose_only(bool shouldMention=false; )
|
verbose_only(bool shouldMention=false; )
|
||||||
for (Register r = FirstReg; r <= LastReg; r = nextreg(r))
|
RegisterMask reg_set = _allocator.activeMask() | saved.activeMask();
|
||||||
|
for (Register r = lsReg(reg_set); reg_set; r = nextLsReg(reg_set, r))
|
||||||
{
|
{
|
||||||
LIns* curins = _allocator.getActive(r);
|
LIns* curins = _allocator.getActive(r);
|
||||||
LIns* savedins = saved.getActive(r);
|
LIns* savedins = saved.getActive(r);
|
||||||
|
@ -2453,15 +2445,14 @@ namespace nanojit
|
||||||
NanoAssert(allow);
|
NanoAssert(allow);
|
||||||
LIns *ins, *vic = 0;
|
LIns *ins, *vic = 0;
|
||||||
int allow_pri = 0x7fffffff;
|
int allow_pri = 0x7fffffff;
|
||||||
for (Register r = FirstReg; r <= LastReg; r = nextreg(r))
|
RegisterMask vic_set = allow & _allocator.activeMask();
|
||||||
|
for (Register r = lsReg(vic_set); vic_set; r = nextLsReg(vic_set, r))
|
||||||
{
|
{
|
||||||
if ((allow & rmask(r)) && (ins = _allocator.getActive(r)) != 0)
|
ins = _allocator.getActive(r);
|
||||||
{
|
int pri = canRemat(ins) ? 0 : _allocator.getPriority(r);
|
||||||
int pri = canRemat(ins) ? 0 : _allocator.getPriority(r);
|
if (!vic || pri < allow_pri) {
|
||||||
if (!vic || pri < allow_pri) {
|
vic = ins;
|
||||||
vic = ins;
|
allow_pri = pri;
|
||||||
allow_pri = pri;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
NanoAssert(vic != 0);
|
NanoAssert(vic != 0);
|
||||||
|
|
|
@ -196,7 +196,7 @@ namespace nanojit
|
||||||
typedef HashMap<uint64_t, uint64_t*> ImmDPoolMap;
|
typedef HashMap<uint64_t, uint64_t*> ImmDPoolMap;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef VTUNE
|
#ifdef VMCFG_VTUNE
|
||||||
class avmplus::CodegenLIR;
|
class avmplus::CodegenLIR;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
@ -271,8 +271,8 @@ namespace nanojit
|
||||||
#endif // NJ_VERBOSE
|
#endif // NJ_VERBOSE
|
||||||
|
|
||||||
public:
|
public:
|
||||||
#ifdef VTUNE
|
#ifdef VMCFG_VTUNE
|
||||||
avmplus::CodegenLIR *cgen;
|
void* vtuneHandle;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
Assembler(CodeAlloc& codeAlloc, Allocator& dataAlloc, Allocator& alloc, AvmCore* core, LogControl* logc, const Config& config);
|
Assembler(CodeAlloc& codeAlloc, Allocator& dataAlloc, Allocator& alloc, AvmCore* core, LogControl* logc, const Config& config);
|
||||||
|
@ -315,7 +315,11 @@ namespace nanojit
|
||||||
Register registerAlloc(LIns* ins, RegisterMask allow, RegisterMask prefer);
|
Register registerAlloc(LIns* ins, RegisterMask allow, RegisterMask prefer);
|
||||||
Register registerAllocTmp(RegisterMask allow);
|
Register registerAllocTmp(RegisterMask allow);
|
||||||
void registerResetAll();
|
void registerResetAll();
|
||||||
void evictAllActiveRegs();
|
void evictAllActiveRegs() {
|
||||||
|
// The evicted set will be be intersected with activeSet(),
|
||||||
|
// so use an all-1s mask to avoid an extra load or call.
|
||||||
|
evictSomeActiveRegs(~RegisterMask(0));
|
||||||
|
}
|
||||||
void evictSomeActiveRegs(RegisterMask regs);
|
void evictSomeActiveRegs(RegisterMask regs);
|
||||||
void evictScratchRegsExcept(RegisterMask ignore);
|
void evictScratchRegsExcept(RegisterMask ignore);
|
||||||
void intersectRegisterState(RegAlloc& saved);
|
void intersectRegisterState(RegAlloc& saved);
|
||||||
|
|
|
@ -47,7 +47,11 @@
|
||||||
namespace nanojit
|
namespace nanojit
|
||||||
{
|
{
|
||||||
static const bool verbose = false;
|
static const bool verbose = false;
|
||||||
#if defined(NANOJIT_ARM)
|
#ifdef VMCFG_VTUNE
|
||||||
|
// vtune jit profiling api can't handle non-contiguous methods,
|
||||||
|
// so make the allocation size huge to avoid non-contiguous methods
|
||||||
|
static const int pagesPerAlloc = 128; // 1MB
|
||||||
|
#elif defined(NANOJIT_ARM)
|
||||||
// ARM requires single-page allocations, due to the constant pool that
|
// ARM requires single-page allocations, due to the constant pool that
|
||||||
// lives on each page that must be reachable by a 4kb pcrel load.
|
// lives on each page that must be reachable by a 4kb pcrel load.
|
||||||
static const int pagesPerAlloc = 1;
|
static const int pagesPerAlloc = 1;
|
||||||
|
|
|
@ -1976,13 +1976,16 @@ namespace nanojit
|
||||||
m_capNL[LIns3] = 16;
|
m_capNL[LIns3] = 16;
|
||||||
m_capNL[LInsCall] = 64;
|
m_capNL[LInsCall] = 64;
|
||||||
|
|
||||||
for (NLKind nlkind = LInsFirst; nlkind <= LInsLast; nlkind = nextNLKind(nlkind))
|
for (NLKind nlkind = LInsFirst; nlkind <= LInsLast; nlkind = nextNLKind(nlkind)) {
|
||||||
m_listNL[nlkind] = new (alloc) LIns*[m_capNL[nlkind]];
|
m_listNL[nlkind] = new (alloc) LIns*[m_capNL[nlkind]];
|
||||||
|
m_usedNL[nlkind] = 1; // Force memset in clearAll().
|
||||||
|
}
|
||||||
|
|
||||||
// Note that this allocates the CONST and MULTIPLE tables as well.
|
// Note that this allocates the CONST and MULTIPLE tables as well.
|
||||||
for (CseAcc a = 0; a < CSE_NUM_USED_ACCS; a++) {
|
for (CseAcc a = 0; a < CSE_NUM_USED_ACCS; a++) {
|
||||||
m_capL[a] = 16;
|
m_capL[a] = 16;
|
||||||
m_listL[a] = new (alloc) LIns*[m_capL[a]];
|
m_listL[a] = new (alloc) LIns*[m_capL[a]];
|
||||||
|
m_usedL[a] = 1; // Force memset(0) in first clearAll().
|
||||||
}
|
}
|
||||||
|
|
||||||
clearAll();
|
clearAll();
|
||||||
|
@ -2484,7 +2487,7 @@ namespace nanojit
|
||||||
// this function.
|
// this function.
|
||||||
AccSet a = storesSinceLastLoad & ((1 << EMB_NUM_USED_ACCS) - 1);
|
AccSet a = storesSinceLastLoad & ((1 << EMB_NUM_USED_ACCS) - 1);
|
||||||
while (a) {
|
while (a) {
|
||||||
int acc = msbSet(a);
|
int acc = msbSet32(a);
|
||||||
clearL((CseAcc)acc);
|
clearL((CseAcc)acc);
|
||||||
a &= ~(1 << acc);
|
a &= ~(1 << acc);
|
||||||
}
|
}
|
||||||
|
@ -3038,7 +3041,7 @@ namespace nanojit
|
||||||
|
|
||||||
case LIR_file:
|
case LIR_file:
|
||||||
case LIR_line:
|
case LIR_line:
|
||||||
// XXX: not sure about these ones. Ignore for the moment.
|
// These will never get hit since VTUNE implies !DEBUG. Ignore for the moment.
|
||||||
nArgs = 0;
|
nArgs = 0;
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
|
|
@ -289,32 +289,9 @@ namespace nanojit
|
||||||
struct MiniAccSet { MiniAccSetVal val; };
|
struct MiniAccSet { MiniAccSetVal val; };
|
||||||
static const MiniAccSet MINI_ACCSET_MULTIPLE = { 99 };
|
static const MiniAccSet MINI_ACCSET_MULTIPLE = { 99 };
|
||||||
|
|
||||||
#if defined(_WIN32) && (_MSC_VER >= 1300) && (defined(_M_IX86) || defined(_M_AMD64) || defined(_M_X64))
|
|
||||||
extern "C" unsigned char _BitScanReverse(unsigned long * Index, unsigned long Mask);
|
|
||||||
# pragma intrinsic(_BitScanReverse)
|
|
||||||
|
|
||||||
// Returns the index of the most significant bit that is set.
|
|
||||||
static int msbSet(uint32_t x) {
|
|
||||||
unsigned long idx;
|
|
||||||
_BitScanReverse(&idx, (unsigned long)(x | 1)); // the '| 1' ensures a 0 result when x==0
|
|
||||||
return idx;
|
|
||||||
}
|
|
||||||
#elif (__GNUC__ >= 4) || (__GNUC__ == 3 && __GNUC_MINOR__ >= 4)
|
|
||||||
static int msbSet(uint32_t x) {
|
|
||||||
return 31 - __builtin_clz(x | 1);
|
|
||||||
}
|
|
||||||
#else
|
|
||||||
static int msbSet(uint32_t x) { // slow fallback version
|
|
||||||
for (int i = 31; i >= 0; i--)
|
|
||||||
if ((1 << i) & x)
|
|
||||||
return i;
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
static MiniAccSet compressAccSet(AccSet accSet) {
|
static MiniAccSet compressAccSet(AccSet accSet) {
|
||||||
if (isSingletonAccSet(accSet)) {
|
if (isSingletonAccSet(accSet)) {
|
||||||
MiniAccSet ret = { uint8_t(msbSet(accSet)) };
|
MiniAccSet ret = { uint8_t(msbSet32(accSet)) };
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1143,8 +1120,12 @@ namespace nanojit
|
||||||
// Nb: the types of these bitfields are all 32-bit integers to ensure
|
// Nb: the types of these bitfields are all 32-bit integers to ensure
|
||||||
// they are fully packed on Windows, sigh. Also, 'loadQual' is
|
// they are fully packed on Windows, sigh. Also, 'loadQual' is
|
||||||
// unsigned to ensure the values 0, 1, and 2 all fit in 2 bits.
|
// unsigned to ensure the values 0, 1, and 2 all fit in 2 bits.
|
||||||
int32_t disp:16;
|
//
|
||||||
int32_t miniAccSetVal:8;
|
// Nb: explicit signed keyword for bitfield types is required,
|
||||||
|
// some compilers may treat them as unsigned without it.
|
||||||
|
// See Bugzilla 584219 comment #18
|
||||||
|
signed int disp:16;
|
||||||
|
signed int miniAccSetVal:8;
|
||||||
uint32_t loadQual:2;
|
uint32_t loadQual:2;
|
||||||
|
|
||||||
LIns* oprnd_1;
|
LIns* oprnd_1;
|
||||||
|
|
|
@ -99,15 +99,6 @@
|
||||||
|
|
||||||
namespace nanojit {
|
namespace nanojit {
|
||||||
|
|
||||||
inline Register nextreg(Register r) {
|
|
||||||
return Register(r+1);
|
|
||||||
}
|
|
||||||
|
|
||||||
inline Register prevreg(Register r) {
|
|
||||||
return Register(r-1);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
class Fragment;
|
class Fragment;
|
||||||
struct SideExit;
|
struct SideExit;
|
||||||
struct SwitchInfo;
|
struct SwitchInfo;
|
||||||
|
@ -152,9 +143,9 @@ namespace nanojit {
|
||||||
#define asm_output(...) do { \
|
#define asm_output(...) do { \
|
||||||
if (_logc->lcbits & LC_Native) { \
|
if (_logc->lcbits & LC_Native) { \
|
||||||
outline[0]='\0'; \
|
outline[0]='\0'; \
|
||||||
VMPI_sprintf(outline, "%p ", _nIns); \
|
VMPI_sprintf(outline, "%p ", _nIns); \
|
||||||
sprintf(&outline[13], ##__VA_ARGS__); \
|
VMPI_sprintf(outline+VMPI_strlen(outline), ##__VA_ARGS__); \
|
||||||
output(); \
|
output(); \
|
||||||
} \
|
} \
|
||||||
} while (0) /* no semi */
|
} while (0) /* no semi */
|
||||||
#define gpn(r) regNames[(r)]
|
#define gpn(r) regNames[(r)]
|
||||||
|
|
|
@ -43,7 +43,6 @@
|
||||||
|
|
||||||
#ifdef UNDER_CE
|
#ifdef UNDER_CE
|
||||||
#include <cmnintrin.h>
|
#include <cmnintrin.h>
|
||||||
extern "C" bool blx_lr_broken();
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if defined(FEATURE_NANOJIT) && defined(NANOJIT_ARM)
|
#if defined(FEATURE_NANOJIT) && defined(NANOJIT_ARM)
|
||||||
|
@ -114,13 +113,14 @@ Assembler::CountLeadingZeroes(uint32_t data)
|
||||||
// ARMCC can do this with an intrinsic.
|
// ARMCC can do this with an intrinsic.
|
||||||
leading_zeroes = __clz(data);
|
leading_zeroes = __clz(data);
|
||||||
|
|
||||||
// current Android GCC compiler incorrectly refuses to compile 'clz' for armv5
|
#elif defined(__GNUC__) && (NJ_COMPILER_ARM_ARCH >= 5)
|
||||||
// (even though this is a legal instruction there). Since we currently only compile for ARMv5
|
|
||||||
// for emulation, we don't care too much (but we DO care for ARMv6+ since those are "real"
|
|
||||||
// devices).
|
|
||||||
#elif defined(__GNUC__) && !(defined(ANDROID) && __ARM_ARCH__ <= 5)
|
|
||||||
// GCC can use inline assembler to insert a CLZ instruction.
|
// GCC can use inline assembler to insert a CLZ instruction.
|
||||||
__asm (
|
__asm (
|
||||||
|
#if defined(ANDROID) && (NJ_COMPILER_ARM_ARCH < 7)
|
||||||
|
// On Android gcc compiler, the clz instruction is not supported with a
|
||||||
|
// target smaller than armv7, despite it being legal for armv5+.
|
||||||
|
" .arch armv7-a\n"
|
||||||
|
#endif
|
||||||
" clz %0, %1 \n"
|
" clz %0, %1 \n"
|
||||||
: "=r" (leading_zeroes)
|
: "=r" (leading_zeroes)
|
||||||
: "r" (data)
|
: "r" (data)
|
||||||
|
@ -463,11 +463,6 @@ Assembler::asm_eor_imm(Register rd, Register rn, int32_t imm, int stat /* =0 */)
|
||||||
void
|
void
|
||||||
Assembler::nInit(AvmCore*)
|
Assembler::nInit(AvmCore*)
|
||||||
{
|
{
|
||||||
#ifdef UNDER_CE
|
|
||||||
blx_lr_bug = blx_lr_broken();
|
|
||||||
#else
|
|
||||||
blx_lr_bug = 0;
|
|
||||||
#endif
|
|
||||||
nHints[LIR_calli] = rmask(retRegs[0]);
|
nHints[LIR_calli] = rmask(retRegs[0]);
|
||||||
nHints[LIR_hcalli] = rmask(retRegs[1]);
|
nHints[LIR_hcalli] = rmask(retRegs[1]);
|
||||||
nHints[LIR_paramp] = PREFER_SPECIAL;
|
nHints[LIR_paramp] = PREFER_SPECIAL;
|
||||||
|
@ -628,7 +623,7 @@ Assembler::asm_arg(ArgType ty, LIns* arg, Register& r, int& stkd)
|
||||||
// pre-assign registers R0-R3 for arguments (if they fit)
|
// pre-assign registers R0-R3 for arguments (if they fit)
|
||||||
if (r < R4) {
|
if (r < R4) {
|
||||||
asm_regarg(ty, arg, r);
|
asm_regarg(ty, arg, r);
|
||||||
r = nextreg(r);
|
r = Register(r + 1);
|
||||||
} else {
|
} else {
|
||||||
asm_stkarg(arg, stkd);
|
asm_stkarg(arg, stkd);
|
||||||
stkd += 4;
|
stkd += 4;
|
||||||
|
@ -662,14 +657,14 @@ Assembler::asm_arg_64(LIns* arg, Register& r, int& stkd)
|
||||||
// R3 if r is R3 to start with, and will force the argument to go on
|
// R3 if r is R3 to start with, and will force the argument to go on
|
||||||
// the stack.
|
// the stack.
|
||||||
if ((r == R1) || (r == R3)) {
|
if ((r == R1) || (r == R3)) {
|
||||||
r = nextreg(r);
|
r = Register(r + 1);
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
if (r < R3) {
|
if (r < R3) {
|
||||||
Register ra = r;
|
Register ra = r;
|
||||||
Register rb = nextreg(r);
|
Register rb = Register(r + 1);
|
||||||
r = nextreg(rb);
|
r = Register(rb + 1);
|
||||||
|
|
||||||
#ifdef NJ_ARM_EABI
|
#ifdef NJ_ARM_EABI
|
||||||
// EABI requires that 64-bit arguments are aligned on even-numbered
|
// EABI requires that 64-bit arguments are aligned on even-numbered
|
||||||
|
@ -692,12 +687,8 @@ Assembler::asm_arg_64(LIns* arg, Register& r, int& stkd)
|
||||||
// We only have one register left, but the legacy ABI requires that we
|
// We only have one register left, but the legacy ABI requires that we
|
||||||
// put 32 bits of the argument in the register (R3) and the remaining
|
// put 32 bits of the argument in the register (R3) and the remaining
|
||||||
// 32 bits on the stack.
|
// 32 bits on the stack.
|
||||||
Register ra = r;
|
Register ra = r; // R3
|
||||||
r = nextreg(r);
|
r = R4;
|
||||||
|
|
||||||
// This really just checks that nextreg() works properly, as we know
|
|
||||||
// that r was previously R3.
|
|
||||||
NanoAssert(r == R4);
|
|
||||||
|
|
||||||
// We're splitting the argument between registers and the stack. This
|
// We're splitting the argument between registers and the stack. This
|
||||||
// must be the first time that the stack is used, so stkd must be at 0.
|
// must be the first time that the stack is used, so stkd must be at 0.
|
||||||
|
@ -912,26 +903,17 @@ Assembler::asm_call(LIns* ins)
|
||||||
outputf(" %p:", _nIns);
|
outputf(" %p:", _nIns);
|
||||||
)
|
)
|
||||||
|
|
||||||
// Direct call: on v5 and above (where the calling sequence doesn't
|
|
||||||
// corrupt LR until the actual branch instruction), we can avoid an
|
|
||||||
// interlock in the "long" branch sequence by manually loading the
|
|
||||||
// target address into LR ourselves before setting up the parameters
|
|
||||||
// in other registers.
|
|
||||||
BranchWithLink((NIns*)ci->_address);
|
BranchWithLink((NIns*)ci->_address);
|
||||||
} else {
|
} else {
|
||||||
// Indirect call: we assign the address arg to LR since it's not
|
// Indirect call: we assign the address arg to LR
|
||||||
// used for regular arguments, and is otherwise scratch since it's
|
#ifdef UNDER_CE
|
||||||
// clobberred by the call. On v4/v4T, where we have to manually do
|
// workaround for msft device emulator bug (blx lr emulated as no-op)
|
||||||
// the equivalent of a BLX, move LR into IP before corrupting LR
|
underrunProtect(8);
|
||||||
// with the return address.
|
BLX(IP);
|
||||||
if (blx_lr_bug) {
|
MOV(IP, LR);
|
||||||
// workaround for msft device emulator bug (blx lr emulated as no-op)
|
#else
|
||||||
underrunProtect(8);
|
BLX(LR);
|
||||||
BLX(IP);
|
#endif
|
||||||
MOV(IP,LR);
|
|
||||||
} else {
|
|
||||||
BLX(LR);
|
|
||||||
}
|
|
||||||
asm_regarg(ARGTYPE_I, ins->arg(--argc), LR);
|
asm_regarg(ARGTYPE_I, ins->arg(--argc), LR);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -981,8 +963,6 @@ Assembler::nRegisterResetAll(RegAlloc& a)
|
||||||
rmask(R10) | rmask(LR);
|
rmask(R10) | rmask(LR);
|
||||||
if (_config.arm_vfp)
|
if (_config.arm_vfp)
|
||||||
a.free |= FpRegs;
|
a.free |= FpRegs;
|
||||||
|
|
||||||
debug_only(a.managed = a.free);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline ConditionCode
|
static inline ConditionCode
|
||||||
|
@ -1925,17 +1905,19 @@ Assembler::BLX(Register addr, bool chk /* = true */)
|
||||||
NanoAssert(_config.arm_arch >= 5);
|
NanoAssert(_config.arm_arch >= 5);
|
||||||
|
|
||||||
NanoAssert(IsGpReg(addr));
|
NanoAssert(IsGpReg(addr));
|
||||||
|
#ifdef UNDER_CE
|
||||||
// There is a bug in the WinCE device emulator which stops "BLX LR" from
|
// There is a bug in the WinCE device emulator which stops "BLX LR" from
|
||||||
// working as expected. Assert that we never do that!
|
// working as expected. Assert that we never do that!
|
||||||
if (blx_lr_bug) { NanoAssert(addr != LR); }
|
NanoAssert(addr != LR);
|
||||||
|
#endif
|
||||||
|
|
||||||
if (chk) {
|
if (chk) {
|
||||||
underrunProtect(4);
|
underrunProtect(4);
|
||||||
}
|
}
|
||||||
|
|
||||||
// BLX IP
|
// BLX reg
|
||||||
*(--_nIns) = (NIns)( (COND_AL) | (0x12<<20) | (0xFFF<<8) | (0x3<<4) | (addr) );
|
*(--_nIns) = (NIns)( (COND_AL) | (0x12<<20) | (0xFFF<<8) | (0x3<<4) | (addr) );
|
||||||
asm_output("blx ip");
|
asm_output("blx %s", gpn(addr));
|
||||||
}
|
}
|
||||||
|
|
||||||
// Emit the code required to load a memory address into a register as follows:
|
// Emit the code required to load a memory address into a register as follows:
|
||||||
|
@ -2777,14 +2759,13 @@ Assembler::asm_cmov(LIns* ins)
|
||||||
|
|
||||||
Register rf = findRegFor(iffalse, allow & ~rmask(rr));
|
Register rf = findRegFor(iffalse, allow & ~rmask(rr));
|
||||||
|
|
||||||
|
// If 'iftrue' isn't in a register, it can be clobbered by 'ins'.
|
||||||
|
Register rt = iftrue->isInReg() ? iftrue->getReg() : rr;
|
||||||
|
|
||||||
if (ins->isop(LIR_cmovd)) {
|
if (ins->isop(LIR_cmovd)) {
|
||||||
NIns* target = _nIns;
|
NIns* target = _nIns;
|
||||||
asm_nongp_copy(rr, rf);
|
asm_nongp_copy(rr, rf);
|
||||||
asm_branch(false, condval, target);
|
asm_branch(false, condval, target);
|
||||||
|
|
||||||
// If 'iftrue' isn't in a register, it can be clobbered by 'ins'.
|
|
||||||
Register rt = iftrue->isInReg() ? iftrue->getReg() : rr;
|
|
||||||
|
|
||||||
if (rr != rt)
|
if (rr != rt)
|
||||||
asm_nongp_copy(rr, rt);
|
asm_nongp_copy(rr, rt);
|
||||||
freeResourcesOf(ins);
|
freeResourcesOf(ins);
|
||||||
|
@ -2795,9 +2776,6 @@ Assembler::asm_cmov(LIns* ins)
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
// If 'iftrue' isn't in a register, it can be clobbered by 'ins'.
|
|
||||||
Register rt = iftrue->isInReg() ? iftrue->getReg() : rr;
|
|
||||||
|
|
||||||
// WARNING: We cannot generate any code that affects the condition
|
// WARNING: We cannot generate any code that affects the condition
|
||||||
// codes between the MRcc generation here and the asm_cmp() call
|
// codes between the MRcc generation here and the asm_cmp() call
|
||||||
// below. See asm_cmp() for more details.
|
// below. See asm_cmp() for more details.
|
||||||
|
|
|
@ -240,7 +240,6 @@ verbose_only( extern const char* shiftNames[]; )
|
||||||
inline uint32_t CountLeadingZeroes(uint32_t data); \
|
inline uint32_t CountLeadingZeroes(uint32_t data); \
|
||||||
int * _nSlot; \
|
int * _nSlot; \
|
||||||
int * _nExitSlot; \
|
int * _nExitSlot; \
|
||||||
bool blx_lr_bug; \
|
|
||||||
int max_out_args; /* bytes */
|
int max_out_args; /* bytes */
|
||||||
|
|
||||||
#define IMM32(imm) *(--_nIns) = (NIns)((imm));
|
#define IMM32(imm) *(--_nIns) = (NIns)((imm));
|
||||||
|
|
|
@ -481,8 +481,8 @@ namespace nanojit
|
||||||
// where we are
|
// where we are
|
||||||
if (stkd & 4) {
|
if (stkd & 4) {
|
||||||
if (stkd < 16) {
|
if (stkd < 16) {
|
||||||
r = nextreg(r);
|
r = Register(r + 1);
|
||||||
fr = nextreg(fr);
|
fr = Register(fr + 1);
|
||||||
}
|
}
|
||||||
stkd += 4;
|
stkd += 4;
|
||||||
}
|
}
|
||||||
|
@ -496,11 +496,11 @@ namespace nanojit
|
||||||
// Move it to the integer pair
|
// Move it to the integer pair
|
||||||
Register fpupair = arg->getReg();
|
Register fpupair = arg->getReg();
|
||||||
Register intpair = fr;
|
Register intpair = fr;
|
||||||
MFC1(mswregpair(intpair), nextreg(fpupair)); // Odd fpu register contains sign,expt,manthi
|
MFC1(mswregpair(intpair), Register(fpupair + 1)); // Odd fpu register contains sign,expt,manthi
|
||||||
MFC1(lswregpair(intpair), fpupair); // Even fpu register contains mantlo
|
MFC1(lswregpair(intpair), fpupair); // Even fpu register contains mantlo
|
||||||
}
|
}
|
||||||
r = nextreg(nextreg(r));
|
r = Register(r + 2);
|
||||||
fr = nextreg(nextreg(fr));
|
fr = Register(fr + 2);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
asm_stkarg(arg, stkd);
|
asm_stkarg(arg, stkd);
|
||||||
|
@ -1578,8 +1578,8 @@ namespace nanojit
|
||||||
NanoAssert(ty == ARGTYPE_I || ty == ARGTYPE_UI);
|
NanoAssert(ty == ARGTYPE_I || ty == ARGTYPE_UI);
|
||||||
if (stkd < 16) {
|
if (stkd < 16) {
|
||||||
asm_regarg(ty, arg, r);
|
asm_regarg(ty, arg, r);
|
||||||
fr = nextreg(fr);
|
fr = Register(fr + 1);
|
||||||
r = nextreg(r);
|
r = Register(r + 1);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
asm_stkarg(arg, stkd);
|
asm_stkarg(arg, stkd);
|
||||||
|
@ -1684,7 +1684,6 @@ namespace nanojit
|
||||||
regs.free = GpRegs;
|
regs.free = GpRegs;
|
||||||
if (cpu_has_fpu)
|
if (cpu_has_fpu)
|
||||||
regs.free |= FpRegs;
|
regs.free |= FpRegs;
|
||||||
debug_only(regs.managed = regs.free;)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#define signextend16(s) ((int32_t(s)<<16)>>16)
|
#define signextend16(s) ((int32_t(s)<<16)>>16)
|
||||||
|
|
|
@ -736,7 +736,7 @@ namespace nanojit
|
||||||
// GP arg
|
// GP arg
|
||||||
if (r <= R10) {
|
if (r <= R10) {
|
||||||
asm_regarg(ty, arg, r);
|
asm_regarg(ty, arg, r);
|
||||||
r = nextreg(r);
|
r = Register(r + 1);
|
||||||
param_size += sizeof(void*);
|
param_size += sizeof(void*);
|
||||||
} else {
|
} else {
|
||||||
// put arg on stack
|
// put arg on stack
|
||||||
|
@ -746,11 +746,11 @@ namespace nanojit
|
||||||
// double
|
// double
|
||||||
if (fr <= F13) {
|
if (fr <= F13) {
|
||||||
asm_regarg(ty, arg, fr);
|
asm_regarg(ty, arg, fr);
|
||||||
fr = nextreg(fr);
|
fr = Register(fr + 1);
|
||||||
#ifdef NANOJIT_64BIT
|
#ifdef NANOJIT_64BIT
|
||||||
r = nextreg(r);
|
r = Register(r + 1);
|
||||||
#else
|
#else
|
||||||
r = nextreg(nextreg(r)); // skip 2 gpr's
|
r = Register(r + 2); // skip 2 gpr's
|
||||||
#endif
|
#endif
|
||||||
param_size += sizeof(double);
|
param_size += sizeof(double);
|
||||||
} else {
|
} else {
|
||||||
|
@ -1040,11 +1040,11 @@ namespace nanojit
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void Assembler::asm_dasq(LIns *ins) {
|
void Assembler::asm_dasq(LIns*) {
|
||||||
TODO(asm_dasq);
|
TODO(asm_dasq);
|
||||||
}
|
}
|
||||||
|
|
||||||
void Assembler::asm_qasd(LIns *ins) {
|
void Assembler::asm_qasd(LIns*) {
|
||||||
TODO(asm_qasd);
|
TODO(asm_qasd);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1390,7 +1390,6 @@ namespace nanojit
|
||||||
void Assembler::nRegisterResetAll(RegAlloc ®s) {
|
void Assembler::nRegisterResetAll(RegAlloc ®s) {
|
||||||
regs.clear();
|
regs.clear();
|
||||||
regs.free = SavedRegs | 0x1ff8 /* R3-12 */ | 0x3ffe00000000LL /* F1-13 */;
|
regs.free = SavedRegs | 0x1ff8 /* R3-12 */ | 0x3ffe00000000LL /* F1-13 */;
|
||||||
debug_only(regs.managed = regs.free);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef NANOJIT_64BIT
|
#ifdef NANOJIT_64BIT
|
||||||
|
|
|
@ -234,7 +234,6 @@ namespace nanojit
|
||||||
{
|
{
|
||||||
a.clear();
|
a.clear();
|
||||||
a.free = GpRegs | FpRegs;
|
a.free = GpRegs | FpRegs;
|
||||||
debug_only( a.managed = a.free; )
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void Assembler::nPatchBranch(NIns* branch, NIns* location)
|
void Assembler::nPatchBranch(NIns* branch, NIns* location)
|
||||||
|
@ -537,7 +536,7 @@ namespace nanojit
|
||||||
return at;
|
return at;
|
||||||
}
|
}
|
||||||
|
|
||||||
NIns* Assembler::asm_branch_ov(LOpcode, NIns* targ)
|
NIns* Assembler::asm_branch_ov(LOpcode op, NIns* targ)
|
||||||
{
|
{
|
||||||
NIns* at = 0;
|
NIns* at = 0;
|
||||||
underrunProtect(32);
|
underrunProtect(32);
|
||||||
|
@ -552,7 +551,10 @@ namespace nanojit
|
||||||
}
|
}
|
||||||
NOP();
|
NOP();
|
||||||
|
|
||||||
BVS(0, tt);
|
if( op == LIR_mulxovi || op == LIR_muljovi )
|
||||||
|
BNE(0, tt);
|
||||||
|
else
|
||||||
|
BVS(0, tt);
|
||||||
return at;
|
return at;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -645,7 +647,7 @@ namespace nanojit
|
||||||
|
|
||||||
Register rb = deprecated_UnknownReg;
|
Register rb = deprecated_UnknownReg;
|
||||||
RegisterMask allow = GpRegs;
|
RegisterMask allow = GpRegs;
|
||||||
bool forceReg = (op == LIR_muli || op == LIR_mulxovi || !rhs->isImmI());
|
bool forceReg = (op == LIR_muli || op == LIR_mulxovi || op == LIR_muljovi || !rhs->isImmI());
|
||||||
|
|
||||||
if (lhs != rhs && forceReg)
|
if (lhs != rhs && forceReg)
|
||||||
{
|
{
|
||||||
|
@ -679,8 +681,14 @@ namespace nanojit
|
||||||
ADDCC(rr, rb, rr);
|
ADDCC(rr, rb, rr);
|
||||||
else if (op == LIR_subi || op == LIR_subxovi)
|
else if (op == LIR_subi || op == LIR_subxovi)
|
||||||
SUBCC(rr, rb, rr);
|
SUBCC(rr, rb, rr);
|
||||||
else if (op == LIR_muli || op == LIR_mulxovi)
|
else if (op == LIR_muli)
|
||||||
MULX(rr, rb, rr);
|
SMULCC(rr, rb, rr);
|
||||||
|
else if (op == LIR_mulxovi || op == LIR_muljovi) {
|
||||||
|
SUBCC(L4, L6, L4);
|
||||||
|
SRAI(rr, 31, L6);
|
||||||
|
RDY(L4);
|
||||||
|
SMULCC(rr, rb, rr);
|
||||||
|
}
|
||||||
else if (op == LIR_andi)
|
else if (op == LIR_andi)
|
||||||
AND(rr, rb, rr);
|
AND(rr, rb, rr);
|
||||||
else if (op == LIR_ori)
|
else if (op == LIR_ori)
|
||||||
|
|
|
@ -737,10 +737,10 @@ namespace nanojit
|
||||||
asm_output("movvs %d, %s", simm11, gpn(rd)); \
|
asm_output("movvs %d, %s", simm11, gpn(rd)); \
|
||||||
} while (0)
|
} while (0)
|
||||||
|
|
||||||
#define MULX(rs1, rs2, rd) \
|
#define SMULCC(rs1, rs2, rd) \
|
||||||
do { \
|
do { \
|
||||||
Format_3_1(2, rd, 0x9, rs1, 0, rs2); \
|
Format_3_1(2, rd, 0x1b, rs1, 0, rs2); \
|
||||||
asm_output("mul %s, %s, %s", gpn(rs1), gpn(rs2), gpn(rd)); \
|
asm_output("smulcc %s, %s, %s", gpn(rs1), gpn(rs2), gpn(rd)); \
|
||||||
} while (0)
|
} while (0)
|
||||||
|
|
||||||
#define NOP() \
|
#define NOP() \
|
||||||
|
@ -773,6 +773,12 @@ namespace nanojit
|
||||||
asm_output("andcc %s, %s, %s", gpn(rs1), gpn(rs2), gpn(rd)); \
|
asm_output("andcc %s, %s, %s", gpn(rs1), gpn(rs2), gpn(rd)); \
|
||||||
} while (0)
|
} while (0)
|
||||||
|
|
||||||
|
#define RDY(rd) \
|
||||||
|
do { \
|
||||||
|
Format_3_1(2, rd, 0x28, 0, 0, 0); \
|
||||||
|
asm_output("rdy %s", gpn(rd)); \
|
||||||
|
} while (0)
|
||||||
|
|
||||||
#define RESTORE(rs1, rs2, rd) \
|
#define RESTORE(rs1, rs2, rd) \
|
||||||
do { \
|
do { \
|
||||||
Format_3_1(2, rd, 0x3D, rs1, 0, rs2); \
|
Format_3_1(2, rd, 0x3D, rs1, 0, rs2); \
|
||||||
|
@ -809,6 +815,12 @@ namespace nanojit
|
||||||
asm_output("sra %s, %s, %s", gpn(rs1), gpn(rs2), gpn(rd)); \
|
asm_output("sra %s, %s, %s", gpn(rs1), gpn(rs2), gpn(rd)); \
|
||||||
} while (0)
|
} while (0)
|
||||||
|
|
||||||
|
#define SRAI(rs1, shcnt32, rd) \
|
||||||
|
do { \
|
||||||
|
Format_3_6(2, rd, 0x27, rs1, shcnt32); \
|
||||||
|
asm_output("sra %s, %d, %s", gpn(rs1), shcnt32, gpn(rd)); \
|
||||||
|
} while (0)
|
||||||
|
|
||||||
#define SRL(rs1, rs2, rd) \
|
#define SRL(rs1, rs2, rd) \
|
||||||
do { \
|
do { \
|
||||||
Format_3_5(2, rd, 0x26, rs1, 0, rs2); \
|
Format_3_5(2, rd, 0x26, rs1, 0, rs2); \
|
||||||
|
|
|
@ -966,7 +966,7 @@ namespace nanojit
|
||||||
else if (ty == ARGTYPE_D && fr < XMM8) {
|
else if (ty == ARGTYPE_D && fr < XMM8) {
|
||||||
// double goes in next available XMM register
|
// double goes in next available XMM register
|
||||||
asm_regarg(ty, arg, fr);
|
asm_regarg(ty, arg, fr);
|
||||||
fr = nextreg(fr);
|
fr = Register(fr + 1);
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
else {
|
else {
|
||||||
|
@ -1119,14 +1119,13 @@ namespace nanojit
|
||||||
|
|
||||||
Register rf = findRegFor(iffalse, allow & ~rmask(rr));
|
Register rf = findRegFor(iffalse, allow & ~rmask(rr));
|
||||||
|
|
||||||
|
// If 'iftrue' isn't in a register, it can be clobbered by 'ins'.
|
||||||
|
Register rt = iftrue->isInReg() ? iftrue->getReg() : rr;
|
||||||
|
|
||||||
if (ins->isop(LIR_cmovd)) {
|
if (ins->isop(LIR_cmovd)) {
|
||||||
NIns* target = _nIns;
|
NIns* target = _nIns;
|
||||||
asm_nongp_copy(rr, rf);
|
asm_nongp_copy(rr, rf);
|
||||||
asm_branch(false, cond, target);
|
asm_branch(false, cond, target);
|
||||||
|
|
||||||
// If 'iftrue' isn't in a register, it can be clobbered by 'ins'.
|
|
||||||
Register rt = iftrue->isInReg() ? iftrue->getReg() : rr;
|
|
||||||
|
|
||||||
if (rr != rt)
|
if (rr != rt)
|
||||||
asm_nongp_copy(rr, rt);
|
asm_nongp_copy(rr, rt);
|
||||||
freeResourcesOf(ins);
|
freeResourcesOf(ins);
|
||||||
|
@ -1137,9 +1136,6 @@ namespace nanojit
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
// If 'iftrue' isn't in a register, it can be clobbered by 'ins'.
|
|
||||||
Register rt = iftrue->isInReg() ? iftrue->getReg() : rr;
|
|
||||||
|
|
||||||
// WARNING: We cannot generate any code that affects the condition
|
// WARNING: We cannot generate any code that affects the condition
|
||||||
// codes between the MRcc generation here and the asm_cmp() call
|
// codes between the MRcc generation here and the asm_cmp() call
|
||||||
// below. See asm_cmp() for more details.
|
// below. See asm_cmp() for more details.
|
||||||
|
@ -1905,7 +1901,6 @@ namespace nanojit
|
||||||
#else
|
#else
|
||||||
a.free = 0xffffffff & ~(1<<RSP | 1<<RBP);
|
a.free = 0xffffffff & ~(1<<RSP | 1<<RBP);
|
||||||
#endif
|
#endif
|
||||||
debug_only( a.managed = a.free; )
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void Assembler::nPatchBranch(NIns *patch, NIns *target) {
|
void Assembler::nPatchBranch(NIns *patch, NIns *target) {
|
||||||
|
|
|
@ -1112,7 +1112,6 @@ namespace nanojit
|
||||||
a.free = SavedRegs | ScratchRegs;
|
a.free = SavedRegs | ScratchRegs;
|
||||||
if (!_config.i386_sse2)
|
if (!_config.i386_sse2)
|
||||||
a.free &= ~XmmRegs;
|
a.free &= ~XmmRegs;
|
||||||
debug_only( a.managed = a.free; )
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void Assembler::nPatchBranch(NIns* branch, NIns* targ)
|
void Assembler::nPatchBranch(NIns* branch, NIns* targ)
|
||||||
|
@ -2059,14 +2058,13 @@ namespace nanojit
|
||||||
|
|
||||||
Register rf = findRegFor(iffalse, allow & ~rmask(rr));
|
Register rf = findRegFor(iffalse, allow & ~rmask(rr));
|
||||||
|
|
||||||
|
// If 'iftrue' isn't in a register, it can be clobbered by 'ins'.
|
||||||
|
Register rt = iftrue->isInReg() ? iftrue->getReg() : rr;
|
||||||
|
|
||||||
if (ins->isop(LIR_cmovd)) {
|
if (ins->isop(LIR_cmovd)) {
|
||||||
NIns* target = _nIns;
|
NIns* target = _nIns;
|
||||||
asm_nongp_copy(rr, rf);
|
asm_nongp_copy(rr, rf);
|
||||||
asm_branch(false, condval, target);
|
asm_branch(false, condval, target);
|
||||||
|
|
||||||
// If 'iftrue' isn't in a register, it can be clobbered by 'ins'.
|
|
||||||
Register rt = iftrue->isInReg() ? iftrue->getReg() : rr;
|
|
||||||
|
|
||||||
if (rr != rt)
|
if (rr != rt)
|
||||||
asm_nongp_copy(rr, rt);
|
asm_nongp_copy(rr, rt);
|
||||||
freeResourcesOf(ins);
|
freeResourcesOf(ins);
|
||||||
|
@ -2077,9 +2075,6 @@ namespace nanojit
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
// If 'iftrue' isn't in a register, it can be clobbered by 'ins'.
|
|
||||||
Register rt = iftrue->isInReg() ? iftrue->getReg() : rr;
|
|
||||||
|
|
||||||
NanoAssert(ins->isop(LIR_cmovi));
|
NanoAssert(ins->isop(LIR_cmovi));
|
||||||
|
|
||||||
// WARNING: We cannot generate any code that affects the condition
|
// WARNING: We cannot generate any code that affects the condition
|
||||||
|
|
|
@ -45,14 +45,6 @@ namespace nanojit
|
||||||
|
|
||||||
#ifdef _DEBUG
|
#ifdef _DEBUG
|
||||||
|
|
||||||
uint32_t RegAlloc::countActive()
|
|
||||||
{
|
|
||||||
int cnt = 0;
|
|
||||||
for(Register i=FirstReg; i <= LastReg; i = nextreg(i))
|
|
||||||
cnt += active[i] ? 1 : 0;
|
|
||||||
return cnt;
|
|
||||||
}
|
|
||||||
|
|
||||||
bool RegAlloc::isConsistent(Register r, LIns* i) const
|
bool RegAlloc::isConsistent(Register r, LIns* i) const
|
||||||
{
|
{
|
||||||
NanoAssert(r != deprecated_UnknownReg);
|
NanoAssert(r != deprecated_UnknownReg);
|
||||||
|
|
|
@ -120,9 +120,13 @@ namespace nanojit
|
||||||
return active[r];
|
return active[r];
|
||||||
}
|
}
|
||||||
|
|
||||||
debug_only( uint32_t countActive(); )
|
// Return a mask containing the active registers. For each register
|
||||||
|
// in this set, getActive(register) will be a nonzero LIns pointer.
|
||||||
|
RegisterMask activeMask() const {
|
||||||
|
return ~free & managed;
|
||||||
|
}
|
||||||
|
|
||||||
debug_only( bool isConsistent(Register r, LIns* v) const; )
|
debug_only( bool isConsistent(Register r, LIns* v) const; )
|
||||||
debug_only( RegisterMask managed; ) // the registers managed by the register allocator
|
|
||||||
|
|
||||||
// Some basics:
|
// Some basics:
|
||||||
//
|
//
|
||||||
|
@ -171,10 +175,41 @@ namespace nanojit
|
||||||
//
|
//
|
||||||
LIns* active[LastReg + 1]; // active[r] = LIns that defines r
|
LIns* active[LastReg + 1]; // active[r] = LIns that defines r
|
||||||
int32_t usepri[LastReg + 1]; // used priority. lower = more likely to spill.
|
int32_t usepri[LastReg + 1]; // used priority. lower = more likely to spill.
|
||||||
RegisterMask free;
|
RegisterMask free; // Registers currently free.
|
||||||
|
RegisterMask managed; // Registers under management (invariant).
|
||||||
int32_t priority;
|
int32_t priority;
|
||||||
|
|
||||||
DECLARE_PLATFORM_REGALLOC()
|
DECLARE_PLATFORM_REGALLOC()
|
||||||
};
|
};
|
||||||
|
|
||||||
|
// Return the lowest numbered Register in mask.
|
||||||
|
inline Register lsReg(RegisterMask mask) {
|
||||||
|
// This is faster than it looks; we rely on the C++ optimizer
|
||||||
|
// to strip the dead branch and inline just one alternative.
|
||||||
|
if (sizeof(RegisterMask) == 4)
|
||||||
|
return (Register) lsbSet32(mask);
|
||||||
|
else
|
||||||
|
return (Register) lsbSet64(mask);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Return the highest numbered Register in mask.
|
||||||
|
inline Register msReg(RegisterMask mask) {
|
||||||
|
// This is faster than it looks; we rely on the C++ optimizer
|
||||||
|
// to strip the dead branch and inline just one alternative.
|
||||||
|
if (sizeof(RegisterMask) == 4)
|
||||||
|
return (Register) msbSet32(mask);
|
||||||
|
else
|
||||||
|
return (Register) msbSet64(mask);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Clear bit r in mask, then return lsReg(mask).
|
||||||
|
inline Register nextLsReg(RegisterMask& mask, Register r) {
|
||||||
|
return lsReg(mask &= ~rmask(r));
|
||||||
|
}
|
||||||
|
|
||||||
|
// Clear bit r in mask, then return msReg(mask).
|
||||||
|
inline Register nextMsReg(RegisterMask& mask, Register r) {
|
||||||
|
return msReg(mask &= ~rmask(r));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
#endif // __nanojit_RegAlloc__
|
#endif // __nanojit_RegAlloc__
|
||||||
|
|
|
@ -41,13 +41,6 @@
|
||||||
typedef void *maddr_ptr;
|
typedef void *maddr_ptr;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if defined(AVMPLUS_ARM) && defined(UNDER_CE)
|
|
||||||
extern "C" bool
|
|
||||||
blx_lr_broken() {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
using namespace avmplus;
|
using namespace avmplus;
|
||||||
|
|
||||||
nanojit::Config AvmCore::config;
|
nanojit::Config AvmCore::config;
|
||||||
|
|
|
@ -189,6 +189,121 @@ static inline bool isU32(uintptr_t i) {
|
||||||
#define alignTo(x,s) ((((uintptr_t)(x)))&~(((uintptr_t)s)-1))
|
#define alignTo(x,s) ((((uintptr_t)(x)))&~(((uintptr_t)s)-1))
|
||||||
#define alignUp(x,s) ((((uintptr_t)(x))+(((uintptr_t)s)-1))&~(((uintptr_t)s)-1))
|
#define alignUp(x,s) ((((uintptr_t)(x))+(((uintptr_t)s)-1))&~(((uintptr_t)s)-1))
|
||||||
|
|
||||||
|
namespace nanojit
|
||||||
|
{
|
||||||
|
// Define msbSet32(), lsbSet32(), msbSet64(), and lsbSet64() functions using
|
||||||
|
// fast find-first-bit instructions intrinsics when available.
|
||||||
|
// The fall-back implementations use iteration.
|
||||||
|
#if defined(_WIN32) && (_MSC_VER >= 1300) && (defined(_M_IX86) || defined(_M_AMD64) || defined(_M_X64))
|
||||||
|
|
||||||
|
extern "C" unsigned char _BitScanForward(unsigned long * Index, unsigned long Mask);
|
||||||
|
extern "C" unsigned char _BitScanReverse(unsigned long * Index, unsigned long Mask);
|
||||||
|
# pragma intrinsic(_BitScanForward)
|
||||||
|
# pragma intrinsic(_BitScanReverse)
|
||||||
|
|
||||||
|
// Returns the index of the most significant bit that is set.
|
||||||
|
static inline int msbSet32(uint32_t x) {
|
||||||
|
unsigned long idx;
|
||||||
|
_BitScanReverse(&idx, (unsigned long)(x | 1)); // the '| 1' ensures a 0 result when x==0
|
||||||
|
return idx;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Returns the index of the least significant bit that is set.
|
||||||
|
static inline int lsbSet32(uint32_t x) {
|
||||||
|
unsigned long idx;
|
||||||
|
_BitScanForward(&idx, (unsigned long)(x | 0x80000000)); // the '| 0x80000000' ensures a 0 result when x==0
|
||||||
|
return idx;
|
||||||
|
}
|
||||||
|
|
||||||
|
#if defined(_M_AMD64) || defined(_M_X64)
|
||||||
|
extern "C" unsigned char _BitScanForward64(unsigned long * Index, unsigned __int64 Mask);
|
||||||
|
extern "C" unsigned char _BitScanReverse64(unsigned long * Index, unsigned __int64 Mask);
|
||||||
|
# pragma intrinsic(_BitScanForward64)
|
||||||
|
# pragma intrinsic(_BitScanReverse64)
|
||||||
|
|
||||||
|
// Returns the index of the most significant bit that is set.
|
||||||
|
static inline int msbSet64(uint64_t x) {
|
||||||
|
unsigned long idx;
|
||||||
|
_BitScanReverse64(&idx, (unsigned __int64)(x | 1)); // the '| 1' ensures a 0 result when x==0
|
||||||
|
return idx;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Returns the index of the least significant bit that is set.
|
||||||
|
static inline int lsbSet64(uint64_t x) {
|
||||||
|
unsigned long idx;
|
||||||
|
_BitScanForward64(&idx, (unsigned __int64)(x | 0x8000000000000000LL)); // the '| 0x80000000' ensures a 0 result when x==0
|
||||||
|
return idx;
|
||||||
|
}
|
||||||
|
#else
|
||||||
|
// Returns the index of the most significant bit that is set.
|
||||||
|
static int msbSet64(uint64_t x) {
|
||||||
|
return (x & 0xffffffff00000000LL) ? msbSet32(uint32_t(x >> 32)) + 32 : msbSet32(uint32_t(x));
|
||||||
|
}
|
||||||
|
// Returns the index of the least significant bit that is set.
|
||||||
|
static int lsbSet64(uint64_t x) {
|
||||||
|
return (x & 0x00000000ffffffffLL) ? lsbSet32(uint32_t(x)) : lsbSet32(uint32_t(x >> 32)) + 32;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#elif (__GNUC__ >= 4) || (__GNUC__ == 3 && __GNUC_MINOR__ >= 4)
|
||||||
|
|
||||||
|
// Returns the index of the most significant bit that is set.
|
||||||
|
static inline int msbSet32(uint32_t x) {
|
||||||
|
return 31 - __builtin_clz(x | 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Returns the index of the least significant bit that is set.
|
||||||
|
static inline int lsbSet32(uint32_t x) {
|
||||||
|
return __builtin_ctz(x | 0x80000000);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Returns the index of the most significant bit that is set.
|
||||||
|
static inline int msbSet64(uint64_t x) {
|
||||||
|
return 63 - __builtin_clzll(x | 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Returns the index of the least significant bit that is set.
|
||||||
|
static inline int lsbSet64(uint64_t x) {
|
||||||
|
return __builtin_ctzll(x | 0x8000000000000000LL);
|
||||||
|
}
|
||||||
|
|
||||||
|
#else
|
||||||
|
|
||||||
|
// Slow fall-back: return most significant bit set by searching iteratively.
|
||||||
|
static int msbSet32(uint32_t x) {
|
||||||
|
for (int i = 31; i >= 0; i--)
|
||||||
|
if ((1 << i) & x)
|
||||||
|
return i;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Slow fall-back: return least significant bit set by searching iteratively.
|
||||||
|
static int lsbSet32(uint32_t x) {
|
||||||
|
for (int i = 0; i < 32; i++)
|
||||||
|
if ((1 << i) & x)
|
||||||
|
return i;
|
||||||
|
return 31;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Slow fall-back: return most significant bit set by searching iteratively.
|
||||||
|
static int msbSet64(uint64_t x) {
|
||||||
|
for (int i = 63; i >= 0; i--)
|
||||||
|
if ((1LL << i) & x)
|
||||||
|
return i;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Slow fall-back: return least significant bit set by searching iteratively.
|
||||||
|
static int lsbSet64(uint64_t x) {
|
||||||
|
for (int i = 0; i < 64; i++)
|
||||||
|
if ((1LL << i) & x)
|
||||||
|
return i;
|
||||||
|
return 63;
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif // select compiler
|
||||||
|
} // namespace nanojit
|
||||||
|
|
||||||
// -------------------------------------------------------------------
|
// -------------------------------------------------------------------
|
||||||
// START debug-logging definitions
|
// START debug-logging definitions
|
||||||
// -------------------------------------------------------------------
|
// -------------------------------------------------------------------
|
||||||
|
|
Загрузка…
Ссылка в новой задаче