зеркало из https://github.com/mozilla/pjs.git
Bug 520714 - nanojit: distinguish 64-bit int and float loads/stores. r=gal,rreitmai.
--HG-- extra : convert_revision : a19809f7ba60b4381b77b84363bebf0ff7cf9629
This commit is contained in:
Родитель
05e53a9c70
Коммит
6bfb46c1b1
|
@ -99,14 +99,13 @@ CLASS( LOP_F_I, 0, 2) // 67% LIR_i2f, LIR_u2f
|
|||
CLASS( LOP_I_F, 0, 2) // 69% LIR_qlo, LIR_qhi
|
||||
CLASS( LOP_F_II, 0, 1) // 70% LIR_qjoin
|
||||
|
||||
// XXX: "QorF" because the same opcode is used for both 64-bit int and
|
||||
// 64-bit float loads. Ditto for stores. That should be fixed, see
|
||||
// bug 520714.
|
||||
CLASS( LLD_I, 0, 4) // 74% LIR_ld
|
||||
CLASS( LLD_QorF, 0, 4) // 78% LIR_ldq
|
||||
CLASS( LLD_I, 0, 3) // 73% LIR_ld, LIR_ldc, LIR_ld*b, LIR_ld*s
|
||||
CLASS( LLD_Q, 1, 2) // 75% LIR_ldq, LIR_ldqc
|
||||
CLASS( LLD_F, 0, 3) // 78% LIR_ldf, LIR_ldfc
|
||||
|
||||
CLASS( LST_I, 0, 7) // 85% LIR_sti
|
||||
CLASS( LST_QorF, 0, 7) // 92% LIR_stqi
|
||||
CLASS( LST_I, 0, 5) // 83% LIR_sti
|
||||
CLASS( LST_Q, 1, 4) // 87% LIR_stqi
|
||||
CLASS( LST_F, 0, 5) // 92% LIR_stfi
|
||||
|
||||
CLASS( LCALL_I_I1, 0, 1) // 93% LIR_icall
|
||||
CLASS( LCALL_I_I6, 0, 1) // 94% LIR_icall
|
||||
|
|
|
@ -991,6 +991,7 @@ FragmentAssembler::assembleFragment(LirTokenStream &in, bool implicitBegin, cons
|
|||
#endif
|
||||
case LIR_sti:
|
||||
case LIR_stqi:
|
||||
case LIR_stfi:
|
||||
need(3);
|
||||
ins = mLir->insStore(mOpcode, ref(mTokens[0]),
|
||||
ref(mTokens[1]),
|
||||
|
@ -1011,6 +1012,8 @@ FragmentAssembler::assembleFragment(LirTokenStream &in, bool implicitBegin, cons
|
|||
case LIR_ldc:
|
||||
case LIR_ldq:
|
||||
case LIR_ldqc:
|
||||
case LIR_ldf:
|
||||
case LIR_ldfc:
|
||||
case LIR_ldcb:
|
||||
case LIR_ldcs:
|
||||
ins = assemble_load();
|
||||
|
@ -1357,7 +1360,7 @@ FragmentAssembler::assembleRandomFragment(int nIns)
|
|||
F_II_ops.push_back(LIR_qjoin);
|
||||
|
||||
vector<LOpcode> I_loads;
|
||||
I_loads.push_back(LIR_ld); // weight LIR_ld the heaviest
|
||||
I_loads.push_back(LIR_ld); // weight LIR_ld more heavily
|
||||
I_loads.push_back(LIR_ld);
|
||||
I_loads.push_back(LIR_ld);
|
||||
I_loads.push_back(LIR_ldc);
|
||||
|
@ -1372,14 +1375,20 @@ FragmentAssembler::assembleRandomFragment(int nIns)
|
|||
I_loads.push_back(LIR_ldcss);
|
||||
#endif
|
||||
|
||||
vector<LOpcode> QorF_loads;
|
||||
QorF_loads.push_back(LIR_ldq); // weight LIR_ldq the heaviest
|
||||
QorF_loads.push_back(LIR_ldq);
|
||||
QorF_loads.push_back(LIR_ldqc);
|
||||
vector<LOpcode> Q_loads;
|
||||
Q_loads.push_back(LIR_ldq); // weight LIR_ld more heavily
|
||||
Q_loads.push_back(LIR_ldq);
|
||||
Q_loads.push_back(LIR_ldqc);
|
||||
|
||||
vector<LOpcode> F_loads;
|
||||
F_loads.push_back(LIR_ldf); // weight LIR_ldf more heavily
|
||||
F_loads.push_back(LIR_ldf);
|
||||
F_loads.push_back(LIR_ldfc);
|
||||
#if NJ_EXPANDED_LOADSTORE_SUPPORTED
|
||||
// this loads a 32-bit float and expands to 64-bit float
|
||||
QorF_loads.push_back(LIR_ld32f);
|
||||
QorF_loads.push_back(LIR_ldc32f);
|
||||
F_loads.push_back(LIR_ld32f); // weight LIR_ld32f more heavily
|
||||
F_loads.push_back(LIR_ld32f);
|
||||
F_loads.push_back(LIR_ldc32f);
|
||||
#endif
|
||||
|
||||
enum LInsClass {
|
||||
|
@ -1700,15 +1709,23 @@ FragmentAssembler::assembleRandomFragment(int nIns)
|
|||
break;
|
||||
}
|
||||
|
||||
case LLD_QorF: {
|
||||
case LLD_Q:
|
||||
if (!M8ps.empty()) {
|
||||
LIns* base = rndPick(M8ps);
|
||||
ins = mLir->insLoad(rndPick(QorF_loads), base, rndOffset64(base->size()));
|
||||
addOrReplace((rnd(2) ? Qs : Fs), ins);
|
||||
ins = mLir->insLoad(rndPick(Q_loads), base, rndOffset64(base->size()));
|
||||
addOrReplace(Qs, ins);
|
||||
n++;
|
||||
}
|
||||
break;
|
||||
|
||||
case LLD_F:
|
||||
if (!M8ps.empty()) {
|
||||
LIns* base = rndPick(M8ps);
|
||||
ins = mLir->insLoad(rndPick(F_loads), base, rndOffset64(base->size()));
|
||||
addOrReplace(Fs, ins);
|
||||
n++;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
case LST_I: {
|
||||
vector<LIns*> Ms = rnd(2) ? M4s : M8ps;
|
||||
|
@ -1720,14 +1737,21 @@ FragmentAssembler::assembleRandomFragment(int nIns)
|
|||
break;
|
||||
}
|
||||
|
||||
case LST_QorF: {
|
||||
case LST_Q:
|
||||
if (!M8ps.empty() && !Qs.empty()) {
|
||||
LIns* base = rndPick(M8ps);
|
||||
mLir->insStorei(rndPick(Qs), base, rndOffset64(base->size()));
|
||||
n++;
|
||||
}
|
||||
break;
|
||||
|
||||
case LST_F:
|
||||
if (!M8ps.empty() && !Fs.empty()) {
|
||||
LIns* base = rndPick(M8ps);
|
||||
mLir->insStorei(rndPick(Fs), base, rndOffset64(base->size()));
|
||||
n++;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
case LCALL_I_I1:
|
||||
if (!Is.empty()) {
|
||||
|
|
|
@ -1205,6 +1205,8 @@ namespace nanojit
|
|||
case LIR_ldc32f:
|
||||
case LIR_ldq:
|
||||
case LIR_ldqc:
|
||||
case LIR_ldf:
|
||||
case LIR_ldfc:
|
||||
{
|
||||
countlir_ldq();
|
||||
asm_load64(ins);
|
||||
|
@ -1300,14 +1302,15 @@ namespace nanojit
|
|||
}
|
||||
case LIR_st32f:
|
||||
case LIR_stqi:
|
||||
case LIR_stfi:
|
||||
{
|
||||
countlir_stq();
|
||||
LIns* value = ins->oprnd1();
|
||||
LIns* base = ins->oprnd2();
|
||||
int dr = ins->disp();
|
||||
if (value->isop(LIR_qjoin) && op != LIR_st32f)
|
||||
if (value->isop(LIR_qjoin) && op == LIR_stfi)
|
||||
{
|
||||
// this is correct for little-endian only
|
||||
// This is correct for little-endian only.
|
||||
asm_store32(LIR_sti, value->oprnd1(), dr, base);
|
||||
asm_store32(LIR_sti, value->oprnd2(), dr+4, base);
|
||||
}
|
||||
|
|
|
@ -884,7 +884,16 @@ namespace nanojit
|
|||
|
||||
LIns* LirWriter::insStorei(LIns* value, LIns* base, int32_t d)
|
||||
{
|
||||
LOpcode op = value->isQuad() ? LIR_stqi : LIR_sti;
|
||||
// Determine which kind of store should be used for 'value' based on
|
||||
// its type.
|
||||
LOpcode op = LOpcode(0);
|
||||
switch (retTypes[value->opcode()]) {
|
||||
case LTy_I32: op = LIR_sti; break;
|
||||
case LTy_I64: op = LIR_stqi; break;
|
||||
case LTy_F64: op = LIR_stfi; break;
|
||||
case LTy_Void: NanoAssert(0); break;
|
||||
default: NanoAssert(0); break;
|
||||
}
|
||||
return insStore(op, value, base, d);
|
||||
}
|
||||
|
||||
|
@ -1509,6 +1518,8 @@ namespace nanojit
|
|||
case LIR_ldc:
|
||||
case LIR_ldq:
|
||||
case LIR_ldqc:
|
||||
case LIR_ldf:
|
||||
case LIR_ldfc:
|
||||
case LIR_ldzb:
|
||||
case LIR_ldzs:
|
||||
case LIR_ldcb:
|
||||
|
@ -1545,6 +1556,7 @@ namespace nanojit
|
|||
|
||||
case LIR_sti:
|
||||
case LIR_stqi:
|
||||
case LIR_stfi:
|
||||
case LIR_stb:
|
||||
case LIR_sts:
|
||||
case LIR_eq:
|
||||
|
@ -1925,6 +1937,8 @@ namespace nanojit
|
|||
case LIR_ldc:
|
||||
case LIR_ldq:
|
||||
case LIR_ldqc:
|
||||
case LIR_ldf:
|
||||
case LIR_ldfc:
|
||||
case LIR_ldzb:
|
||||
case LIR_ldzs:
|
||||
case LIR_ldcb:
|
||||
|
@ -1942,6 +1956,7 @@ namespace nanojit
|
|||
|
||||
case LIR_sti:
|
||||
case LIR_stqi:
|
||||
case LIR_stfi:
|
||||
case LIR_stb:
|
||||
case LIR_sts:
|
||||
case LIR_st32f:
|
||||
|
@ -1952,7 +1967,7 @@ namespace nanojit
|
|||
break;
|
||||
|
||||
default:
|
||||
VMPI_sprintf(s, "?");
|
||||
NanoAssertMsgf(0, "Can't handle opcode %s\n", lirNames[op]);
|
||||
break;
|
||||
}
|
||||
NanoAssert(VMPI_strlen(sbuf) < sizeof(sbuf)-1);
|
||||
|
@ -2273,6 +2288,7 @@ namespace nanojit
|
|||
{
|
||||
case LIR_ld:
|
||||
case LIR_ldq:
|
||||
case LIR_ldf:
|
||||
case LIR_ld32f:
|
||||
case LIR_ldsb:
|
||||
case LIR_ldss:
|
||||
|
|
|
@ -178,13 +178,13 @@ OPDEF(__69, 69, None, Void)
|
|||
OPDEF(__70, 70, None, Void)
|
||||
OPDEF(qaddp, 71, Op2, I64) // integer addition for temp pointer calculations (64bit only)
|
||||
OPDEF(qparam, 72, P, I64) // load a parameter (64bit register or stk location)
|
||||
OPDEF(__73, 73, None, Void)
|
||||
|
||||
OPDEF(ldq, 74, Ld, I64) // 64-bit (quad) load
|
||||
OPDEF(ldf, 73, Ld, F64) // 64-bit float load
|
||||
OPDEF(ldq, 74, Ld, I64) // 64-bit integer load
|
||||
|
||||
OPDEF(qalloc, 75, I, I64) // allocate some stack space (value is 64bit address)
|
||||
|
||||
OPDEF(stqi, 76, Sti, Void) // 64-bit (quad) store
|
||||
OPDEF(stqi, 76, Sti, Void) // 64-bit integer store
|
||||
OPDEF(fret, 77, Op1, Void)
|
||||
|
||||
OPDEF(st32f, 78, Sti, Void) // store 64-bit float as a 32-bit float (dropping precision)
|
||||
|
@ -193,7 +193,8 @@ OPDEF(ld32f, 79, Ld, F64) // load 32-bit float and widen to 64-bit float
|
|||
OPDEF(fcall, 80, C, F64) // subroutine call returning 64-bit (quad) double value
|
||||
OPDEF(qcall, 81, C, I64) // subroutine call returning 64-bit (quad) integer value
|
||||
|
||||
OPDEF(__82, 82, None, Void)
|
||||
OPDEF(stfi, 82, Sti, Void) // 64-bit float store
|
||||
|
||||
OPDEF(__83, 83, None, Void)
|
||||
OPDEF(__84, 84, None, Void)
|
||||
OPDEF(__85, 85, None, Void)
|
||||
|
@ -202,7 +203,7 @@ OPDEF(__87, 87, None, Void)
|
|||
|
||||
// All opcodes below this marker are subject to CSE.
|
||||
|
||||
OPDEF(quad, 88, N64, I64) // 64-bit (quad) constant value
|
||||
OPDEF(quad, 88, N64, I64) // 64-bit integer constant value
|
||||
OPDEF(qcmov, 89, Op3, I64) // 64-bit conditional move
|
||||
|
||||
OPDEF(i2q, 90, Op1, I64) // sign-extend i32 to i64
|
||||
|
@ -213,9 +214,9 @@ OPDEF(u2f, 93, Op1, F64) // convert an unsigned 32-bit integer to a float
|
|||
OPDEF(__94, 94, None, Void)
|
||||
OPDEF(__95, 95, None, Void)
|
||||
OPDEF(__96, 96, None, Void)
|
||||
OPDEF(__97, 97, None, Void)
|
||||
|
||||
OPDEF(ldqc, 98, Ld, I64) // non-volatile 64-bit load
|
||||
OPDEF(ldfc, 97, Ld, F64) // non-volatile 64-bit float load
|
||||
OPDEF(ldqc, 98, Ld, I64) // non-volatile 64-bit integer load
|
||||
|
||||
OPDEF(fneg, 99, Op1, F64) // floating-point negation
|
||||
OPDEF(fadd, 100, Op2, F64) // floating-point addition
|
||||
|
@ -238,7 +239,7 @@ OPDEF(qjoin, 114, Op2, F64) // join two 32-bit values (1st arg is low bits,
|
|||
OPDEF(__115, 115, None, Void)
|
||||
OPDEF(__116, 116, None, Void)
|
||||
OPDEF(__117, 117, None, Void)
|
||||
OPDEF(float, 118, N64, F64)
|
||||
OPDEF(float, 118, N64, F64) // 64-bit float constant value
|
||||
|
||||
// Integer (64-bit) relational operators.
|
||||
// NB: These opcodes must remain continuous so that comparison-opcode detection
|
||||
|
|
|
@ -1310,13 +1310,16 @@ Assembler::asm_spill(Register rr, int d, bool pop, bool quad)
|
|||
void
|
||||
Assembler::asm_load64(LInsp ins)
|
||||
{
|
||||
NanoAssert(!ins->isop(LIR_ldq) && !ins->isop(LIR_ldqc));
|
||||
|
||||
//asm_output("<<< load64");
|
||||
|
||||
switch (ins->opcode()) {
|
||||
case LIR_ldq:
|
||||
case LIR_ldqc:
|
||||
case LIR_ldf:
|
||||
case LIR_ldfc:
|
||||
// handled by mainline code below for now
|
||||
break;
|
||||
|
||||
case LIR_ld32f:
|
||||
case LIR_ldc32f:
|
||||
NanoAssertMsg(0, "NJ_EXPANDED_LOADSTORE_SUPPORTED not yet supported for this architecture");
|
||||
|
@ -1370,15 +1373,19 @@ Assembler::asm_load64(LInsp ins)
|
|||
void
|
||||
Assembler::asm_store64(LOpcode op, LInsp value, int dr, LInsp base)
|
||||
{
|
||||
NanoAssert(op != LIR_stqi);
|
||||
|
||||
//asm_output("<<< store64 (dr: %d)", dr);
|
||||
|
||||
switch (op) {
|
||||
case LIR_stqi:
|
||||
case LIR_stfi:
|
||||
// handled by mainline code below for now
|
||||
break;
|
||||
|
||||
case LIR_st32f:
|
||||
NanoAssertMsg(0, "NJ_EXPANDED_LOADSTORE_SUPPORTED not yet supported for this architecture");
|
||||
return;
|
||||
|
||||
default:
|
||||
NanoAssertMsg(0, "asm_store64 should never receive this LIR opcode");
|
||||
return;
|
||||
|
|
|
@ -221,6 +221,8 @@ namespace nanojit
|
|||
void Assembler::asm_load64(LIns *ins) {
|
||||
|
||||
switch (ins->opcode()) {
|
||||
case LIR_ldf:
|
||||
case LIR_ldfc:
|
||||
case LIR_ldq:
|
||||
case LIR_ldqc:
|
||||
// handled by mainline code below for now
|
||||
|
@ -314,6 +316,7 @@ namespace nanojit
|
|||
NanoAssert(value->isQuad());
|
||||
|
||||
switch (op) {
|
||||
case LIR_stfi:
|
||||
case LIR_stqi:
|
||||
// handled by mainline code below for now
|
||||
break;
|
||||
|
|
|
@ -365,9 +365,11 @@ namespace nanojit
|
|||
|
||||
void Assembler::asm_load64(LInsp ins)
|
||||
{
|
||||
NanoAssert(!ins->isop(LIR_ldq) && !ins->isop(LIR_ldqc));
|
||||
|
||||
switch (ins->opcode()) {
|
||||
case LIR_ldq:
|
||||
case LIR_ldqc:
|
||||
case LIR_ldf:
|
||||
case LIR_ldfc:
|
||||
// handled by mainline code below for now
|
||||
break;
|
||||
case LIR_ld32f:
|
||||
|
@ -410,8 +412,10 @@ namespace nanojit
|
|||
|
||||
void Assembler::asm_store64(LOpcode op, LInsp value, int dr, LInsp base)
|
||||
{
|
||||
NanoAssert(op != LIR_stqi);
|
||||
|
||||
switch (op) {
|
||||
case LIR_stqi:
|
||||
case LIR_stfi:
|
||||
// handled by mainline code below for now
|
||||
break;
|
||||
case LIR_st32f:
|
||||
|
@ -435,7 +439,7 @@ namespace nanojit
|
|||
return;
|
||||
}
|
||||
|
||||
if (value->isop(LIR_ldq) || value->isop(LIR_ldqc) || value->isop(LIR_qjoin))
|
||||
if (value->isop(LIR_ldf) || value->isop(LIR_ldfc) || value->isop(LIR_qjoin))
|
||||
{
|
||||
// value is 64bit struct or int64_t, or maybe a double.
|
||||
// it may be live in an FPU reg. Either way, don't
|
||||
|
@ -458,6 +462,7 @@ namespace nanojit
|
|||
return;
|
||||
}
|
||||
|
||||
NanoAssert(!value->isop(LIR_ldq) || !value->isop(LIR_ldqc));
|
||||
Register rb;
|
||||
if (base->isop(LIR_alloc)) {
|
||||
rb = FP;
|
||||
|
|
|
@ -631,7 +631,11 @@ namespace nanojit
|
|||
// To make sure floating point operations stay in FPU registers
|
||||
// as much as possible, make sure that only a few opcodes are
|
||||
// reserving GPRs.
|
||||
NanoAssert(a->isop(LIR_quad) || a->isop(LIR_ldq) || a->isop(LIR_ldqc)|| a->isop(LIR_ld32f) || a->isop(LIR_ldc32f)|| a->isop(LIR_u2f) || a->isop(LIR_float) || a->isop(LIR_fcall));
|
||||
NanoAssert(a->isop(LIR_quad) || a->isop(LIR_float) ||
|
||||
a->isop(LIR_ldf) || a->isop(LIR_ldfc) ||
|
||||
a->isop(LIR_ldq) || a->isop(LIR_ldqc) ||
|
||||
a->isop(LIR_ld32f) || a->isop(LIR_ldc32f) ||
|
||||
a->isop(LIR_u2f) || a->isop(LIR_fcall));
|
||||
allow &= ~rmask(rr);
|
||||
ra = findRegFor(a, allow);
|
||||
} else {
|
||||
|
@ -1387,6 +1391,8 @@ namespace nanojit
|
|||
switch (ins->opcode()) {
|
||||
case LIR_ldq:
|
||||
case LIR_ldqc:
|
||||
case LIR_ldf:
|
||||
case LIR_ldfc:
|
||||
regalloc_load(ins, GpRegs, rr, dr, rb);
|
||||
if (IsGpReg(rr)) {
|
||||
// general 64bit load, 32bit const displacement
|
||||
|
@ -1468,6 +1474,7 @@ namespace nanojit
|
|||
|
||||
switch (op) {
|
||||
case LIR_stqi:
|
||||
case LIR_stfi:
|
||||
{
|
||||
if (IsGpReg(r)) {
|
||||
// gpr store
|
||||
|
|
|
@ -555,6 +555,8 @@ namespace nanojit
|
|||
|
||||
void Assembler::asm_load64(LInsp ins)
|
||||
{
|
||||
NanoAssert(!ins->isop(LIR_ldq) && !ins->isop(LIR_ldqc));
|
||||
|
||||
LIns* base = ins->oprnd1();
|
||||
int db = ins->disp();
|
||||
Register rr = ins->getReg();
|
||||
|
@ -564,8 +566,8 @@ namespace nanojit
|
|||
freeRsrcOf(ins, false);
|
||||
Register rb = getBaseReg(base, db, GpRegs);
|
||||
switch (ins->opcode()) {
|
||||
case LIR_ldq:
|
||||
case LIR_ldqc:
|
||||
case LIR_ldf:
|
||||
case LIR_ldfc:
|
||||
SSE_LDQ(rr, db, rb);
|
||||
break;
|
||||
case LIR_ld32f:
|
||||
|
@ -593,8 +595,8 @@ namespace nanojit
|
|||
ins->setReg(UnknownReg);
|
||||
|
||||
switch (ins->opcode()) {
|
||||
case LIR_ldq:
|
||||
case LIR_ldqc:
|
||||
case LIR_ldf:
|
||||
case LIR_ldfc:
|
||||
// don't use an fpu reg to simply load & store the value.
|
||||
if (dr)
|
||||
asm_mmq(FP, dr, rb, db);
|
||||
|
@ -637,6 +639,8 @@ namespace nanojit
|
|||
|
||||
void Assembler::asm_store64(LOpcode op, LInsp value, int dr, LInsp base)
|
||||
{
|
||||
NanoAssert(op != LIR_stqi);
|
||||
|
||||
Register rb = getBaseReg(base, dr, GpRegs);
|
||||
|
||||
if (op == LIR_st32f) {
|
||||
|
@ -662,7 +666,7 @@ namespace nanojit
|
|||
STi(rb, dr+4, value->imm64_1());
|
||||
STi(rb, dr, value->imm64_0());
|
||||
|
||||
} else if (value->isop(LIR_ldq) || value->isop(LIR_ldqc) || value->isop(LIR_qjoin)) {
|
||||
} else if (value->isop(LIR_ldf) || value->isop(LIR_ldfc) || value->isop(LIR_qjoin)) {
|
||||
// value is 64bit struct or int64_t, or maybe a double.
|
||||
// It may be live in an FPU reg. Either way, don't put it in an
|
||||
// FPU reg just to load & store it.
|
||||
|
@ -681,6 +685,7 @@ namespace nanojit
|
|||
}
|
||||
|
||||
} else {
|
||||
NanoAssert(!value->isop(LIR_ldq) && !value->isop(LIR_ldqc));
|
||||
bool pop = value->isUnusedOrHasUnknownReg();
|
||||
Register rv = ( pop
|
||||
? findRegFor(value, config.sse2 ? XmmRegs : FpRegs)
|
||||
|
@ -900,7 +905,7 @@ namespace nanojit
|
|||
default: NanoAssert(0); break;
|
||||
}
|
||||
|
||||
freeResourcesOf(ins); // njn: move after asm_cmp?
|
||||
freeResourcesOf(ins);
|
||||
|
||||
asm_cmp(ins);
|
||||
}
|
||||
|
|
Загрузка…
Ссылка в новой задаче