Bug 520714 - nanojit: distinguish 64-bit int and float loads/stores. r=gal,rreitmai.

--HG-- extra : convert_revision : a19809f7ba60b4381b77b84363bebf0ff7cf9629
2010-01-05 14:03:49 +11:00 · 2010-01-05 14:03:49 +11:00 · 6bfb46c1b1
--- a/js/src/lirasm/LInsClasses.tbl
+++ b/js/src/lirasm/LInsClasses.tbl
@ -99,14 +99,13 @@ CLASS(  LOP_F_I,        0,  2)  // 67%  LIR_i2f, LIR_u2f
 CLASS(  LOP_I_F,        0,  2)  // 69%  LIR_qlo, LIR_qhi
 CLASS(  LOP_F_II,       0,  1)  // 70%  LIR_qjoin

-    // XXX: "QorF" because the same opcode is used for both 64-bit int and
-    // 64-bit float loads.  Ditto for stores.  That should be fixed, see
-    // bug 520714. 
-CLASS(  LLD_I,          0,  4)  // 74%  LIR_ld
-CLASS(  LLD_QorF,       0,  4)  // 78%  LIR_ldq
+CLASS(  LLD_I,          0,  3)  // 73%  LIR_ld, LIR_ldc, LIR_ld*b, LIR_ld*s
+CLASS(  LLD_Q,          1,  2)  // 75%  LIR_ldq, LIR_ldqc
+CLASS(  LLD_F,          0,  3)  // 78%  LIR_ldf, LIR_ldfc

-CLASS(  LST_I,          0,  7)  // 85%  LIR_sti
-CLASS(  LST_QorF,       0,  7)  // 92%  LIR_stqi
+CLASS(  LST_I,          0,  5)  // 83%  LIR_sti
+CLASS(  LST_Q,          1,  4)  // 87%  LIR_stqi
+CLASS(  LST_F,          0,  5)  // 92%  LIR_stfi

 CLASS(  LCALL_I_I1,     0,  1)  // 93%  LIR_icall
 CLASS(  LCALL_I_I6,     0,  1)  // 94%  LIR_icall
--- a/js/src/lirasm/lirasm.cpp
+++ b/js/src/lirasm/lirasm.cpp
@ -991,6 +991,7 @@ FragmentAssembler::assembleFragment(LirTokenStream &in, bool implicitBegin, cons
 #endif
          case LIR_sti:
          case LIR_stqi:
+          case LIR_stfi:
            need(3);
            ins = mLir->insStore(mOpcode, ref(mTokens[0]),
                                  ref(mTokens[1]),
@ -1011,6 +1012,8 @@ FragmentAssembler::assembleFragment(LirTokenStream &in, bool implicitBegin, cons
          case LIR_ldc:
          case LIR_ldq:
          case LIR_ldqc:
+          case LIR_ldf:
+          case LIR_ldfc:
          case LIR_ldcb:
          case LIR_ldcs:
            ins = assemble_load();
@ -1357,7 +1360,7 @@ FragmentAssembler::assembleRandomFragment(int nIns)
    F_II_ops.push_back(LIR_qjoin);

    vector<LOpcode> I_loads;
-    I_loads.push_back(LIR_ld);          // weight LIR_ld the heaviest
+    I_loads.push_back(LIR_ld);          // weight LIR_ld more heavily
    I_loads.push_back(LIR_ld);
    I_loads.push_back(LIR_ld);
    I_loads.push_back(LIR_ldc);
@ -1372,14 +1375,20 @@ FragmentAssembler::assembleRandomFragment(int nIns)
    I_loads.push_back(LIR_ldcss);
 #endif

-    vector<LOpcode> QorF_loads;
-    QorF_loads.push_back(LIR_ldq);      // weight LIR_ldq the heaviest
-    QorF_loads.push_back(LIR_ldq);
-    QorF_loads.push_back(LIR_ldqc);
+    vector<LOpcode> Q_loads;
+    Q_loads.push_back(LIR_ldq);      // weight LIR_ld more heavily
+    Q_loads.push_back(LIR_ldq);
+    Q_loads.push_back(LIR_ldqc);
+
+    vector<LOpcode> F_loads;
+    F_loads.push_back(LIR_ldf);      // weight LIR_ldf more heavily
+    F_loads.push_back(LIR_ldf);
+    F_loads.push_back(LIR_ldfc);
 #if NJ_EXPANDED_LOADSTORE_SUPPORTED
    // this loads a 32-bit float and expands to 64-bit float
-    QorF_loads.push_back(LIR_ld32f); 
-    QorF_loads.push_back(LIR_ldc32f);
+    F_loads.push_back(LIR_ld32f);    // weight LIR_ld32f more heavily
+    F_loads.push_back(LIR_ld32f); 
+    F_loads.push_back(LIR_ldc32f);
 #endif

    enum LInsClass {
@ -1700,15 +1709,23 @@ FragmentAssembler::assembleRandomFragment(int nIns)
            break;
        }

-        case LLD_QorF: {
+        case LLD_Q:
            if (!M8ps.empty()) {
                LIns* base = rndPick(M8ps);
-                ins = mLir->insLoad(rndPick(QorF_loads), base, rndOffset64(base->size()));
-                addOrReplace((rnd(2) ? Qs : Fs), ins);
+                ins = mLir->insLoad(rndPick(Q_loads), base, rndOffset64(base->size()));
+                addOrReplace(Qs, ins);
+                n++;
+            }
+            break;
+
+        case LLD_F:
+            if (!M8ps.empty()) {
+                LIns* base = rndPick(M8ps);
+                ins = mLir->insLoad(rndPick(F_loads), base, rndOffset64(base->size()));
+                addOrReplace(Fs, ins);
                n++;
            }
            break;
-        }

        case LST_I: {
            vector<LIns*> Ms = rnd(2) ? M4s : M8ps;
@ -1720,14 +1737,21 @@ FragmentAssembler::assembleRandomFragment(int nIns)
            break;
        }

-        case LST_QorF: {
+        case LST_Q:
+            if (!M8ps.empty() && !Qs.empty()) {
+                LIns* base = rndPick(M8ps);
+                mLir->insStorei(rndPick(Qs), base, rndOffset64(base->size()));
+                n++;
+            }
+            break;
+
+        case LST_F:
            if (!M8ps.empty() && !Fs.empty()) {
                LIns* base = rndPick(M8ps);
                mLir->insStorei(rndPick(Fs), base, rndOffset64(base->size()));
                n++;
            }
            break;
-        }

        case LCALL_I_I1:
            if (!Is.empty()) {
--- a/js/src/nanojit/Assembler.cpp
+++ b/js/src/nanojit/Assembler.cpp
@ -1205,6 +1205,8 @@ namespace nanojit
                case LIR_ldc32f:
                case LIR_ldq:
                case LIR_ldqc:
+                case LIR_ldf:
+                case LIR_ldfc:
                {
                    countlir_ldq();
                    asm_load64(ins);
@ -1300,14 +1302,15 @@ namespace nanojit
                }
                case LIR_st32f:
                case LIR_stqi:
+                case LIR_stfi:
                {
                    countlir_stq();
                    LIns* value = ins->oprnd1();
                    LIns* base = ins->oprnd2();
                    int dr = ins->disp();
-                    if (value->isop(LIR_qjoin) && op != LIR_st32f)
+                    if (value->isop(LIR_qjoin) && op == LIR_stfi)
                    {
-                        // this is correct for little-endian only
+                        // This is correct for little-endian only.
                        asm_store32(LIR_sti, value->oprnd1(), dr, base);
                        asm_store32(LIR_sti, value->oprnd2(), dr+4, base);
                    }
--- a/js/src/nanojit/LIR.cpp
+++ b/js/src/nanojit/LIR.cpp
@ -884,7 +884,16 @@ namespace nanojit

    LIns* LirWriter::insStorei(LIns* value, LIns* base, int32_t d)
    {
-        LOpcode op = value->isQuad() ? LIR_stqi : LIR_sti;
+        // Determine which kind of store should be used for 'value' based on
+        // its type.
+        LOpcode op = LOpcode(0);
+        switch (retTypes[value->opcode()]) {
+        case LTy_I32:   op = LIR_sti;   break;
+        case LTy_I64:   op = LIR_stqi;  break;
+        case LTy_F64:   op = LIR_stfi;  break;
+        case LTy_Void:  NanoAssert(0);  break; 
+        default:        NanoAssert(0);  break;
+        }
        return insStore(op, value, base, d);
    }

@ -1509,6 +1518,8 @@ namespace nanojit
                case LIR_ldc:
                case LIR_ldq:
                case LIR_ldqc:
+                case LIR_ldf:
+                case LIR_ldfc:
                case LIR_ldzb:
                case LIR_ldzs:
                case LIR_ldcb:
@ -1545,6 +1556,7 @@ namespace nanojit

                case LIR_sti:
                case LIR_stqi:
+                case LIR_stfi:
                case LIR_stb:
                case LIR_sts:
                case LIR_eq:
@ -1925,6 +1937,8 @@ namespace nanojit
            case LIR_ldc:
            case LIR_ldq:
            case LIR_ldqc:
+            case LIR_ldf:
+            case LIR_ldfc:
            case LIR_ldzb:
            case LIR_ldzs:
            case LIR_ldcb:
@ -1942,6 +1956,7 @@ namespace nanojit

            case LIR_sti:
            case LIR_stqi:
+            case LIR_stfi:
            case LIR_stb:
            case LIR_sts:
            case LIR_st32f:
@ -1952,7 +1967,7 @@ namespace nanojit
                break;

            default:
-                VMPI_sprintf(s, "?");
+                NanoAssertMsgf(0, "Can't handle opcode %s\n", lirNames[op]);
                break;
        }
        NanoAssert(VMPI_strlen(sbuf) < sizeof(sbuf)-1);
@ -2273,6 +2288,7 @@ namespace nanojit
            {
                case LIR_ld:
                case LIR_ldq:
+                case LIR_ldf:
                case LIR_ld32f:
                case LIR_ldsb:
                case LIR_ldss:
--- a/js/src/nanojit/LIRopcode.tbl
+++ b/js/src/nanojit/LIRopcode.tbl
@ -178,13 +178,13 @@ OPDEF(__69,     69, None, Void)
 OPDEF(__70,     70, None, Void)
 OPDEF(qaddp,    71, Op2,  I64)  // integer addition for temp pointer calculations (64bit only)
 OPDEF(qparam,   72, P,    I64)  // load a parameter (64bit register or stk location)
-OPDEF(__73,     73, None, Void)

-OPDEF(ldq,      74, Ld,   I64)  // 64-bit (quad) load
+OPDEF(ldf,      73, Ld,   F64)  // 64-bit float load
+OPDEF(ldq,      74, Ld,   I64)  // 64-bit integer load

 OPDEF(qalloc,   75, I,    I64)  // allocate some stack space (value is 64bit address)

-OPDEF(stqi,     76, Sti,  Void) // 64-bit (quad) store
+OPDEF(stqi,     76, Sti,  Void) // 64-bit integer store
 OPDEF(fret,     77, Op1,  Void)

 OPDEF(st32f,    78, Sti,  Void) // store 64-bit float as a 32-bit float (dropping precision)
@ -193,7 +193,8 @@ OPDEF(ld32f,    79, Ld,   F64)  // load 32-bit float and widen to 64-bit float
 OPDEF(fcall,    80, C,    F64)  // subroutine call returning 64-bit (quad) double value
 OPDEF(qcall,    81, C,    I64)  // subroutine call returning 64-bit (quad) integer value

-OPDEF(__82,     82, None, Void)
+OPDEF(stfi,     82, Sti,  Void) // 64-bit float store
+
 OPDEF(__83,     83, None, Void)
 OPDEF(__84,     84, None, Void)
 OPDEF(__85,     85, None, Void)
@ -202,7 +203,7 @@ OPDEF(__87,     87, None, Void)

 // All opcodes below this marker are subject to CSE.

-OPDEF(quad,     88, N64,  I64)  // 64-bit (quad) constant value
+OPDEF(quad,     88, N64,  I64)  // 64-bit integer constant value
 OPDEF(qcmov,    89, Op3,  I64)  // 64-bit conditional move

 OPDEF(i2q,      90, Op1,  I64)  // sign-extend i32 to i64
@ -213,9 +214,9 @@ OPDEF(u2f,      93, Op1,  F64)  // convert an unsigned 32-bit integer to a float
 OPDEF(__94,     94, None, Void)
 OPDEF(__95,     95, None, Void)
 OPDEF(__96,     96, None, Void)
-OPDEF(__97,     97, None, Void)

-OPDEF(ldqc,     98, Ld,   I64)  // non-volatile 64-bit load
+OPDEF(ldfc,     97, Ld,   F64)  // non-volatile 64-bit float load
+OPDEF(ldqc,     98, Ld,   I64)  // non-volatile 64-bit integer load

 OPDEF(fneg,     99, Op1,  F64)  // floating-point negation
 OPDEF(fadd,    100, Op2,  F64)  // floating-point addition
@ -238,7 +239,7 @@ OPDEF(qjoin,   114, Op2,  F64)  // join two 32-bit values (1st arg is low bits,
 OPDEF(__115,   115, None, Void)
 OPDEF(__116,   116, None, Void)
 OPDEF(__117,   117, None, Void)
-OPDEF(float,   118, N64,  F64)
+OPDEF(float,   118, N64,  F64)  // 64-bit float constant value

 // Integer (64-bit) relational operators.
 // NB: These opcodes must remain continuous so that comparison-opcode detection
--- a/js/src/nanojit/NativeARM.cpp
+++ b/js/src/nanojit/NativeARM.cpp
@ -1310,13 +1310,16 @@ Assembler::asm_spill(Register rr, int d, bool pop, bool quad)
 void
 Assembler::asm_load64(LInsp ins)
 {
+    NanoAssert(!ins->isop(LIR_ldq) && !ins->isop(LIR_ldqc));
+
    //asm_output("<<< load64");

    switch (ins->opcode()) {
-        case LIR_ldq:
-        case LIR_ldqc:
+        case LIR_ldf:
+        case LIR_ldfc:
            // handled by mainline code below for now
            break;
+
        case LIR_ld32f:
        case LIR_ldc32f:
            NanoAssertMsg(0, "NJ_EXPANDED_LOADSTORE_SUPPORTED not yet supported for this architecture");
@ -1370,15 +1373,19 @@ Assembler::asm_load64(LInsp ins)
 void
 Assembler::asm_store64(LOpcode op, LInsp value, int dr, LInsp base)
 {
+    NanoAssert(op != LIR_stqi);
+
    //asm_output("<<< store64 (dr: %d)", dr);

    switch (op) {
-        case LIR_stqi:
+        case LIR_stfi:
            // handled by mainline code below for now
            break;
+
        case LIR_st32f:
            NanoAssertMsg(0, "NJ_EXPANDED_LOADSTORE_SUPPORTED not yet supported for this architecture");
            return;
+
        default:
            NanoAssertMsg(0, "asm_store64 should never receive this LIR opcode");
            return;
--- a/js/src/nanojit/NativePPC.cpp
+++ b/js/src/nanojit/NativePPC.cpp
@ -221,6 +221,8 @@ namespace nanojit
    void Assembler::asm_load64(LIns *ins) {

        switch (ins->opcode()) {
+            case LIR_ldf:
+            case LIR_ldfc:
            case LIR_ldq:
            case LIR_ldqc:
                // handled by mainline code below for now
@ -314,6 +316,7 @@ namespace nanojit
        NanoAssert(value->isQuad());

        switch (op) {
+            case LIR_stfi:
            case LIR_stqi:
                // handled by mainline code below for now
                break;
--- a/js/src/nanojit/NativeSparc.cpp
+++ b/js/src/nanojit/NativeSparc.cpp
@ -365,9 +365,11 @@ namespace nanojit

    void Assembler::asm_load64(LInsp ins)
    {
+        NanoAssert(!ins->isop(LIR_ldq) && !ins->isop(LIR_ldqc));
+
        switch (ins->opcode()) {
-            case LIR_ldq:
-            case LIR_ldqc:
+            case LIR_ldf:
+            case LIR_ldfc:
                // handled by mainline code below for now
                break;
            case LIR_ld32f:
@ -410,8 +412,10 @@ namespace nanojit

    void Assembler::asm_store64(LOpcode op, LInsp value, int dr, LInsp base)
    {
+        NanoAssert(op != LIR_stqi);
+
        switch (op) {
-            case LIR_stqi:
+            case LIR_stfi:
                // handled by mainline code below for now
                break;
            case LIR_st32f:
@ -435,7 +439,7 @@ namespace nanojit
                return;
            }

-        if (value->isop(LIR_ldq) || value->isop(LIR_ldqc) || value->isop(LIR_qjoin))
+        if (value->isop(LIR_ldf) || value->isop(LIR_ldfc) || value->isop(LIR_qjoin))
            {
                // value is 64bit struct or int64_t, or maybe a double.
                // it may be live in an FPU reg.  Either way, don't
@ -458,6 +462,7 @@ namespace nanojit
                return;
            }

+        NanoAssert(!value->isop(LIR_ldq) || !value->isop(LIR_ldqc));
        Register rb;
        if (base->isop(LIR_alloc)) {
            rb = FP;
--- a/js/src/nanojit/NativeX64.cpp
+++ b/js/src/nanojit/NativeX64.cpp
@ -631,7 +631,11 @@ namespace nanojit
            // To make sure floating point operations stay in FPU registers
            // as much as possible, make sure that only a few opcodes are
            // reserving GPRs.
-            NanoAssert(a->isop(LIR_quad) || a->isop(LIR_ldq) || a->isop(LIR_ldqc)|| a->isop(LIR_ld32f) || a->isop(LIR_ldc32f)|| a->isop(LIR_u2f) || a->isop(LIR_float) || a->isop(LIR_fcall));
+            NanoAssert(a->isop(LIR_quad) || a->isop(LIR_float) ||
+                       a->isop(LIR_ldf) || a->isop(LIR_ldfc) ||
+                       a->isop(LIR_ldq) || a->isop(LIR_ldqc) ||
+                       a->isop(LIR_ld32f) || a->isop(LIR_ldc32f) ||
+                       a->isop(LIR_u2f) || a->isop(LIR_fcall));
            allow &= ~rmask(rr);
            ra = findRegFor(a, allow);
        } else {
@ -1387,6 +1391,8 @@ namespace nanojit
        switch (ins->opcode()) {
            case LIR_ldq:
            case LIR_ldqc:
+            case LIR_ldf:
+            case LIR_ldfc:
                regalloc_load(ins, GpRegs, rr, dr, rb);
                if (IsGpReg(rr)) {
                    // general 64bit load, 32bit const displacement
@ -1468,6 +1474,7 @@ namespace nanojit

        switch (op) {
            case LIR_stqi:
+            case LIR_stfi:
            {
                if (IsGpReg(r)) {
                    // gpr store
--- a/js/src/nanojit/Nativei386.cpp
+++ b/js/src/nanojit/Nativei386.cpp
@ -555,6 +555,8 @@ namespace nanojit

    void Assembler::asm_load64(LInsp ins)
    {
+        NanoAssert(!ins->isop(LIR_ldq) && !ins->isop(LIR_ldqc));
+
        LIns* base = ins->oprnd1();
        int db = ins->disp();
        Register rr = ins->getReg();
@ -564,8 +566,8 @@ namespace nanojit
            freeRsrcOf(ins, false);
            Register rb = getBaseReg(base, db, GpRegs);
            switch (ins->opcode()) {
-                case LIR_ldq:
-                case LIR_ldqc:
+                case LIR_ldf:
+                case LIR_ldfc:
                    SSE_LDQ(rr, db, rb);
                    break;
                case LIR_ld32f:
@ -593,8 +595,8 @@ namespace nanojit
            ins->setReg(UnknownReg);

            switch (ins->opcode()) {
-                case LIR_ldq:
-                case LIR_ldqc:
+                case LIR_ldf:
+                case LIR_ldfc:
                    // don't use an fpu reg to simply load & store the value.
                    if (dr)
                        asm_mmq(FP, dr, rb, db);
@ -637,6 +639,8 @@ namespace nanojit

    void Assembler::asm_store64(LOpcode op, LInsp value, int dr, LInsp base)
    {
+        NanoAssert(op != LIR_stqi);
+
        Register rb = getBaseReg(base, dr, GpRegs);

        if (op == LIR_st32f) {
@ -662,7 +666,7 @@ namespace nanojit
            STi(rb, dr+4, value->imm64_1());
            STi(rb, dr,   value->imm64_0());

-        } else if (value->isop(LIR_ldq) || value->isop(LIR_ldqc) || value->isop(LIR_qjoin)) {
+        } else if (value->isop(LIR_ldf) || value->isop(LIR_ldfc) || value->isop(LIR_qjoin)) {
            // value is 64bit struct or int64_t, or maybe a double.
            // It may be live in an FPU reg.  Either way, don't put it in an
            // FPU reg just to load & store it.
@ -681,6 +685,7 @@ namespace nanojit
            }

        } else {
+            NanoAssert(!value->isop(LIR_ldq) && !value->isop(LIR_ldqc));
            bool pop = value->isUnusedOrHasUnknownReg();
            Register rv = ( pop
                          ? findRegFor(value, config.sse2 ? XmmRegs : FpRegs)
@ -900,7 +905,7 @@ namespace nanojit
        default:        NanoAssert(0);  break;
        }

-        freeResourcesOf(ins);   // njn: move after asm_cmp?
+        freeResourcesOf(ins);

        asm_cmp(ins);
    }