Add support for ARM's 'hard' EABI variant. (FP arguments go in VFP registers.) [Bug 602834] [r=jbramley,rreitmai]

--HG-- extra : convert_revision : 113a2e56c62fca5adc557906dd729a4ec632d994
2010-10-25 09:51:59 +01:00 · 2010-10-25 09:51:59 +01:00 · 11c397d856
--- a/js/src/nanojit/NativeARM.cpp
+++ b/js/src/nanojit/NativeARM.cpp
@ -24,6 +24,7 @@
 *   Adobe AS3 Team
 *   Vladimir Vukicevic <vladimir@pobox.com>
 *   Jacob Bramley <Jacob.Bramley@arm.com>
+ *   Tero Koskinen <tero.koskinen@digia.com>
 *
 * Alternatively, the contents of this file may be used under the terms of
 * either the GNU General Public License Version 2 or later (the "GPL"), or
@ -52,7 +53,7 @@ namespace nanojit

 #ifdef NJ_VERBOSE
 const char* regNames[] = {"r0","r1","r2","r3","r4","r5","r6","r7","r8","r9","r10","fp","ip","sp","lr","pc",
-                          "d0","d1","d2","d3","d4","d5","d6","d7","s14"};
+                          "d0","d1","d2","d3","d4","d5","d6","d7","s0"};
 const char* condNames[] = {"eq","ne","cs","cc","mi","pl","vs","vc","hi","ls","ge","lt","gt","le",""/*al*/,"nv"};
 const char* shiftNames[] = { "lsl", "lsl", "lsr", "lsr", "asr", "asr", "ror", "ror" };
 #endif
@ -613,6 +614,9 @@ Assembler::genEpilogue()
 * - 32-bit arguments are placed in registers and 32-bit aligned
 *   on the stack.
 *
+ * Under EABI with hardware floating-point procedure-call variant:
+ * - Same as EABI, but doubles are passed in D0..D7 registers.
+ *
 * Under legacy ABI:
 * - doubles are placed in subsequent arg registers; if the next
 *   available register is r3, the low order word goes into r3
@ -622,23 +626,23 @@ Assembler::genEpilogue()
 *   alignment.
 */
 void
-Assembler::asm_arg(ArgType ty, LIns* arg, Register& r, int& stkd)
+Assembler::asm_arg(ArgType ty, LIns* arg, ParameterRegisters& params)
 {
    // The stack pointer must always be at least aligned to 4 bytes.
-    NanoAssert((stkd & 3) == 0);
+    NanoAssert((params.stkd & 3) == 0);

    if (ty == ARGTYPE_D) {
        // This task is fairly complex and so is delegated to asm_arg_64.
-        asm_arg_64(arg, r, stkd);
+        asm_arg_64(arg, params);
    } else {
        NanoAssert(ty == ARGTYPE_I || ty == ARGTYPE_UI);
        // pre-assign registers R0-R3 for arguments (if they fit)
-        if (r < R4) {
-            asm_regarg(ty, arg, r);
-            r = Register(r + 1);
+        if (params.r < R4) {
+            asm_regarg(ty, arg, params.r);
+            params.r = Register(params.r + 1);
        } else {
-            asm_stkarg(arg, stkd);
-            stkd += 4;
+            asm_stkarg(arg, params.stkd);
+            params.stkd += 4;
        }
    }
 }
@ -646,11 +650,26 @@ Assembler::asm_arg(ArgType ty, LIns* arg, Register& r, int& stkd)
 // Encode a 64-bit floating-point argument using the appropriate ABI.
 // This function operates in the same way as asm_arg, except that it will only
 // handle arguments where (ArgType)ty == ARGTYPE_D.
+
+#ifdef NJ_ARM_EABI_HARD_FLOAT
 void
-Assembler::asm_arg_64(LIns* arg, Register& r, int& stkd)
+Assembler::asm_arg_64(LIns* arg, ParameterRegisters& params)
+{
+    NanoAssert(IsFpReg(params.float_r));
+    if (params.float_r <= D7) {
+        findSpecificRegFor(arg, params.float_r);
+        params.float_r = Register(params.float_r + 1);
+    } else {
+        NanoAssertMsg(0, "Only 8 floating point arguments supported");
+    }
+}
+
+#else
+void
+Assembler::asm_arg_64(LIns* arg, ParameterRegisters& params)
 {
    // The stack pointer must always be at least aligned to 4 bytes.
-    NanoAssert((stkd & 3) == 0);
+    NanoAssert((params.stkd & 3) == 0);
    // The only use for this function when we are using soft floating-point
    // is for LIR_ii2d.
    NanoAssert(ARM_VFP || arg->isop(LIR_ii2d));
@ -661,15 +680,15 @@ Assembler::asm_arg_64(LIns* arg, Register& r, int& stkd)
    // odd-numbered register, advance it. Note that this will push r past
    // R3 if r is R3 to start with, and will force the argument to go on
    // the stack.
-    if ((r == R1) || (r == R3)) {
-        r = Register(r + 1);
+    if ((params.r == R1) || (params.r == R3)) {
+        params.r = Register(params.r + 1);
    }
 #endif

-    if (r < R3) {
-        Register    ra = r;
-        Register    rb = Register(r + 1);
-        r = Register(rb + 1);
+    if (params.r < R3) {
+        Register    ra = params.r;
+        Register    rb = Register(params.r + 1);
+        params.r = Register(rb + 1);

 #ifdef NJ_ARM_EABI
        // EABI requires that 64-bit arguments are aligned on even-numbered
@ -693,12 +712,12 @@ Assembler::asm_arg_64(LIns* arg, Register& r, int& stkd)
        // We only have one register left, but the legacy ABI requires that we
        // put 32 bits of the argument in the register (R3) and the remaining
        // 32 bits on the stack.
-        Register    ra = r; // R3
-        r = R4;
+        Register    ra = params.r; // R3
+        params.r = R4;

        // We're splitting the argument between registers and the stack.  This
        // must be the first time that the stack is used, so stkd must be at 0.
-        NanoAssert(stkd == 0);
+        NanoAssert(params.stkd == 0);

        if (ARM_VFP) {
            Register dm = findRegFor(arg, FpRegs);
@ -717,27 +736,28 @@ Assembler::asm_arg_64(LIns* arg, Register& r, int& stkd)
            asm_regarg(ARGTYPE_I, arg->oprnd1(), ra);
            asm_stkarg(arg->oprnd2(), 0);
        }
-        stkd += 4;
+        params.stkd += 4;
 #endif
    } else {
        // The argument won't fit in registers, so pass on to asm_stkarg.
 #ifdef NJ_ARM_EABI
        // EABI requires that 64-bit arguments are 64-bit aligned.
-        if ((stkd & 7) != 0) {
+        if ((params.stkd & 7) != 0) {
            // stkd will always be aligned to at least 4 bytes; this was
            // asserted on entry to this function.
-            stkd += 4;
+            params.stkd += 4;
        }
 #endif
        if (ARM_VFP) {
-            asm_stkarg(arg, stkd);
+            asm_stkarg(arg, params.stkd);
        } else {
-            asm_stkarg(arg->oprnd1(), stkd);
-            asm_stkarg(arg->oprnd2(), stkd+4);
+            asm_stkarg(arg->oprnd1(), params.stkd);
+            asm_stkarg(arg->oprnd2(), params.stkd+4);
        }
-        stkd += 8;
+        params.stkd += 8;
    }
 }
+#endif // NJ_ARM_EABI_HARD_FLOAT

 void
 Assembler::asm_regarg(ArgType ty, LIns* p, Register rd)
@ -818,6 +838,14 @@ Assembler::asm_call(LIns* ins)
         * used here with the ultimate VFP register, and not R0/R1, which
         * potentially allows for R0/R1 to get corrupted as described.
         */
+#ifdef NJ_ARM_EABI_HARD_FLOAT
+        /* With ARM hardware floating point ABI, D0 is used to return the double
+         * from the function. We need to prepare it like we do for R0 in the else
+         * branch.
+         */
+        prepareResultReg(ins, rmask(D0));
+        freeResourcesOf(ins);
+#endif
    } else if (!ins->isop(LIR_callv)) {
        prepareResultReg(ins, rmask(retRegs[0]));
        // Immediately free the resources as we need to re-use the register for
@ -839,11 +867,12 @@ Assembler::asm_call(LIns* ins)
    // function call.
    NanoAssert(ARM_VFP || ins->isop(LIR_callv) || ins->isop(LIR_calli));

-    // If we're using VFP, and the return type is a double, it'll come back in
-    // R0/R1. We need to either place it in the result fp reg, or store it.
+    // If we're using VFP, but not hardware floating point ABI, and
+    // the return type is a double, it'll come back in R0/R1.
+    // We need to either place it in the result fp reg, or store it.
    // See comments above for more details as to why this is necessary here
    // for floating point calls, but not for integer calls.
-    if (ARM_VFP && ins->isExtant()) {
+    if (!ARM_EABI_HARD && ARM_VFP && ins->isExtant()) {
        // If the result size is a floating-point value, treat the result
        // specially, as described previously.
        if (ci->returnType() == ARGTYPE_D) {
@ -894,9 +923,9 @@ Assembler::asm_call(LIns* ins)
        asm_regarg(ARGTYPE_I, ins->arg(--argc), LR);
    }

-    // Encode the arguments, starting at R0 and with an empty argument stack.
-    Register    r = R0;
-    int         stkd = 0;
+    // Encode the arguments, starting at R0 and with an empty argument stack (0).
+    // With hardware fp ABI, floating point arguments start from D0.
+    ParameterRegisters params = init_params(0, R0, D0);

    // Iterate through the argument list and encode each argument according to
    // the ABI.
@ -904,11 +933,11 @@ Assembler::asm_call(LIns* ins)
    // in reverse order.
    uint32_t    i = argc;
    while(i--) {
-        asm_arg(argTypes[i], ins->arg(i), r, stkd);
+        asm_arg(argTypes[i], ins->arg(i), params);
    }

-    if (stkd > max_out_args) {
-        max_out_args = stkd;
+    if (params.stkd > max_out_args) {
+        max_out_args = params.stkd;
    }
 }

@ -941,7 +970,7 @@ Assembler::nRegisterResetAll(RegAlloc& a)
    if (ARM_VFP) {
        a.free |=
            rmask(D0) | rmask(D1) | rmask(D2) | rmask(D3) |
-            rmask(D4) | rmask(D5) | rmask(D6);
+            rmask(D4) | rmask(D5) | rmask(D6) | rmask(D7);
    }
 }

@ -1329,13 +1358,14 @@ Assembler::asm_load64(LIns* ins)
        int         offset = ins->disp();

        if (ins->isInReg()) {
-            dd = prepareResultReg(ins, FpRegs);
+            dd = prepareResultReg(ins, FpRegs & ~rmask(D0));
        } else {
            // If the result isn't already in a register, use the VFP scratch
            // register for the result and store it directly into memory.
            NanoAssert(ins->isInAr());
            int d = arDisp(ins);
-            dd = D7;
+            evictIfActive(D0);
+            dd = D0;
            // VFP can only do loads and stores with a range of ±1020, so we
            // might need to do some arithmetic to extend its range.
            if (isU8(d/4) || isU8(-d/4)) {
@ -1356,11 +1386,12 @@ Assembler::asm_load64(LIns* ins)
                }
                break;
            case LIR_ldf2d:
-                FCVTDS(dd, S14);
+                evictIfActive(D0);
+                FCVTDS(dd, S0);
                if (isU8(offset/4) || isU8(-offset/4)) {
-                    FLDS(S14, rn, offset);
+                    FLDS(S0, rn, offset);
                } else {
-                    FLDS(S14, IP, offset%1024);
+                    FLDS(S0, IP, offset%1024);
                    asm_add_imm(IP, rn, offset-(offset%1024));
                }
                break;
@ -1398,7 +1429,7 @@ Assembler::asm_store64(LOpcode op, LIns* value, int dr, LIns* base)
    NanoAssert(value->isD());

    if (ARM_VFP) {
-        Register dd = findRegFor(value, FpRegs);
+        Register dd = findRegFor(value, FpRegs & ~rmask(D0));
        Register rn = findRegFor(base, GpRegs);

        switch (op) {
@ -1416,14 +1447,15 @@ Assembler::asm_store64(LOpcode op, LIns* value, int dr, LIns* base)
            case LIR_std2f:
                // VFP can only do stores with a range of ±1020, so we might
                // need to do some arithmetic to extend its range.
+                evictIfActive(D0);
                if (isU8(dr/4) || isU8(-dr/4)) {
-                    FSTS(S14, rn, dr);
+                    FSTS(S0, rn, dr);
                } else {
-                    FSTS(S14, IP, dr%1024);
+                    FSTS(S0, IP, dr%1024);
                    asm_add_imm(IP, rn, dr-(dr%1024));
                }

-                FCVTSD(S14, dd);
+                FCVTSD(S0, dd);

                break;
            default:
@ -2123,11 +2155,12 @@ Assembler::B_cond_chk(ConditionCode _c, NIns* _t, bool _chk)
 void
 Assembler::asm_i2d(LIns* ins)
 {
-    Register dd = prepareResultReg(ins, FpRegs);
+    Register dd = prepareResultReg(ins, FpRegs & ~rmask(D0));
    Register rt = findRegFor(ins->oprnd1(), GpRegs);

-    FSITOD(dd, S14);
-    FMSR(S14, rt);
+    evictIfActive(D0);
+    FSITOD(dd, S0);
+    FMSR(S0, rt);

    freeResourcesOf(ins);
 }
@ -2135,20 +2168,22 @@ Assembler::asm_i2d(LIns* ins)
 void
 Assembler::asm_ui2d(LIns* ins)
 {
-    Register dd = prepareResultReg(ins, FpRegs);
+    Register dd = prepareResultReg(ins, FpRegs & ~rmask(D0));
    Register rt = findRegFor(ins->oprnd1(), GpRegs);

-    FUITOD(dd, S14);
-    FMSR(S14, rt);
+    evictIfActive(D0);
+    FUITOD(dd, S0);
+    FMSR(S0, rt);

    freeResourcesOf(ins);
 }

 void Assembler::asm_d2i(LIns* ins)
 {
+    evictIfActive(D0);
    if (ins->isInReg()) {
        Register rt = ins->getReg();
-        FMRS(rt, S14);
+        FMRS(rt, S0);
    } else {
        // There's no active result register, so store the result directly into
        // memory to avoid the FP->GP transfer cost on Cortex-A8.
@ -2156,16 +2191,16 @@ void Assembler::asm_d2i(LIns* ins)
        // VFP can only do stores with a range of ±1020, so we might need to do
        // some arithmetic to extend its range.
        if (isU8(d/4) || isU8(-d/4)) {
-            FSTS(S14, FP, d);
+            FSTS(S0, FP, d);
        } else {
-            FSTS(S14, IP, d%1024);
+            FSTS(S0, IP, d%1024);
            asm_add_imm(IP, FP, d-(d%1024));
        }
    }

-    Register dm = findRegFor(ins->oprnd1(), FpRegs);
+    Register dm = findRegFor(ins->oprnd1(), FpRegs & ~rmask(D0));

-    FTOSID(S14, dm);
+    FTOSID(S0, dm);

    freeResourcesOf(ins);
 }
@ -2832,8 +2867,11 @@ Assembler::asm_ret(LIns *ins)
    // we are intending for R0 is currently IP, not R0. This has to do with
    // the strange dual-nature of the patchable jump in a side-exit. See
    // nPatchBranch.
-
-    MOV(IP, R0);
+    //
+    // With hardware floating point ABI we can skip this for retd.
+    if (!(ARM_EABI_HARD && ins->isop(LIR_retd))) {
+        MOV(IP, R0);
+    }

    // Pop the stack frame.
    MOV(SP,FP);
@ -2847,8 +2885,12 @@ Assembler::asm_ret(LIns *ins)
    else {
        NanoAssert(ins->isop(LIR_retd));
        if (ARM_VFP) {
+#ifdef NJ_ARM_EABI_HARD_FLOAT
+            findSpecificRegFor(value, D0);
+#else
            Register reg = findRegFor(value, FpRegs);
            FMRRD(R0, R1, reg);
+#endif
        } else {
            NanoAssert(value->isop(LIR_ii2d));
            findSpecificRegFor(value->oprnd1(), R0); // lo
--- a/js/src/nanojit/NativeARM.h
+++ b/js/src/nanojit/NativeARM.h
@ -75,7 +75,19 @@ namespace nanojit
 #  define NJ_ARM_EABI  1
 #endif

-// only d0-d6 are actually used; we'll use d7 as s14-s15 for i2d/u2f/etc.
+// GCC defines __ARM_PCS_VFP if it uses hardware floating point ABI
+// See http://gcc.gnu.org/viewcvs?view=revision&revision=162637
+#ifdef __ARM_PCS_VFP
+#  define NJ_ARM_EABI_HARD_FLOAT 1
+#endif
+
+#ifdef NJ_ARM_EABI_HARD_FLOAT
+#  define ARM_EABI_HARD true
+#else
+#  define ARM_EABI_HARD false
+#endif
+
+// only d0-d7 are used; in addition, we'll use d0 as s0-s1 for i2d/u2f/etc.
 #define NJ_VFP_MAX_REGISTERS            8
 #define NJ_MAX_REGISTERS                (11 + NJ_VFP_MAX_REGISTERS)
 #define NJ_MAX_STACK_ENTRY              4096
@ -118,7 +130,7 @@ static const Register
    LR  = { 14 },
    PC  = { 15 },

-    // VFP regs (we currently only use D0-D6 and S14)
+    // VFP regs (we currently only use D0-D7 and S0)
    D0 = { 16 },
    D1 = { 17 },
    D2 = { 18 },
@ -126,23 +138,22 @@ static const Register
    D4 = { 20 },
    D5 = { 21 },
    D6 = { 22 },
-    // S14 overlaps with D7 and is hard-coded into i2d and u2f operations, but
-    // D7 is still listed here for completeness and to facilitate assertions.
    D7 = { 23 },
    // D8-D15 are caller-saved registers that we don't currently handle.

    FirstFloatReg = D0,
-    LastFloatReg = D6,
+    LastFloatReg = D7,

    deprecated_UnknownReg = { 32 },     // XXX: remove eventually, see bug 538924

-    S14 = { 24 },
+    // S0 overlaps with D0 and is hard-coded into i2d and u2f operations
+    S0 = { 24 },

    SBZ = { 0 } ;   // Used for 'should-be-zero' fields in instructions with
                    // unused register fields.

 static const uint32_t FirstRegNum = R0;
-static const uint32_t LastRegNum = D6;
+static const uint32_t LastRegNum = D7;
 }

 #define NJ_USE_UINT32_REGISTER 1
@ -189,6 +200,20 @@ typedef struct _FragInfo {
    NIns*           epilogue;
 } FragInfo;

+typedef struct _ParameterRegisters {
+    int stkd;
+    Register r;
+#ifdef NJ_ARM_EABI_HARD_FLOAT
+    Register float_r;
+#endif
+} ParameterRegisters;
+
+#ifdef NJ_ARM_EABI_HARD_FLOAT
+#define init_params(a,b,c) { (a), (b), (c) }
+#else
+#define init_params(a,b,c) { (a), (b) }
+#endif
+
 // D0-D7 are not saved; D8-D15 are, but we don't use those,
 // so we don't have to worry about saving/restoring them
 static const RegisterMask SavedFpRegs = 0;
@ -253,8 +278,8 @@ verbose_only( extern const char* shiftNames[]; )
    void        asm_cmp(LIns *cond);                                            \
    void        asm_cmpd(LIns *cond);                                           \
    void        asm_ld_imm(Register d, int32_t imm, bool chk = true);           \
-    void        asm_arg(ArgType ty, LIns* arg, Register& r, int& stkd);         \
-    void        asm_arg_64(LIns* arg, Register& r, int& stkd);                  \
+    void        asm_arg(ArgType ty, LIns* arg, ParameterRegisters& params);     \
+    void        asm_arg_64(LIns* arg, ParameterRegisters& params);              \
    void        asm_add_imm(Register rd, Register rn, int32_t imm, int stat = 0);   \
    void        asm_sub_imm(Register rd, Register rn, int32_t imm, int stat = 0);   \
    void        asm_and_imm(Register rd, Register rn, int32_t imm, int stat = 0);   \
@ -910,8 +935,8 @@ enum {
 #define FUITOD(_Dd,_Sm) do {                                            \
        underrunProtect(4);                                             \
        NanoAssert(ARM_VFP);                                    \
-        NanoAssert(IsFpReg(_Dd) && ((_Sm) == S14));                     \
-        *(--_nIns) = (NIns)( COND_AL | (0xEB8<<16) | (FpRegNum(_Dd)<<12) | (0x2D<<6) | (0<<5) | (0x7) ); \
+        NanoAssert(IsFpReg(_Dd) && ((_Sm) == S0));                     \
+        *(--_nIns) = (NIns)( COND_AL | (0xEB8<<16) | (FpRegNum(_Dd)<<12) | (0x2D<<6) | (0<<5) | (0x0) ); \
        asm_output("fuitod %s,%s", gpn(_Dd), gpn(_Sm));                \
    } while (0)

@ -984,13 +1009,13 @@ enum {
 #define FMRS(_Rd,_Sn) do {                                              \
        underrunProtect(4);                                             \
        NanoAssert(ARM_VFP);                                    \
-        NanoAssert(((_Sn) == S14) && IsGpReg(_Rd));                     \
-        *(--_nIns) = (NIns)( COND_AL | (0xE1<<20) | (0x7<<16) | ((_Rd)<<12) | (0xA<<8) | (0<<7) | (0x1<<4) ); \
+        NanoAssert(((_Sn) == S0) && IsGpReg(_Rd));                     \
+        *(--_nIns) = (NIns)( COND_AL | (0xE1<<20) | (0x0<<16) | ((_Rd)<<12) | (0xA<<8) | (0<<7) | (0x1<<4) ); \
        asm_output("fmrs %s,%s", gpn(_Rd), gpn(_Sn));                  \
    } while (0)

 /*
- * The following instructions can only be used with S14 as the
+ * The following instructions can only be used with S0 as the
 * single-precision register; that limitation can be removed if
 * needed, but we'd have to teach NJ about all the single precision
 * regs, and their encoding is strange (top 4 bits usually in a block,
@ -1000,55 +1025,55 @@ enum {
 #define FSITOD(_Dd,_Sm) do {                                            \
        underrunProtect(4);                                             \
        NanoAssert(ARM_VFP);                                    \
-        NanoAssert(IsFpReg(_Dd) && ((_Sm) == S14));                     \
-        *(--_nIns) = (NIns)( COND_AL | (0xEB8<<16) | (FpRegNum(_Dd)<<12) | (0x2F<<6) | (0<<5) | (0x7) ); \
+        NanoAssert(IsFpReg(_Dd) && ((_Sm) == S0));                     \
+        *(--_nIns) = (NIns)( COND_AL | (0xEB8<<16) | (FpRegNum(_Dd)<<12) | (0x2F<<6) | (0<<5) | (0x0) ); \
        asm_output("fsitod %s,%s", gpn(_Dd), gpn(_Sm));                \
    } while (0)

 #define FMSR(_Sn,_Rd) do {                                              \
        underrunProtect(4);                                             \
        NanoAssert(ARM_VFP);                                    \
-        NanoAssert(((_Sn) == S14) && IsGpReg(_Rd));                     \
-        *(--_nIns) = (NIns)( COND_AL | (0xE0<<20) | (0x7<<16) | ((_Rd)<<12) | (0xA<<8) | (0<<7) | (0x1<<4) ); \
+        NanoAssert(((_Sn) == S0) && IsGpReg(_Rd));                     \
+        *(--_nIns) = (NIns)( COND_AL | (0xE0<<20) | (0x0<<16) | ((_Rd)<<12) | (0xA<<8) | (0<<7) | (0x1<<4) ); \
        asm_output("fmsr %s,%s", gpn(_Sn), gpn(_Rd));                  \
    } while (0)

 #define FMRS(_Rd,_Sn) do {                                              \
        underrunProtect(4);                                             \
        NanoAssert(ARM_VFP);                                    \
-        NanoAssert(((_Sn) == S14) && IsGpReg(_Rd));                     \
-        *(--_nIns) = (NIns)( COND_AL | (0xE1<<20) | (0x7<<16) | ((_Rd)<<12) | (0xA<<8) | (0<<7) | (0x1<<4) ); \
+        NanoAssert(((_Sn) == S0) && IsGpReg(_Rd));                     \
+        *(--_nIns) = (NIns)( COND_AL | (0xE1<<20) | (0x0<<16) | ((_Rd)<<12) | (0xA<<8) | (0<<7) | (0x1<<4) ); \
        asm_output("fmrs %s,%s", gpn(_Rd), gpn(_Sn));                  \
    } while (0)

 #define FMSR(_Sn,_Rd) do {                                              \
        underrunProtect(4);                                             \
        NanoAssert(ARM_VFP);                                    \
-        NanoAssert(((_Sn) == S14) && IsGpReg(_Rd));                     \
-        *(--_nIns) = (NIns)( COND_AL | (0xE0<<20) | (0x7<<16) | ((_Rd)<<12) | (0xA<<8) | (0<<7) | (0x1<<4) ); \
+        NanoAssert(((_Sn) == S0) && IsGpReg(_Rd));                     \
+        *(--_nIns) = (NIns)( COND_AL | (0xE0<<20) | (0x0<<16) | ((_Rd)<<12) | (0xA<<8) | (0<<7) | (0x1<<4) ); \
        asm_output("fmsr %s,%s", gpn(_Sn), gpn(_Rd));                  \
    } while (0)

 #define FCVTSD(_Sd,_Dm) do {                        \
        underrunProtect(4);                         \
        NanoAssert(ARM_VFP);                \
-        NanoAssert(((_Sd) == S14) && IsFpReg(_Dm)); \
-        *(--_nIns) = (NIns)( COND_AL | (0xEB7<<16) | (0x7<<12) | (0xBC<<4) | (FpRegNum(_Dm)) ); \
-        asm_output("[0x%08x] fcvtsd s14,%s", *_nIns, gpn(_Dm));                          \
+        NanoAssert(((_Sd) == S0) && IsFpReg(_Dm)); \
+        *(--_nIns) = (NIns)( COND_AL | (0xEB7<<16) | (0x0<<12) | (0xBC<<4) | (FpRegNum(_Dm)) ); \
+        asm_output("[0x%08x] fcvtsd s0,%s", *_nIns, gpn(_Dm));                          \
    } while (0)

 #define FCVTDS(_Dd,_Sm) do {                                    \
        underrunProtect(4);                                     \
        NanoAssert(ARM_VFP);                                    \
-        NanoAssert(((_Sm) == S14) && IsFpReg(_Dd));             \
-        *(--_nIns) = (NIns)( COND_AL | (0xEB7<<16) | (FpRegNum(_Dd)<<12) | (0xAC<<4) | (0x7) ); \
-        asm_output("fcvtds %s,s14", gpn(_Dd));                  \
+        NanoAssert(((_Sm) == S0) && IsFpReg(_Dd));             \
+        *(--_nIns) = (NIns)( COND_AL | (0xEB7<<16) | (FpRegNum(_Dd)<<12) | (0xAC<<4) | (0x0) ); \
+        asm_output("fcvtds %s,s0", gpn(_Dd));                  \
    } while(0)

 #define FLDS(_Sd,_Rn,_offs) do {                                \
        underrunProtect(4);                                     \
        NanoAssert(ARM_VFP);                                    \
-        NanoAssert(((_Sd) == S14) && !IsFpReg(_Rn));            \
+        NanoAssert(((_Sd) == S0) && !IsFpReg(_Rn));            \
        NanoAssert(((_offs)%4) == 0);                           \
        NanoAssert((isU8((_offs)/4)) || isU8(-(_offs)/4));      \
        int addflag = 1<<23;                                    \
@ -1057,14 +1082,14 @@ enum {
            addflag = 0;                                        \
            offs = -offs;                                       \
        }                                                       \
-        *(--_nIns) = (NIns)( COND_AL | (0xD1<<20) | ((_Rn)<<16) | (0x7<<12) | (0xA << 8) | addflag | ((offs>>2)&0xff) ); \
-        asm_output("flds s14, [%s, #%d]", gpn(_Rn), (_offs));   \
+        *(--_nIns) = (NIns)( COND_AL | (0xD1<<20) | ((_Rn)<<16) | (0x0<<12) | (0xA << 8) | addflag | ((offs>>2)&0xff) ); \
+        asm_output("flds s0, [%s, #%d]", gpn(_Rn), (_offs));   \
    } while (0)

 #define FSTS(_Sd,_Rn,_offs) do {                                \
        underrunProtect(4);                                     \
        NanoAssert(ARM_VFP);                                    \
-        NanoAssert(((_Sd) == S14) && !IsFpReg(_Rn));            \
+        NanoAssert(((_Sd) == S0) && !IsFpReg(_Rn));            \
        NanoAssert(((_offs)%4) == 0);                           \
        NanoAssert((isU8((_offs)/4)) || isU8(-(_offs)/4));      \
        int addflag = 1<<23;                                    \
@ -1073,16 +1098,16 @@ enum {
            addflag = 0;                                        \
            offs = -offs;                                       \
        }                                                       \
-        *(--_nIns) = (NIns)( COND_AL | (0xD0<<20) | ((_Rn)<<16) | (0x7<<12) | (0xA << 8) | addflag | ((offs>>2)&0xff) ); \
-        asm_output("fsts s14, [%s, #%d]", gpn(_Rn), (_offs));   \
+        *(--_nIns) = (NIns)( COND_AL | (0xD0<<20) | ((_Rn)<<16) | (0x0<<12) | (0xA << 8) | addflag | ((offs>>2)&0xff) ); \
+        asm_output("fsts s0, [%s, #%d]", gpn(_Rn), (_offs));   \
    } while (0)

 #define FTOSID(_Sd,_Dm) do {                                   \
        underrunProtect(4);                                    \
        NanoAssert(ARM_VFP);                           \
-        NanoAssert(((_Sd) == S14) && IsFpReg(_Dm));            \
-        *(--_nIns) = (NIns)( COND_AL | (0xEBD<<16) | (0x7<<12) | (0xB4<<4) | FpRegNum(_Dm) ); \
-        asm_output("ftosid s14, %s", gpn(_Dm));                \
+        NanoAssert(((_Sd) == S0) && IsFpReg(_Dm));            \
+        *(--_nIns) = (NIns)( COND_AL | (0xEBD<<16) | (0x0<<12) | (0xB4<<4) | FpRegNum(_Dm) ); \
+        asm_output("ftosid s0, %s", gpn(_Dm));                \
    } while (0)

 } // namespace nanojit