Optimize DoubleToECMAInt32 for ARM. [Bug 551837][r=vlad]

2010-04-27 08:31:36 +01:00 · 2010-04-27 08:31:36 +01:00 · 228379824a
--- a/js/src/jsnum.h
+++ b/js/src/jsnum.h
@ -442,6 +442,127 @@ js_DoubleToECMAInt32(jsdouble d)
    }
    return int32(du.d);
 #elif defined (__arm__) && defined (__GNUC__)
    int32_t i;
    uint32_t    tmp0;
    uint32_t    tmp1;
    uint32_t    tmp2;
    asm (
    // We use a pure integer solution here. In the 'softfp' ABI, the argument
    // will start in r0 and r1, and VFP can't do all of the necessary ECMA
    // conversions by itself so some integer code will be required anyway. A
    // hybrid solution is faster on A9, but this pure integer solution is
    // notably faster for A8.
    // %0 is the result register, and may alias either of the %[QR]1 registers.
    // %Q4 holds the lower part of the mantissa.
    // %R4 holds the sign, exponent, and the upper part of the mantissa.
    // %1, %2 and %3 are used as temporary values.
    // Extract the exponent.
 "   mov     %1, %R4, LSR #20\n"
 "   bic     %1, %1, #(1 << 11)\n"  // Clear the sign.
    // Set the implicit top bit of the mantissa. This clobbers a bit of the
    // exponent, but we have already extracted that.
 "   orr     %R4, %R4, #(1 << 20)\n"
    // Special Cases
    //   We should return zero in the following special cases:
    //    - Exponent is 0x000 - 1023: +/-0 or subnormal.
    //    - Exponent is 0x7ff - 1023: +/-INFINITY or NaN
    //      - This case is implicitly handled by the standard code path anyway,
    //        as shifting the mantissa up by the exponent will result in '0'.
    //
    // The result is composed of the mantissa, prepended with '1' and
    // bit-shifted left by the (decoded) exponent. Note that because the r1[20]
    // is the bit with value '1', r1 is effectively already shifted (left) by
    // 20 bits, and r0 is already shifted by 52 bits.
    // Adjust the exponent to remove the encoding offset. If the decoded
    // exponent is negative, quickly bail out with '0' as such values round to
    // zero anyway. This also catches +/-0 and subnormals.
 "   sub     %1, %1, #0xff\n"
 "   subs    %1, %1, #0x300\n"
 "   bmi     8f\n"
    //  %1 = (decoded) exponent >= 0
    //  %R4 = upper mantissa and sign
    // ---- Lower Mantissa ----
 "   subs    %3, %1, #52\n"         // Calculate exp-52
 "   bmi     1f\n"
    // Shift r0 left by exp-52.
    // Ensure that we don't overflow ARM's 8-bit shift operand range.
    // We need to handle anything up to an 11-bit value here as we know that
    // 52 <= exp <= 1024 (0x400). Any shift beyond 31 bits results in zero
    // anyway, so as long as we don't touch the bottom 5 bits, we can use
    // a logical OR to push long shifts into the 32 <= (exp&0xff) <= 255 range.
 "   bic     %2, %3, #0xff\n"
 "   orr     %3, %3, %2, LSR #3\n"
    // We can now perform a straight shift, avoiding the need for any
    // conditional instructions or extra branches.
 "   mov     %Q4, %Q4, LSL %3\n"
 "   b       2f\n"
 "1:\n" // Shift r0 right by 52-exp.
    // We know that 0 <= exp < 52, and we can shift up to 255 bits so 52-exp
    // will always be a valid shift and we can sk%3 the range check for this case.
 "   rsb     %3, %1, #52\n"
 "   mov     %Q4, %Q4, LSR %3\n"
    //  %1 = (decoded) exponent
    //  %R4 = upper mantissa and sign
    //  %Q4 = partially-converted integer
 "2:\n"
    // ---- Upper Mantissa ----
    // This is much the same as the lower mantissa, with a few different
    // boundary checks and some masking to hide the exponent & sign bit in the
    // upper word.
    // Note that the upper mantissa is pre-shifted by 20 in %R4, but we shift
    // it left more to remove the sign and exponent so it is effectively
    // pre-shifted by 31 bits.
 "   subs    %3, %1, #31\n"          // Calculate exp-31
 "   mov     %1, %R4, LSL #11\n"     // Re-use %1 as a temporary register.
 "   bmi     3f\n"
    // Shift %R4 left by exp-31.
    // Avoid overflowing the 8-bit shift range, as before.
 "   bic     %2, %3, #0xff\n"
 "   orr     %3, %3, %2, LSR #3\n"
    // Perform the shift.
 "   mov     %2, %1, LSL %3\n"
 "   b       4f\n"
 "3:\n" // Shift r1 right by 31-exp.
    // We know that 0 <= exp < 31, and we can shift up to 255 bits so 31-exp
    // will always be a valid shift and we can skip the range check for this case.
 "   rsb     %3, %3, #0\n"          // Calculate 31-exp from -(exp-31)
 "   mov     %2, %1, LSR %3\n"      // Thumb-2 can't do "LSR %3" in "orr".
    //  %Q4 = partially-converted integer (lower)
    //  %R4 = upper mantissa and sign
    //  %2 = partially-converted integer (upper)
 "4:\n"
    // Combine the converted parts.
 "   orr     %Q4, %Q4, %2\n"
    // Negate the result if we have to, and move it to %0 in the process. To
    // avoid conditionals, we can do this by inverting on %R4[31], then adding
    // %R4[31]>>31.
 "   eor     %Q4, %Q4, %R4, ASR #31\n"
 "   add     %0, %Q4, %R4, LSR #31\n"
 "   b       9f\n"
 "8:\n"
    // +/-INFINITY, +/-0, subnormals, NaNs, and anything else out-of-range that
    // will result in a conversion of '0'.
 "   mov     %0, #0\n"
 "9:\n"
    : "=r" (i), "=&r" (tmp0), "=&r" (tmp1), "=&r" (tmp2)
    : "r" (d)
    : "cc"
        );
    return i;
 #else
    int32 i;
    jsdouble two32, two31;