From 228379824a47f0c20a44c919a06d78d279514daf Mon Sep 17 00:00:00 2001 From: Jacob Bramley Date: Tue, 27 Apr 2010 08:31:36 +0100 Subject: [PATCH] Optimize DoubleToECMAInt32 for ARM. [Bug 551837][r=vlad] --- js/src/jsnum.h | 121 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 121 insertions(+) diff --git a/js/src/jsnum.h b/js/src/jsnum.h index 1a0580679ef..d782ca8f587 100644 --- a/js/src/jsnum.h +++ b/js/src/jsnum.h @@ -442,6 +442,127 @@ js_DoubleToECMAInt32(jsdouble d) } return int32(du.d); +#elif defined (__arm__) && defined (__GNUC__) + int32_t i; + uint32_t tmp0; + uint32_t tmp1; + uint32_t tmp2; + asm ( + // We use a pure integer solution here. In the 'softfp' ABI, the argument + // will start in r0 and r1, and VFP can't do all of the necessary ECMA + // conversions by itself so some integer code will be required anyway. A + // hybrid solution is faster on A9, but this pure integer solution is + // notably faster for A8. + + // %0 is the result register, and may alias either of the %[QR]1 registers. + // %Q4 holds the lower part of the mantissa. + // %R4 holds the sign, exponent, and the upper part of the mantissa. + // %1, %2 and %3 are used as temporary values. + + // Extract the exponent. +" mov %1, %R4, LSR #20\n" +" bic %1, %1, #(1 << 11)\n" // Clear the sign. + + // Set the implicit top bit of the mantissa. This clobbers a bit of the + // exponent, but we have already extracted that. +" orr %R4, %R4, #(1 << 20)\n" + + // Special Cases + // We should return zero in the following special cases: + // - Exponent is 0x000 - 1023: +/-0 or subnormal. + // - Exponent is 0x7ff - 1023: +/-INFINITY or NaN + // - This case is implicitly handled by the standard code path anyway, + // as shifting the mantissa up by the exponent will result in '0'. + // + // The result is composed of the mantissa, prepended with '1' and + // bit-shifted left by the (decoded) exponent. Note that because the r1[20] + // is the bit with value '1', r1 is effectively already shifted (left) by + // 20 bits, and r0 is already shifted by 52 bits. + + // Adjust the exponent to remove the encoding offset. If the decoded + // exponent is negative, quickly bail out with '0' as such values round to + // zero anyway. This also catches +/-0 and subnormals. +" sub %1, %1, #0xff\n" +" subs %1, %1, #0x300\n" +" bmi 8f\n" + + // %1 = (decoded) exponent >= 0 + // %R4 = upper mantissa and sign + + // ---- Lower Mantissa ---- +" subs %3, %1, #52\n" // Calculate exp-52 +" bmi 1f\n" + + // Shift r0 left by exp-52. + // Ensure that we don't overflow ARM's 8-bit shift operand range. + // We need to handle anything up to an 11-bit value here as we know that + // 52 <= exp <= 1024 (0x400). Any shift beyond 31 bits results in zero + // anyway, so as long as we don't touch the bottom 5 bits, we can use + // a logical OR to push long shifts into the 32 <= (exp&0xff) <= 255 range. +" bic %2, %3, #0xff\n" +" orr %3, %3, %2, LSR #3\n" + // We can now perform a straight shift, avoiding the need for any + // conditional instructions or extra branches. +" mov %Q4, %Q4, LSL %3\n" +" b 2f\n" +"1:\n" // Shift r0 right by 52-exp. + // We know that 0 <= exp < 52, and we can shift up to 255 bits so 52-exp + // will always be a valid shift and we can sk%3 the range check for this case. +" rsb %3, %1, #52\n" +" mov %Q4, %Q4, LSR %3\n" + + // %1 = (decoded) exponent + // %R4 = upper mantissa and sign + // %Q4 = partially-converted integer + +"2:\n" + // ---- Upper Mantissa ---- + // This is much the same as the lower mantissa, with a few different + // boundary checks and some masking to hide the exponent & sign bit in the + // upper word. + // Note that the upper mantissa is pre-shifted by 20 in %R4, but we shift + // it left more to remove the sign and exponent so it is effectively + // pre-shifted by 31 bits. +" subs %3, %1, #31\n" // Calculate exp-31 +" mov %1, %R4, LSL #11\n" // Re-use %1 as a temporary register. +" bmi 3f\n" + + // Shift %R4 left by exp-31. + // Avoid overflowing the 8-bit shift range, as before. +" bic %2, %3, #0xff\n" +" orr %3, %3, %2, LSR #3\n" + // Perform the shift. +" mov %2, %1, LSL %3\n" +" b 4f\n" +"3:\n" // Shift r1 right by 31-exp. + // We know that 0 <= exp < 31, and we can shift up to 255 bits so 31-exp + // will always be a valid shift and we can skip the range check for this case. +" rsb %3, %3, #0\n" // Calculate 31-exp from -(exp-31) +" mov %2, %1, LSR %3\n" // Thumb-2 can't do "LSR %3" in "orr". + + // %Q4 = partially-converted integer (lower) + // %R4 = upper mantissa and sign + // %2 = partially-converted integer (upper) + +"4:\n" + // Combine the converted parts. +" orr %Q4, %Q4, %2\n" + // Negate the result if we have to, and move it to %0 in the process. To + // avoid conditionals, we can do this by inverting on %R4[31], then adding + // %R4[31]>>31. +" eor %Q4, %Q4, %R4, ASR #31\n" +" add %0, %Q4, %R4, LSR #31\n" +" b 9f\n" +"8:\n" + // +/-INFINITY, +/-0, subnormals, NaNs, and anything else out-of-range that + // will result in a conversion of '0'. +" mov %0, #0\n" +"9:\n" + : "=r" (i), "=&r" (tmp0), "=&r" (tmp1), "=&r" (tmp2) + : "r" (d) + : "cc" + ); + return i; #else int32 i; jsdouble two32, two31;