Optimize DoubleToECMAInt32 for ARM. [Bug 551837][r=vlad]

This commit is contained in:
Jacob Bramley 2010-04-27 08:31:36 +01:00
Родитель 2153048481
Коммит 228379824a
1 изменённых файлов: 121 добавлений и 0 удалений

Просмотреть файл

@ -442,6 +442,127 @@ js_DoubleToECMAInt32(jsdouble d)
}
return int32(du.d);
#elif defined (__arm__) && defined (__GNUC__)
int32_t i;
uint32_t tmp0;
uint32_t tmp1;
uint32_t tmp2;
asm (
// We use a pure integer solution here. In the 'softfp' ABI, the argument
// will start in r0 and r1, and VFP can't do all of the necessary ECMA
// conversions by itself so some integer code will be required anyway. A
// hybrid solution is faster on A9, but this pure integer solution is
// notably faster for A8.
// %0 is the result register, and may alias either of the %[QR]1 registers.
// %Q4 holds the lower part of the mantissa.
// %R4 holds the sign, exponent, and the upper part of the mantissa.
// %1, %2 and %3 are used as temporary values.
// Extract the exponent.
" mov %1, %R4, LSR #20\n"
" bic %1, %1, #(1 << 11)\n" // Clear the sign.
// Set the implicit top bit of the mantissa. This clobbers a bit of the
// exponent, but we have already extracted that.
" orr %R4, %R4, #(1 << 20)\n"
// Special Cases
// We should return zero in the following special cases:
// - Exponent is 0x000 - 1023: +/-0 or subnormal.
// - Exponent is 0x7ff - 1023: +/-INFINITY or NaN
// - This case is implicitly handled by the standard code path anyway,
// as shifting the mantissa up by the exponent will result in '0'.
//
// The result is composed of the mantissa, prepended with '1' and
// bit-shifted left by the (decoded) exponent. Note that because the r1[20]
// is the bit with value '1', r1 is effectively already shifted (left) by
// 20 bits, and r0 is already shifted by 52 bits.
// Adjust the exponent to remove the encoding offset. If the decoded
// exponent is negative, quickly bail out with '0' as such values round to
// zero anyway. This also catches +/-0 and subnormals.
" sub %1, %1, #0xff\n"
" subs %1, %1, #0x300\n"
" bmi 8f\n"
// %1 = (decoded) exponent >= 0
// %R4 = upper mantissa and sign
// ---- Lower Mantissa ----
" subs %3, %1, #52\n" // Calculate exp-52
" bmi 1f\n"
// Shift r0 left by exp-52.
// Ensure that we don't overflow ARM's 8-bit shift operand range.
// We need to handle anything up to an 11-bit value here as we know that
// 52 <= exp <= 1024 (0x400). Any shift beyond 31 bits results in zero
// anyway, so as long as we don't touch the bottom 5 bits, we can use
// a logical OR to push long shifts into the 32 <= (exp&0xff) <= 255 range.
" bic %2, %3, #0xff\n"
" orr %3, %3, %2, LSR #3\n"
// We can now perform a straight shift, avoiding the need for any
// conditional instructions or extra branches.
" mov %Q4, %Q4, LSL %3\n"
" b 2f\n"
"1:\n" // Shift r0 right by 52-exp.
// We know that 0 <= exp < 52, and we can shift up to 255 bits so 52-exp
// will always be a valid shift and we can sk%3 the range check for this case.
" rsb %3, %1, #52\n"
" mov %Q4, %Q4, LSR %3\n"
// %1 = (decoded) exponent
// %R4 = upper mantissa and sign
// %Q4 = partially-converted integer
"2:\n"
// ---- Upper Mantissa ----
// This is much the same as the lower mantissa, with a few different
// boundary checks and some masking to hide the exponent & sign bit in the
// upper word.
// Note that the upper mantissa is pre-shifted by 20 in %R4, but we shift
// it left more to remove the sign and exponent so it is effectively
// pre-shifted by 31 bits.
" subs %3, %1, #31\n" // Calculate exp-31
" mov %1, %R4, LSL #11\n" // Re-use %1 as a temporary register.
" bmi 3f\n"
// Shift %R4 left by exp-31.
// Avoid overflowing the 8-bit shift range, as before.
" bic %2, %3, #0xff\n"
" orr %3, %3, %2, LSR #3\n"
// Perform the shift.
" mov %2, %1, LSL %3\n"
" b 4f\n"
"3:\n" // Shift r1 right by 31-exp.
// We know that 0 <= exp < 31, and we can shift up to 255 bits so 31-exp
// will always be a valid shift and we can skip the range check for this case.
" rsb %3, %3, #0\n" // Calculate 31-exp from -(exp-31)
" mov %2, %1, LSR %3\n" // Thumb-2 can't do "LSR %3" in "orr".
// %Q4 = partially-converted integer (lower)
// %R4 = upper mantissa and sign
// %2 = partially-converted integer (upper)
"4:\n"
// Combine the converted parts.
" orr %Q4, %Q4, %2\n"
// Negate the result if we have to, and move it to %0 in the process. To
// avoid conditionals, we can do this by inverting on %R4[31], then adding
// %R4[31]>>31.
" eor %Q4, %Q4, %R4, ASR #31\n"
" add %0, %Q4, %R4, LSR #31\n"
" b 9f\n"
"8:\n"
// +/-INFINITY, +/-0, subnormals, NaNs, and anything else out-of-range that
// will result in a conversion of '0'.
" mov %0, #0\n"
"9:\n"
: "=r" (i), "=&r" (tmp0), "=&r" (tmp1), "=&r" (tmp2)
: "r" (d)
: "cc"
);
return i;
#else
int32 i;
jsdouble two32, two31;