зеркало из https://github.com/mozilla/pjs.git
Optimize DoubleToECMAInt32 for ARM. [Bug 551837][r=vlad]
This commit is contained in:
Родитель
2153048481
Коммит
228379824a
121
js/src/jsnum.h
121
js/src/jsnum.h
|
@ -442,6 +442,127 @@ js_DoubleToECMAInt32(jsdouble d)
|
||||||
}
|
}
|
||||||
|
|
||||||
return int32(du.d);
|
return int32(du.d);
|
||||||
|
#elif defined (__arm__) && defined (__GNUC__)
|
||||||
|
int32_t i;
|
||||||
|
uint32_t tmp0;
|
||||||
|
uint32_t tmp1;
|
||||||
|
uint32_t tmp2;
|
||||||
|
asm (
|
||||||
|
// We use a pure integer solution here. In the 'softfp' ABI, the argument
|
||||||
|
// will start in r0 and r1, and VFP can't do all of the necessary ECMA
|
||||||
|
// conversions by itself so some integer code will be required anyway. A
|
||||||
|
// hybrid solution is faster on A9, but this pure integer solution is
|
||||||
|
// notably faster for A8.
|
||||||
|
|
||||||
|
// %0 is the result register, and may alias either of the %[QR]1 registers.
|
||||||
|
// %Q4 holds the lower part of the mantissa.
|
||||||
|
// %R4 holds the sign, exponent, and the upper part of the mantissa.
|
||||||
|
// %1, %2 and %3 are used as temporary values.
|
||||||
|
|
||||||
|
// Extract the exponent.
|
||||||
|
" mov %1, %R4, LSR #20\n"
|
||||||
|
" bic %1, %1, #(1 << 11)\n" // Clear the sign.
|
||||||
|
|
||||||
|
// Set the implicit top bit of the mantissa. This clobbers a bit of the
|
||||||
|
// exponent, but we have already extracted that.
|
||||||
|
" orr %R4, %R4, #(1 << 20)\n"
|
||||||
|
|
||||||
|
// Special Cases
|
||||||
|
// We should return zero in the following special cases:
|
||||||
|
// - Exponent is 0x000 - 1023: +/-0 or subnormal.
|
||||||
|
// - Exponent is 0x7ff - 1023: +/-INFINITY or NaN
|
||||||
|
// - This case is implicitly handled by the standard code path anyway,
|
||||||
|
// as shifting the mantissa up by the exponent will result in '0'.
|
||||||
|
//
|
||||||
|
// The result is composed of the mantissa, prepended with '1' and
|
||||||
|
// bit-shifted left by the (decoded) exponent. Note that because the r1[20]
|
||||||
|
// is the bit with value '1', r1 is effectively already shifted (left) by
|
||||||
|
// 20 bits, and r0 is already shifted by 52 bits.
|
||||||
|
|
||||||
|
// Adjust the exponent to remove the encoding offset. If the decoded
|
||||||
|
// exponent is negative, quickly bail out with '0' as such values round to
|
||||||
|
// zero anyway. This also catches +/-0 and subnormals.
|
||||||
|
" sub %1, %1, #0xff\n"
|
||||||
|
" subs %1, %1, #0x300\n"
|
||||||
|
" bmi 8f\n"
|
||||||
|
|
||||||
|
// %1 = (decoded) exponent >= 0
|
||||||
|
// %R4 = upper mantissa and sign
|
||||||
|
|
||||||
|
// ---- Lower Mantissa ----
|
||||||
|
" subs %3, %1, #52\n" // Calculate exp-52
|
||||||
|
" bmi 1f\n"
|
||||||
|
|
||||||
|
// Shift r0 left by exp-52.
|
||||||
|
// Ensure that we don't overflow ARM's 8-bit shift operand range.
|
||||||
|
// We need to handle anything up to an 11-bit value here as we know that
|
||||||
|
// 52 <= exp <= 1024 (0x400). Any shift beyond 31 bits results in zero
|
||||||
|
// anyway, so as long as we don't touch the bottom 5 bits, we can use
|
||||||
|
// a logical OR to push long shifts into the 32 <= (exp&0xff) <= 255 range.
|
||||||
|
" bic %2, %3, #0xff\n"
|
||||||
|
" orr %3, %3, %2, LSR #3\n"
|
||||||
|
// We can now perform a straight shift, avoiding the need for any
|
||||||
|
// conditional instructions or extra branches.
|
||||||
|
" mov %Q4, %Q4, LSL %3\n"
|
||||||
|
" b 2f\n"
|
||||||
|
"1:\n" // Shift r0 right by 52-exp.
|
||||||
|
// We know that 0 <= exp < 52, and we can shift up to 255 bits so 52-exp
|
||||||
|
// will always be a valid shift and we can sk%3 the range check for this case.
|
||||||
|
" rsb %3, %1, #52\n"
|
||||||
|
" mov %Q4, %Q4, LSR %3\n"
|
||||||
|
|
||||||
|
// %1 = (decoded) exponent
|
||||||
|
// %R4 = upper mantissa and sign
|
||||||
|
// %Q4 = partially-converted integer
|
||||||
|
|
||||||
|
"2:\n"
|
||||||
|
// ---- Upper Mantissa ----
|
||||||
|
// This is much the same as the lower mantissa, with a few different
|
||||||
|
// boundary checks and some masking to hide the exponent & sign bit in the
|
||||||
|
// upper word.
|
||||||
|
// Note that the upper mantissa is pre-shifted by 20 in %R4, but we shift
|
||||||
|
// it left more to remove the sign and exponent so it is effectively
|
||||||
|
// pre-shifted by 31 bits.
|
||||||
|
" subs %3, %1, #31\n" // Calculate exp-31
|
||||||
|
" mov %1, %R4, LSL #11\n" // Re-use %1 as a temporary register.
|
||||||
|
" bmi 3f\n"
|
||||||
|
|
||||||
|
// Shift %R4 left by exp-31.
|
||||||
|
// Avoid overflowing the 8-bit shift range, as before.
|
||||||
|
" bic %2, %3, #0xff\n"
|
||||||
|
" orr %3, %3, %2, LSR #3\n"
|
||||||
|
// Perform the shift.
|
||||||
|
" mov %2, %1, LSL %3\n"
|
||||||
|
" b 4f\n"
|
||||||
|
"3:\n" // Shift r1 right by 31-exp.
|
||||||
|
// We know that 0 <= exp < 31, and we can shift up to 255 bits so 31-exp
|
||||||
|
// will always be a valid shift and we can skip the range check for this case.
|
||||||
|
" rsb %3, %3, #0\n" // Calculate 31-exp from -(exp-31)
|
||||||
|
" mov %2, %1, LSR %3\n" // Thumb-2 can't do "LSR %3" in "orr".
|
||||||
|
|
||||||
|
// %Q4 = partially-converted integer (lower)
|
||||||
|
// %R4 = upper mantissa and sign
|
||||||
|
// %2 = partially-converted integer (upper)
|
||||||
|
|
||||||
|
"4:\n"
|
||||||
|
// Combine the converted parts.
|
||||||
|
" orr %Q4, %Q4, %2\n"
|
||||||
|
// Negate the result if we have to, and move it to %0 in the process. To
|
||||||
|
// avoid conditionals, we can do this by inverting on %R4[31], then adding
|
||||||
|
// %R4[31]>>31.
|
||||||
|
" eor %Q4, %Q4, %R4, ASR #31\n"
|
||||||
|
" add %0, %Q4, %R4, LSR #31\n"
|
||||||
|
" b 9f\n"
|
||||||
|
"8:\n"
|
||||||
|
// +/-INFINITY, +/-0, subnormals, NaNs, and anything else out-of-range that
|
||||||
|
// will result in a conversion of '0'.
|
||||||
|
" mov %0, #0\n"
|
||||||
|
"9:\n"
|
||||||
|
: "=r" (i), "=&r" (tmp0), "=&r" (tmp1), "=&r" (tmp2)
|
||||||
|
: "r" (d)
|
||||||
|
: "cc"
|
||||||
|
);
|
||||||
|
return i;
|
||||||
#else
|
#else
|
||||||
int32 i;
|
int32 i;
|
||||||
jsdouble two32, two31;
|
jsdouble two32, two31;
|
||||||
|
|
Загрузка…
Ссылка в новой задаче