Force gcc to use x86's built-in 64/32->32,32 division instruction

rather than swanning off to a helper function. This seems to make
woefully little actual performance difference, which annoys me, but
it's a just-about-detectable improvement so I'll leave it in.

[originally from svn r3136]
This commit is contained in:
Simon Tatham 2003-04-23 15:25:45 +00:00
Родитель afd4b4d662
Коммит 51fa8d6294
1 изменённых файлов: 12 добавлений и 5 удалений

17
sshbn.c
Просмотреть файл

@ -15,6 +15,10 @@ typedef unsigned long long BignumDblInt;
#define BIGNUM_TOP_BIT 0x80000000UL #define BIGNUM_TOP_BIT 0x80000000UL
#define BIGNUM_INT_BITS 32 #define BIGNUM_INT_BITS 32
#define MUL_WORD(w1, w2) ((BignumDblInt)w1 * w2) #define MUL_WORD(w1, w2) ((BignumDblInt)w1 * w2)
#define DIVMOD_WORD(q, r, hi, lo, w) \
__asm__("div %2" : \
"=d" (r), "=a" (q) : \
"r" (w), "d" (hi), "a" (lo))
#else #else
typedef unsigned short BignumInt; typedef unsigned short BignumInt;
typedef unsigned long BignumDblInt; typedef unsigned long BignumDblInt;
@ -22,6 +26,11 @@ typedef unsigned long BignumDblInt;
#define BIGNUM_TOP_BIT 0x8000U #define BIGNUM_TOP_BIT 0x8000U
#define BIGNUM_INT_BITS 16 #define BIGNUM_INT_BITS 16
#define MUL_WORD(w1, w2) ((BignumDblInt)w1 * w2) #define MUL_WORD(w1, w2) ((BignumDblInt)w1 * w2)
#define DIVMOD_WORD(q, r, hi, lo, w) do { \
BignumDblInt n = (((BignumDblInt)hi) << BIGNUM_INT_BITS) | lo; \
q = n / w; \
r = n % w; \
} while (0)
#endif #endif
#define BIGNUM_INT_BYTES (BIGNUM_INT_BITS / 8) #define BIGNUM_INT_BYTES (BIGNUM_INT_BITS / 8)
@ -175,13 +184,11 @@ static void internal_mod(BignumInt *a, int alen,
ai1 = a[i + 1]; ai1 = a[i + 1];
/* Find q = h:a[i] / m0 */ /* Find q = h:a[i] / m0 */
t = ((BignumDblInt) h << BIGNUM_INT_BITS) + a[i]; DIVMOD_WORD(q, r, h, a[i], m0);
q = t / m0;
r = t % m0;
/* Refine our estimate of q by looking at /* Refine our estimate of q by looking at
h:a[i]:a[i+1] / m0:m1 */ h:a[i]:a[i+1] / m0:m1 */
t = (BignumDblInt) m1 * (BignumDblInt) q; t = MUL_WORD(m1, q);
if (t > ((BignumDblInt) r << BIGNUM_INT_BITS) + ai1) { if (t > ((BignumDblInt) r << BIGNUM_INT_BITS) + ai1) {
q--; q--;
t -= m1; t -= m1;
@ -193,7 +200,7 @@ static void internal_mod(BignumInt *a, int alen,
/* Subtract q * m from a[i...] */ /* Subtract q * m from a[i...] */
c = 0; c = 0;
for (k = mlen - 1; k >= 0; k--) { for (k = mlen - 1; k >= 0; k--) {
t = (BignumDblInt) q * (BignumDblInt) m[k]; t = MUL_WORD(q, m[k]);
t += c; t += c;
c = t >> BIGNUM_INT_BITS; c = t >> BIGNUM_INT_BITS;
if ((BignumInt) t > a[i + k]) if ((BignumInt) t > a[i + k])