On DEC Alpha/OSF1, following the examples in <c_asm.h>, use inline

assembly code to get 64-bit products and squares from 32-bit multipliers
in just two instructions.  Speeds up multiplication and squaring a lot.
This commit is contained in:
nelsonb%netscape.com 2000-12-09 03:36:41 +00:00
Родитель 04688e0249
Коммит 5073b4c76e
1 изменённых файлов: 40 добавлений и 17 удалений

Просмотреть файл

@ -35,10 +35,13 @@
* the GPL. If you do not delete the provisions above, a recipient
* may use your version of this file under either the MPL or the GPL.
*
* $Id: mpi.c,v 1.27 2000/10/24 21:32:53 nelsonb%netscape.com Exp $
* $Id: mpi.c,v 1.28 2000/12/09 03:36:41 nelsonb%netscape.com Exp $
*/
#include "mpi-priv.h"
#if defined(OSF1)
#include <c_asm.h>
#endif
#if MP_LOGTAB
/*
@ -3794,6 +3797,10 @@ mp_err s_mp_mul(mp_int *a, const mp_int *b)
{ unsigned long long product = (unsigned long long)a * b; \
Plo = (mp_digit)product; \
Phi = (mp_digit)(product >> MP_DIGIT_BIT); }
#elif defined(OSF1)
#define MP_MUL_DxD(a, b, Phi, Plo) \
{ Plo = asm ("mulq %a0, %a1, %v0", a, b);\
Phi = asm ("umulh %a0, %a1, %v0", a, b); }
#else
#define MP_MUL_DxD(a, b, Phi, Plo) \
{ mp_digit a0b1, a1b0; \
@ -3927,6 +3934,30 @@ void s_mpv_mul_d_add_prop(const mp_digit *a, mp_size a_len, mp_digit b, mp_digit
}
#endif
#if defined(SOLARIS) && (ULONG_MAX == UINT_MAX)
/* This trick works on Sparc V8 CPUs with the Workshop compilers. */
#define MP_SQR_D(a, Phi, Plo) \
{ unsigned long long square = (unsigned long long)a * a; \
Plo = (mp_digit)square; \
Phi = (mp_digit)(square >> MP_DIGIT_BIT); }
#elif defined(OSF1)
#define MP_SQR_D(a, Phi, Plo) \
{ Plo = asm ("mulq %a0, %a0, %v0", a);\
Phi = asm ("umulh %a0, %a0, %v0", a); }
#else
#define MP_SQR_D(a, Phi, Plo) \
{ mp_digit Pmid; \
Plo = (a & MP_HALF_DIGIT_MAX) * (a & MP_HALF_DIGIT_MAX); \
Phi = (a >> MP_HALF_DIGIT_BIT) * (a >> MP_HALF_DIGIT_BIT); \
Pmid = (a & MP_HALF_DIGIT_MAX) * (a >> MP_HALF_DIGIT_BIT); \
Phi += Pmid >> (MP_HALF_DIGIT_BIT - 1); \
Pmid <<= (MP_HALF_DIGIT_BIT + 1); \
Plo += Pmid; \
if (Plo < Pmid) \
++Phi; \
}
#endif
#if !defined(MP_ASSEMBLY_SQUARE)
/* Add the squares of the digits of a to the digits of b. */
void s_mpv_sqr_add_prop(const mp_digit *pa, mp_size a_len, mp_digit *ps)
@ -3972,30 +4003,22 @@ void s_mpv_sqr_add_prop(const mp_digit *pa, mp_size a_len, mp_digit *ps)
mp_digit carry = 0;
while (a_len--) {
mp_digit a_i = *pa++;
mp_digit a0 = a_i & MP_HALF_DIGIT_MAX;
mp_digit a1 = a_i >> MP_HALF_DIGIT_BIT;
mp_digit a0a0, a0a1, a1a1;
mp_digit a0a0, a1a1;
a0a0 = a0 * a0;
a1a1 = a1 * a1;
a0a1 = a0 * a1;
MP_SQR_D(a_i, a1a1, a0a0);
a1a1 += a0a1 >> (MP_HALF_DIGIT_BIT - 1);
a0a1 <<= (MP_HALF_DIGIT_BIT + 1);
a0a0 += a0a1;
if (a0a0 < a0a1)
++a1a1;
/* here a1a1 and a0a0 constitute a_i ** 2 */
a0a0 += carry;
if (a0a0 < carry)
++a1a1;
/* here a1a1 and a0a0 constitute a_i ** 2 */
/* now add to ps */
a0a0 += a0a1 = *ps;
if (a0a0 < a0a1)
a0a0 += a_i = *ps;
if (a0a0 < a_i)
++a1a1;
*ps++ = a0a0;
a1a1 += a0a1 = *ps;
carry = (a1a1 < a0a1);
a1a1 += a_i = *ps;
carry = (a1a1 < a_i);
*ps++ = a1a1;
}
while (carry) {