зеркало из https://github.com/mozilla/pjs.git
Big changes for performance.
- In mpi-priv.h, declare new 3 argument versions of s_mp_add and s_mp_sub. Also declare new set of s_mpv_ functions that operate on vectors (arrays) of mp_digits instead of on mp_ints. These functions are candidates for implementation in assembler. - In mpi.c reimplement mp_add and mp_sub using the new 3arg functions. Implement 3 argument versions of s_mp_add and s_mp_sub. This eliminates all need for temporary variables in mp_add and mp_sub. Implement c language reference implementations of new s_mpv vector multiply and multiply and add functions. Change mp_mul and mp_sqr so they no longer pre-zero the output variable. It's no longer nececssary with the new s_mpv functions. s_mp_pad no longer zeros out the new padded space. -In mpmontg.c, implement variable width exponetiation windows. Implement a new function to compute the multiply and Montgomery reduction in a single pass. This is "Improvement 2" from Dusse' and Kaliski's paper "A Cryptographic Library for the Motorola DSP56000". Performance impact is negligible in this c implementation. However, this function is another target for assembly language optimization.
This commit is contained in:
Родитель
a10342172b
Коммит
f9eed96546
|
@ -1,8 +1,11 @@
|
|||
/*
|
||||
* mpi-priv.h
|
||||
*
|
||||
* mpi-priv.h - Private header file for MPI
|
||||
* Arbitrary precision integer arithmetic library
|
||||
*
|
||||
* NOTE WELL: the content of this header file is NOT part of the "public"
|
||||
* API for the MPI library, and may change at any time.
|
||||
* Application programs that use libmpi should NOT include this header file.
|
||||
*
|
||||
* The contents of this file are subject to the Mozilla Public
|
||||
* License Version 1.1 (the "License"); you may not use this file
|
||||
* except in compliance with the License. You may obtain a copy of
|
||||
|
@ -35,7 +38,7 @@
|
|||
* the GPL. If you do not delete the provisions above, a recipient
|
||||
* may use your version of this file under either the MPL or the GPL.
|
||||
*
|
||||
* $Id: mpi-priv.h,v 1.8 2000-08-09 20:42:18 nelsonb%netscape.com Exp $
|
||||
* $Id: mpi-priv.h,v 1.9 2000-08-22 01:57:33 nelsonb%netscape.com Exp $
|
||||
*/
|
||||
#ifndef _MPI_PRIV_H_
|
||||
#define _MPI_PRIV_H_ 1
|
||||
|
@ -142,6 +145,7 @@ extern const float s_logv_2[];
|
|||
void s_mp_free(void *ptr); /* general free function */
|
||||
extern unsigned long mp_allocs;
|
||||
extern unsigned long mp_frees;
|
||||
extern unsigned long mp_copies;
|
||||
#else
|
||||
|
||||
/* Even if these are defined as macros, we need to respect the settings
|
||||
|
@ -198,9 +202,11 @@ mp_err s_mp_mod_d(mp_int *mp, mp_digit d, mp_digit *r);
|
|||
mp_err s_mp_reduce(mp_int *x, const mp_int *m, const mp_int *mu);
|
||||
/* Barrett reduction */
|
||||
mp_err s_mp_add(mp_int *a, const mp_int *b); /* magnitude addition */
|
||||
mp_err s_mp_add_3arg(const mp_int *a, const mp_int *b, mp_int *c);
|
||||
mp_err s_mp_sub(mp_int *a, const mp_int *b); /* magnitude subtract */
|
||||
mp_err s_mp_sub_3arg(const mp_int *a, const mp_int *b, mp_int *c);
|
||||
mp_err s_mp_add_offset(mp_int *a, mp_int *b, mp_size offset);
|
||||
/* a += b * RADIX^offset */
|
||||
mp_err s_mp_sub(mp_int *a, const mp_int *b); /* magnitude subtract */
|
||||
mp_err s_mp_mul(mp_int *a, const mp_int *b); /* magnitude multiply */
|
||||
#if MP_SQUARE
|
||||
mp_err s_mp_sqr(mp_int *a); /* magnitude square */
|
||||
|
@ -219,11 +225,17 @@ int s_mp_tovalue(char ch, int r); /* convert ch to value */
|
|||
char s_mp_todigit(mp_digit val, int r, int low); /* convert val to digit */
|
||||
int s_mp_outlen(int bits, int r); /* output length in bytes */
|
||||
mp_digit s_mp_invmod_32b(mp_digit P); /* returns (P ** -1) mod (2 ** 32) */
|
||||
void s_mp_mul_add(const mp_digit *a, mp_size a_len, mp_digit b, mp_digit *c);
|
||||
/* c += a * b * (MP_RADIX ** offset); */
|
||||
|
||||
/* ------ mpv functions, operate on arrays of digits, not on mp_int's ------ */
|
||||
void s_mpv_mul_d(const mp_digit *a, mp_size a_len, mp_digit b, mp_digit *c);
|
||||
void s_mpv_mul_d_add(const mp_digit *a, mp_size a_len, mp_digit b,
|
||||
mp_digit *c);
|
||||
void s_mpv_mul_d_add_prop(const mp_digit *a, mp_size a_len, mp_digit b,
|
||||
mp_digit *c);
|
||||
|
||||
/* c += a * b * (MP_RADIX ** offset); */
|
||||
#define s_mp_mul_d_add_offset(a, b, c, off) \
|
||||
(s_mp_mul_add(MP_DIGITS(a), MP_USED(a), b, MP_DIGITS(c) + off), MP_OKAY)
|
||||
(s_mpv_mul_d_add_prop(MP_DIGITS(a), MP_USED(a), b, MP_DIGITS(c) + off), MP_OKAY)
|
||||
|
||||
/* }}} */
|
||||
#endif
|
||||
|
|
|
@ -35,7 +35,7 @@
|
|||
* the GPL. If you do not delete the provisions above, a recipient
|
||||
* may use your version of this file under either the MPL or the GPL.
|
||||
*
|
||||
* $Id: mpi.c,v 1.18 2000-08-11 01:58:20 nelsonb%netscape.com Exp $
|
||||
* $Id: mpi.c,v 1.19 2000-08-22 01:57:34 nelsonb%netscape.com Exp $
|
||||
*/
|
||||
|
||||
#include "mpi-priv.h"
|
||||
|
@ -81,8 +81,8 @@ static const char *s_dmap_1 =
|
|||
|
||||
unsigned long mp_allocs;
|
||||
unsigned long mp_frees;
|
||||
unsigned long mp_copies;
|
||||
|
||||
#define MP_CHECKOK(x) if (MP_OKAY > (res = (x))) goto CLEANUP
|
||||
|
||||
/* {{{ Default precision manipulation */
|
||||
|
||||
|
@ -200,6 +200,7 @@ mp_err mp_copy(const mp_int *from, mp_int *to)
|
|||
if(from == to)
|
||||
return MP_OKAY;
|
||||
|
||||
++mp_copies;
|
||||
{ /* copy */
|
||||
mp_digit *tmp;
|
||||
|
||||
|
@ -702,38 +703,22 @@ mp_err mp_neg(const mp_int *a, mp_int *b)
|
|||
|
||||
mp_err mp_add(const mp_int *a, const mp_int *b, mp_int *c)
|
||||
{
|
||||
mp_int tmp;
|
||||
mp_err res;
|
||||
|
||||
ARGCHK(a != NULL && b != NULL && c != NULL, MP_BADARG);
|
||||
|
||||
MP_DIGITS(&tmp) = 0;
|
||||
if (c == a) {
|
||||
MP_CHECKOK( mp_init_copy(&tmp, a) );
|
||||
a = &tmp;
|
||||
if (c == b)
|
||||
b = &tmp;
|
||||
} else if (c == b) {
|
||||
MP_CHECKOK( mp_init_copy(&tmp, b) );
|
||||
b = &tmp;
|
||||
}
|
||||
|
||||
if(SIGN(a) == SIGN(b)) { /* same sign: add values, keep sign */
|
||||
MP_CHECKOK( mp_copy(a, c) );
|
||||
MP_CHECKOK( s_mp_add(c, b) );
|
||||
MP_CHECKOK( s_mp_add_3arg(a, b, c) );
|
||||
} else if(s_mp_cmp(a, b) >= 0) { /* different sign: a >= b */
|
||||
MP_CHECKOK( mp_copy(a, c) );
|
||||
MP_CHECKOK( s_mp_sub(c, b) );
|
||||
MP_CHECKOK( s_mp_sub_3arg(a, b, c) );
|
||||
} else { /* different sign: a < b */
|
||||
MP_CHECKOK( mp_copy(b, c) );
|
||||
MP_CHECKOK( s_mp_sub(c, a) );
|
||||
MP_CHECKOK( s_mp_sub_3arg(b, a, c) );
|
||||
}
|
||||
|
||||
if (s_mp_cmp_d(c, 0) == MP_EQ)
|
||||
SIGN(c) = ZPOS;
|
||||
|
||||
CLEANUP:
|
||||
mp_clear(&tmp);
|
||||
return res;
|
||||
|
||||
} /* end mp_add() */
|
||||
|
@ -750,37 +735,25 @@ CLEANUP:
|
|||
|
||||
mp_err mp_sub(const mp_int *a, const mp_int *b, mp_int *c)
|
||||
{
|
||||
mp_int tmp;
|
||||
mp_err res;
|
||||
int diffSign;
|
||||
int magDiff;
|
||||
|
||||
ARGCHK(a != NULL && b != NULL && c != NULL, MP_BADARG);
|
||||
|
||||
MP_DIGITS(&tmp) = 0;
|
||||
if (a == b) {
|
||||
mp_zero(c);
|
||||
return MP_OKAY;
|
||||
}
|
||||
if (c == a) {
|
||||
MP_CHECKOK( mp_init_copy(&tmp, a) );
|
||||
a = &tmp;
|
||||
} else if (c == b) {
|
||||
MP_CHECKOK( mp_init_copy(&tmp, b) );
|
||||
b = &tmp;
|
||||
}
|
||||
|
||||
if (MP_SIGN(a) != MP_SIGN(b)) {
|
||||
MP_CHECKOK( mp_copy(a, c) );
|
||||
MP_CHECKOK( s_mp_add(c, b) );
|
||||
} else if (!(diffSign = s_mp_cmp(a, b))) {
|
||||
MP_CHECKOK( s_mp_add_3arg(a, b, c) );
|
||||
} else if (!(magDiff = s_mp_cmp(a, b))) {
|
||||
mp_zero(c);
|
||||
res = MP_OKAY;
|
||||
} else if (diffSign > 0) {
|
||||
MP_CHECKOK( mp_copy(a, c) );
|
||||
MP_CHECKOK( s_mp_sub(c, b) );
|
||||
} else if (magDiff > 0) {
|
||||
MP_CHECKOK( s_mp_sub_3arg(a, b, c) );
|
||||
} else {
|
||||
MP_CHECKOK( mp_copy(b, c) );
|
||||
MP_CHECKOK( s_mp_sub(c, a) );
|
||||
MP_CHECKOK( s_mp_sub_3arg(b, a, c) );
|
||||
MP_SIGN(c) = !MP_SIGN(a);
|
||||
}
|
||||
|
||||
|
@ -788,7 +761,6 @@ mp_err mp_sub(const mp_int *a, const mp_int *b, mp_int *c)
|
|||
MP_SIGN(c) = MP_ZPOS;
|
||||
|
||||
CLEANUP:
|
||||
mp_clear(&tmp);
|
||||
return res;
|
||||
|
||||
} /* end mp_sub() */
|
||||
|
@ -808,6 +780,7 @@ mp_err mp_mul(const mp_int *a, const mp_int *b, mp_int * c)
|
|||
mp_int tmp;
|
||||
mp_err res;
|
||||
mp_size ib;
|
||||
mp_size useda, usedb;
|
||||
|
||||
ARGCHK(a != NULL && b != NULL && c != NULL, MP_BADARG);
|
||||
|
||||
|
@ -831,21 +804,24 @@ mp_err mp_mul(const mp_int *a, const mp_int *b, mp_int * c)
|
|||
a = xch;
|
||||
}
|
||||
|
||||
/* This has the effect of left-padding with zeroes... */
|
||||
MP_USED(c) = 1; MP_DIGIT(c, 0) = 0;
|
||||
if((res = s_mp_pad(c, USED(a) + USED(b))) != MP_OKAY)
|
||||
goto CLEANUP;
|
||||
|
||||
pb = MP_DIGITS(b);
|
||||
s_mpv_mul_d(MP_DIGITS(a), MP_USED(a), *pb++, MP_DIGITS(c));
|
||||
|
||||
/* Outer loop: Digits of b */
|
||||
for(ib = 0; ib < USED(b); ib++) {
|
||||
useda = MP_USED(a);
|
||||
usedb = MP_USED(b);
|
||||
for (ib = 1; ib < usedb; ib++) {
|
||||
mp_digit b_i = *pb++;
|
||||
|
||||
if(b_i == 0)
|
||||
continue;
|
||||
|
||||
/* Inner product: Digits of a */
|
||||
s_mp_mul_d_add_offset(a, b_i, c, ib);
|
||||
if (b_i)
|
||||
s_mpv_mul_d_add(MP_DIGITS(a), useda, b_i, MP_DIGITS(c) + ib);
|
||||
else
|
||||
MP_DIGIT(c, ib + useda) = b_i;
|
||||
}
|
||||
|
||||
s_mp_clamp(c);
|
||||
|
@ -876,12 +852,13 @@ CLEANUP:
|
|||
/* sqr = a^2; Caller provides both a and tmp; */
|
||||
mp_err mp_sqr(const mp_int *a, mp_int *sqr)
|
||||
{
|
||||
mp_digit *pa, *ps, *alim;
|
||||
mp_digit *pa, *ps;
|
||||
mp_word w;
|
||||
mp_digit d, k;
|
||||
mp_digit d;
|
||||
mp_err res;
|
||||
mp_size ix;
|
||||
mp_int tmp;
|
||||
int count;
|
||||
|
||||
ARGCHK(a != NULL && sqr != NULL, MP_BADARG);
|
||||
|
||||
|
@ -893,56 +870,32 @@ mp_err mp_sqr(const mp_int *a, mp_int *sqr)
|
|||
DIGITS(&tmp) = 0;
|
||||
}
|
||||
|
||||
/* Left-pad with zeroes */
|
||||
MP_USED(sqr) = 1; MP_DIGIT(sqr, 0) = 0;
|
||||
if((res = s_mp_pad(sqr, 2 * USED(a))) != MP_OKAY)
|
||||
goto CLEANUP;
|
||||
|
||||
/*
|
||||
The inner product is computed as:
|
||||
(C, S) = t[i,j] + 2 a[i] a[j] + C
|
||||
*/
|
||||
ix = 2 * MP_USED(a);
|
||||
if (ix > MP_ALLOC(sqr)) {
|
||||
MP_USED(sqr) = 1;
|
||||
MP_CHECKOK( s_mp_grow(sqr, ix) );
|
||||
}
|
||||
MP_USED(sqr) = ix;
|
||||
MP_DIGIT(sqr, 0) = 0;
|
||||
|
||||
pa = MP_DIGITS(a);
|
||||
alim = pa + MP_USED(a);
|
||||
for (ix = 0; pa < alim; ++ix) {
|
||||
count = MP_USED(a) - 1;
|
||||
if (count > 0) {
|
||||
d = *pa++;
|
||||
ps = MP_DIGITS(sqr) + 1 + (ix << 1);
|
||||
s_mp_mul_add(pa, alim - pa, d, ps);
|
||||
} /* for(ix ...) */
|
||||
s_mpv_mul_d(pa, count, d, MP_DIGITS(sqr) + 1);
|
||||
for (ix = 3; --count > 0; ix += 2) {
|
||||
d = *pa++;
|
||||
s_mpv_mul_d_add(pa, count, d, MP_DIGITS(sqr) + ix);
|
||||
} /* for(ix ...) */
|
||||
MP_DIGIT(sqr, MP_USED(sqr)-1) = 0; /* above loop stopped short of this. */
|
||||
|
||||
/* now sqr *= 2 */
|
||||
ps = MP_DIGITS(sqr);
|
||||
k = 0;
|
||||
#define SHIFT_1_BIT(n) \
|
||||
d = ps[n]; ps[n] = (d << 1) | k; k = d >> (DIGIT_BIT - 1)
|
||||
for (ix = MP_USED(sqr); ix >= 8; ix -= 8) {
|
||||
SHIFT_1_BIT(0);
|
||||
SHIFT_1_BIT(1);
|
||||
SHIFT_1_BIT(2);
|
||||
SHIFT_1_BIT(3);
|
||||
SHIFT_1_BIT(4);
|
||||
SHIFT_1_BIT(5);
|
||||
SHIFT_1_BIT(6);
|
||||
SHIFT_1_BIT(7);
|
||||
ps += 8;
|
||||
}
|
||||
if (ix) {
|
||||
ps += ix;
|
||||
switch (ix) { /* all fallthru */
|
||||
case 7: SHIFT_1_BIT(-7); /* FALLTHRU */
|
||||
case 6: SHIFT_1_BIT(-6); /* FALLTHRU */
|
||||
case 5: SHIFT_1_BIT(-5); /* FALLTHRU */
|
||||
case 4: SHIFT_1_BIT(-4); /* FALLTHRU */
|
||||
case 3: SHIFT_1_BIT(-3); /* FALLTHRU */
|
||||
case 2: SHIFT_1_BIT(-2); /* FALLTHRU */
|
||||
case 1: SHIFT_1_BIT(-1); /* FALLTHRU */
|
||||
case 0: break;
|
||||
}
|
||||
/* now sqr *= 2 */
|
||||
s_mp_mul_2(sqr);
|
||||
} else {
|
||||
MP_DIGIT(sqr, 1) = 0;
|
||||
}
|
||||
|
||||
pa = MP_DIGITS(a);
|
||||
alim = pa + MP_USED(a);
|
||||
ps = MP_DIGITS(sqr);
|
||||
w = 0;
|
||||
#define ADD_SQUARE(n) \
|
||||
|
@ -2544,7 +2497,7 @@ mp_err s_mp_pad(mp_int *mp, mp_size min)
|
|||
if ((res = s_mp_grow(mp, min)) != MP_OKAY)
|
||||
return res;
|
||||
} else {
|
||||
s_mp_setz(DIGITS(mp) + USED(mp), min - USED(mp));
|
||||
/* s_mp_setz(DIGITS(mp) + USED(mp), min - USED(mp)); */
|
||||
}
|
||||
|
||||
/* Increase precision; should already be 0-filled */
|
||||
|
@ -2770,12 +2723,15 @@ void s_mp_rshd(mp_int *mp, mp_size p)
|
|||
for (ix = USED(mp) - p; ix > 0; ix--)
|
||||
*dst++ = *src++;
|
||||
|
||||
MP_USED(mp) -= p;
|
||||
/* Fill the top digits with zeroes */
|
||||
while (p-- > 0)
|
||||
*dst++ = 0;
|
||||
|
||||
#if 0
|
||||
/* Strip off any leading zeroes */
|
||||
s_mp_clamp(mp);
|
||||
#endif
|
||||
|
||||
} /* end s_mp_rshd() */
|
||||
|
||||
|
@ -2796,21 +2752,23 @@ void s_mp_div_2(mp_int *mp)
|
|||
|
||||
mp_err s_mp_mul_2(mp_int *mp)
|
||||
{
|
||||
int ix;
|
||||
mp_digit kin = 0, kout;
|
||||
mp_err res;
|
||||
mp_digit *pd;
|
||||
int ix, used;
|
||||
mp_digit kin = 0;
|
||||
|
||||
/* Shift digits leftward by 1 bit */
|
||||
for(ix = 0; ix < USED(mp); ix++) {
|
||||
kout = (DIGIT(mp, ix) >> (DIGIT_BIT - 1)) & 1;
|
||||
DIGIT(mp, ix) = (DIGIT(mp, ix) << 1) | kin;
|
||||
|
||||
kin = kout;
|
||||
used = MP_USED(mp);
|
||||
pd = MP_DIGITS(mp);
|
||||
for (ix = 0; ix < used; ix++) {
|
||||
mp_digit d = *pd;
|
||||
*pd++ = (d << 1) | kin;
|
||||
kin = (d >> (DIGIT_BIT - 1));
|
||||
}
|
||||
|
||||
/* Deal with rollover from last digit */
|
||||
if(kin) {
|
||||
if(ix >= ALLOC(mp)) {
|
||||
if (kin) {
|
||||
if (ix >= ALLOC(mp)) {
|
||||
mp_err res;
|
||||
if((res = s_mp_grow(mp, ALLOC(mp) + 1)) != MP_OKAY)
|
||||
return res;
|
||||
}
|
||||
|
@ -3139,8 +3097,10 @@ mp_err s_mp_mod_d(mp_int *mp, mp_digit d, mp_digit *r)
|
|||
/* Compute a = |a| + |b| */
|
||||
mp_err s_mp_add(mp_int *a, const mp_int *b) /* magnitude addition */
|
||||
{
|
||||
mp_word w, k = 0;
|
||||
mp_digit *pa, *pb;
|
||||
mp_word w = 0;
|
||||
mp_size ix;
|
||||
mp_size used;
|
||||
mp_err res;
|
||||
|
||||
/* Make sure a has enough precision for the output value */
|
||||
|
@ -3154,19 +3114,23 @@ mp_err s_mp_add(mp_int *a, const mp_int *b) /* magnitude addition */
|
|||
less precision, we'll have to make sure the carry out is duly
|
||||
propagated upward among the higher-order digits of the sum.
|
||||
*/
|
||||
for(ix = 0; ix < USED(b); ix++) {
|
||||
w = (mp_word)DIGIT(a, ix) + DIGIT(b, ix) + k;
|
||||
DIGIT(a, ix) = ACCUM(w);
|
||||
k = CARRYOUT(w);
|
||||
pa = MP_DIGITS(a);
|
||||
pb = MP_DIGITS(b);
|
||||
used = MP_USED(b);
|
||||
for(ix = 0; ix < used; ix++) {
|
||||
w = w + *pa + *pb++;
|
||||
*pa++ = ACCUM(w);
|
||||
w = CARRYOUT(w);
|
||||
}
|
||||
|
||||
/* If we run out of 'b' digits before we're actually done, make
|
||||
sure the carries get propagated upward...
|
||||
*/
|
||||
while(k && ix < USED(a)) {
|
||||
w = (mp_word)DIGIT(a, ix) + k;
|
||||
DIGIT(a, ix) = ACCUM(w);
|
||||
k = CARRYOUT(w);
|
||||
used = MP_USED(a);
|
||||
while (w && ix < used) {
|
||||
w = w + *pa;
|
||||
*pa++ = ACCUM(w);
|
||||
w = CARRYOUT(w);
|
||||
++ix;
|
||||
}
|
||||
|
||||
|
@ -3174,11 +3138,11 @@ mp_err s_mp_add(mp_int *a, const mp_int *b) /* magnitude addition */
|
|||
it. We could have done this initially, but why touch the memory
|
||||
allocator unless we're sure we have to?
|
||||
*/
|
||||
if(k) {
|
||||
if (w) {
|
||||
if((res = s_mp_pad(a, USED(a) + 1)) != MP_OKAY)
|
||||
return res;
|
||||
|
||||
DIGIT(a, ix) = (mp_digit)k;
|
||||
DIGIT(a, ix) = (mp_digit)w;
|
||||
}
|
||||
|
||||
return MP_OKAY;
|
||||
|
@ -3187,6 +3151,69 @@ mp_err s_mp_add(mp_int *a, const mp_int *b) /* magnitude addition */
|
|||
|
||||
/* }}} */
|
||||
|
||||
/* Compute c = |a| + |b| */ /* magnitude addition */
|
||||
mp_err s_mp_add_3arg(const mp_int *a, const mp_int *b, mp_int *c)
|
||||
{
|
||||
mp_digit *pa, *pb, *pc;
|
||||
mp_word w = 0;
|
||||
mp_size ix;
|
||||
mp_size used;
|
||||
mp_err res;
|
||||
|
||||
MP_SIGN(c) = MP_SIGN(a);
|
||||
if (MP_USED(a) < MP_USED(b)) {
|
||||
const mp_int *xch = a;
|
||||
a = b;
|
||||
b = xch;
|
||||
}
|
||||
|
||||
/* Make sure a has enough precision for the output value */
|
||||
if (MP_OKAY != (res = s_mp_pad(c, MP_USED(a))))
|
||||
return res;
|
||||
|
||||
/*
|
||||
Add up all digits up to the precision of b. If b had initially
|
||||
the same precision as a, or greater, we took care of it by the
|
||||
exchange step above, so there is no problem. If b had initially
|
||||
less precision, we'll have to make sure the carry out is duly
|
||||
propagated upward among the higher-order digits of the sum.
|
||||
*/
|
||||
pa = MP_DIGITS(a);
|
||||
pb = MP_DIGITS(b);
|
||||
pc = MP_DIGITS(c);
|
||||
used = MP_USED(b);
|
||||
for (ix = 0; ix < used; ix++) {
|
||||
w = w + *pa++ + *pb++;
|
||||
*pc++ = ACCUM(w);
|
||||
w = CARRYOUT(w);
|
||||
}
|
||||
|
||||
/* If we run out of 'b' digits before we're actually done, make
|
||||
sure the carries get propagated upward...
|
||||
*/
|
||||
used = MP_USED(a);
|
||||
while (ix < used) {
|
||||
w = w + *pa++;
|
||||
*pc++ = ACCUM(w);
|
||||
w = CARRYOUT(w);
|
||||
++ix;
|
||||
}
|
||||
|
||||
/* If there's an overall carry out, increase precision and include
|
||||
it. We could have done this initially, but why touch the memory
|
||||
allocator unless we're sure we have to?
|
||||
*/
|
||||
if (w) {
|
||||
if((res = s_mp_pad(c, ix + 1)) != MP_OKAY)
|
||||
return res;
|
||||
|
||||
DIGIT(c, ix) = (mp_digit)w;
|
||||
++ix;
|
||||
}
|
||||
MP_USED(c) = ix;
|
||||
return MP_OKAY;
|
||||
|
||||
}
|
||||
/* {{{ s_mp_add_offset(a, b, offset) */
|
||||
|
||||
/* Compute a = |a| + ( |b| * (RADIX ** offset) ) */
|
||||
|
@ -3288,6 +3315,52 @@ mp_err s_mp_sub(mp_int *a, const mp_int *b) /* magnitude subtract */
|
|||
|
||||
/* }}} */
|
||||
|
||||
/* Compute c = |a| - |b|, assumes |a| >= |b| */ /* magnitude subtract */
|
||||
mp_err s_mp_sub_3arg(const mp_int *a, const mp_int *b, mp_int *c)
|
||||
{
|
||||
mp_digit *pa, *pb, *pc;
|
||||
mp_sword w = 0;
|
||||
int ix, limit;
|
||||
mp_err res;
|
||||
|
||||
MP_SIGN(c) = MP_SIGN(a);
|
||||
|
||||
/* Make sure a has enough precision for the output value */
|
||||
if (MP_OKAY != (res = s_mp_pad(c, MP_USED(a))))
|
||||
return res;
|
||||
|
||||
/*
|
||||
Subtract and propagate borrow. Up to the precision of b, this
|
||||
accounts for the digits of b; after that, we just make sure the
|
||||
carries get to the right place. This saves having to pad b out to
|
||||
the precision of a just to make the loops work right...
|
||||
*/
|
||||
pa = MP_DIGITS(a);
|
||||
pb = MP_DIGITS(b);
|
||||
pc = MP_DIGITS(c);
|
||||
limit = MP_USED(b);
|
||||
for (ix = 0; ix < limit; ++ix) {
|
||||
w = w + *pa++ - *pb++;
|
||||
*pc++ = ACCUM(w);
|
||||
w >>= MP_DIGIT_BIT;
|
||||
}
|
||||
for (limit = MP_USED(a); ix < limit; ++ix) {
|
||||
w = w + *pa++;
|
||||
*pc++ = ACCUM(w);
|
||||
w >>= MP_DIGIT_BIT;
|
||||
}
|
||||
|
||||
/* Clobber any leading zeroes we created */
|
||||
MP_USED(c) = ix;
|
||||
s_mp_clamp(c);
|
||||
|
||||
/*
|
||||
If there was a borrow out, then |b| > |a| in violation
|
||||
of our input invariant. We've already done the work,
|
||||
but we'll at least complain about it...
|
||||
*/
|
||||
return w ? MP_RANGE : MP_OKAY;
|
||||
}
|
||||
/* {{{ s_mp_mul(a, b) */
|
||||
|
||||
/* Compute a = |a| * |b| */
|
||||
|
@ -3298,24 +3371,55 @@ mp_err s_mp_mul(mp_int *a, const mp_int *b)
|
|||
|
||||
/* }}} */
|
||||
|
||||
/* c += a * b */
|
||||
void s_mp_mul_add(const mp_digit *a, mp_size a_len, mp_digit b, mp_digit *c)
|
||||
#if !defined(MP_ASSEMBLY_MULTIPLY)
|
||||
/* c = a * b */
|
||||
void s_mpv_mul_d(const mp_digit *a, mp_size a_len, mp_digit b, mp_digit *c)
|
||||
{
|
||||
mp_word w = 0;
|
||||
mp_digit d = 0;
|
||||
|
||||
/* Inner product: Digits of a */
|
||||
while (a_len--) {
|
||||
w += ((mp_word)b * *a++) + *c;
|
||||
mp_word w = ((mp_word)b * *a++) + d;
|
||||
*c++ = ACCUM(w);
|
||||
w = CARRYOUT(w);
|
||||
d = CARRYOUT(w);
|
||||
}
|
||||
*c = d;
|
||||
}
|
||||
|
||||
/* c += a * b */
|
||||
void s_mpv_mul_d_add(const mp_digit *a, mp_size a_len, mp_digit b,
|
||||
mp_digit *c)
|
||||
{
|
||||
mp_digit d = 0;
|
||||
|
||||
/* Inner product: Digits of a */
|
||||
while (a_len--) {
|
||||
mp_word w = ((mp_word)b * *a++) + *c + d;
|
||||
*c++ = ACCUM(w);
|
||||
d = CARRYOUT(w);
|
||||
}
|
||||
*c = d;
|
||||
}
|
||||
|
||||
/* c += a * b */
|
||||
void s_mpv_mul_d_add_prop(const mp_digit *a, mp_size a_len, mp_digit b, mp_digit *c)
|
||||
{
|
||||
mp_digit d = 0;
|
||||
|
||||
/* Inner product: Digits of a */
|
||||
while (a_len--) {
|
||||
mp_word w = ((mp_word)b * *a++) + *c + d;
|
||||
*c++ = ACCUM(w);
|
||||
d = CARRYOUT(w);
|
||||
}
|
||||
|
||||
while (w) {
|
||||
w += *c;
|
||||
while (d) {
|
||||
mp_word w = (mp_word)*c + d;
|
||||
*c++ = ACCUM(w);
|
||||
w = CARRYOUT(w);
|
||||
d = CARRYOUT(w);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
#if MP_SQUARE
|
||||
/* {{{ s_mp_sqr(a) */
|
||||
|
|
|
@ -36,7 +36,7 @@
|
|||
* may use your version of this file under either the MPL or the
|
||||
* GPL.
|
||||
*
|
||||
* $Id: mpi.h,v 1.7 2000-08-04 19:57:24 nelsonb%netscape.com Exp $
|
||||
* $Id: mpi.h,v 1.8 2000-08-22 01:57:33 nelsonb%netscape.com Exp $
|
||||
*/
|
||||
|
||||
#ifndef _H_MPI_
|
||||
|
@ -68,7 +68,7 @@
|
|||
#define MP_UNDEF -5 /* answer is undefined */
|
||||
#define MP_LAST_CODE MP_UNDEF
|
||||
|
||||
typedef char mp_sign;
|
||||
typedef unsigned int mp_sign;
|
||||
typedef unsigned int mp_size;
|
||||
typedef int mp_err;
|
||||
|
||||
|
@ -245,6 +245,9 @@ mp_err mp_to_unsigned_octets(const mp_int *mp, unsigned char *str, mp_size maxle
|
|||
mp_err mp_to_signed_octets(const mp_int *mp, unsigned char *str, mp_size maxlen);
|
||||
mp_err mp_to_fixlen_octets(const mp_int *mp, unsigned char *str, mp_size len);
|
||||
|
||||
#define MP_CHECKOK(x) if (MP_OKAY > (res = (x))) goto CLEANUP
|
||||
#define MP_CHECKERR(x) if (MP_OKAY > (res = (x))) goto CLEANUP
|
||||
|
||||
#if defined(MP_API_COMPATIBLE)
|
||||
#define NEG MP_NEG
|
||||
#define ZPOS MP_ZPOS
|
||||
|
|
|
@ -29,7 +29,7 @@
|
|||
* the GPL. If you do not delete the provisions above, a recipient
|
||||
* may use your version of this file under either the MPL or the
|
||||
* GPL.
|
||||
* $Id: mpmontg.c,v 1.6 2000-08-08 03:20:35 nelsonb%netscape.com Exp $
|
||||
* $Id: mpmontg.c,v 1.7 2000-08-22 01:57:34 nelsonb%netscape.com Exp $
|
||||
*/
|
||||
|
||||
/* This file implements moduluar exponentiation using Montgomery's
|
||||
|
@ -46,13 +46,11 @@
|
|||
#include "mplogic.h"
|
||||
#include "mpprime.h"
|
||||
|
||||
#define MP_CHECKOK(x) if (MP_OKAY != (rv = (x))) goto loser
|
||||
#define MP_CHECKERR(x) if (0 > (rv = (x))) goto loser
|
||||
#define STATIC
|
||||
/* #define DEBUG 1 */
|
||||
|
||||
#define WINDOW_BITS 5
|
||||
#define ODD_INTS 16 /* 2 ** (WINDOW_BITS - 1) */
|
||||
#define MAX_WINDOW_BITS 6
|
||||
#define MAX_ODD_INTS 32 /* 2 ** (WINDOW_BITS - 1) */
|
||||
|
||||
typedef struct {
|
||||
mp_int N; /* modulus N */
|
||||
|
@ -60,17 +58,22 @@ typedef struct {
|
|||
mp_size b; /* R == 2 ** b, also b = # significant bits in N */
|
||||
} mp_mont_modulus;
|
||||
|
||||
mp_err s_mp_mul_mont(const mp_int *a, const mp_int *b, mp_int *c,
|
||||
mp_mont_modulus *mmm);
|
||||
|
||||
/* computes T = REDC(T), 2^b == R */
|
||||
STATIC
|
||||
mp_err s_mp_redc(mp_int *T, mp_mont_modulus *mmm)
|
||||
{
|
||||
mp_err rv;
|
||||
int i;
|
||||
mp_err res;
|
||||
mp_size i;
|
||||
#ifdef DEBUG
|
||||
mp_int m;
|
||||
MP_DIGITS(&m) = 0;
|
||||
#endif
|
||||
|
||||
MP_CHECKOK( s_mp_pad(T, MP_USED(T) + MP_USED(&mmm->N) + 2) );
|
||||
i = MP_USED(T) + MP_USED(&mmm->N) + 2;
|
||||
MP_CHECKOK( s_mp_pad(T, i) );
|
||||
for (i = 0; i < MP_USED(&mmm->N); ++i ) {
|
||||
mp_digit m_i = MP_DIGIT(T, i) * mmm->n0prime;
|
||||
/* T += N * m_i * (MP_RADIX ** i); */
|
||||
|
@ -84,39 +87,100 @@ mp_err s_mp_redc(mp_int *T, mp_mont_modulus *mmm)
|
|||
MP_CHECKOK( mp_div_2d(T, mmm->b, T, &m));
|
||||
/* here, remainder m should be equal to zero */
|
||||
if (mp_cmp_z(&m) != 0) {
|
||||
rv = MP_UNDEF;
|
||||
goto loser;
|
||||
res = MP_UNDEF;
|
||||
goto CLEANUP;
|
||||
}
|
||||
#else
|
||||
s_mp_div_2d(T, mmm->b);
|
||||
#endif
|
||||
|
||||
if ((rv = s_mp_cmp(T, &mmm->N)) >= 0) {
|
||||
if ((res = s_mp_cmp(T, &mmm->N)) >= 0) {
|
||||
/* T = T - N */
|
||||
MP_CHECKOK( s_mp_sub(T, &mmm->N) );
|
||||
#ifdef DEBUG
|
||||
if ((rv = mp_cmp(T, &mmm->N)) >= 0) {
|
||||
rv = MP_UNDEF;
|
||||
if ((res = mp_cmp(T, &mmm->N)) >= 0) {
|
||||
res = MP_UNDEF;
|
||||
goto CLEANUP;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
rv = MP_OKAY;
|
||||
loser:
|
||||
res = MP_OKAY;
|
||||
CLEANUP:
|
||||
#ifdef DEBUG
|
||||
mp_clear(&m);
|
||||
#endif
|
||||
return rv;
|
||||
return res;
|
||||
}
|
||||
|
||||
mp_err mp_to_mont(const mp_int *x, mp_mont_modulus *mmm, mp_int *xMont)
|
||||
#if !defined(MP_ASSEMBLY_MUL_MONT) && !defined(MP_MONT_USE_MP_MUL)
|
||||
mp_err s_mp_mul_mont(const mp_int *a, const mp_int *b, mp_int *c,
|
||||
mp_mont_modulus *mmm)
|
||||
{
|
||||
mp_err rv;
|
||||
mp_digit *pb;
|
||||
mp_digit m_i;
|
||||
mp_err res;
|
||||
mp_size ib;
|
||||
mp_size useda, usedb;
|
||||
|
||||
ARGCHK(a != NULL && b != NULL && c != NULL, MP_BADARG);
|
||||
|
||||
if (MP_USED(a) < MP_USED(b)) {
|
||||
const mp_int *xch = b; /* switch a and b, to do fewer outer loops */
|
||||
b = a;
|
||||
a = xch;
|
||||
}
|
||||
|
||||
MP_USED(c) = 1; MP_DIGIT(c, 0) = 0;
|
||||
ib = MP_USED(a) + MP_MAX(MP_USED(b), MP_USED(&mmm->N)) + 2;
|
||||
if((res = s_mp_pad(c, ib)) != MP_OKAY)
|
||||
goto CLEANUP;
|
||||
|
||||
useda = MP_USED(a);
|
||||
pb = MP_DIGITS(b);
|
||||
s_mpv_mul_d(MP_DIGITS(a), useda, *pb++, MP_DIGITS(c));
|
||||
s_mp_setz(MP_DIGITS(c) + useda + 1, ib - (useda + 1));
|
||||
m_i = MP_DIGIT(c, 0) * mmm->n0prime;
|
||||
s_mp_mul_d_add_offset(&mmm->N, m_i, c, 0);
|
||||
|
||||
/* Outer loop: Digits of b */
|
||||
usedb = MP_USED(b);
|
||||
for (ib = 1; ib < usedb; ib++) {
|
||||
mp_digit b_i = *pb++;
|
||||
|
||||
/* Inner product: Digits of a */
|
||||
if (b_i)
|
||||
s_mpv_mul_d_add_prop(MP_DIGITS(a), useda, b_i, MP_DIGITS(c) + ib);
|
||||
m_i = MP_DIGIT(c, ib) * mmm->n0prime;
|
||||
s_mp_mul_d_add_offset(&mmm->N, m_i, c, ib);
|
||||
}
|
||||
if (usedb < MP_USED(&mmm->N)) {
|
||||
for (usedb = MP_USED(&mmm->N); ib < usedb; ++ib ) {
|
||||
m_i = MP_DIGIT(c, ib) * mmm->n0prime;
|
||||
s_mp_mul_d_add_offset(&mmm->N, m_i, c, ib);
|
||||
}
|
||||
}
|
||||
s_mp_clamp(c);
|
||||
s_mp_div_2d(c, mmm->b);
|
||||
if (s_mp_cmp(c, &mmm->N) >= 0) {
|
||||
MP_CHECKOK( s_mp_sub(c, &mmm->N) );
|
||||
}
|
||||
res = MP_OKAY;
|
||||
|
||||
CLEANUP:
|
||||
return res;
|
||||
}
|
||||
#endif
|
||||
|
||||
STATIC
|
||||
mp_err s_mp_to_mont(const mp_int *x, mp_mont_modulus *mmm, mp_int *xMont)
|
||||
{
|
||||
mp_err res;
|
||||
|
||||
/* xMont = x * R mod N where N is modulus */
|
||||
MP_CHECKOK( mpl_lsh(x, xMont, mmm->b) ); /* xMont = x << b */
|
||||
MP_CHECKOK( mp_div(xMont, &mmm->N, 0, xMont) ); /* mod N */
|
||||
loser:
|
||||
return rv;
|
||||
CLEANUP:
|
||||
return res;
|
||||
}
|
||||
|
||||
|
||||
|
@ -124,28 +188,34 @@ mp_err mp_exptmod(const mp_int *inBase, const mp_int *exponent,
|
|||
const mp_int *modulus, mp_int *result)
|
||||
{
|
||||
const mp_int *base;
|
||||
mp_int *pa1, *pa2, *ptmp;
|
||||
mp_size bits_in_exponent;
|
||||
mp_size i;
|
||||
mp_err rv;
|
||||
mp_int square, accum, goodBase, tmp;
|
||||
mp_size window_bits, odd_ints;
|
||||
mp_err res;
|
||||
mp_int square, accum1, accum2, goodBase;
|
||||
mp_mont_modulus mmm;
|
||||
|
||||
/* function for computing n0prime only works if n0 is odd */
|
||||
if (!mp_isodd(modulus))
|
||||
return s_mp_exptmod(inBase, exponent, modulus, result);
|
||||
|
||||
MP_DIGITS(&square) = 0;
|
||||
MP_DIGITS(&accum1) = 0;
|
||||
MP_DIGITS(&accum2) = 0;
|
||||
MP_DIGITS(&goodBase) = 0;
|
||||
|
||||
if (mp_cmp(inBase, modulus) < 0) {
|
||||
base = inBase;
|
||||
MP_DIGITS(&goodBase) = 0;
|
||||
} else {
|
||||
mp_init(&goodBase);
|
||||
MP_CHECKOK( mp_init(&goodBase) );
|
||||
base = &goodBase;
|
||||
MP_CHECKOK( mp_mod(inBase, modulus, &goodBase) );
|
||||
}
|
||||
|
||||
mp_init_size(&square, 2 * MP_USED(modulus) + 2);
|
||||
mp_init_size(&accum, 3 * MP_USED(modulus) + 2);
|
||||
mp_init_size(&tmp, 3 * MP_USED(modulus) + 2);
|
||||
MP_CHECKOK( mp_init_size(&square, 2 * MP_USED(modulus) + 2) );
|
||||
MP_CHECKOK( mp_init_size(&accum1, 3 * MP_USED(modulus) + 2) );
|
||||
MP_CHECKOK( mp_init_size(&accum2, 3 * MP_USED(modulus) + 2) );
|
||||
|
||||
mmm.N = *modulus; /* a copy of the mp_int struct */
|
||||
i = mpl_significant_bits(modulus);
|
||||
|
@ -157,18 +227,25 @@ mp_err mp_exptmod(const mp_int *inBase, const mp_int *exponent,
|
|||
*/
|
||||
mmm.n0prime = 0 - s_mp_invmod_32b( MP_DIGIT(modulus, 0) );
|
||||
|
||||
MP_CHECKOK( mp_to_mont(base, &mmm, &square) );
|
||||
MP_CHECKOK( s_mp_to_mont(base, &mmm, &square) );
|
||||
|
||||
bits_in_exponent = mpl_significant_bits(exponent);
|
||||
i = bits_in_exponent % WINDOW_BITS;
|
||||
if (bits_in_exponent > 480)
|
||||
window_bits = 6;
|
||||
else if (bits_in_exponent > 160)
|
||||
window_bits = 5;
|
||||
else
|
||||
window_bits = 4;
|
||||
odd_ints = 1 << (window_bits - 1);
|
||||
i = bits_in_exponent % window_bits;
|
||||
if (i != 0) {
|
||||
bits_in_exponent += WINDOW_BITS - i;
|
||||
bits_in_exponent += window_bits - i;
|
||||
}
|
||||
{
|
||||
/* oddPowers[i] = base ** (2*i + 1); */
|
||||
/* power2 = base ** 2; */
|
||||
int expOff;
|
||||
mp_int power2, oddPowers[ODD_INTS];
|
||||
/* power2 = base ** 2; oddPowers[i] = base ** (2*i + 1); */
|
||||
mp_int power2, oddPowers[MAX_ODD_INTS];
|
||||
|
||||
MP_CHECKOK( mp_init_copy(oddPowers, &square) );
|
||||
|
||||
|
@ -176,97 +253,122 @@ mp_err mp_exptmod(const mp_int *inBase, const mp_int *exponent,
|
|||
MP_CHECKOK( mp_sqr(&square, &power2) ); /* square = square ** 2 */
|
||||
MP_CHECKOK( s_mp_redc(&power2, &mmm) );
|
||||
|
||||
for (i = 1; i < ODD_INTS; ++i) {
|
||||
for (i = 1; i < odd_ints; ++i) {
|
||||
mp_init_size(oddPowers + i, MP_USED(modulus) + 2 * MP_USED(&power2) + 2);
|
||||
MP_CHECKOK( mp_mul(oddPowers + (i - 1), &power2, oddPowers + i) );
|
||||
MP_CHECKOK( s_mp_redc(oddPowers + i, &mmm) );
|
||||
}
|
||||
mp_set(&accum, 1);
|
||||
MP_CHECKOK( mp_to_mont(&accum, &mmm, &accum) );
|
||||
mp_set(&accum1, 1);
|
||||
MP_CHECKOK( s_mp_to_mont(&accum1, &mmm, &accum1) );
|
||||
pa1 = &accum1;
|
||||
pa2 = &accum2;
|
||||
|
||||
#define SQUARE \
|
||||
MP_CHECKOK( mp_sqr(&accum, &tmp) );\
|
||||
mp_exch(&accum, &tmp); \
|
||||
MP_CHECKOK( s_mp_redc(&accum, &mmm) )
|
||||
#define SQR(a,b) \
|
||||
MP_CHECKOK( mp_sqr(a, b) );\
|
||||
MP_CHECKOK( s_mp_redc(b, &mmm) )
|
||||
|
||||
#define MUL(x) \
|
||||
MP_CHECKOK( mp_mul(&accum, oddPowers + (x), &tmp) ); \
|
||||
mp_exch(&accum, &tmp); \
|
||||
MP_CHECKOK( s_mp_redc(&accum, &mmm))
|
||||
|
||||
for (expOff = bits_in_exponent - WINDOW_BITS; expOff >= 0; expOff -= WINDOW_BITS) {
|
||||
mp_size smallExp;
|
||||
MP_CHECKERR( mpl_get_bits(exponent, expOff, WINDOW_BITS) );
|
||||
smallExp = (mp_size)rv;
|
||||
|
||||
#if WINDOW_BITS == 4
|
||||
if (!smallExp) {
|
||||
SQUARE; SQUARE; SQUARE; SQUARE;
|
||||
} else if (smallExp & 1) {
|
||||
SQUARE; SQUARE; SQUARE; SQUARE; MUL(smallExp/2);
|
||||
} else if (smallExp & 2) {
|
||||
SQUARE; SQUARE; SQUARE; MUL(smallExp/4); SQUARE;
|
||||
} else if (smallExp & 4) {
|
||||
SQUARE; SQUARE; MUL(smallExp/8); SQUARE; SQUARE;
|
||||
} else if (smallExp & 8) {
|
||||
SQUARE; MUL(smallExp/16); SQUARE; SQUARE; SQUARE;
|
||||
} else {
|
||||
abort();
|
||||
}
|
||||
#elif WINDOW_BITS == 5
|
||||
if (!smallExp) {
|
||||
SQUARE; SQUARE; SQUARE; SQUARE; SQUARE;
|
||||
} else if (smallExp & 1) {
|
||||
SQUARE; SQUARE; SQUARE; SQUARE; SQUARE; MUL(smallExp/2);
|
||||
} else if (smallExp & 2) {
|
||||
SQUARE; SQUARE; SQUARE; SQUARE; MUL(smallExp/4); SQUARE;
|
||||
} else if (smallExp & 4) {
|
||||
SQUARE; SQUARE; SQUARE; MUL(smallExp/8); SQUARE; SQUARE;
|
||||
} else if (smallExp & 8) {
|
||||
SQUARE; SQUARE; MUL(smallExp/16); SQUARE; SQUARE; SQUARE;
|
||||
} else if (smallExp & 0x10) {
|
||||
SQUARE; MUL(smallExp/32); SQUARE; SQUARE; SQUARE; SQUARE;
|
||||
} else {
|
||||
abort();
|
||||
}
|
||||
#elif WINDOW_BITS == 6
|
||||
if (!smallExp) {
|
||||
SQUARE; SQUARE; SQUARE; SQUARE; SQUARE; SQUARE;
|
||||
} else if (smallExp & 1) {
|
||||
SQUARE; SQUARE; SQUARE; SQUARE; SQUARE; SQUARE; MUL(smallExp/2);
|
||||
} else if (smallExp & 2) {
|
||||
SQUARE; SQUARE; SQUARE; SQUARE; SQUARE; MUL(smallExp/4); SQUARE;
|
||||
} else if (smallExp & 4) {
|
||||
SQUARE; SQUARE; SQUARE; SQUARE; MUL(smallExp/8); SQUARE; SQUARE;
|
||||
} else if (smallExp & 8) {
|
||||
SQUARE; SQUARE; SQUARE; MUL(smallExp/16); SQUARE; SQUARE; SQUARE;
|
||||
} else if (smallExp & 0x10) {
|
||||
SQUARE; SQUARE; MUL(smallExp/32); SQUARE; SQUARE; SQUARE; SQUARE;
|
||||
} else if (smallExp & 0x20) {
|
||||
SQUARE; MUL(smallExp/64); SQUARE; SQUARE; SQUARE; SQUARE; SQUARE;
|
||||
} else {
|
||||
abort();
|
||||
}
|
||||
#if defined(MP_MONT_USE_MP_MUL)
|
||||
#define MUL(x,a,b) \
|
||||
MP_CHECKOK( mp_mul(a, oddPowers + (x), b) ); \
|
||||
MP_CHECKOK( s_mp_redc(b, &mmm) )
|
||||
#else
|
||||
#error "Unknown value for WINDOW_BITS"
|
||||
#define MUL(x,a,b) \
|
||||
MP_CHECKOK( s_mp_mul_mont(a, oddPowers + (x), b, &mmm) )
|
||||
#endif
|
||||
|
||||
#define SWAPPA ptmp = pa1; pa1 = pa2; pa2 = ptmp
|
||||
|
||||
for (expOff = bits_in_exponent - window_bits; expOff >= 0; expOff -= window_bits) {
|
||||
mp_size smallExp;
|
||||
MP_CHECKOK( mpl_get_bits(exponent, expOff, window_bits) );
|
||||
smallExp = (mp_size)res;
|
||||
|
||||
if (window_bits == 4) {
|
||||
if (!smallExp) {
|
||||
SQR(pa1,pa2); SQR(pa2,pa1); SQR(pa1,pa2); SQR(pa2,pa1);
|
||||
} else if (smallExp & 1) {
|
||||
SQR(pa1,pa2); SQR(pa2,pa1); SQR(pa1,pa2); SQR(pa2,pa1);
|
||||
MUL(smallExp/2, pa1,pa2); SWAPPA;
|
||||
} else if (smallExp & 2) {
|
||||
SQR(pa1,pa2); SQR(pa2,pa1); SQR(pa1,pa2);
|
||||
MUL(smallExp/4,pa2,pa1); SQR(pa1,pa2); SWAPPA;
|
||||
} else if (smallExp & 4) {
|
||||
SQR(pa1,pa2); SQR(pa2,pa1); MUL(smallExp/8,pa1,pa2);
|
||||
SQR(pa2,pa1); SQR(pa1,pa2); SWAPPA;
|
||||
} else if (smallExp & 8) {
|
||||
SQR(pa1,pa2); MUL(smallExp/16,pa2,pa1); SQR(pa1,pa2);
|
||||
SQR(pa2,pa1); SQR(pa1,pa2); SWAPPA;
|
||||
} else {
|
||||
abort();
|
||||
}
|
||||
} else if (window_bits == 5) {
|
||||
if (!smallExp) {
|
||||
SQR(pa1,pa2); SQR(pa2,pa1); SQR(pa1,pa2); SQR(pa2,pa1);
|
||||
SQR(pa1,pa2); SWAPPA;
|
||||
} else if (smallExp & 1) {
|
||||
SQR(pa1,pa2); SQR(pa2,pa1); SQR(pa1,pa2); SQR(pa2,pa1);
|
||||
SQR(pa1,pa2); MUL(smallExp/2,pa2,pa1);
|
||||
} else if (smallExp & 2) {
|
||||
SQR(pa1,pa2); SQR(pa2,pa1); SQR(pa1,pa2); SQR(pa2,pa1);
|
||||
MUL(smallExp/4,pa1,pa2); SQR(pa2,pa1);
|
||||
} else if (smallExp & 4) {
|
||||
SQR(pa1,pa2); SQR(pa2,pa1); SQR(pa1,pa2);
|
||||
MUL(smallExp/8,pa2,pa1); SQR(pa1,pa2); SQR(pa2,pa1);
|
||||
} else if (smallExp & 8) {
|
||||
SQR(pa1,pa2); SQR(pa2,pa1); MUL(smallExp/16,pa1,pa2);
|
||||
SQR(pa2,pa1); SQR(pa1,pa2); SQR(pa2,pa1);
|
||||
} else if (smallExp & 0x10) {
|
||||
SQR(pa1,pa2); MUL(smallExp/32,pa2,pa1); SQR(pa1,pa2);
|
||||
SQR(pa2,pa1); SQR(pa1,pa2); SQR(pa2,pa1);
|
||||
} else {
|
||||
abort();
|
||||
}
|
||||
} else if (window_bits == 6) {
|
||||
if (!smallExp) {
|
||||
SQR(pa1,pa2); SQR(pa2,pa1); SQR(pa1,pa2); SQR(pa2,pa1);
|
||||
SQR(pa1,pa2); SQR(pa2,pa1);
|
||||
} else if (smallExp & 1) {
|
||||
SQR(pa1,pa2); SQR(pa2,pa1); SQR(pa1,pa2); SQR(pa2,pa1);
|
||||
SQR(pa1,pa2); SQR(pa2,pa1); MUL(smallExp/2,pa1,pa2); SWAPPA;
|
||||
} else if (smallExp & 2) {
|
||||
SQR(pa1,pa2); SQR(pa2,pa1); SQR(pa1,pa2); SQR(pa2,pa1);
|
||||
SQR(pa1,pa2); MUL(smallExp/4,pa2,pa1); SQR(pa1,pa2); SWAPPA;
|
||||
} else if (smallExp & 4) {
|
||||
SQR(pa1,pa2); SQR(pa2,pa1); SQR(pa1,pa2); SQR(pa2,pa1);
|
||||
MUL(smallExp/8,pa1,pa2); SQR(pa2,pa1); SQR(pa1,pa2); SWAPPA;
|
||||
} else if (smallExp & 8) {
|
||||
SQR(pa1,pa2); SQR(pa2,pa1); SQR(pa1,pa2);
|
||||
MUL(smallExp/16,pa2,pa1); SQR(pa1,pa2); SQR(pa2,pa1);
|
||||
SQR(pa1,pa2); SWAPPA;
|
||||
} else if (smallExp & 0x10) {
|
||||
SQR(pa1,pa2); SQR(pa2,pa1); MUL(smallExp/32,pa1,pa2);
|
||||
SQR(pa2,pa1); SQR(pa1,pa2); SQR(pa2,pa1); SQR(pa1,pa2); SWAPPA;
|
||||
} else if (smallExp & 0x20) {
|
||||
SQR(pa1,pa2); MUL(smallExp/64,pa2,pa1); SQR(pa1,pa2);
|
||||
SQR(pa2,pa1); SQR(pa1,pa2); SQR(pa2,pa1); SQR(pa1,pa2); SWAPPA;
|
||||
} else {
|
||||
abort();
|
||||
}
|
||||
} else {
|
||||
abort();
|
||||
}
|
||||
}
|
||||
|
||||
mp_clear(&power2);
|
||||
for (i = 0; i < ODD_INTS; ++i) {
|
||||
for (i = 0; i < odd_ints; ++i) {
|
||||
mp_clear(oddPowers + i);
|
||||
}
|
||||
}
|
||||
rv = s_mp_redc(&accum, &mmm);
|
||||
mp_exch(&accum, result);
|
||||
loser:
|
||||
res = s_mp_redc(pa1, &mmm);
|
||||
mp_exch(pa1, result);
|
||||
CLEANUP:
|
||||
mp_clear(&square);
|
||||
mp_clear(&accum);
|
||||
mp_clear(&accum1);
|
||||
mp_clear(&accum2);
|
||||
mp_clear(&goodBase);
|
||||
mp_clear(&tmp);
|
||||
/* Don't mp_clear mmm.N because it is merely a copy of modulus.
|
||||
** Just zap it.
|
||||
*/
|
||||
memset(&mmm, 0, sizeof mmm);
|
||||
return rv;
|
||||
return res;
|
||||
}
|
||||
|
|
Загрузка…
Ссылка в новой задаче