- In mpi-priv.h, declare new 3 argument versions of s_mp_add and s_mp_sub.
Also declare new set of s_mpv_ functions that operate on vectors (arrays)
of mp_digits instead of on mp_ints.  These functions are candidates for
implementation in assembler.
- In mpi.c reimplement mp_add and mp_sub using the new 3arg functions.
Implement 3 argument versions of s_mp_add and s_mp_sub.
This eliminates all need for temporary variables in mp_add and mp_sub.
Implement c language reference implementations of new s_mpv vector multiply
and multiply and add functions.  Change mp_mul and mp_sqr so they no longer
pre-zero the output variable.  It's no longer nececssary with the new s_mpv
functions.  s_mp_pad no longer zeros out the new padded space.
-In mpmontg.c, implement variable width exponetiation windows.  Implement
a new function to compute the multiply and Montgomery reduction in a
single pass.  This is "Improvement 2" from Dusse' and Kaliski's paper
"A Cryptographic Library for the Motorola DSP56000".  Performance impact
is negligible in this c implementation.  However, this function is another
target for assembly language optimization.
This commit is contained in:
nelsonb%netscape.com 2000-08-22 01:57:34 +00:00
Родитель a10342172b
Коммит f9eed96546
4 изменённых файлов: 456 добавлений и 235 удалений

Просмотреть файл

@ -1,8 +1,11 @@
/* /*
* mpi-priv.h * mpi-priv.h - Private header file for MPI
*
* Arbitrary precision integer arithmetic library * Arbitrary precision integer arithmetic library
* *
* NOTE WELL: the content of this header file is NOT part of the "public"
* API for the MPI library, and may change at any time.
* Application programs that use libmpi should NOT include this header file.
*
* The contents of this file are subject to the Mozilla Public * The contents of this file are subject to the Mozilla Public
* License Version 1.1 (the "License"); you may not use this file * License Version 1.1 (the "License"); you may not use this file
* except in compliance with the License. You may obtain a copy of * except in compliance with the License. You may obtain a copy of
@ -35,7 +38,7 @@
* the GPL. If you do not delete the provisions above, a recipient * the GPL. If you do not delete the provisions above, a recipient
* may use your version of this file under either the MPL or the GPL. * may use your version of this file under either the MPL or the GPL.
* *
* $Id: mpi-priv.h,v 1.8 2000-08-09 20:42:18 nelsonb%netscape.com Exp $ * $Id: mpi-priv.h,v 1.9 2000-08-22 01:57:33 nelsonb%netscape.com Exp $
*/ */
#ifndef _MPI_PRIV_H_ #ifndef _MPI_PRIV_H_
#define _MPI_PRIV_H_ 1 #define _MPI_PRIV_H_ 1
@ -142,6 +145,7 @@ extern const float s_logv_2[];
void s_mp_free(void *ptr); /* general free function */ void s_mp_free(void *ptr); /* general free function */
extern unsigned long mp_allocs; extern unsigned long mp_allocs;
extern unsigned long mp_frees; extern unsigned long mp_frees;
extern unsigned long mp_copies;
#else #else
/* Even if these are defined as macros, we need to respect the settings /* Even if these are defined as macros, we need to respect the settings
@ -198,9 +202,11 @@ mp_err s_mp_mod_d(mp_int *mp, mp_digit d, mp_digit *r);
mp_err s_mp_reduce(mp_int *x, const mp_int *m, const mp_int *mu); mp_err s_mp_reduce(mp_int *x, const mp_int *m, const mp_int *mu);
/* Barrett reduction */ /* Barrett reduction */
mp_err s_mp_add(mp_int *a, const mp_int *b); /* magnitude addition */ mp_err s_mp_add(mp_int *a, const mp_int *b); /* magnitude addition */
mp_err s_mp_add_3arg(const mp_int *a, const mp_int *b, mp_int *c);
mp_err s_mp_sub(mp_int *a, const mp_int *b); /* magnitude subtract */
mp_err s_mp_sub_3arg(const mp_int *a, const mp_int *b, mp_int *c);
mp_err s_mp_add_offset(mp_int *a, mp_int *b, mp_size offset); mp_err s_mp_add_offset(mp_int *a, mp_int *b, mp_size offset);
/* a += b * RADIX^offset */ /* a += b * RADIX^offset */
mp_err s_mp_sub(mp_int *a, const mp_int *b); /* magnitude subtract */
mp_err s_mp_mul(mp_int *a, const mp_int *b); /* magnitude multiply */ mp_err s_mp_mul(mp_int *a, const mp_int *b); /* magnitude multiply */
#if MP_SQUARE #if MP_SQUARE
mp_err s_mp_sqr(mp_int *a); /* magnitude square */ mp_err s_mp_sqr(mp_int *a); /* magnitude square */
@ -219,11 +225,17 @@ int s_mp_tovalue(char ch, int r); /* convert ch to value */
char s_mp_todigit(mp_digit val, int r, int low); /* convert val to digit */ char s_mp_todigit(mp_digit val, int r, int low); /* convert val to digit */
int s_mp_outlen(int bits, int r); /* output length in bytes */ int s_mp_outlen(int bits, int r); /* output length in bytes */
mp_digit s_mp_invmod_32b(mp_digit P); /* returns (P ** -1) mod (2 ** 32) */ mp_digit s_mp_invmod_32b(mp_digit P); /* returns (P ** -1) mod (2 ** 32) */
void s_mp_mul_add(const mp_digit *a, mp_size a_len, mp_digit b, mp_digit *c);
/* c += a * b * (MP_RADIX ** offset); */
/* ------ mpv functions, operate on arrays of digits, not on mp_int's ------ */
void s_mpv_mul_d(const mp_digit *a, mp_size a_len, mp_digit b, mp_digit *c);
void s_mpv_mul_d_add(const mp_digit *a, mp_size a_len, mp_digit b,
mp_digit *c);
void s_mpv_mul_d_add_prop(const mp_digit *a, mp_size a_len, mp_digit b,
mp_digit *c);
/* c += a * b * (MP_RADIX ** offset); */
#define s_mp_mul_d_add_offset(a, b, c, off) \ #define s_mp_mul_d_add_offset(a, b, c, off) \
(s_mp_mul_add(MP_DIGITS(a), MP_USED(a), b, MP_DIGITS(c) + off), MP_OKAY) (s_mpv_mul_d_add_prop(MP_DIGITS(a), MP_USED(a), b, MP_DIGITS(c) + off), MP_OKAY)
/* }}} */ /* }}} */
#endif #endif

Просмотреть файл

@ -35,7 +35,7 @@
* the GPL. If you do not delete the provisions above, a recipient * the GPL. If you do not delete the provisions above, a recipient
* may use your version of this file under either the MPL or the GPL. * may use your version of this file under either the MPL or the GPL.
* *
* $Id: mpi.c,v 1.18 2000-08-11 01:58:20 nelsonb%netscape.com Exp $ * $Id: mpi.c,v 1.19 2000-08-22 01:57:34 nelsonb%netscape.com Exp $
*/ */
#include "mpi-priv.h" #include "mpi-priv.h"
@ -81,8 +81,8 @@ static const char *s_dmap_1 =
unsigned long mp_allocs; unsigned long mp_allocs;
unsigned long mp_frees; unsigned long mp_frees;
unsigned long mp_copies;
#define MP_CHECKOK(x) if (MP_OKAY > (res = (x))) goto CLEANUP
/* {{{ Default precision manipulation */ /* {{{ Default precision manipulation */
@ -200,6 +200,7 @@ mp_err mp_copy(const mp_int *from, mp_int *to)
if(from == to) if(from == to)
return MP_OKAY; return MP_OKAY;
++mp_copies;
{ /* copy */ { /* copy */
mp_digit *tmp; mp_digit *tmp;
@ -702,38 +703,22 @@ mp_err mp_neg(const mp_int *a, mp_int *b)
mp_err mp_add(const mp_int *a, const mp_int *b, mp_int *c) mp_err mp_add(const mp_int *a, const mp_int *b, mp_int *c)
{ {
mp_int tmp;
mp_err res; mp_err res;
ARGCHK(a != NULL && b != NULL && c != NULL, MP_BADARG); ARGCHK(a != NULL && b != NULL && c != NULL, MP_BADARG);
MP_DIGITS(&tmp) = 0;
if (c == a) {
MP_CHECKOK( mp_init_copy(&tmp, a) );
a = &tmp;
if (c == b)
b = &tmp;
} else if (c == b) {
MP_CHECKOK( mp_init_copy(&tmp, b) );
b = &tmp;
}
if(SIGN(a) == SIGN(b)) { /* same sign: add values, keep sign */ if(SIGN(a) == SIGN(b)) { /* same sign: add values, keep sign */
MP_CHECKOK( mp_copy(a, c) ); MP_CHECKOK( s_mp_add_3arg(a, b, c) );
MP_CHECKOK( s_mp_add(c, b) );
} else if(s_mp_cmp(a, b) >= 0) { /* different sign: a >= b */ } else if(s_mp_cmp(a, b) >= 0) { /* different sign: a >= b */
MP_CHECKOK( mp_copy(a, c) ); MP_CHECKOK( s_mp_sub_3arg(a, b, c) );
MP_CHECKOK( s_mp_sub(c, b) );
} else { /* different sign: a < b */ } else { /* different sign: a < b */
MP_CHECKOK( mp_copy(b, c) ); MP_CHECKOK( s_mp_sub_3arg(b, a, c) );
MP_CHECKOK( s_mp_sub(c, a) );
} }
if (s_mp_cmp_d(c, 0) == MP_EQ) if (s_mp_cmp_d(c, 0) == MP_EQ)
SIGN(c) = ZPOS; SIGN(c) = ZPOS;
CLEANUP: CLEANUP:
mp_clear(&tmp);
return res; return res;
} /* end mp_add() */ } /* end mp_add() */
@ -750,37 +735,25 @@ CLEANUP:
mp_err mp_sub(const mp_int *a, const mp_int *b, mp_int *c) mp_err mp_sub(const mp_int *a, const mp_int *b, mp_int *c)
{ {
mp_int tmp;
mp_err res; mp_err res;
int diffSign; int magDiff;
ARGCHK(a != NULL && b != NULL && c != NULL, MP_BADARG); ARGCHK(a != NULL && b != NULL && c != NULL, MP_BADARG);
MP_DIGITS(&tmp) = 0;
if (a == b) { if (a == b) {
mp_zero(c); mp_zero(c);
return MP_OKAY; return MP_OKAY;
} }
if (c == a) {
MP_CHECKOK( mp_init_copy(&tmp, a) );
a = &tmp;
} else if (c == b) {
MP_CHECKOK( mp_init_copy(&tmp, b) );
b = &tmp;
}
if (MP_SIGN(a) != MP_SIGN(b)) { if (MP_SIGN(a) != MP_SIGN(b)) {
MP_CHECKOK( mp_copy(a, c) ); MP_CHECKOK( s_mp_add_3arg(a, b, c) );
MP_CHECKOK( s_mp_add(c, b) ); } else if (!(magDiff = s_mp_cmp(a, b))) {
} else if (!(diffSign = s_mp_cmp(a, b))) {
mp_zero(c); mp_zero(c);
res = MP_OKAY; res = MP_OKAY;
} else if (diffSign > 0) { } else if (magDiff > 0) {
MP_CHECKOK( mp_copy(a, c) ); MP_CHECKOK( s_mp_sub_3arg(a, b, c) );
MP_CHECKOK( s_mp_sub(c, b) );
} else { } else {
MP_CHECKOK( mp_copy(b, c) ); MP_CHECKOK( s_mp_sub_3arg(b, a, c) );
MP_CHECKOK( s_mp_sub(c, a) );
MP_SIGN(c) = !MP_SIGN(a); MP_SIGN(c) = !MP_SIGN(a);
} }
@ -788,7 +761,6 @@ mp_err mp_sub(const mp_int *a, const mp_int *b, mp_int *c)
MP_SIGN(c) = MP_ZPOS; MP_SIGN(c) = MP_ZPOS;
CLEANUP: CLEANUP:
mp_clear(&tmp);
return res; return res;
} /* end mp_sub() */ } /* end mp_sub() */
@ -808,6 +780,7 @@ mp_err mp_mul(const mp_int *a, const mp_int *b, mp_int * c)
mp_int tmp; mp_int tmp;
mp_err res; mp_err res;
mp_size ib; mp_size ib;
mp_size useda, usedb;
ARGCHK(a != NULL && b != NULL && c != NULL, MP_BADARG); ARGCHK(a != NULL && b != NULL && c != NULL, MP_BADARG);
@ -831,21 +804,24 @@ mp_err mp_mul(const mp_int *a, const mp_int *b, mp_int * c)
a = xch; a = xch;
} }
/* This has the effect of left-padding with zeroes... */
MP_USED(c) = 1; MP_DIGIT(c, 0) = 0; MP_USED(c) = 1; MP_DIGIT(c, 0) = 0;
if((res = s_mp_pad(c, USED(a) + USED(b))) != MP_OKAY) if((res = s_mp_pad(c, USED(a) + USED(b))) != MP_OKAY)
goto CLEANUP; goto CLEANUP;
pb = MP_DIGITS(b); pb = MP_DIGITS(b);
s_mpv_mul_d(MP_DIGITS(a), MP_USED(a), *pb++, MP_DIGITS(c));
/* Outer loop: Digits of b */ /* Outer loop: Digits of b */
for(ib = 0; ib < USED(b); ib++) { useda = MP_USED(a);
usedb = MP_USED(b);
for (ib = 1; ib < usedb; ib++) {
mp_digit b_i = *pb++; mp_digit b_i = *pb++;
if(b_i == 0)
continue;
/* Inner product: Digits of a */ /* Inner product: Digits of a */
s_mp_mul_d_add_offset(a, b_i, c, ib); if (b_i)
s_mpv_mul_d_add(MP_DIGITS(a), useda, b_i, MP_DIGITS(c) + ib);
else
MP_DIGIT(c, ib + useda) = b_i;
} }
s_mp_clamp(c); s_mp_clamp(c);
@ -876,12 +852,13 @@ CLEANUP:
/* sqr = a^2; Caller provides both a and tmp; */ /* sqr = a^2; Caller provides both a and tmp; */
mp_err mp_sqr(const mp_int *a, mp_int *sqr) mp_err mp_sqr(const mp_int *a, mp_int *sqr)
{ {
mp_digit *pa, *ps, *alim; mp_digit *pa, *ps;
mp_word w; mp_word w;
mp_digit d, k; mp_digit d;
mp_err res; mp_err res;
mp_size ix; mp_size ix;
mp_int tmp; mp_int tmp;
int count;
ARGCHK(a != NULL && sqr != NULL, MP_BADARG); ARGCHK(a != NULL && sqr != NULL, MP_BADARG);
@ -893,56 +870,32 @@ mp_err mp_sqr(const mp_int *a, mp_int *sqr)
DIGITS(&tmp) = 0; DIGITS(&tmp) = 0;
} }
/* Left-pad with zeroes */ ix = 2 * MP_USED(a);
MP_USED(sqr) = 1; MP_DIGIT(sqr, 0) = 0; if (ix > MP_ALLOC(sqr)) {
if((res = s_mp_pad(sqr, 2 * USED(a))) != MP_OKAY) MP_USED(sqr) = 1;
goto CLEANUP; MP_CHECKOK( s_mp_grow(sqr, ix) );
}
/* MP_USED(sqr) = ix;
The inner product is computed as: MP_DIGIT(sqr, 0) = 0;
(C, S) = t[i,j] + 2 a[i] a[j] + C
*/
pa = MP_DIGITS(a); pa = MP_DIGITS(a);
alim = pa + MP_USED(a); count = MP_USED(a) - 1;
for (ix = 0; pa < alim; ++ix) { if (count > 0) {
d = *pa++; d = *pa++;
ps = MP_DIGITS(sqr) + 1 + (ix << 1); s_mpv_mul_d(pa, count, d, MP_DIGITS(sqr) + 1);
s_mp_mul_add(pa, alim - pa, d, ps); for (ix = 3; --count > 0; ix += 2) {
} /* for(ix ...) */ d = *pa++;
s_mpv_mul_d_add(pa, count, d, MP_DIGITS(sqr) + ix);
} /* for(ix ...) */
MP_DIGIT(sqr, MP_USED(sqr)-1) = 0; /* above loop stopped short of this. */
/* now sqr *= 2 */ /* now sqr *= 2 */
ps = MP_DIGITS(sqr); s_mp_mul_2(sqr);
k = 0; } else {
#define SHIFT_1_BIT(n) \ MP_DIGIT(sqr, 1) = 0;
d = ps[n]; ps[n] = (d << 1) | k; k = d >> (DIGIT_BIT - 1)
for (ix = MP_USED(sqr); ix >= 8; ix -= 8) {
SHIFT_1_BIT(0);
SHIFT_1_BIT(1);
SHIFT_1_BIT(2);
SHIFT_1_BIT(3);
SHIFT_1_BIT(4);
SHIFT_1_BIT(5);
SHIFT_1_BIT(6);
SHIFT_1_BIT(7);
ps += 8;
}
if (ix) {
ps += ix;
switch (ix) { /* all fallthru */
case 7: SHIFT_1_BIT(-7); /* FALLTHRU */
case 6: SHIFT_1_BIT(-6); /* FALLTHRU */
case 5: SHIFT_1_BIT(-5); /* FALLTHRU */
case 4: SHIFT_1_BIT(-4); /* FALLTHRU */
case 3: SHIFT_1_BIT(-3); /* FALLTHRU */
case 2: SHIFT_1_BIT(-2); /* FALLTHRU */
case 1: SHIFT_1_BIT(-1); /* FALLTHRU */
case 0: break;
}
} }
pa = MP_DIGITS(a); pa = MP_DIGITS(a);
alim = pa + MP_USED(a);
ps = MP_DIGITS(sqr); ps = MP_DIGITS(sqr);
w = 0; w = 0;
#define ADD_SQUARE(n) \ #define ADD_SQUARE(n) \
@ -2544,7 +2497,7 @@ mp_err s_mp_pad(mp_int *mp, mp_size min)
if ((res = s_mp_grow(mp, min)) != MP_OKAY) if ((res = s_mp_grow(mp, min)) != MP_OKAY)
return res; return res;
} else { } else {
s_mp_setz(DIGITS(mp) + USED(mp), min - USED(mp)); /* s_mp_setz(DIGITS(mp) + USED(mp), min - USED(mp)); */
} }
/* Increase precision; should already be 0-filled */ /* Increase precision; should already be 0-filled */
@ -2770,12 +2723,15 @@ void s_mp_rshd(mp_int *mp, mp_size p)
for (ix = USED(mp) - p; ix > 0; ix--) for (ix = USED(mp) - p; ix > 0; ix--)
*dst++ = *src++; *dst++ = *src++;
MP_USED(mp) -= p;
/* Fill the top digits with zeroes */ /* Fill the top digits with zeroes */
while (p-- > 0) while (p-- > 0)
*dst++ = 0; *dst++ = 0;
#if 0
/* Strip off any leading zeroes */ /* Strip off any leading zeroes */
s_mp_clamp(mp); s_mp_clamp(mp);
#endif
} /* end s_mp_rshd() */ } /* end s_mp_rshd() */
@ -2796,21 +2752,23 @@ void s_mp_div_2(mp_int *mp)
mp_err s_mp_mul_2(mp_int *mp) mp_err s_mp_mul_2(mp_int *mp)
{ {
int ix; mp_digit *pd;
mp_digit kin = 0, kout; int ix, used;
mp_err res; mp_digit kin = 0;
/* Shift digits leftward by 1 bit */ /* Shift digits leftward by 1 bit */
for(ix = 0; ix < USED(mp); ix++) { used = MP_USED(mp);
kout = (DIGIT(mp, ix) >> (DIGIT_BIT - 1)) & 1; pd = MP_DIGITS(mp);
DIGIT(mp, ix) = (DIGIT(mp, ix) << 1) | kin; for (ix = 0; ix < used; ix++) {
mp_digit d = *pd;
kin = kout; *pd++ = (d << 1) | kin;
kin = (d >> (DIGIT_BIT - 1));
} }
/* Deal with rollover from last digit */ /* Deal with rollover from last digit */
if(kin) { if (kin) {
if(ix >= ALLOC(mp)) { if (ix >= ALLOC(mp)) {
mp_err res;
if((res = s_mp_grow(mp, ALLOC(mp) + 1)) != MP_OKAY) if((res = s_mp_grow(mp, ALLOC(mp) + 1)) != MP_OKAY)
return res; return res;
} }
@ -3139,8 +3097,10 @@ mp_err s_mp_mod_d(mp_int *mp, mp_digit d, mp_digit *r)
/* Compute a = |a| + |b| */ /* Compute a = |a| + |b| */
mp_err s_mp_add(mp_int *a, const mp_int *b) /* magnitude addition */ mp_err s_mp_add(mp_int *a, const mp_int *b) /* magnitude addition */
{ {
mp_word w, k = 0; mp_digit *pa, *pb;
mp_word w = 0;
mp_size ix; mp_size ix;
mp_size used;
mp_err res; mp_err res;
/* Make sure a has enough precision for the output value */ /* Make sure a has enough precision for the output value */
@ -3154,19 +3114,23 @@ mp_err s_mp_add(mp_int *a, const mp_int *b) /* magnitude addition */
less precision, we'll have to make sure the carry out is duly less precision, we'll have to make sure the carry out is duly
propagated upward among the higher-order digits of the sum. propagated upward among the higher-order digits of the sum.
*/ */
for(ix = 0; ix < USED(b); ix++) { pa = MP_DIGITS(a);
w = (mp_word)DIGIT(a, ix) + DIGIT(b, ix) + k; pb = MP_DIGITS(b);
DIGIT(a, ix) = ACCUM(w); used = MP_USED(b);
k = CARRYOUT(w); for(ix = 0; ix < used; ix++) {
w = w + *pa + *pb++;
*pa++ = ACCUM(w);
w = CARRYOUT(w);
} }
/* If we run out of 'b' digits before we're actually done, make /* If we run out of 'b' digits before we're actually done, make
sure the carries get propagated upward... sure the carries get propagated upward...
*/ */
while(k && ix < USED(a)) { used = MP_USED(a);
w = (mp_word)DIGIT(a, ix) + k; while (w && ix < used) {
DIGIT(a, ix) = ACCUM(w); w = w + *pa;
k = CARRYOUT(w); *pa++ = ACCUM(w);
w = CARRYOUT(w);
++ix; ++ix;
} }
@ -3174,11 +3138,11 @@ mp_err s_mp_add(mp_int *a, const mp_int *b) /* magnitude addition */
it. We could have done this initially, but why touch the memory it. We could have done this initially, but why touch the memory
allocator unless we're sure we have to? allocator unless we're sure we have to?
*/ */
if(k) { if (w) {
if((res = s_mp_pad(a, USED(a) + 1)) != MP_OKAY) if((res = s_mp_pad(a, USED(a) + 1)) != MP_OKAY)
return res; return res;
DIGIT(a, ix) = (mp_digit)k; DIGIT(a, ix) = (mp_digit)w;
} }
return MP_OKAY; return MP_OKAY;
@ -3187,6 +3151,69 @@ mp_err s_mp_add(mp_int *a, const mp_int *b) /* magnitude addition */
/* }}} */ /* }}} */
/* Compute c = |a| + |b| */ /* magnitude addition */
mp_err s_mp_add_3arg(const mp_int *a, const mp_int *b, mp_int *c)
{
mp_digit *pa, *pb, *pc;
mp_word w = 0;
mp_size ix;
mp_size used;
mp_err res;
MP_SIGN(c) = MP_SIGN(a);
if (MP_USED(a) < MP_USED(b)) {
const mp_int *xch = a;
a = b;
b = xch;
}
/* Make sure a has enough precision for the output value */
if (MP_OKAY != (res = s_mp_pad(c, MP_USED(a))))
return res;
/*
Add up all digits up to the precision of b. If b had initially
the same precision as a, or greater, we took care of it by the
exchange step above, so there is no problem. If b had initially
less precision, we'll have to make sure the carry out is duly
propagated upward among the higher-order digits of the sum.
*/
pa = MP_DIGITS(a);
pb = MP_DIGITS(b);
pc = MP_DIGITS(c);
used = MP_USED(b);
for (ix = 0; ix < used; ix++) {
w = w + *pa++ + *pb++;
*pc++ = ACCUM(w);
w = CARRYOUT(w);
}
/* If we run out of 'b' digits before we're actually done, make
sure the carries get propagated upward...
*/
used = MP_USED(a);
while (ix < used) {
w = w + *pa++;
*pc++ = ACCUM(w);
w = CARRYOUT(w);
++ix;
}
/* If there's an overall carry out, increase precision and include
it. We could have done this initially, but why touch the memory
allocator unless we're sure we have to?
*/
if (w) {
if((res = s_mp_pad(c, ix + 1)) != MP_OKAY)
return res;
DIGIT(c, ix) = (mp_digit)w;
++ix;
}
MP_USED(c) = ix;
return MP_OKAY;
}
/* {{{ s_mp_add_offset(a, b, offset) */ /* {{{ s_mp_add_offset(a, b, offset) */
/* Compute a = |a| + ( |b| * (RADIX ** offset) ) */ /* Compute a = |a| + ( |b| * (RADIX ** offset) ) */
@ -3288,6 +3315,52 @@ mp_err s_mp_sub(mp_int *a, const mp_int *b) /* magnitude subtract */
/* }}} */ /* }}} */
/* Compute c = |a| - |b|, assumes |a| >= |b| */ /* magnitude subtract */
mp_err s_mp_sub_3arg(const mp_int *a, const mp_int *b, mp_int *c)
{
mp_digit *pa, *pb, *pc;
mp_sword w = 0;
int ix, limit;
mp_err res;
MP_SIGN(c) = MP_SIGN(a);
/* Make sure a has enough precision for the output value */
if (MP_OKAY != (res = s_mp_pad(c, MP_USED(a))))
return res;
/*
Subtract and propagate borrow. Up to the precision of b, this
accounts for the digits of b; after that, we just make sure the
carries get to the right place. This saves having to pad b out to
the precision of a just to make the loops work right...
*/
pa = MP_DIGITS(a);
pb = MP_DIGITS(b);
pc = MP_DIGITS(c);
limit = MP_USED(b);
for (ix = 0; ix < limit; ++ix) {
w = w + *pa++ - *pb++;
*pc++ = ACCUM(w);
w >>= MP_DIGIT_BIT;
}
for (limit = MP_USED(a); ix < limit; ++ix) {
w = w + *pa++;
*pc++ = ACCUM(w);
w >>= MP_DIGIT_BIT;
}
/* Clobber any leading zeroes we created */
MP_USED(c) = ix;
s_mp_clamp(c);
/*
If there was a borrow out, then |b| > |a| in violation
of our input invariant. We've already done the work,
but we'll at least complain about it...
*/
return w ? MP_RANGE : MP_OKAY;
}
/* {{{ s_mp_mul(a, b) */ /* {{{ s_mp_mul(a, b) */
/* Compute a = |a| * |b| */ /* Compute a = |a| * |b| */
@ -3298,24 +3371,55 @@ mp_err s_mp_mul(mp_int *a, const mp_int *b)
/* }}} */ /* }}} */
/* c += a * b */ #if !defined(MP_ASSEMBLY_MULTIPLY)
void s_mp_mul_add(const mp_digit *a, mp_size a_len, mp_digit b, mp_digit *c) /* c = a * b */
void s_mpv_mul_d(const mp_digit *a, mp_size a_len, mp_digit b, mp_digit *c)
{ {
mp_word w = 0; mp_digit d = 0;
/* Inner product: Digits of a */ /* Inner product: Digits of a */
while (a_len--) { while (a_len--) {
w += ((mp_word)b * *a++) + *c; mp_word w = ((mp_word)b * *a++) + d;
*c++ = ACCUM(w); *c++ = ACCUM(w);
w = CARRYOUT(w); d = CARRYOUT(w);
}
*c = d;
}
/* c += a * b */
void s_mpv_mul_d_add(const mp_digit *a, mp_size a_len, mp_digit b,
mp_digit *c)
{
mp_digit d = 0;
/* Inner product: Digits of a */
while (a_len--) {
mp_word w = ((mp_word)b * *a++) + *c + d;
*c++ = ACCUM(w);
d = CARRYOUT(w);
}
*c = d;
}
/* c += a * b */
void s_mpv_mul_d_add_prop(const mp_digit *a, mp_size a_len, mp_digit b, mp_digit *c)
{
mp_digit d = 0;
/* Inner product: Digits of a */
while (a_len--) {
mp_word w = ((mp_word)b * *a++) + *c + d;
*c++ = ACCUM(w);
d = CARRYOUT(w);
} }
while (w) { while (d) {
w += *c; mp_word w = (mp_word)*c + d;
*c++ = ACCUM(w); *c++ = ACCUM(w);
w = CARRYOUT(w); d = CARRYOUT(w);
} }
} }
#endif
#if MP_SQUARE #if MP_SQUARE
/* {{{ s_mp_sqr(a) */ /* {{{ s_mp_sqr(a) */

Просмотреть файл

@ -36,7 +36,7 @@
* may use your version of this file under either the MPL or the * may use your version of this file under either the MPL or the
* GPL. * GPL.
* *
* $Id: mpi.h,v 1.7 2000-08-04 19:57:24 nelsonb%netscape.com Exp $ * $Id: mpi.h,v 1.8 2000-08-22 01:57:33 nelsonb%netscape.com Exp $
*/ */
#ifndef _H_MPI_ #ifndef _H_MPI_
@ -68,7 +68,7 @@
#define MP_UNDEF -5 /* answer is undefined */ #define MP_UNDEF -5 /* answer is undefined */
#define MP_LAST_CODE MP_UNDEF #define MP_LAST_CODE MP_UNDEF
typedef char mp_sign; typedef unsigned int mp_sign;
typedef unsigned int mp_size; typedef unsigned int mp_size;
typedef int mp_err; typedef int mp_err;
@ -245,6 +245,9 @@ mp_err mp_to_unsigned_octets(const mp_int *mp, unsigned char *str, mp_size maxle
mp_err mp_to_signed_octets(const mp_int *mp, unsigned char *str, mp_size maxlen); mp_err mp_to_signed_octets(const mp_int *mp, unsigned char *str, mp_size maxlen);
mp_err mp_to_fixlen_octets(const mp_int *mp, unsigned char *str, mp_size len); mp_err mp_to_fixlen_octets(const mp_int *mp, unsigned char *str, mp_size len);
#define MP_CHECKOK(x) if (MP_OKAY > (res = (x))) goto CLEANUP
#define MP_CHECKERR(x) if (MP_OKAY > (res = (x))) goto CLEANUP
#if defined(MP_API_COMPATIBLE) #if defined(MP_API_COMPATIBLE)
#define NEG MP_NEG #define NEG MP_NEG
#define ZPOS MP_ZPOS #define ZPOS MP_ZPOS

Просмотреть файл

@ -29,7 +29,7 @@
* the GPL. If you do not delete the provisions above, a recipient * the GPL. If you do not delete the provisions above, a recipient
* may use your version of this file under either the MPL or the * may use your version of this file under either the MPL or the
* GPL. * GPL.
* $Id: mpmontg.c,v 1.6 2000-08-08 03:20:35 nelsonb%netscape.com Exp $ * $Id: mpmontg.c,v 1.7 2000-08-22 01:57:34 nelsonb%netscape.com Exp $
*/ */
/* This file implements moduluar exponentiation using Montgomery's /* This file implements moduluar exponentiation using Montgomery's
@ -46,13 +46,11 @@
#include "mplogic.h" #include "mplogic.h"
#include "mpprime.h" #include "mpprime.h"
#define MP_CHECKOK(x) if (MP_OKAY != (rv = (x))) goto loser
#define MP_CHECKERR(x) if (0 > (rv = (x))) goto loser
#define STATIC #define STATIC
/* #define DEBUG 1 */ /* #define DEBUG 1 */
#define WINDOW_BITS 5 #define MAX_WINDOW_BITS 6
#define ODD_INTS 16 /* 2 ** (WINDOW_BITS - 1) */ #define MAX_ODD_INTS 32 /* 2 ** (WINDOW_BITS - 1) */
typedef struct { typedef struct {
mp_int N; /* modulus N */ mp_int N; /* modulus N */
@ -60,17 +58,22 @@ typedef struct {
mp_size b; /* R == 2 ** b, also b = # significant bits in N */ mp_size b; /* R == 2 ** b, also b = # significant bits in N */
} mp_mont_modulus; } mp_mont_modulus;
mp_err s_mp_mul_mont(const mp_int *a, const mp_int *b, mp_int *c,
mp_mont_modulus *mmm);
/* computes T = REDC(T), 2^b == R */ /* computes T = REDC(T), 2^b == R */
STATIC STATIC
mp_err s_mp_redc(mp_int *T, mp_mont_modulus *mmm) mp_err s_mp_redc(mp_int *T, mp_mont_modulus *mmm)
{ {
mp_err rv; mp_err res;
int i; mp_size i;
#ifdef DEBUG #ifdef DEBUG
mp_int m; mp_int m;
MP_DIGITS(&m) = 0;
#endif #endif
MP_CHECKOK( s_mp_pad(T, MP_USED(T) + MP_USED(&mmm->N) + 2) ); i = MP_USED(T) + MP_USED(&mmm->N) + 2;
MP_CHECKOK( s_mp_pad(T, i) );
for (i = 0; i < MP_USED(&mmm->N); ++i ) { for (i = 0; i < MP_USED(&mmm->N); ++i ) {
mp_digit m_i = MP_DIGIT(T, i) * mmm->n0prime; mp_digit m_i = MP_DIGIT(T, i) * mmm->n0prime;
/* T += N * m_i * (MP_RADIX ** i); */ /* T += N * m_i * (MP_RADIX ** i); */
@ -84,39 +87,100 @@ mp_err s_mp_redc(mp_int *T, mp_mont_modulus *mmm)
MP_CHECKOK( mp_div_2d(T, mmm->b, T, &m)); MP_CHECKOK( mp_div_2d(T, mmm->b, T, &m));
/* here, remainder m should be equal to zero */ /* here, remainder m should be equal to zero */
if (mp_cmp_z(&m) != 0) { if (mp_cmp_z(&m) != 0) {
rv = MP_UNDEF; res = MP_UNDEF;
goto loser; goto CLEANUP;
} }
#else #else
s_mp_div_2d(T, mmm->b); s_mp_div_2d(T, mmm->b);
#endif #endif
if ((rv = s_mp_cmp(T, &mmm->N)) >= 0) { if ((res = s_mp_cmp(T, &mmm->N)) >= 0) {
/* T = T - N */ /* T = T - N */
MP_CHECKOK( s_mp_sub(T, &mmm->N) ); MP_CHECKOK( s_mp_sub(T, &mmm->N) );
#ifdef DEBUG #ifdef DEBUG
if ((rv = mp_cmp(T, &mmm->N)) >= 0) { if ((res = mp_cmp(T, &mmm->N)) >= 0) {
rv = MP_UNDEF; res = MP_UNDEF;
goto CLEANUP;
} }
#endif #endif
} }
rv = MP_OKAY; res = MP_OKAY;
loser: CLEANUP:
#ifdef DEBUG #ifdef DEBUG
mp_clear(&m); mp_clear(&m);
#endif #endif
return rv; return res;
} }
mp_err mp_to_mont(const mp_int *x, mp_mont_modulus *mmm, mp_int *xMont) #if !defined(MP_ASSEMBLY_MUL_MONT) && !defined(MP_MONT_USE_MP_MUL)
mp_err s_mp_mul_mont(const mp_int *a, const mp_int *b, mp_int *c,
mp_mont_modulus *mmm)
{ {
mp_err rv; mp_digit *pb;
mp_digit m_i;
mp_err res;
mp_size ib;
mp_size useda, usedb;
ARGCHK(a != NULL && b != NULL && c != NULL, MP_BADARG);
if (MP_USED(a) < MP_USED(b)) {
const mp_int *xch = b; /* switch a and b, to do fewer outer loops */
b = a;
a = xch;
}
MP_USED(c) = 1; MP_DIGIT(c, 0) = 0;
ib = MP_USED(a) + MP_MAX(MP_USED(b), MP_USED(&mmm->N)) + 2;
if((res = s_mp_pad(c, ib)) != MP_OKAY)
goto CLEANUP;
useda = MP_USED(a);
pb = MP_DIGITS(b);
s_mpv_mul_d(MP_DIGITS(a), useda, *pb++, MP_DIGITS(c));
s_mp_setz(MP_DIGITS(c) + useda + 1, ib - (useda + 1));
m_i = MP_DIGIT(c, 0) * mmm->n0prime;
s_mp_mul_d_add_offset(&mmm->N, m_i, c, 0);
/* Outer loop: Digits of b */
usedb = MP_USED(b);
for (ib = 1; ib < usedb; ib++) {
mp_digit b_i = *pb++;
/* Inner product: Digits of a */
if (b_i)
s_mpv_mul_d_add_prop(MP_DIGITS(a), useda, b_i, MP_DIGITS(c) + ib);
m_i = MP_DIGIT(c, ib) * mmm->n0prime;
s_mp_mul_d_add_offset(&mmm->N, m_i, c, ib);
}
if (usedb < MP_USED(&mmm->N)) {
for (usedb = MP_USED(&mmm->N); ib < usedb; ++ib ) {
m_i = MP_DIGIT(c, ib) * mmm->n0prime;
s_mp_mul_d_add_offset(&mmm->N, m_i, c, ib);
}
}
s_mp_clamp(c);
s_mp_div_2d(c, mmm->b);
if (s_mp_cmp(c, &mmm->N) >= 0) {
MP_CHECKOK( s_mp_sub(c, &mmm->N) );
}
res = MP_OKAY;
CLEANUP:
return res;
}
#endif
STATIC
mp_err s_mp_to_mont(const mp_int *x, mp_mont_modulus *mmm, mp_int *xMont)
{
mp_err res;
/* xMont = x * R mod N where N is modulus */ /* xMont = x * R mod N where N is modulus */
MP_CHECKOK( mpl_lsh(x, xMont, mmm->b) ); /* xMont = x << b */ MP_CHECKOK( mpl_lsh(x, xMont, mmm->b) ); /* xMont = x << b */
MP_CHECKOK( mp_div(xMont, &mmm->N, 0, xMont) ); /* mod N */ MP_CHECKOK( mp_div(xMont, &mmm->N, 0, xMont) ); /* mod N */
loser: CLEANUP:
return rv; return res;
} }
@ -124,28 +188,34 @@ mp_err mp_exptmod(const mp_int *inBase, const mp_int *exponent,
const mp_int *modulus, mp_int *result) const mp_int *modulus, mp_int *result)
{ {
const mp_int *base; const mp_int *base;
mp_int *pa1, *pa2, *ptmp;
mp_size bits_in_exponent; mp_size bits_in_exponent;
mp_size i; mp_size i;
mp_err rv; mp_size window_bits, odd_ints;
mp_int square, accum, goodBase, tmp; mp_err res;
mp_int square, accum1, accum2, goodBase;
mp_mont_modulus mmm; mp_mont_modulus mmm;
/* function for computing n0prime only works if n0 is odd */ /* function for computing n0prime only works if n0 is odd */
if (!mp_isodd(modulus)) if (!mp_isodd(modulus))
return s_mp_exptmod(inBase, exponent, modulus, result); return s_mp_exptmod(inBase, exponent, modulus, result);
MP_DIGITS(&square) = 0;
MP_DIGITS(&accum1) = 0;
MP_DIGITS(&accum2) = 0;
MP_DIGITS(&goodBase) = 0;
if (mp_cmp(inBase, modulus) < 0) { if (mp_cmp(inBase, modulus) < 0) {
base = inBase; base = inBase;
MP_DIGITS(&goodBase) = 0;
} else { } else {
mp_init(&goodBase); MP_CHECKOK( mp_init(&goodBase) );
base = &goodBase; base = &goodBase;
MP_CHECKOK( mp_mod(inBase, modulus, &goodBase) ); MP_CHECKOK( mp_mod(inBase, modulus, &goodBase) );
} }
mp_init_size(&square, 2 * MP_USED(modulus) + 2); MP_CHECKOK( mp_init_size(&square, 2 * MP_USED(modulus) + 2) );
mp_init_size(&accum, 3 * MP_USED(modulus) + 2); MP_CHECKOK( mp_init_size(&accum1, 3 * MP_USED(modulus) + 2) );
mp_init_size(&tmp, 3 * MP_USED(modulus) + 2); MP_CHECKOK( mp_init_size(&accum2, 3 * MP_USED(modulus) + 2) );
mmm.N = *modulus; /* a copy of the mp_int struct */ mmm.N = *modulus; /* a copy of the mp_int struct */
i = mpl_significant_bits(modulus); i = mpl_significant_bits(modulus);
@ -157,18 +227,25 @@ mp_err mp_exptmod(const mp_int *inBase, const mp_int *exponent,
*/ */
mmm.n0prime = 0 - s_mp_invmod_32b( MP_DIGIT(modulus, 0) ); mmm.n0prime = 0 - s_mp_invmod_32b( MP_DIGIT(modulus, 0) );
MP_CHECKOK( mp_to_mont(base, &mmm, &square) ); MP_CHECKOK( s_mp_to_mont(base, &mmm, &square) );
bits_in_exponent = mpl_significant_bits(exponent); bits_in_exponent = mpl_significant_bits(exponent);
i = bits_in_exponent % WINDOW_BITS; if (bits_in_exponent > 480)
window_bits = 6;
else if (bits_in_exponent > 160)
window_bits = 5;
else
window_bits = 4;
odd_ints = 1 << (window_bits - 1);
i = bits_in_exponent % window_bits;
if (i != 0) { if (i != 0) {
bits_in_exponent += WINDOW_BITS - i; bits_in_exponent += window_bits - i;
} }
{ {
/* oddPowers[i] = base ** (2*i + 1); */ /* oddPowers[i] = base ** (2*i + 1); */
/* power2 = base ** 2; */
int expOff; int expOff;
mp_int power2, oddPowers[ODD_INTS]; /* power2 = base ** 2; oddPowers[i] = base ** (2*i + 1); */
mp_int power2, oddPowers[MAX_ODD_INTS];
MP_CHECKOK( mp_init_copy(oddPowers, &square) ); MP_CHECKOK( mp_init_copy(oddPowers, &square) );
@ -176,97 +253,122 @@ mp_err mp_exptmod(const mp_int *inBase, const mp_int *exponent,
MP_CHECKOK( mp_sqr(&square, &power2) ); /* square = square ** 2 */ MP_CHECKOK( mp_sqr(&square, &power2) ); /* square = square ** 2 */
MP_CHECKOK( s_mp_redc(&power2, &mmm) ); MP_CHECKOK( s_mp_redc(&power2, &mmm) );
for (i = 1; i < ODD_INTS; ++i) { for (i = 1; i < odd_ints; ++i) {
mp_init_size(oddPowers + i, MP_USED(modulus) + 2 * MP_USED(&power2) + 2); mp_init_size(oddPowers + i, MP_USED(modulus) + 2 * MP_USED(&power2) + 2);
MP_CHECKOK( mp_mul(oddPowers + (i - 1), &power2, oddPowers + i) ); MP_CHECKOK( mp_mul(oddPowers + (i - 1), &power2, oddPowers + i) );
MP_CHECKOK( s_mp_redc(oddPowers + i, &mmm) ); MP_CHECKOK( s_mp_redc(oddPowers + i, &mmm) );
} }
mp_set(&accum, 1); mp_set(&accum1, 1);
MP_CHECKOK( mp_to_mont(&accum, &mmm, &accum) ); MP_CHECKOK( s_mp_to_mont(&accum1, &mmm, &accum1) );
pa1 = &accum1;
pa2 = &accum2;
#define SQUARE \ #define SQR(a,b) \
MP_CHECKOK( mp_sqr(&accum, &tmp) );\ MP_CHECKOK( mp_sqr(a, b) );\
mp_exch(&accum, &tmp); \ MP_CHECKOK( s_mp_redc(b, &mmm) )
MP_CHECKOK( s_mp_redc(&accum, &mmm) )
#define MUL(x) \ #if defined(MP_MONT_USE_MP_MUL)
MP_CHECKOK( mp_mul(&accum, oddPowers + (x), &tmp) ); \ #define MUL(x,a,b) \
mp_exch(&accum, &tmp); \ MP_CHECKOK( mp_mul(a, oddPowers + (x), b) ); \
MP_CHECKOK( s_mp_redc(&accum, &mmm)) MP_CHECKOK( s_mp_redc(b, &mmm) )
for (expOff = bits_in_exponent - WINDOW_BITS; expOff >= 0; expOff -= WINDOW_BITS) {
mp_size smallExp;
MP_CHECKERR( mpl_get_bits(exponent, expOff, WINDOW_BITS) );
smallExp = (mp_size)rv;
#if WINDOW_BITS == 4
if (!smallExp) {
SQUARE; SQUARE; SQUARE; SQUARE;
} else if (smallExp & 1) {
SQUARE; SQUARE; SQUARE; SQUARE; MUL(smallExp/2);
} else if (smallExp & 2) {
SQUARE; SQUARE; SQUARE; MUL(smallExp/4); SQUARE;
} else if (smallExp & 4) {
SQUARE; SQUARE; MUL(smallExp/8); SQUARE; SQUARE;
} else if (smallExp & 8) {
SQUARE; MUL(smallExp/16); SQUARE; SQUARE; SQUARE;
} else {
abort();
}
#elif WINDOW_BITS == 5
if (!smallExp) {
SQUARE; SQUARE; SQUARE; SQUARE; SQUARE;
} else if (smallExp & 1) {
SQUARE; SQUARE; SQUARE; SQUARE; SQUARE; MUL(smallExp/2);
} else if (smallExp & 2) {
SQUARE; SQUARE; SQUARE; SQUARE; MUL(smallExp/4); SQUARE;
} else if (smallExp & 4) {
SQUARE; SQUARE; SQUARE; MUL(smallExp/8); SQUARE; SQUARE;
} else if (smallExp & 8) {
SQUARE; SQUARE; MUL(smallExp/16); SQUARE; SQUARE; SQUARE;
} else if (smallExp & 0x10) {
SQUARE; MUL(smallExp/32); SQUARE; SQUARE; SQUARE; SQUARE;
} else {
abort();
}
#elif WINDOW_BITS == 6
if (!smallExp) {
SQUARE; SQUARE; SQUARE; SQUARE; SQUARE; SQUARE;
} else if (smallExp & 1) {
SQUARE; SQUARE; SQUARE; SQUARE; SQUARE; SQUARE; MUL(smallExp/2);
} else if (smallExp & 2) {
SQUARE; SQUARE; SQUARE; SQUARE; SQUARE; MUL(smallExp/4); SQUARE;
} else if (smallExp & 4) {
SQUARE; SQUARE; SQUARE; SQUARE; MUL(smallExp/8); SQUARE; SQUARE;
} else if (smallExp & 8) {
SQUARE; SQUARE; SQUARE; MUL(smallExp/16); SQUARE; SQUARE; SQUARE;
} else if (smallExp & 0x10) {
SQUARE; SQUARE; MUL(smallExp/32); SQUARE; SQUARE; SQUARE; SQUARE;
} else if (smallExp & 0x20) {
SQUARE; MUL(smallExp/64); SQUARE; SQUARE; SQUARE; SQUARE; SQUARE;
} else {
abort();
}
#else #else
#error "Unknown value for WINDOW_BITS" #define MUL(x,a,b) \
MP_CHECKOK( s_mp_mul_mont(a, oddPowers + (x), b, &mmm) )
#endif #endif
#define SWAPPA ptmp = pa1; pa1 = pa2; pa2 = ptmp
for (expOff = bits_in_exponent - window_bits; expOff >= 0; expOff -= window_bits) {
mp_size smallExp;
MP_CHECKOK( mpl_get_bits(exponent, expOff, window_bits) );
smallExp = (mp_size)res;
if (window_bits == 4) {
if (!smallExp) {
SQR(pa1,pa2); SQR(pa2,pa1); SQR(pa1,pa2); SQR(pa2,pa1);
} else if (smallExp & 1) {
SQR(pa1,pa2); SQR(pa2,pa1); SQR(pa1,pa2); SQR(pa2,pa1);
MUL(smallExp/2, pa1,pa2); SWAPPA;
} else if (smallExp & 2) {
SQR(pa1,pa2); SQR(pa2,pa1); SQR(pa1,pa2);
MUL(smallExp/4,pa2,pa1); SQR(pa1,pa2); SWAPPA;
} else if (smallExp & 4) {
SQR(pa1,pa2); SQR(pa2,pa1); MUL(smallExp/8,pa1,pa2);
SQR(pa2,pa1); SQR(pa1,pa2); SWAPPA;
} else if (smallExp & 8) {
SQR(pa1,pa2); MUL(smallExp/16,pa2,pa1); SQR(pa1,pa2);
SQR(pa2,pa1); SQR(pa1,pa2); SWAPPA;
} else {
abort();
}
} else if (window_bits == 5) {
if (!smallExp) {
SQR(pa1,pa2); SQR(pa2,pa1); SQR(pa1,pa2); SQR(pa2,pa1);
SQR(pa1,pa2); SWAPPA;
} else if (smallExp & 1) {
SQR(pa1,pa2); SQR(pa2,pa1); SQR(pa1,pa2); SQR(pa2,pa1);
SQR(pa1,pa2); MUL(smallExp/2,pa2,pa1);
} else if (smallExp & 2) {
SQR(pa1,pa2); SQR(pa2,pa1); SQR(pa1,pa2); SQR(pa2,pa1);
MUL(smallExp/4,pa1,pa2); SQR(pa2,pa1);
} else if (smallExp & 4) {
SQR(pa1,pa2); SQR(pa2,pa1); SQR(pa1,pa2);
MUL(smallExp/8,pa2,pa1); SQR(pa1,pa2); SQR(pa2,pa1);
} else if (smallExp & 8) {
SQR(pa1,pa2); SQR(pa2,pa1); MUL(smallExp/16,pa1,pa2);
SQR(pa2,pa1); SQR(pa1,pa2); SQR(pa2,pa1);
} else if (smallExp & 0x10) {
SQR(pa1,pa2); MUL(smallExp/32,pa2,pa1); SQR(pa1,pa2);
SQR(pa2,pa1); SQR(pa1,pa2); SQR(pa2,pa1);
} else {
abort();
}
} else if (window_bits == 6) {
if (!smallExp) {
SQR(pa1,pa2); SQR(pa2,pa1); SQR(pa1,pa2); SQR(pa2,pa1);
SQR(pa1,pa2); SQR(pa2,pa1);
} else if (smallExp & 1) {
SQR(pa1,pa2); SQR(pa2,pa1); SQR(pa1,pa2); SQR(pa2,pa1);
SQR(pa1,pa2); SQR(pa2,pa1); MUL(smallExp/2,pa1,pa2); SWAPPA;
} else if (smallExp & 2) {
SQR(pa1,pa2); SQR(pa2,pa1); SQR(pa1,pa2); SQR(pa2,pa1);
SQR(pa1,pa2); MUL(smallExp/4,pa2,pa1); SQR(pa1,pa2); SWAPPA;
} else if (smallExp & 4) {
SQR(pa1,pa2); SQR(pa2,pa1); SQR(pa1,pa2); SQR(pa2,pa1);
MUL(smallExp/8,pa1,pa2); SQR(pa2,pa1); SQR(pa1,pa2); SWAPPA;
} else if (smallExp & 8) {
SQR(pa1,pa2); SQR(pa2,pa1); SQR(pa1,pa2);
MUL(smallExp/16,pa2,pa1); SQR(pa1,pa2); SQR(pa2,pa1);
SQR(pa1,pa2); SWAPPA;
} else if (smallExp & 0x10) {
SQR(pa1,pa2); SQR(pa2,pa1); MUL(smallExp/32,pa1,pa2);
SQR(pa2,pa1); SQR(pa1,pa2); SQR(pa2,pa1); SQR(pa1,pa2); SWAPPA;
} else if (smallExp & 0x20) {
SQR(pa1,pa2); MUL(smallExp/64,pa2,pa1); SQR(pa1,pa2);
SQR(pa2,pa1); SQR(pa1,pa2); SQR(pa2,pa1); SQR(pa1,pa2); SWAPPA;
} else {
abort();
}
} else {
abort();
}
} }
mp_clear(&power2); mp_clear(&power2);
for (i = 0; i < ODD_INTS; ++i) { for (i = 0; i < odd_ints; ++i) {
mp_clear(oddPowers + i); mp_clear(oddPowers + i);
} }
} }
rv = s_mp_redc(&accum, &mmm); res = s_mp_redc(pa1, &mmm);
mp_exch(&accum, result); mp_exch(pa1, result);
loser: CLEANUP:
mp_clear(&square); mp_clear(&square);
mp_clear(&accum); mp_clear(&accum1);
mp_clear(&accum2);
mp_clear(&goodBase); mp_clear(&goodBase);
mp_clear(&tmp);
/* Don't mp_clear mmm.N because it is merely a copy of modulus. /* Don't mp_clear mmm.N because it is merely a copy of modulus.
** Just zap it. ** Just zap it.
*/ */
memset(&mmm, 0, sizeof mmm); memset(&mmm, 0, sizeof mmm);
return rv; return res;
} }