- In mpi-priv.h, declare new 3 argument versions of s_mp_add and s_mp_sub.
Also declare new set of s_mpv_ functions that operate on vectors (arrays)
of mp_digits instead of on mp_ints.  These functions are candidates for
implementation in assembler.
- In mpi.c reimplement mp_add and mp_sub using the new 3arg functions.
Implement 3 argument versions of s_mp_add and s_mp_sub.
This eliminates all need for temporary variables in mp_add and mp_sub.
Implement c language reference implementations of new s_mpv vector multiply
and multiply and add functions.  Change mp_mul and mp_sqr so they no longer
pre-zero the output variable.  It's no longer nececssary with the new s_mpv
functions.  s_mp_pad no longer zeros out the new padded space.
-In mpmontg.c, implement variable width exponetiation windows.  Implement
a new function to compute the multiply and Montgomery reduction in a
single pass.  This is "Improvement 2" from Dusse' and Kaliski's paper
"A Cryptographic Library for the Motorola DSP56000".  Performance impact
is negligible in this c implementation.  However, this function is another
target for assembly language optimization.
This commit is contained in:
nelsonb%netscape.com 2000-08-22 01:57:34 +00:00
Родитель a10342172b
Коммит f9eed96546
4 изменённых файлов: 456 добавлений и 235 удалений

Просмотреть файл

@ -1,8 +1,11 @@
/*
* mpi-priv.h
*
* mpi-priv.h - Private header file for MPI
* Arbitrary precision integer arithmetic library
*
* NOTE WELL: the content of this header file is NOT part of the "public"
* API for the MPI library, and may change at any time.
* Application programs that use libmpi should NOT include this header file.
*
* The contents of this file are subject to the Mozilla Public
* License Version 1.1 (the "License"); you may not use this file
* except in compliance with the License. You may obtain a copy of
@ -35,7 +38,7 @@
* the GPL. If you do not delete the provisions above, a recipient
* may use your version of this file under either the MPL or the GPL.
*
* $Id: mpi-priv.h,v 1.8 2000-08-09 20:42:18 nelsonb%netscape.com Exp $
* $Id: mpi-priv.h,v 1.9 2000-08-22 01:57:33 nelsonb%netscape.com Exp $
*/
#ifndef _MPI_PRIV_H_
#define _MPI_PRIV_H_ 1
@ -142,6 +145,7 @@ extern const float s_logv_2[];
void s_mp_free(void *ptr); /* general free function */
extern unsigned long mp_allocs;
extern unsigned long mp_frees;
extern unsigned long mp_copies;
#else
/* Even if these are defined as macros, we need to respect the settings
@ -198,9 +202,11 @@ mp_err s_mp_mod_d(mp_int *mp, mp_digit d, mp_digit *r);
mp_err s_mp_reduce(mp_int *x, const mp_int *m, const mp_int *mu);
/* Barrett reduction */
mp_err s_mp_add(mp_int *a, const mp_int *b); /* magnitude addition */
mp_err s_mp_add_3arg(const mp_int *a, const mp_int *b, mp_int *c);
mp_err s_mp_sub(mp_int *a, const mp_int *b); /* magnitude subtract */
mp_err s_mp_sub_3arg(const mp_int *a, const mp_int *b, mp_int *c);
mp_err s_mp_add_offset(mp_int *a, mp_int *b, mp_size offset);
/* a += b * RADIX^offset */
mp_err s_mp_sub(mp_int *a, const mp_int *b); /* magnitude subtract */
mp_err s_mp_mul(mp_int *a, const mp_int *b); /* magnitude multiply */
#if MP_SQUARE
mp_err s_mp_sqr(mp_int *a); /* magnitude square */
@ -219,11 +225,17 @@ int s_mp_tovalue(char ch, int r); /* convert ch to value */
char s_mp_todigit(mp_digit val, int r, int low); /* convert val to digit */
int s_mp_outlen(int bits, int r); /* output length in bytes */
mp_digit s_mp_invmod_32b(mp_digit P); /* returns (P ** -1) mod (2 ** 32) */
void s_mp_mul_add(const mp_digit *a, mp_size a_len, mp_digit b, mp_digit *c);
/* c += a * b * (MP_RADIX ** offset); */
/* ------ mpv functions, operate on arrays of digits, not on mp_int's ------ */
void s_mpv_mul_d(const mp_digit *a, mp_size a_len, mp_digit b, mp_digit *c);
void s_mpv_mul_d_add(const mp_digit *a, mp_size a_len, mp_digit b,
mp_digit *c);
void s_mpv_mul_d_add_prop(const mp_digit *a, mp_size a_len, mp_digit b,
mp_digit *c);
/* c += a * b * (MP_RADIX ** offset); */
#define s_mp_mul_d_add_offset(a, b, c, off) \
(s_mp_mul_add(MP_DIGITS(a), MP_USED(a), b, MP_DIGITS(c) + off), MP_OKAY)
(s_mpv_mul_d_add_prop(MP_DIGITS(a), MP_USED(a), b, MP_DIGITS(c) + off), MP_OKAY)
/* }}} */
#endif

Просмотреть файл

@ -35,7 +35,7 @@
* the GPL. If you do not delete the provisions above, a recipient
* may use your version of this file under either the MPL or the GPL.
*
* $Id: mpi.c,v 1.18 2000-08-11 01:58:20 nelsonb%netscape.com Exp $
* $Id: mpi.c,v 1.19 2000-08-22 01:57:34 nelsonb%netscape.com Exp $
*/
#include "mpi-priv.h"
@ -81,8 +81,8 @@ static const char *s_dmap_1 =
unsigned long mp_allocs;
unsigned long mp_frees;
unsigned long mp_copies;
#define MP_CHECKOK(x) if (MP_OKAY > (res = (x))) goto CLEANUP
/* {{{ Default precision manipulation */
@ -200,6 +200,7 @@ mp_err mp_copy(const mp_int *from, mp_int *to)
if(from == to)
return MP_OKAY;
++mp_copies;
{ /* copy */
mp_digit *tmp;
@ -702,38 +703,22 @@ mp_err mp_neg(const mp_int *a, mp_int *b)
mp_err mp_add(const mp_int *a, const mp_int *b, mp_int *c)
{
mp_int tmp;
mp_err res;
ARGCHK(a != NULL && b != NULL && c != NULL, MP_BADARG);
MP_DIGITS(&tmp) = 0;
if (c == a) {
MP_CHECKOK( mp_init_copy(&tmp, a) );
a = &tmp;
if (c == b)
b = &tmp;
} else if (c == b) {
MP_CHECKOK( mp_init_copy(&tmp, b) );
b = &tmp;
}
if(SIGN(a) == SIGN(b)) { /* same sign: add values, keep sign */
MP_CHECKOK( mp_copy(a, c) );
MP_CHECKOK( s_mp_add(c, b) );
MP_CHECKOK( s_mp_add_3arg(a, b, c) );
} else if(s_mp_cmp(a, b) >= 0) { /* different sign: a >= b */
MP_CHECKOK( mp_copy(a, c) );
MP_CHECKOK( s_mp_sub(c, b) );
MP_CHECKOK( s_mp_sub_3arg(a, b, c) );
} else { /* different sign: a < b */
MP_CHECKOK( mp_copy(b, c) );
MP_CHECKOK( s_mp_sub(c, a) );
MP_CHECKOK( s_mp_sub_3arg(b, a, c) );
}
if (s_mp_cmp_d(c, 0) == MP_EQ)
SIGN(c) = ZPOS;
CLEANUP:
mp_clear(&tmp);
return res;
} /* end mp_add() */
@ -750,37 +735,25 @@ CLEANUP:
mp_err mp_sub(const mp_int *a, const mp_int *b, mp_int *c)
{
mp_int tmp;
mp_err res;
int diffSign;
int magDiff;
ARGCHK(a != NULL && b != NULL && c != NULL, MP_BADARG);
MP_DIGITS(&tmp) = 0;
if (a == b) {
mp_zero(c);
return MP_OKAY;
}
if (c == a) {
MP_CHECKOK( mp_init_copy(&tmp, a) );
a = &tmp;
} else if (c == b) {
MP_CHECKOK( mp_init_copy(&tmp, b) );
b = &tmp;
}
if (MP_SIGN(a) != MP_SIGN(b)) {
MP_CHECKOK( mp_copy(a, c) );
MP_CHECKOK( s_mp_add(c, b) );
} else if (!(diffSign = s_mp_cmp(a, b))) {
MP_CHECKOK( s_mp_add_3arg(a, b, c) );
} else if (!(magDiff = s_mp_cmp(a, b))) {
mp_zero(c);
res = MP_OKAY;
} else if (diffSign > 0) {
MP_CHECKOK( mp_copy(a, c) );
MP_CHECKOK( s_mp_sub(c, b) );
} else if (magDiff > 0) {
MP_CHECKOK( s_mp_sub_3arg(a, b, c) );
} else {
MP_CHECKOK( mp_copy(b, c) );
MP_CHECKOK( s_mp_sub(c, a) );
MP_CHECKOK( s_mp_sub_3arg(b, a, c) );
MP_SIGN(c) = !MP_SIGN(a);
}
@ -788,7 +761,6 @@ mp_err mp_sub(const mp_int *a, const mp_int *b, mp_int *c)
MP_SIGN(c) = MP_ZPOS;
CLEANUP:
mp_clear(&tmp);
return res;
} /* end mp_sub() */
@ -808,6 +780,7 @@ mp_err mp_mul(const mp_int *a, const mp_int *b, mp_int * c)
mp_int tmp;
mp_err res;
mp_size ib;
mp_size useda, usedb;
ARGCHK(a != NULL && b != NULL && c != NULL, MP_BADARG);
@ -831,21 +804,24 @@ mp_err mp_mul(const mp_int *a, const mp_int *b, mp_int * c)
a = xch;
}
/* This has the effect of left-padding with zeroes... */
MP_USED(c) = 1; MP_DIGIT(c, 0) = 0;
if((res = s_mp_pad(c, USED(a) + USED(b))) != MP_OKAY)
goto CLEANUP;
pb = MP_DIGITS(b);
s_mpv_mul_d(MP_DIGITS(a), MP_USED(a), *pb++, MP_DIGITS(c));
/* Outer loop: Digits of b */
for(ib = 0; ib < USED(b); ib++) {
useda = MP_USED(a);
usedb = MP_USED(b);
for (ib = 1; ib < usedb; ib++) {
mp_digit b_i = *pb++;
if(b_i == 0)
continue;
/* Inner product: Digits of a */
s_mp_mul_d_add_offset(a, b_i, c, ib);
if (b_i)
s_mpv_mul_d_add(MP_DIGITS(a), useda, b_i, MP_DIGITS(c) + ib);
else
MP_DIGIT(c, ib + useda) = b_i;
}
s_mp_clamp(c);
@ -876,12 +852,13 @@ CLEANUP:
/* sqr = a^2; Caller provides both a and tmp; */
mp_err mp_sqr(const mp_int *a, mp_int *sqr)
{
mp_digit *pa, *ps, *alim;
mp_digit *pa, *ps;
mp_word w;
mp_digit d, k;
mp_digit d;
mp_err res;
mp_size ix;
mp_int tmp;
int count;
ARGCHK(a != NULL && sqr != NULL, MP_BADARG);
@ -893,56 +870,32 @@ mp_err mp_sqr(const mp_int *a, mp_int *sqr)
DIGITS(&tmp) = 0;
}
/* Left-pad with zeroes */
MP_USED(sqr) = 1; MP_DIGIT(sqr, 0) = 0;
if((res = s_mp_pad(sqr, 2 * USED(a))) != MP_OKAY)
goto CLEANUP;
/*
The inner product is computed as:
(C, S) = t[i,j] + 2 a[i] a[j] + C
*/
ix = 2 * MP_USED(a);
if (ix > MP_ALLOC(sqr)) {
MP_USED(sqr) = 1;
MP_CHECKOK( s_mp_grow(sqr, ix) );
}
MP_USED(sqr) = ix;
MP_DIGIT(sqr, 0) = 0;
pa = MP_DIGITS(a);
alim = pa + MP_USED(a);
for (ix = 0; pa < alim; ++ix) {
count = MP_USED(a) - 1;
if (count > 0) {
d = *pa++;
ps = MP_DIGITS(sqr) + 1 + (ix << 1);
s_mp_mul_add(pa, alim - pa, d, ps);
} /* for(ix ...) */
s_mpv_mul_d(pa, count, d, MP_DIGITS(sqr) + 1);
for (ix = 3; --count > 0; ix += 2) {
d = *pa++;
s_mpv_mul_d_add(pa, count, d, MP_DIGITS(sqr) + ix);
} /* for(ix ...) */
MP_DIGIT(sqr, MP_USED(sqr)-1) = 0; /* above loop stopped short of this. */
/* now sqr *= 2 */
ps = MP_DIGITS(sqr);
k = 0;
#define SHIFT_1_BIT(n) \
d = ps[n]; ps[n] = (d << 1) | k; k = d >> (DIGIT_BIT - 1)
for (ix = MP_USED(sqr); ix >= 8; ix -= 8) {
SHIFT_1_BIT(0);
SHIFT_1_BIT(1);
SHIFT_1_BIT(2);
SHIFT_1_BIT(3);
SHIFT_1_BIT(4);
SHIFT_1_BIT(5);
SHIFT_1_BIT(6);
SHIFT_1_BIT(7);
ps += 8;
}
if (ix) {
ps += ix;
switch (ix) { /* all fallthru */
case 7: SHIFT_1_BIT(-7); /* FALLTHRU */
case 6: SHIFT_1_BIT(-6); /* FALLTHRU */
case 5: SHIFT_1_BIT(-5); /* FALLTHRU */
case 4: SHIFT_1_BIT(-4); /* FALLTHRU */
case 3: SHIFT_1_BIT(-3); /* FALLTHRU */
case 2: SHIFT_1_BIT(-2); /* FALLTHRU */
case 1: SHIFT_1_BIT(-1); /* FALLTHRU */
case 0: break;
}
/* now sqr *= 2 */
s_mp_mul_2(sqr);
} else {
MP_DIGIT(sqr, 1) = 0;
}
pa = MP_DIGITS(a);
alim = pa + MP_USED(a);
ps = MP_DIGITS(sqr);
w = 0;
#define ADD_SQUARE(n) \
@ -2544,7 +2497,7 @@ mp_err s_mp_pad(mp_int *mp, mp_size min)
if ((res = s_mp_grow(mp, min)) != MP_OKAY)
return res;
} else {
s_mp_setz(DIGITS(mp) + USED(mp), min - USED(mp));
/* s_mp_setz(DIGITS(mp) + USED(mp), min - USED(mp)); */
}
/* Increase precision; should already be 0-filled */
@ -2770,12 +2723,15 @@ void s_mp_rshd(mp_int *mp, mp_size p)
for (ix = USED(mp) - p; ix > 0; ix--)
*dst++ = *src++;
MP_USED(mp) -= p;
/* Fill the top digits with zeroes */
while (p-- > 0)
*dst++ = 0;
#if 0
/* Strip off any leading zeroes */
s_mp_clamp(mp);
#endif
} /* end s_mp_rshd() */
@ -2796,21 +2752,23 @@ void s_mp_div_2(mp_int *mp)
mp_err s_mp_mul_2(mp_int *mp)
{
int ix;
mp_digit kin = 0, kout;
mp_err res;
mp_digit *pd;
int ix, used;
mp_digit kin = 0;
/* Shift digits leftward by 1 bit */
for(ix = 0; ix < USED(mp); ix++) {
kout = (DIGIT(mp, ix) >> (DIGIT_BIT - 1)) & 1;
DIGIT(mp, ix) = (DIGIT(mp, ix) << 1) | kin;
kin = kout;
used = MP_USED(mp);
pd = MP_DIGITS(mp);
for (ix = 0; ix < used; ix++) {
mp_digit d = *pd;
*pd++ = (d << 1) | kin;
kin = (d >> (DIGIT_BIT - 1));
}
/* Deal with rollover from last digit */
if(kin) {
if(ix >= ALLOC(mp)) {
if (kin) {
if (ix >= ALLOC(mp)) {
mp_err res;
if((res = s_mp_grow(mp, ALLOC(mp) + 1)) != MP_OKAY)
return res;
}
@ -3139,8 +3097,10 @@ mp_err s_mp_mod_d(mp_int *mp, mp_digit d, mp_digit *r)
/* Compute a = |a| + |b| */
mp_err s_mp_add(mp_int *a, const mp_int *b) /* magnitude addition */
{
mp_word w, k = 0;
mp_digit *pa, *pb;
mp_word w = 0;
mp_size ix;
mp_size used;
mp_err res;
/* Make sure a has enough precision for the output value */
@ -3154,19 +3114,23 @@ mp_err s_mp_add(mp_int *a, const mp_int *b) /* magnitude addition */
less precision, we'll have to make sure the carry out is duly
propagated upward among the higher-order digits of the sum.
*/
for(ix = 0; ix < USED(b); ix++) {
w = (mp_word)DIGIT(a, ix) + DIGIT(b, ix) + k;
DIGIT(a, ix) = ACCUM(w);
k = CARRYOUT(w);
pa = MP_DIGITS(a);
pb = MP_DIGITS(b);
used = MP_USED(b);
for(ix = 0; ix < used; ix++) {
w = w + *pa + *pb++;
*pa++ = ACCUM(w);
w = CARRYOUT(w);
}
/* If we run out of 'b' digits before we're actually done, make
sure the carries get propagated upward...
*/
while(k && ix < USED(a)) {
w = (mp_word)DIGIT(a, ix) + k;
DIGIT(a, ix) = ACCUM(w);
k = CARRYOUT(w);
used = MP_USED(a);
while (w && ix < used) {
w = w + *pa;
*pa++ = ACCUM(w);
w = CARRYOUT(w);
++ix;
}
@ -3174,11 +3138,11 @@ mp_err s_mp_add(mp_int *a, const mp_int *b) /* magnitude addition */
it. We could have done this initially, but why touch the memory
allocator unless we're sure we have to?
*/
if(k) {
if (w) {
if((res = s_mp_pad(a, USED(a) + 1)) != MP_OKAY)
return res;
DIGIT(a, ix) = (mp_digit)k;
DIGIT(a, ix) = (mp_digit)w;
}
return MP_OKAY;
@ -3187,6 +3151,69 @@ mp_err s_mp_add(mp_int *a, const mp_int *b) /* magnitude addition */
/* }}} */
/* Compute c = |a| + |b| */ /* magnitude addition */
mp_err s_mp_add_3arg(const mp_int *a, const mp_int *b, mp_int *c)
{
mp_digit *pa, *pb, *pc;
mp_word w = 0;
mp_size ix;
mp_size used;
mp_err res;
MP_SIGN(c) = MP_SIGN(a);
if (MP_USED(a) < MP_USED(b)) {
const mp_int *xch = a;
a = b;
b = xch;
}
/* Make sure a has enough precision for the output value */
if (MP_OKAY != (res = s_mp_pad(c, MP_USED(a))))
return res;
/*
Add up all digits up to the precision of b. If b had initially
the same precision as a, or greater, we took care of it by the
exchange step above, so there is no problem. If b had initially
less precision, we'll have to make sure the carry out is duly
propagated upward among the higher-order digits of the sum.
*/
pa = MP_DIGITS(a);
pb = MP_DIGITS(b);
pc = MP_DIGITS(c);
used = MP_USED(b);
for (ix = 0; ix < used; ix++) {
w = w + *pa++ + *pb++;
*pc++ = ACCUM(w);
w = CARRYOUT(w);
}
/* If we run out of 'b' digits before we're actually done, make
sure the carries get propagated upward...
*/
used = MP_USED(a);
while (ix < used) {
w = w + *pa++;
*pc++ = ACCUM(w);
w = CARRYOUT(w);
++ix;
}
/* If there's an overall carry out, increase precision and include
it. We could have done this initially, but why touch the memory
allocator unless we're sure we have to?
*/
if (w) {
if((res = s_mp_pad(c, ix + 1)) != MP_OKAY)
return res;
DIGIT(c, ix) = (mp_digit)w;
++ix;
}
MP_USED(c) = ix;
return MP_OKAY;
}
/* {{{ s_mp_add_offset(a, b, offset) */
/* Compute a = |a| + ( |b| * (RADIX ** offset) ) */
@ -3288,6 +3315,52 @@ mp_err s_mp_sub(mp_int *a, const mp_int *b) /* magnitude subtract */
/* }}} */
/* Compute c = |a| - |b|, assumes |a| >= |b| */ /* magnitude subtract */
mp_err s_mp_sub_3arg(const mp_int *a, const mp_int *b, mp_int *c)
{
mp_digit *pa, *pb, *pc;
mp_sword w = 0;
int ix, limit;
mp_err res;
MP_SIGN(c) = MP_SIGN(a);
/* Make sure a has enough precision for the output value */
if (MP_OKAY != (res = s_mp_pad(c, MP_USED(a))))
return res;
/*
Subtract and propagate borrow. Up to the precision of b, this
accounts for the digits of b; after that, we just make sure the
carries get to the right place. This saves having to pad b out to
the precision of a just to make the loops work right...
*/
pa = MP_DIGITS(a);
pb = MP_DIGITS(b);
pc = MP_DIGITS(c);
limit = MP_USED(b);
for (ix = 0; ix < limit; ++ix) {
w = w + *pa++ - *pb++;
*pc++ = ACCUM(w);
w >>= MP_DIGIT_BIT;
}
for (limit = MP_USED(a); ix < limit; ++ix) {
w = w + *pa++;
*pc++ = ACCUM(w);
w >>= MP_DIGIT_BIT;
}
/* Clobber any leading zeroes we created */
MP_USED(c) = ix;
s_mp_clamp(c);
/*
If there was a borrow out, then |b| > |a| in violation
of our input invariant. We've already done the work,
but we'll at least complain about it...
*/
return w ? MP_RANGE : MP_OKAY;
}
/* {{{ s_mp_mul(a, b) */
/* Compute a = |a| * |b| */
@ -3298,24 +3371,55 @@ mp_err s_mp_mul(mp_int *a, const mp_int *b)
/* }}} */
/* c += a * b */
void s_mp_mul_add(const mp_digit *a, mp_size a_len, mp_digit b, mp_digit *c)
#if !defined(MP_ASSEMBLY_MULTIPLY)
/* c = a * b */
void s_mpv_mul_d(const mp_digit *a, mp_size a_len, mp_digit b, mp_digit *c)
{
mp_word w = 0;
mp_digit d = 0;
/* Inner product: Digits of a */
while (a_len--) {
w += ((mp_word)b * *a++) + *c;
mp_word w = ((mp_word)b * *a++) + d;
*c++ = ACCUM(w);
w = CARRYOUT(w);
d = CARRYOUT(w);
}
*c = d;
}
/* c += a * b */
void s_mpv_mul_d_add(const mp_digit *a, mp_size a_len, mp_digit b,
mp_digit *c)
{
mp_digit d = 0;
/* Inner product: Digits of a */
while (a_len--) {
mp_word w = ((mp_word)b * *a++) + *c + d;
*c++ = ACCUM(w);
d = CARRYOUT(w);
}
*c = d;
}
/* c += a * b */
void s_mpv_mul_d_add_prop(const mp_digit *a, mp_size a_len, mp_digit b, mp_digit *c)
{
mp_digit d = 0;
/* Inner product: Digits of a */
while (a_len--) {
mp_word w = ((mp_word)b * *a++) + *c + d;
*c++ = ACCUM(w);
d = CARRYOUT(w);
}
while (w) {
w += *c;
while (d) {
mp_word w = (mp_word)*c + d;
*c++ = ACCUM(w);
w = CARRYOUT(w);
d = CARRYOUT(w);
}
}
#endif
#if MP_SQUARE
/* {{{ s_mp_sqr(a) */

Просмотреть файл

@ -36,7 +36,7 @@
* may use your version of this file under either the MPL or the
* GPL.
*
* $Id: mpi.h,v 1.7 2000-08-04 19:57:24 nelsonb%netscape.com Exp $
* $Id: mpi.h,v 1.8 2000-08-22 01:57:33 nelsonb%netscape.com Exp $
*/
#ifndef _H_MPI_
@ -68,7 +68,7 @@
#define MP_UNDEF -5 /* answer is undefined */
#define MP_LAST_CODE MP_UNDEF
typedef char mp_sign;
typedef unsigned int mp_sign;
typedef unsigned int mp_size;
typedef int mp_err;
@ -245,6 +245,9 @@ mp_err mp_to_unsigned_octets(const mp_int *mp, unsigned char *str, mp_size maxle
mp_err mp_to_signed_octets(const mp_int *mp, unsigned char *str, mp_size maxlen);
mp_err mp_to_fixlen_octets(const mp_int *mp, unsigned char *str, mp_size len);
#define MP_CHECKOK(x) if (MP_OKAY > (res = (x))) goto CLEANUP
#define MP_CHECKERR(x) if (MP_OKAY > (res = (x))) goto CLEANUP
#if defined(MP_API_COMPATIBLE)
#define NEG MP_NEG
#define ZPOS MP_ZPOS

Просмотреть файл

@ -29,7 +29,7 @@
* the GPL. If you do not delete the provisions above, a recipient
* may use your version of this file under either the MPL or the
* GPL.
* $Id: mpmontg.c,v 1.6 2000-08-08 03:20:35 nelsonb%netscape.com Exp $
* $Id: mpmontg.c,v 1.7 2000-08-22 01:57:34 nelsonb%netscape.com Exp $
*/
/* This file implements moduluar exponentiation using Montgomery's
@ -46,13 +46,11 @@
#include "mplogic.h"
#include "mpprime.h"
#define MP_CHECKOK(x) if (MP_OKAY != (rv = (x))) goto loser
#define MP_CHECKERR(x) if (0 > (rv = (x))) goto loser
#define STATIC
/* #define DEBUG 1 */
#define WINDOW_BITS 5
#define ODD_INTS 16 /* 2 ** (WINDOW_BITS - 1) */
#define MAX_WINDOW_BITS 6
#define MAX_ODD_INTS 32 /* 2 ** (WINDOW_BITS - 1) */
typedef struct {
mp_int N; /* modulus N */
@ -60,17 +58,22 @@ typedef struct {
mp_size b; /* R == 2 ** b, also b = # significant bits in N */
} mp_mont_modulus;
mp_err s_mp_mul_mont(const mp_int *a, const mp_int *b, mp_int *c,
mp_mont_modulus *mmm);
/* computes T = REDC(T), 2^b == R */
STATIC
mp_err s_mp_redc(mp_int *T, mp_mont_modulus *mmm)
{
mp_err rv;
int i;
mp_err res;
mp_size i;
#ifdef DEBUG
mp_int m;
MP_DIGITS(&m) = 0;
#endif
MP_CHECKOK( s_mp_pad(T, MP_USED(T) + MP_USED(&mmm->N) + 2) );
i = MP_USED(T) + MP_USED(&mmm->N) + 2;
MP_CHECKOK( s_mp_pad(T, i) );
for (i = 0; i < MP_USED(&mmm->N); ++i ) {
mp_digit m_i = MP_DIGIT(T, i) * mmm->n0prime;
/* T += N * m_i * (MP_RADIX ** i); */
@ -84,39 +87,100 @@ mp_err s_mp_redc(mp_int *T, mp_mont_modulus *mmm)
MP_CHECKOK( mp_div_2d(T, mmm->b, T, &m));
/* here, remainder m should be equal to zero */
if (mp_cmp_z(&m) != 0) {
rv = MP_UNDEF;
goto loser;
res = MP_UNDEF;
goto CLEANUP;
}
#else
s_mp_div_2d(T, mmm->b);
#endif
if ((rv = s_mp_cmp(T, &mmm->N)) >= 0) {
if ((res = s_mp_cmp(T, &mmm->N)) >= 0) {
/* T = T - N */
MP_CHECKOK( s_mp_sub(T, &mmm->N) );
#ifdef DEBUG
if ((rv = mp_cmp(T, &mmm->N)) >= 0) {
rv = MP_UNDEF;
if ((res = mp_cmp(T, &mmm->N)) >= 0) {
res = MP_UNDEF;
goto CLEANUP;
}
#endif
}
rv = MP_OKAY;
loser:
res = MP_OKAY;
CLEANUP:
#ifdef DEBUG
mp_clear(&m);
#endif
return rv;
return res;
}
mp_err mp_to_mont(const mp_int *x, mp_mont_modulus *mmm, mp_int *xMont)
#if !defined(MP_ASSEMBLY_MUL_MONT) && !defined(MP_MONT_USE_MP_MUL)
mp_err s_mp_mul_mont(const mp_int *a, const mp_int *b, mp_int *c,
mp_mont_modulus *mmm)
{
mp_err rv;
mp_digit *pb;
mp_digit m_i;
mp_err res;
mp_size ib;
mp_size useda, usedb;
ARGCHK(a != NULL && b != NULL && c != NULL, MP_BADARG);
if (MP_USED(a) < MP_USED(b)) {
const mp_int *xch = b; /* switch a and b, to do fewer outer loops */
b = a;
a = xch;
}
MP_USED(c) = 1; MP_DIGIT(c, 0) = 0;
ib = MP_USED(a) + MP_MAX(MP_USED(b), MP_USED(&mmm->N)) + 2;
if((res = s_mp_pad(c, ib)) != MP_OKAY)
goto CLEANUP;
useda = MP_USED(a);
pb = MP_DIGITS(b);
s_mpv_mul_d(MP_DIGITS(a), useda, *pb++, MP_DIGITS(c));
s_mp_setz(MP_DIGITS(c) + useda + 1, ib - (useda + 1));
m_i = MP_DIGIT(c, 0) * mmm->n0prime;
s_mp_mul_d_add_offset(&mmm->N, m_i, c, 0);
/* Outer loop: Digits of b */
usedb = MP_USED(b);
for (ib = 1; ib < usedb; ib++) {
mp_digit b_i = *pb++;
/* Inner product: Digits of a */
if (b_i)
s_mpv_mul_d_add_prop(MP_DIGITS(a), useda, b_i, MP_DIGITS(c) + ib);
m_i = MP_DIGIT(c, ib) * mmm->n0prime;
s_mp_mul_d_add_offset(&mmm->N, m_i, c, ib);
}
if (usedb < MP_USED(&mmm->N)) {
for (usedb = MP_USED(&mmm->N); ib < usedb; ++ib ) {
m_i = MP_DIGIT(c, ib) * mmm->n0prime;
s_mp_mul_d_add_offset(&mmm->N, m_i, c, ib);
}
}
s_mp_clamp(c);
s_mp_div_2d(c, mmm->b);
if (s_mp_cmp(c, &mmm->N) >= 0) {
MP_CHECKOK( s_mp_sub(c, &mmm->N) );
}
res = MP_OKAY;
CLEANUP:
return res;
}
#endif
STATIC
mp_err s_mp_to_mont(const mp_int *x, mp_mont_modulus *mmm, mp_int *xMont)
{
mp_err res;
/* xMont = x * R mod N where N is modulus */
MP_CHECKOK( mpl_lsh(x, xMont, mmm->b) ); /* xMont = x << b */
MP_CHECKOK( mp_div(xMont, &mmm->N, 0, xMont) ); /* mod N */
loser:
return rv;
CLEANUP:
return res;
}
@ -124,28 +188,34 @@ mp_err mp_exptmod(const mp_int *inBase, const mp_int *exponent,
const mp_int *modulus, mp_int *result)
{
const mp_int *base;
mp_int *pa1, *pa2, *ptmp;
mp_size bits_in_exponent;
mp_size i;
mp_err rv;
mp_int square, accum, goodBase, tmp;
mp_size window_bits, odd_ints;
mp_err res;
mp_int square, accum1, accum2, goodBase;
mp_mont_modulus mmm;
/* function for computing n0prime only works if n0 is odd */
if (!mp_isodd(modulus))
return s_mp_exptmod(inBase, exponent, modulus, result);
MP_DIGITS(&square) = 0;
MP_DIGITS(&accum1) = 0;
MP_DIGITS(&accum2) = 0;
MP_DIGITS(&goodBase) = 0;
if (mp_cmp(inBase, modulus) < 0) {
base = inBase;
MP_DIGITS(&goodBase) = 0;
} else {
mp_init(&goodBase);
MP_CHECKOK( mp_init(&goodBase) );
base = &goodBase;
MP_CHECKOK( mp_mod(inBase, modulus, &goodBase) );
}
mp_init_size(&square, 2 * MP_USED(modulus) + 2);
mp_init_size(&accum, 3 * MP_USED(modulus) + 2);
mp_init_size(&tmp, 3 * MP_USED(modulus) + 2);
MP_CHECKOK( mp_init_size(&square, 2 * MP_USED(modulus) + 2) );
MP_CHECKOK( mp_init_size(&accum1, 3 * MP_USED(modulus) + 2) );
MP_CHECKOK( mp_init_size(&accum2, 3 * MP_USED(modulus) + 2) );
mmm.N = *modulus; /* a copy of the mp_int struct */
i = mpl_significant_bits(modulus);
@ -157,18 +227,25 @@ mp_err mp_exptmod(const mp_int *inBase, const mp_int *exponent,
*/
mmm.n0prime = 0 - s_mp_invmod_32b( MP_DIGIT(modulus, 0) );
MP_CHECKOK( mp_to_mont(base, &mmm, &square) );
MP_CHECKOK( s_mp_to_mont(base, &mmm, &square) );
bits_in_exponent = mpl_significant_bits(exponent);
i = bits_in_exponent % WINDOW_BITS;
if (bits_in_exponent > 480)
window_bits = 6;
else if (bits_in_exponent > 160)
window_bits = 5;
else
window_bits = 4;
odd_ints = 1 << (window_bits - 1);
i = bits_in_exponent % window_bits;
if (i != 0) {
bits_in_exponent += WINDOW_BITS - i;
bits_in_exponent += window_bits - i;
}
{
/* oddPowers[i] = base ** (2*i + 1); */
/* power2 = base ** 2; */
int expOff;
mp_int power2, oddPowers[ODD_INTS];
/* power2 = base ** 2; oddPowers[i] = base ** (2*i + 1); */
mp_int power2, oddPowers[MAX_ODD_INTS];
MP_CHECKOK( mp_init_copy(oddPowers, &square) );
@ -176,97 +253,122 @@ mp_err mp_exptmod(const mp_int *inBase, const mp_int *exponent,
MP_CHECKOK( mp_sqr(&square, &power2) ); /* square = square ** 2 */
MP_CHECKOK( s_mp_redc(&power2, &mmm) );
for (i = 1; i < ODD_INTS; ++i) {
for (i = 1; i < odd_ints; ++i) {
mp_init_size(oddPowers + i, MP_USED(modulus) + 2 * MP_USED(&power2) + 2);
MP_CHECKOK( mp_mul(oddPowers + (i - 1), &power2, oddPowers + i) );
MP_CHECKOK( s_mp_redc(oddPowers + i, &mmm) );
}
mp_set(&accum, 1);
MP_CHECKOK( mp_to_mont(&accum, &mmm, &accum) );
mp_set(&accum1, 1);
MP_CHECKOK( s_mp_to_mont(&accum1, &mmm, &accum1) );
pa1 = &accum1;
pa2 = &accum2;
#define SQUARE \
MP_CHECKOK( mp_sqr(&accum, &tmp) );\
mp_exch(&accum, &tmp); \
MP_CHECKOK( s_mp_redc(&accum, &mmm) )
#define SQR(a,b) \
MP_CHECKOK( mp_sqr(a, b) );\
MP_CHECKOK( s_mp_redc(b, &mmm) )
#define MUL(x) \
MP_CHECKOK( mp_mul(&accum, oddPowers + (x), &tmp) ); \
mp_exch(&accum, &tmp); \
MP_CHECKOK( s_mp_redc(&accum, &mmm))
for (expOff = bits_in_exponent - WINDOW_BITS; expOff >= 0; expOff -= WINDOW_BITS) {
mp_size smallExp;
MP_CHECKERR( mpl_get_bits(exponent, expOff, WINDOW_BITS) );
smallExp = (mp_size)rv;
#if WINDOW_BITS == 4
if (!smallExp) {
SQUARE; SQUARE; SQUARE; SQUARE;
} else if (smallExp & 1) {
SQUARE; SQUARE; SQUARE; SQUARE; MUL(smallExp/2);
} else if (smallExp & 2) {
SQUARE; SQUARE; SQUARE; MUL(smallExp/4); SQUARE;
} else if (smallExp & 4) {
SQUARE; SQUARE; MUL(smallExp/8); SQUARE; SQUARE;
} else if (smallExp & 8) {
SQUARE; MUL(smallExp/16); SQUARE; SQUARE; SQUARE;
} else {
abort();
}
#elif WINDOW_BITS == 5
if (!smallExp) {
SQUARE; SQUARE; SQUARE; SQUARE; SQUARE;
} else if (smallExp & 1) {
SQUARE; SQUARE; SQUARE; SQUARE; SQUARE; MUL(smallExp/2);
} else if (smallExp & 2) {
SQUARE; SQUARE; SQUARE; SQUARE; MUL(smallExp/4); SQUARE;
} else if (smallExp & 4) {
SQUARE; SQUARE; SQUARE; MUL(smallExp/8); SQUARE; SQUARE;
} else if (smallExp & 8) {
SQUARE; SQUARE; MUL(smallExp/16); SQUARE; SQUARE; SQUARE;
} else if (smallExp & 0x10) {
SQUARE; MUL(smallExp/32); SQUARE; SQUARE; SQUARE; SQUARE;
} else {
abort();
}
#elif WINDOW_BITS == 6
if (!smallExp) {
SQUARE; SQUARE; SQUARE; SQUARE; SQUARE; SQUARE;
} else if (smallExp & 1) {
SQUARE; SQUARE; SQUARE; SQUARE; SQUARE; SQUARE; MUL(smallExp/2);
} else if (smallExp & 2) {
SQUARE; SQUARE; SQUARE; SQUARE; SQUARE; MUL(smallExp/4); SQUARE;
} else if (smallExp & 4) {
SQUARE; SQUARE; SQUARE; SQUARE; MUL(smallExp/8); SQUARE; SQUARE;
} else if (smallExp & 8) {
SQUARE; SQUARE; SQUARE; MUL(smallExp/16); SQUARE; SQUARE; SQUARE;
} else if (smallExp & 0x10) {
SQUARE; SQUARE; MUL(smallExp/32); SQUARE; SQUARE; SQUARE; SQUARE;
} else if (smallExp & 0x20) {
SQUARE; MUL(smallExp/64); SQUARE; SQUARE; SQUARE; SQUARE; SQUARE;
} else {
abort();
}
#if defined(MP_MONT_USE_MP_MUL)
#define MUL(x,a,b) \
MP_CHECKOK( mp_mul(a, oddPowers + (x), b) ); \
MP_CHECKOK( s_mp_redc(b, &mmm) )
#else
#error "Unknown value for WINDOW_BITS"
#define MUL(x,a,b) \
MP_CHECKOK( s_mp_mul_mont(a, oddPowers + (x), b, &mmm) )
#endif
#define SWAPPA ptmp = pa1; pa1 = pa2; pa2 = ptmp
for (expOff = bits_in_exponent - window_bits; expOff >= 0; expOff -= window_bits) {
mp_size smallExp;
MP_CHECKOK( mpl_get_bits(exponent, expOff, window_bits) );
smallExp = (mp_size)res;
if (window_bits == 4) {
if (!smallExp) {
SQR(pa1,pa2); SQR(pa2,pa1); SQR(pa1,pa2); SQR(pa2,pa1);
} else if (smallExp & 1) {
SQR(pa1,pa2); SQR(pa2,pa1); SQR(pa1,pa2); SQR(pa2,pa1);
MUL(smallExp/2, pa1,pa2); SWAPPA;
} else if (smallExp & 2) {
SQR(pa1,pa2); SQR(pa2,pa1); SQR(pa1,pa2);
MUL(smallExp/4,pa2,pa1); SQR(pa1,pa2); SWAPPA;
} else if (smallExp & 4) {
SQR(pa1,pa2); SQR(pa2,pa1); MUL(smallExp/8,pa1,pa2);
SQR(pa2,pa1); SQR(pa1,pa2); SWAPPA;
} else if (smallExp & 8) {
SQR(pa1,pa2); MUL(smallExp/16,pa2,pa1); SQR(pa1,pa2);
SQR(pa2,pa1); SQR(pa1,pa2); SWAPPA;
} else {
abort();
}
} else if (window_bits == 5) {
if (!smallExp) {
SQR(pa1,pa2); SQR(pa2,pa1); SQR(pa1,pa2); SQR(pa2,pa1);
SQR(pa1,pa2); SWAPPA;
} else if (smallExp & 1) {
SQR(pa1,pa2); SQR(pa2,pa1); SQR(pa1,pa2); SQR(pa2,pa1);
SQR(pa1,pa2); MUL(smallExp/2,pa2,pa1);
} else if (smallExp & 2) {
SQR(pa1,pa2); SQR(pa2,pa1); SQR(pa1,pa2); SQR(pa2,pa1);
MUL(smallExp/4,pa1,pa2); SQR(pa2,pa1);
} else if (smallExp & 4) {
SQR(pa1,pa2); SQR(pa2,pa1); SQR(pa1,pa2);
MUL(smallExp/8,pa2,pa1); SQR(pa1,pa2); SQR(pa2,pa1);
} else if (smallExp & 8) {
SQR(pa1,pa2); SQR(pa2,pa1); MUL(smallExp/16,pa1,pa2);
SQR(pa2,pa1); SQR(pa1,pa2); SQR(pa2,pa1);
} else if (smallExp & 0x10) {
SQR(pa1,pa2); MUL(smallExp/32,pa2,pa1); SQR(pa1,pa2);
SQR(pa2,pa1); SQR(pa1,pa2); SQR(pa2,pa1);
} else {
abort();
}
} else if (window_bits == 6) {
if (!smallExp) {
SQR(pa1,pa2); SQR(pa2,pa1); SQR(pa1,pa2); SQR(pa2,pa1);
SQR(pa1,pa2); SQR(pa2,pa1);
} else if (smallExp & 1) {
SQR(pa1,pa2); SQR(pa2,pa1); SQR(pa1,pa2); SQR(pa2,pa1);
SQR(pa1,pa2); SQR(pa2,pa1); MUL(smallExp/2,pa1,pa2); SWAPPA;
} else if (smallExp & 2) {
SQR(pa1,pa2); SQR(pa2,pa1); SQR(pa1,pa2); SQR(pa2,pa1);
SQR(pa1,pa2); MUL(smallExp/4,pa2,pa1); SQR(pa1,pa2); SWAPPA;
} else if (smallExp & 4) {
SQR(pa1,pa2); SQR(pa2,pa1); SQR(pa1,pa2); SQR(pa2,pa1);
MUL(smallExp/8,pa1,pa2); SQR(pa2,pa1); SQR(pa1,pa2); SWAPPA;
} else if (smallExp & 8) {
SQR(pa1,pa2); SQR(pa2,pa1); SQR(pa1,pa2);
MUL(smallExp/16,pa2,pa1); SQR(pa1,pa2); SQR(pa2,pa1);
SQR(pa1,pa2); SWAPPA;
} else if (smallExp & 0x10) {
SQR(pa1,pa2); SQR(pa2,pa1); MUL(smallExp/32,pa1,pa2);
SQR(pa2,pa1); SQR(pa1,pa2); SQR(pa2,pa1); SQR(pa1,pa2); SWAPPA;
} else if (smallExp & 0x20) {
SQR(pa1,pa2); MUL(smallExp/64,pa2,pa1); SQR(pa1,pa2);
SQR(pa2,pa1); SQR(pa1,pa2); SQR(pa2,pa1); SQR(pa1,pa2); SWAPPA;
} else {
abort();
}
} else {
abort();
}
}
mp_clear(&power2);
for (i = 0; i < ODD_INTS; ++i) {
for (i = 0; i < odd_ints; ++i) {
mp_clear(oddPowers + i);
}
}
rv = s_mp_redc(&accum, &mmm);
mp_exch(&accum, result);
loser:
res = s_mp_redc(pa1, &mmm);
mp_exch(pa1, result);
CLEANUP:
mp_clear(&square);
mp_clear(&accum);
mp_clear(&accum1);
mp_clear(&accum2);
mp_clear(&goodBase);
mp_clear(&tmp);
/* Don't mp_clear mmm.N because it is merely a copy of modulus.
** Just zap it.
*/
memset(&mmm, 0, sizeof mmm);
return rv;
return res;
}