321350 Implement optimized code for NIST Suite B elliptic curves

r=douglas.
This commit is contained in:
rrelyea%redhat.com 2006-03-23 19:55:37 +00:00
Родитель 701b146652
Коммит e72ce470d4
11 изменённых файлов: 2297 добавлений и 298 удалений

Просмотреть файл

@ -132,6 +132,7 @@ ifeq ($(CPU_ARCH),x86_64)
DEFINES += -DNSS_BEVAND_ARCFOUR -DMPI_AMD64 -DMP_ASSEMBLY_MULTIPLY
DEFINES += -DNSS_USE_COMBA
DEFINES += -DMP_CHAR_STORE_SLOW -DMP_IS_LITTLE_ENDIAN
DEFINES += -DMPI_AMD64_ADD
MPI_SRCS += mpi_amd64.c mp_comba.c
endif
ifeq ($(CPU_ARCH),x86)

Просмотреть файл

@ -113,7 +113,7 @@ LIBOBJS = ecl.o ecl_curve.o ecl_mult.o ecl_gf.o \
ec2_163.o ec2_193.o ec2_233.o \
ecp_aff.o ecp_jac.o ecp_mont.o \
ec_naf.o ecp_jm.o \
ecp_192.o ecp_224.o
ecp_192.o ecp_224.o ecp_256.o ecp_384.o ecp_521.o
ifeq ($(ECL_USE_FP),1)
LIBOBJS+= ecp_fp160.o ecp_fp192.o ecp_fp224.o ecp_fp.o
endif
@ -162,6 +162,9 @@ ecp_jm.o: ecp_jm.c $(LIBHDRS)
ecp_mont.o: ecp_mont.c $(LIBHDRS)
ecp_192.o: ecp_192.c $(LIBHDRS)
ecp_224.o: ecp_224.c $(LIBHDRS)
ecp_256.o: ecp_256.c $(LIBHDRS)
ecp_384.o: ecp_384.c $(LIBHDRS)
ecp_521.o: ecp_521.c $(LIBHDRS)
ecp_fp.o: ecp_fp.c $(LIBHDRS)
ifeq ($(ECL_USE_FP),1)
ecp_fp160.o: ecp_fp160.c ecp_fpinc.c $(LIBHDRS)

Просмотреть файл

@ -45,22 +45,62 @@
#include "mplogic.h"
/* MAX_FIELD_SIZE_DIGITS is the maximum size of field element supported */
/* the following needs to go away... */
#if defined(MP_USE_LONG_LONG_DIGIT) || defined(MP_USE_LONG_DIGIT)
#define ECL_SIXTY_FOUR_BIT
#define ECL_BITS 64
#define ECL_MAX_FIELD_SIZE_DIGITS 10
#else
#define ECL_THIRTY_TWO_BIT
#define ECL_BITS 32
#define ECL_MAX_FIELD_SIZE_DIGITS 20
#endif
#define ECL_CURVE_DIGITS(curve_size_in_bits) \
(((curve_size_in_bits)+(sizeof(mp_digit)*8-1))/(sizeof(mp_digit)*8))
#define ECL_BITS (sizeof(mp_digit)*8)
#define ECL_MAX_FIELD_SIZE_DIGITS (80/sizeof(mp_digit))
/* Gets the i'th bit in the binary representation of a. If i >= length(a),
* then return 0. (The above behaviour differs from mpl_get_bit, which
* causes an error if i >= length(a).) */
#define MP_GET_BIT(a, i) \
((i) >= mpl_significant_bits((a))) ? 0 : mpl_get_bit((a), (i))
#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_ADD_WORD)
#define MP_ADD_CARRY(a1, a2, s, cin, cout) \
{ mp_word w; \
w = ((mp_word)(cin)) + (a1) + (a2) \
s = ACCUM(w); \
cout = CARRYOUT(w); }
#define MP_SUB_BORROW(a1, a2, s, bin, bout) \
{ mp_word w; \
w = ((mp_word)(a1)) - (a2) - (bin); \
s = ACCUM(w); \
bout = (w >> MP_DIGIT_BIT) & 1; }
#else
/* NOTE,
* cin and cout could be the same variable.
* bin and bout could be the same variable.
* a1 or a2 and s could be the same variable.
* don't trash those outputs until their respective inputs have
* been read. */
#define MP_ADD_CARRY(a1, a2, s, cin, cout) \
{ mp_digit tmp,sum; \
tmp = (a1); \
sum = tmp + (a2); \
tmp = (sum < tmp); /* detect overflow */ \
s = sum += (cin); \
cout = tmp + (sum < (cin)); }
#define MP_SUB_BORROW(a1, a2, s, bin, bout) \
{ mp_digit tmp; \
tmp = (a1); \
s = tmp - (a2); \
tmp = (s > tmp); /* detect borrow */ \
if ((bin) && !s--) tmp++; \
bout = tmp; }
#endif
struct GFMethodStr;
typedef struct GFMethodStr GFMethod;
struct GFMethodStr {
@ -158,6 +198,25 @@ mp_err ec_GFp_add(const mp_int *a, const mp_int *b, mp_int *r,
mp_err ec_GFp_neg(const mp_int *a, mp_int *r, const GFMethod *meth);
mp_err ec_GFp_sub(const mp_int *a, const mp_int *b, mp_int *r,
const GFMethod *meth);
/* fixed length in-line adds. Count is in words */
mp_err ec_GFp_add_3(const mp_int *a, const mp_int *b, mp_int *r,
const GFMethod *meth);
mp_err ec_GFp_add_4(const mp_int *a, const mp_int *b, mp_int *r,
const GFMethod *meth);
mp_err ec_GFp_add_5(const mp_int *a, const mp_int *b, mp_int *r,
const GFMethod *meth);
mp_err ec_GFp_add_6(const mp_int *a, const mp_int *b, mp_int *r,
const GFMethod *meth);
mp_err ec_GFp_sub_3(const mp_int *a, const mp_int *b, mp_int *r,
const GFMethod *meth);
mp_err ec_GFp_sub_4(const mp_int *a, const mp_int *b, mp_int *r,
const GFMethod *meth);
mp_err ec_GFp_sub_5(const mp_int *a, const mp_int *b, mp_int *r,
const GFMethod *meth);
mp_err ec_GFp_sub_6(const mp_int *a, const mp_int *b, mp_int *r,
const GFMethod *meth);
mp_err ec_GFp_mod(const mp_int *a, mp_int *r, const GFMethod *meth);
mp_err ec_GFp_mul(const mp_int *a, const mp_int *b, mp_int *r,
const GFMethod *meth);
@ -205,6 +264,9 @@ mp_err ec_compute_wNAF(signed char *out, int bitsize, const mp_int *in,
/* Optimized field arithmetic */
mp_err ec_group_set_gfp192(ECGroup *group, ECCurveName);
mp_err ec_group_set_gfp224(ECGroup *group, ECCurveName);
mp_err ec_group_set_gfp256(ECGroup *group, ECCurveName);
mp_err ec_group_set_gfp384(ECGroup *group, ECCurveName);
mp_err ec_group_set_gfp521(ECGroup *group, ECCurveName);
mp_err ec_group_set_gf2m163(ECGroup *group, ECCurveName name);
mp_err ec_group_set_gf2m193(ECGroup *group, ECCurveName name);
mp_err ec_group_set_gf2m233(ECGroup *group, ECCurveName name);

Просмотреть файл

@ -246,29 +246,17 @@ ecgroup_fromNameAndHex(const ECCurveName name,
/* determine which optimizations (if any) to use */
if (params->field == ECField_GFp) {
if ((name == ECCurve_SECG_PRIME_160R1)) {
switch (name) {
#ifdef ECL_USE_FP
case ECCurve_SECG_PRIME_160R1:
group =
ECGroup_consGFp(&irr, &curvea, &curveb, &genx, &geny,
&order, params->cofactor);
if (group == NULL) { res = MP_UNDEF; goto CLEANUP; }
MP_CHECKOK(ec_group_set_secp160r1_fp(group));
#else
group =
ECGroup_consGFp_mont(&irr, &curvea, &curveb, &genx, &geny,
&order, params->cofactor);
if (group == NULL) { res = MP_UNDEF; goto CLEANUP; }
/* no optimized version of secp160r1 arithmetic for non-floating
* point systems
*/
break;
#endif
} else if ((name == ECCurve_SECG_PRIME_192K1)) {
group =
ECGroup_consGFp_mont(&irr, &curvea, &curveb, &genx, &geny,
&order, params->cofactor);
if (group == NULL) { res = MP_UNDEF; goto CLEANUP; }
MP_CHECKOK(ec_group_set_gfp192(group, name));
} else if ((name == ECCurve_SECG_PRIME_192R1)) {
case ECCurve_SECG_PRIME_192R1:
#ifdef ECL_USE_FP
group =
ECGroup_consGFp(&irr, &curvea, &curveb, &genx, &geny,
@ -282,13 +270,8 @@ ecgroup_fromNameAndHex(const ECCurveName name,
if (group == NULL) { res = MP_UNDEF; goto CLEANUP; }
MP_CHECKOK(ec_group_set_gfp192(group, name));
#endif
} else if ((name == ECCurve_SECG_PRIME_224K1)) {
group =
ECGroup_consGFp_mont(&irr, &curvea, &curveb, &genx, &geny,
&order, params->cofactor);
if (group == NULL) { res = MP_UNDEF; goto CLEANUP; }
MP_CHECKOK(ec_group_set_gfp224(group, name));
} else if ((name == ECCurve_SECG_PRIME_224R1)) {
break;
case ECCurve_SECG_PRIME_224R1:
#ifdef ECL_USE_FP
group =
ECGroup_consGFp(&irr, &curvea, &curveb, &genx, &geny,
@ -302,17 +285,28 @@ ecgroup_fromNameAndHex(const ECCurveName name,
if (group == NULL) { res = MP_UNDEF; goto CLEANUP; }
MP_CHECKOK(ec_group_set_gfp224(group, name));
#endif
} else {
break;
case ECCurve_SECG_PRIME_256R1:
group =
ECGroup_consGFp(&irr, &curvea, &curveb, &genx, &geny,
&order, params->cofactor);
if (group == NULL) { res = MP_UNDEF; goto CLEANUP; }
MP_CHECKOK(ec_group_set_gfp256(group, name));
break;
case ECCurve_SECG_PRIME_521R1:
group =
ECGroup_consGFp(&irr, &curvea, &curveb, &genx, &geny,
&order, params->cofactor);
if (group == NULL) { res = MP_UNDEF; goto CLEANUP; }
MP_CHECKOK(ec_group_set_gfp521(group, name));
break;
default:
/* use generic arithmetic */
group =
ECGroup_consGFp_mont(&irr, &curvea, &curveb, &genx, &geny,
&order, params->cofactor);
if (group == NULL) { res = MP_UNDEF; goto CLEANUP; }
}
/* XXX secp521r1 fails ecp_test with &ec_GFp_pts_mul_jac */
if (name == ECCurve_SECG_PRIME_521R1) {
group->points_mul = &ec_pts_mul_simul_w2;
}
} else if (params->field == ECField_GF2m) {
group = ECGroup_consGF2m(&irr, NULL, &curvea, &curveb, &genx, &geny, &order, params->cofactor);
if (group == NULL) { res = MP_UNDEF; goto CLEANUP; }

Просмотреть файл

@ -40,6 +40,7 @@
#include "mpi.h"
#include "mp_gf2m.h"
#include "ecl-priv.h"
#include "mpi-priv.h"
#include <stdlib.h>
/* Allocate memory for a new GFMethod object. */
@ -79,9 +80,29 @@ GFMethod_consGFp(const mp_int *irr)
meth->irr_arr[0] = mpl_significant_bits(irr);
meth->irr_arr[1] = meth->irr_arr[2] = meth->irr_arr[3] =
meth->irr_arr[4] = 0;
meth->field_add = &ec_GFp_add;
switch(MP_USED(&meth->irr)) {
/* maybe we need 1 and 2 words here as well?*/
case 3:
meth->field_add = &ec_GFp_add_3;
meth->field_sub = &ec_GFp_sub_3;
break;
case 4:
meth->field_add = &ec_GFp_add_4;
meth->field_sub = &ec_GFp_sub_4;
break;
case 5:
meth->field_add = &ec_GFp_add_5;
meth->field_sub = &ec_GFp_sub_5;
break;
case 6:
meth->field_add = &ec_GFp_add_6;
meth->field_sub = &ec_GFp_sub_6;
break;
default:
meth->field_add = &ec_GFp_add;
meth->field_sub = &ec_GFp_sub;
}
meth->field_neg = &ec_GFp_neg;
meth->field_sub = &ec_GFp_sub;
meth->field_mod = &ec_GFp_mod;
meth->field_mul = &ec_GFp_mul;
meth->field_sqr = &ec_GFp_sqr;
@ -223,6 +244,676 @@ ec_GFp_sub(const mp_int *a, const mp_int *b, mp_int *r,
CLEANUP:
return res;
}
/*
* Inline adds for small curve lengths.
*/
/* 3 words */
mp_err
ec_GFp_add_3(const mp_int *a, const mp_int *b, mp_int *r,
const GFMethod *meth)
{
mp_err res = MP_OKAY;
mp_digit a0 = 0, a1 = 0, a2 = 0;
mp_digit r0 = 0, r1 = 0, r2 = 0;
mp_digit carry;
switch(MP_USED(a)) {
case 3:
a2 = MP_DIGIT(a,2);
case 2:
a1 = MP_DIGIT(a,1);
case 1:
a0 = MP_DIGIT(a,0);
}
switch(MP_USED(b)) {
case 3:
r2 = MP_DIGIT(b,2);
case 2:
r1 = MP_DIGIT(b,1);
case 1:
r0 = MP_DIGIT(b,0);
}
#ifndef MPI_AMD64_ADD
MP_ADD_CARRY(a0, r0, r0, 0, carry);
MP_ADD_CARRY(a1, r1, r1, carry, carry);
MP_ADD_CARRY(a2, r2, r2, carry, carry);
#else
__asm__ (
"xorq %3,%3 \n\t"
"addq %4,%0 \n\t"
"adcq %5,%1 \n\t"
"adcq %6,%2 \n\t"
"adcq $0,%3 \n\t"
: "=r"(r0), "=r"(r1), "=r"(r2), "=r"(carry)
: "r" (a0), "r" (a1), "r" (a2),
"0" (r0), "1" (r1), "2" (r2)
: "%cc" );
#endif
MP_CHECKOK(s_mp_pad(r, 3));
MP_DIGIT(r, 2) = r2;
MP_DIGIT(r, 1) = r1;
MP_DIGIT(r, 0) = r0;
MP_SIGN(r) = MP_ZPOS;
MP_USED(r) = 3;
/* Do quick 'subract' if we've gone over
* (add the 2's complement of the curve field) */
a2 = MP_DIGIT(&meth->irr,2);
if (carry || r2 > a2 ||
((r2 == a2) && mp_cmp(r,&meth->irr) != MP_LT)) {
a1 = MP_DIGIT(&meth->irr,1);
a0 = MP_DIGIT(&meth->irr,0);
#ifndef MPI_AMD64_ADD
MP_SUB_BORROW(r0, a0, r0, 0, carry);
MP_SUB_BORROW(r1, a1, r1, carry, carry);
MP_SUB_BORROW(r2, a2, r2, carry, carry);
#else
__asm__ (
"subq %3,%0 \n\t"
"sbbq %4,%1 \n\t"
"sbbq %5,%2 \n\t"
: "=r"(r0), "=r"(r1), "=r"(r2)
: "r" (a0), "r" (a1), "r" (a2),
"0" (r0), "1" (r1), "2" (r2)
: "%cc" );
#endif
MP_DIGIT(r, 2) = r2;
MP_DIGIT(r, 1) = r1;
MP_DIGIT(r, 0) = r0;
}
s_mp_clamp(r);
CLEANUP:
return res;
}
/* 4 words */
mp_err
ec_GFp_add_4(const mp_int *a, const mp_int *b, mp_int *r,
const GFMethod *meth)
{
mp_err res = MP_OKAY;
mp_digit a0 = 0, a1 = 0, a2 = 0, a3 = 0;
mp_digit r0 = 0, r1 = 0, r2 = 0, r3 = 0;
mp_digit carry;
switch(MP_USED(a)) {
case 4:
a3 = MP_DIGIT(a,3);
case 3:
a2 = MP_DIGIT(a,2);
case 2:
a1 = MP_DIGIT(a,1);
case 1:
a0 = MP_DIGIT(a,0);
}
switch(MP_USED(b)) {
case 4:
r3 = MP_DIGIT(b,3);
case 3:
r2 = MP_DIGIT(b,2);
case 2:
r1 = MP_DIGIT(b,1);
case 1:
r0 = MP_DIGIT(b,0);
}
#ifndef MPI_AMD64_ADD
MP_ADD_CARRY(a0, r0, r0, 0, carry);
MP_ADD_CARRY(a1, r1, r1, carry, carry);
MP_ADD_CARRY(a2, r2, r2, carry, carry);
MP_ADD_CARRY(a3, r3, r3, carry, carry);
#else
__asm__ (
"xorq %4,%4 \n\t"
"addq %5,%0 \n\t"
"adcq %6,%1 \n\t"
"adcq %7,%2 \n\t"
"adcq %8,%3 \n\t"
"adcq $0,%4 \n\t"
: "=r"(r0), "=r"(r1), "=r"(r2), "=r"(r3), "=r"(carry)
: "r" (a0), "r" (a1), "r" (a2), "r" (a3),
"0" (r0), "1" (r1), "2" (r2), "3" (r3)
: "%cc" );
#endif
MP_CHECKOK(s_mp_pad(r, 4));
MP_DIGIT(r, 3) = r3;
MP_DIGIT(r, 2) = r2;
MP_DIGIT(r, 1) = r1;
MP_DIGIT(r, 0) = r0;
MP_SIGN(r) = MP_ZPOS;
MP_USED(r) = 4;
/* Do quick 'subract' if we've gone over
* (add the 2's complement of the curve field) */
a3 = MP_DIGIT(&meth->irr,3);
if (carry || r3 > a3 ||
((r3 == a3) && mp_cmp(r,&meth->irr) != MP_LT)) {
a2 = MP_DIGIT(&meth->irr,2);
a1 = MP_DIGIT(&meth->irr,1);
a0 = MP_DIGIT(&meth->irr,0);
#ifndef MPI_AMD64_ADD
MP_SUB_BORROW(r0, a0, r0, 0, carry);
MP_SUB_BORROW(r1, a1, r1, carry, carry);
MP_SUB_BORROW(r2, a2, r2, carry, carry);
MP_SUB_BORROW(r3, a3, r3, carry, carry);
#else
__asm__ (
"subq %4,%0 \n\t"
"sbbq %5,%1 \n\t"
"sbbq %6,%2 \n\t"
"sbbq %7,%3 \n\t"
: "=r"(r0), "=r"(r1), "=r"(r2), "=r"(r3)
: "r" (a0), "r" (a1), "r" (a2), "r" (a3),
"0" (r0), "1" (r1), "2" (r2), "3" (r3)
: "%cc" );
#endif
MP_DIGIT(r, 3) = r3;
MP_DIGIT(r, 2) = r2;
MP_DIGIT(r, 1) = r1;
MP_DIGIT(r, 0) = r0;
}
s_mp_clamp(r);
CLEANUP:
return res;
}
/* 5 words */
mp_err
ec_GFp_add_5(const mp_int *a, const mp_int *b, mp_int *r,
const GFMethod *meth)
{
mp_err res = MP_OKAY;
mp_digit a0 = 0, a1 = 0, a2 = 0, a3 = 0, a4 = 0;
mp_digit r0 = 0, r1 = 0, r2 = 0, r3 = 0, r4 = 0;
mp_digit carry;
switch(MP_USED(a)) {
case 5:
a4 = MP_DIGIT(a,4);
case 4:
a3 = MP_DIGIT(a,3);
case 3:
a2 = MP_DIGIT(a,2);
case 2:
a1 = MP_DIGIT(a,1);
case 1:
a0 = MP_DIGIT(a,0);
}
switch(MP_USED(b)) {
case 5:
r4 = MP_DIGIT(b,4);
case 4:
r3 = MP_DIGIT(b,3);
case 3:
r2 = MP_DIGIT(b,2);
case 2:
r1 = MP_DIGIT(b,1);
case 1:
r0 = MP_DIGIT(b,0);
}
MP_ADD_CARRY(a0, r0, r0, 0, carry);
MP_ADD_CARRY(a1, r1, r1, carry, carry);
MP_ADD_CARRY(a2, r2, r2, carry, carry);
MP_ADD_CARRY(a3, r3, r3, carry, carry);
MP_ADD_CARRY(a4, r4, r4, carry, carry);
MP_CHECKOK(s_mp_pad(r, 5));
MP_DIGIT(r, 4) = r4;
MP_DIGIT(r, 3) = r3;
MP_DIGIT(r, 2) = r2;
MP_DIGIT(r, 1) = r1;
MP_DIGIT(r, 0) = r0;
MP_SIGN(r) = MP_ZPOS;
MP_USED(r) = 5;
/* Do quick 'subract' if we've gone over
* (add the 2's complement of the curve field) */
a4 = MP_DIGIT(&meth->irr,4);
if (carry || r4 > a4 ||
((r4 == a4) && mp_cmp(r,&meth->irr) != MP_LT)) {
a3 = MP_DIGIT(&meth->irr,3);
a2 = MP_DIGIT(&meth->irr,2);
a1 = MP_DIGIT(&meth->irr,1);
a0 = MP_DIGIT(&meth->irr,0);
MP_SUB_BORROW(r0, a0, r0, 0, carry);
MP_SUB_BORROW(r1, a1, r1, carry, carry);
MP_SUB_BORROW(r2, a2, r2, carry, carry);
MP_SUB_BORROW(r3, a3, r3, carry, carry);
MP_SUB_BORROW(r4, a4, r4, carry, carry);
MP_DIGIT(r, 4) = r4;
MP_DIGIT(r, 3) = r3;
MP_DIGIT(r, 2) = r2;
MP_DIGIT(r, 1) = r1;
MP_DIGIT(r, 0) = r0;
}
s_mp_clamp(r);
CLEANUP:
return res;
}
/* 6 words */
mp_err
ec_GFp_add_6(const mp_int *a, const mp_int *b, mp_int *r,
const GFMethod *meth)
{
mp_err res = MP_OKAY;
mp_digit a0 = 0, a1 = 0, a2 = 0, a3 = 0, a4 = 0, a5 = 0;
mp_digit r0 = 0, r1 = 0, r2 = 0, r3 = 0, r4 = 0, r5 = 0;
mp_digit carry;
switch(MP_USED(a)) {
case 6:
a5 = MP_DIGIT(a,5);
case 5:
a4 = MP_DIGIT(a,4);
case 4:
a3 = MP_DIGIT(a,3);
case 3:
a2 = MP_DIGIT(a,2);
case 2:
a1 = MP_DIGIT(a,1);
case 1:
a0 = MP_DIGIT(a,0);
}
switch(MP_USED(b)) {
case 6:
r5 = MP_DIGIT(b,5);
case 5:
r4 = MP_DIGIT(b,4);
case 4:
r3 = MP_DIGIT(b,3);
case 3:
r2 = MP_DIGIT(b,2);
case 2:
r1 = MP_DIGIT(b,1);
case 1:
r0 = MP_DIGIT(b,0);
}
MP_ADD_CARRY(a0, r0, r0, 0, carry);
MP_ADD_CARRY(a1, r1, r1, carry, carry);
MP_ADD_CARRY(a2, r2, r2, carry, carry);
MP_ADD_CARRY(a3, r3, r3, carry, carry);
MP_ADD_CARRY(a4, r4, r4, carry, carry);
MP_ADD_CARRY(a5, r5, r5, carry, carry);
MP_CHECKOK(s_mp_pad(r, 6));
MP_DIGIT(r, 5) = r5;
MP_DIGIT(r, 4) = r4;
MP_DIGIT(r, 3) = r3;
MP_DIGIT(r, 2) = r2;
MP_DIGIT(r, 1) = r1;
MP_DIGIT(r, 0) = r0;
MP_SIGN(r) = MP_ZPOS;
MP_USED(r) = 6;
/* Do quick 'subract' if we've gone over
* (add the 2's complement of the curve field) */
a5 = MP_DIGIT(&meth->irr,5);
if (carry || r5 > a5 ||
((r5 == a5) && mp_cmp(r,&meth->irr) != MP_LT)) {
a4 = MP_DIGIT(&meth->irr,4);
a3 = MP_DIGIT(&meth->irr,3);
a2 = MP_DIGIT(&meth->irr,2);
a1 = MP_DIGIT(&meth->irr,1);
a0 = MP_DIGIT(&meth->irr,0);
MP_SUB_BORROW(r0, a0, r0, 0, carry);
MP_SUB_BORROW(r1, a1, r1, carry, carry);
MP_SUB_BORROW(r2, a2, r2, carry, carry);
MP_SUB_BORROW(r3, a3, r3, carry, carry);
MP_SUB_BORROW(r4, a4, r4, carry, carry);
MP_SUB_BORROW(r5, a5, r5, carry, carry);
MP_DIGIT(r, 5) = r5;
MP_DIGIT(r, 4) = r4;
MP_DIGIT(r, 3) = r3;
MP_DIGIT(r, 2) = r2;
MP_DIGIT(r, 1) = r1;
MP_DIGIT(r, 0) = r0;
}
s_mp_clamp(r);
CLEANUP:
return res;
}
/*
* The following subraction functions do in-line subractions based
* on our curve size.
*
* ... 3 words
*/
mp_err
ec_GFp_sub_3(const mp_int *a, const mp_int *b, mp_int *r,
const GFMethod *meth)
{
mp_err res = MP_OKAY;
mp_digit b0 = 0, b1 = 0, b2 = 0;
mp_digit r0 = 0, r1 = 0, r2 = 0;
mp_digit borrow;
switch(MP_USED(a)) {
case 3:
r2 = MP_DIGIT(a,2);
case 2:
r1 = MP_DIGIT(a,1);
case 1:
r0 = MP_DIGIT(a,0);
}
switch(MP_USED(b)) {
case 3:
b2 = MP_DIGIT(b,2);
case 2:
b1 = MP_DIGIT(b,1);
case 1:
b0 = MP_DIGIT(b,0);
}
#ifndef MPI_AMD64_ADD
MP_SUB_BORROW(r0, b0, r0, 0, borrow);
MP_SUB_BORROW(r1, b1, r1, borrow, borrow);
MP_SUB_BORROW(r2, b2, r2, borrow, borrow);
#else
__asm__ (
"xorq %3,%3 \n\t"
"subq %4,%0 \n\t"
"sbbq %5,%1 \n\t"
"sbbq %6,%2 \n\t"
"adcq $0,%3 \n\t"
: "=r"(r0), "=r"(r1), "=r"(r2), "=r" (borrow)
: "r" (b0), "r" (b1), "r" (b2),
"0" (r0), "1" (r1), "2" (r2)
: "%cc" );
#endif
/* Do quick 'add' if we've gone under 0
* (subtract the 2's complement of the curve field) */
if (borrow) {
b2 = MP_DIGIT(&meth->irr,2);
b1 = MP_DIGIT(&meth->irr,1);
b0 = MP_DIGIT(&meth->irr,0);
#ifndef MPI_AMD64_ADD
MP_ADD_CARRY(b0, r0, r0, 0, borrow);
MP_ADD_CARRY(b1, r1, r1, borrow, borrow);
MP_ADD_CARRY(b2, r2, r2, borrow, borrow);
#else
__asm__ (
"addq %3,%0 \n\t"
"adcq %4,%1 \n\t"
"adcq %5,%2 \n\t"
: "=r"(r0), "=r"(r1), "=r"(r2)
: "r" (b0), "r" (b1), "r" (b2),
"0" (r0), "1" (r1), "2" (r2)
: "%cc" );
#endif
}
#ifdef MPI_AMD64_ADD
/* compiler fakeout? */
if ((r2 == b0) && (r1 == b0) && (r0 == b0)) {
MP_CHECKOK(s_mp_pad(r, 4));
}
#endif
MP_CHECKOK(s_mp_pad(r, 3));
MP_DIGIT(r, 2) = r2;
MP_DIGIT(r, 1) = r1;
MP_DIGIT(r, 0) = r0;
MP_SIGN(r) = MP_ZPOS;
MP_USED(r) = 3;
s_mp_clamp(r);
CLEANUP:
return res;
}
/* 4 words */
mp_err
ec_GFp_sub_4(const mp_int *a, const mp_int *b, mp_int *r,
const GFMethod *meth)
{
mp_err res = MP_OKAY;
mp_digit b0 = 0, b1 = 0, b2 = 0, b3 = 0;
mp_digit r0 = 0, r1 = 0, r2 = 0, r3 = 0;
mp_digit borrow;
switch(MP_USED(a)) {
case 4:
r3 = MP_DIGIT(a,3);
case 3:
r2 = MP_DIGIT(a,2);
case 2:
r1 = MP_DIGIT(a,1);
case 1:
r0 = MP_DIGIT(a,0);
}
switch(MP_USED(b)) {
case 4:
b3 = MP_DIGIT(b,3);
case 3:
b2 = MP_DIGIT(b,2);
case 2:
b1 = MP_DIGIT(b,1);
case 1:
b0 = MP_DIGIT(b,0);
}
#ifndef MPI_AMD64_ADD
MP_SUB_BORROW(r0, b0, r0, 0, borrow);
MP_SUB_BORROW(r1, b1, r1, borrow, borrow);
MP_SUB_BORROW(r2, b2, r2, borrow, borrow);
MP_SUB_BORROW(r3, b3, r3, borrow, borrow);
#else
__asm__ (
"xorq %4,%4 \n\t"
"subq %5,%0 \n\t"
"sbbq %6,%1 \n\t"
"sbbq %7,%2 \n\t"
"sbbq %8,%3 \n\t"
"adcq $0,%4 \n\t"
: "=r"(r0), "=r"(r1), "=r"(r2), "=r"(r3), "=r" (borrow)
: "r" (b0), "r" (b1), "r" (b2), "r" (b3),
"0" (r0), "1" (r1), "2" (r2), "3" (r3)
: "%cc" );
#endif
/* Do quick 'add' if we've gone under 0
* (subtract the 2's complement of the curve field) */
if (borrow) {
b3 = MP_DIGIT(&meth->irr,3);
b2 = MP_DIGIT(&meth->irr,2);
b1 = MP_DIGIT(&meth->irr,1);
b0 = MP_DIGIT(&meth->irr,0);
#ifndef MPI_AMD64_ADD
MP_ADD_CARRY(b0, r0, r0, 0, borrow);
MP_ADD_CARRY(b1, r1, r1, borrow, borrow);
MP_ADD_CARRY(b2, r2, r2, borrow, borrow);
MP_ADD_CARRY(b3, r3, r3, borrow, borrow);
#else
__asm__ (
"addq %4,%0 \n\t"
"adcq %5,%1 \n\t"
"adcq %6,%2 \n\t"
"adcq %7,%3 \n\t"
: "=r"(r0), "=r"(r1), "=r"(r2), "=r"(r3)
: "r" (b0), "r" (b1), "r" (b2), "r" (b3),
"0" (r0), "1" (r1), "2" (r2), "3" (r3)
: "%cc" );
#endif
}
#ifdef MPI_AMD64_ADD
/* compiler fakeout? */
if ((r3 == b0) && (r1 == b0) && (r0 == b0)) {
MP_CHECKOK(s_mp_pad(r, 4));
}
#endif
MP_CHECKOK(s_mp_pad(r, 4));
MP_DIGIT(r, 3) = r3;
MP_DIGIT(r, 2) = r2;
MP_DIGIT(r, 1) = r1;
MP_DIGIT(r, 0) = r0;
MP_SIGN(r) = MP_ZPOS;
MP_USED(r) = 4;
s_mp_clamp(r);
CLEANUP:
return res;
}
/* 5 words */
mp_err
ec_GFp_sub_5(const mp_int *a, const mp_int *b, mp_int *r,
const GFMethod *meth)
{
mp_err res = MP_OKAY;
mp_digit b0 = 0, b1 = 0, b2 = 0, b3 = 0, b4 = 0;
mp_digit r0 = 0, r1 = 0, r2 = 0, r3 = 0, r4 = 0;
mp_digit borrow;
switch(MP_USED(a)) {
case 5:
r4 = MP_DIGIT(a,4);
case 4:
r3 = MP_DIGIT(a,3);
case 3:
r2 = MP_DIGIT(a,2);
case 2:
r1 = MP_DIGIT(a,1);
case 1:
r0 = MP_DIGIT(a,0);
}
switch(MP_USED(b)) {
case 5:
b4 = MP_DIGIT(b,4);
case 4:
b3 = MP_DIGIT(b,3);
case 3:
b2 = MP_DIGIT(b,2);
case 2:
b1 = MP_DIGIT(b,1);
case 1:
b0 = MP_DIGIT(b,0);
}
MP_SUB_BORROW(r0, b0, r0, 0, borrow);
MP_SUB_BORROW(r1, b1, r1, borrow, borrow);
MP_SUB_BORROW(r2, b2, r2, borrow, borrow);
MP_SUB_BORROW(r3, b3, r3, borrow, borrow);
MP_SUB_BORROW(r4, b4, r4, borrow, borrow);
/* Do quick 'add' if we've gone under 0
* (subtract the 2's complement of the curve field) */
if (borrow) {
b4 = MP_DIGIT(&meth->irr,4);
b3 = MP_DIGIT(&meth->irr,3);
b2 = MP_DIGIT(&meth->irr,2);
b1 = MP_DIGIT(&meth->irr,1);
b0 = MP_DIGIT(&meth->irr,0);
MP_ADD_CARRY(b0, r0, r0, 0, borrow);
MP_ADD_CARRY(b1, r1, r1, borrow, borrow);
MP_ADD_CARRY(b2, r2, r2, borrow, borrow);
MP_ADD_CARRY(b3, r3, r3, borrow, borrow);
}
MP_CHECKOK(s_mp_pad(r, 5));
MP_DIGIT(r, 4) = r4;
MP_DIGIT(r, 3) = r3;
MP_DIGIT(r, 2) = r2;
MP_DIGIT(r, 1) = r1;
MP_DIGIT(r, 0) = r0;
MP_SIGN(r) = MP_ZPOS;
MP_USED(r) = 5;
s_mp_clamp(r);
CLEANUP:
return res;
}
/* 6 words */
mp_err
ec_GFp_sub_6(const mp_int *a, const mp_int *b, mp_int *r,
const GFMethod *meth)
{
mp_err res = MP_OKAY;
mp_digit b0 = 0, b1 = 0, b2 = 0, b3 = 0, b4 = 0, b5 = 0;
mp_digit r0 = 0, r1 = 0, r2 = 0, r3 = 0, r4 = 0, r5 = 0;
mp_digit borrow;
switch(MP_USED(a)) {
case 6:
r5 = MP_DIGIT(a,5);
case 5:
r4 = MP_DIGIT(a,4);
case 4:
r3 = MP_DIGIT(a,3);
case 3:
r2 = MP_DIGIT(a,2);
case 2:
r1 = MP_DIGIT(a,1);
case 1:
r0 = MP_DIGIT(a,0);
}
switch(MP_USED(b)) {
case 6:
b5 = MP_DIGIT(b,5);
case 5:
b4 = MP_DIGIT(b,4);
case 4:
b3 = MP_DIGIT(b,3);
case 3:
b2 = MP_DIGIT(b,2);
case 2:
b1 = MP_DIGIT(b,1);
case 1:
b0 = MP_DIGIT(b,0);
}
MP_SUB_BORROW(r0, b0, r0, 0, borrow);
MP_SUB_BORROW(r1, b1, r1, borrow, borrow);
MP_SUB_BORROW(r2, b2, r2, borrow, borrow);
MP_SUB_BORROW(r3, b3, r3, borrow, borrow);
MP_SUB_BORROW(r4, b4, r4, borrow, borrow);
MP_SUB_BORROW(r5, b5, r5, borrow, borrow);
/* Do quick 'add' if we've gone under 0
* (subtract the 2's complement of the curve field) */
if (borrow) {
b5 = MP_DIGIT(&meth->irr,5);
b4 = MP_DIGIT(&meth->irr,4);
b3 = MP_DIGIT(&meth->irr,3);
b2 = MP_DIGIT(&meth->irr,2);
b1 = MP_DIGIT(&meth->irr,1);
b0 = MP_DIGIT(&meth->irr,0);
MP_ADD_CARRY(b0, r0, r0, 0, borrow);
MP_ADD_CARRY(b1, r1, r1, borrow, borrow);
MP_ADD_CARRY(b2, r2, r2, borrow, borrow);
MP_ADD_CARRY(b3, r3, r3, borrow, borrow);
MP_ADD_CARRY(b4, r4, r4, borrow, borrow);
}
MP_CHECKOK(s_mp_pad(r, 6));
MP_DIGIT(r, 5) = r5;
MP_DIGIT(r, 4) = r4;
MP_DIGIT(r, 3) = r3;
MP_DIGIT(r, 2) = r2;
MP_DIGIT(r, 1) = r1;
MP_DIGIT(r, 0) = r0;
MP_SIGN(r) = MP_ZPOS;
MP_USED(r) = 6;
s_mp_clamp(r);
CLEANUP:
return res;
}
/* Reduces an integer to a field element. */
mp_err

Просмотреть файл

@ -42,6 +42,8 @@
#include "mpi-priv.h"
#include <stdlib.h>
#define ECP192_DIGITS ECL_CURVE_DIGITS(192)
/* Fast modular reduction for p192 = 2^192 - 2^64 - 1. a can be r. Uses
* algorithm 7 from Brown, Hankerson, Lopez, Menezes. Software
* Implementation of the NIST Elliptic Curves over Prime Fields. */
@ -50,101 +52,127 @@ ec_GFp_nistp192_mod(const mp_int *a, mp_int *r, const GFMethod *meth)
{
mp_err res = MP_OKAY;
mp_size a_used = MP_USED(a);
/* s is a statically-allocated mp_int of exactly the size we need */
mp_int s;
mp_digit r3;
#ifndef MPI_AMD64_ADD
mp_digit carry;
#endif
#ifdef ECL_THIRTY_TWO_BIT
mp_digit sa[6];
mp_digit a11 = 0, a10, a9 = 0, a8, a7 = 0, a6;
MP_SIGN(&s) = MP_ZPOS;
MP_ALLOC(&s) = 6;
MP_USED(&s) = 6;
MP_DIGITS(&s) = sa;
mp_digit a5a = 0, a5b = 0, a4a = 0, a4b = 0, a3a = 0, a3b = 0;
mp_digit r0a, r0b, r1a, r1b, r2a, r2b;
#else
mp_digit sa[3];
mp_digit a5 = 0, a4 = 0, a3 = 0;
MP_SIGN(&s) = MP_ZPOS;
MP_ALLOC(&s) = 3;
MP_USED(&s) = 3;
MP_DIGITS(&s) = sa;
mp_digit r0, r1, r2;
#endif
/* reduction not needed if a is not larger than field size */
#ifdef ECL_THIRTY_TWO_BIT
if (a_used < 6) {
#else
if (a_used < 3) {
#endif
if (a_used < ECP192_DIGITS) {
if (a == r) {
return MP_OKAY;
}
return mp_copy(a, r);
}
#ifdef ECL_THIRTY_TWO_BIT
/* for polynomials larger than twice the field size, use regular
* reduction */
if (a_used > 12) {
if (a_used > ECP192_DIGITS*2) {
MP_CHECKOK(mp_mod(a, &meth->irr, r));
} else {
/* copy out upper words of a */
#ifdef ECL_THIRTY_TWO_BIT
/* in all the math below,
* nXb is most signifiant, nXa is least significant */
switch (a_used) {
case 12:
a11 = MP_DIGIT(a, 11);
a5b = MP_DIGIT(a, 11);
case 11:
a10 = MP_DIGIT(a, 10);
a5a = MP_DIGIT(a, 10);
case 10:
a9 = MP_DIGIT(a, 9);
a4b = MP_DIGIT(a, 9);
case 9:
a8 = MP_DIGIT(a, 8);
a4a = MP_DIGIT(a, 8);
case 8:
a7 = MP_DIGIT(a, 7);
a3b = MP_DIGIT(a, 7);
case 7:
a6 = MP_DIGIT(a, 6);
a3a = MP_DIGIT(a, 6);
}
r2b= MP_DIGIT(a, 5);
r2a= MP_DIGIT(a, 4);
r1b = MP_DIGIT(a, 3);
r1a = MP_DIGIT(a, 2);
r0b = MP_DIGIT(a, 1);
r0a = MP_DIGIT(a, 0);
/* implement r = (a2,a1,a0)+(a5,a5,a5)+(a4,a4,0)+(0,a3,a3) */
MP_ADD_CARRY(r0a, a3a, r0a, 0, carry);
MP_ADD_CARRY(r0b, a3b, r0b, carry, carry);
MP_ADD_CARRY(r1a, a3a, r1a, carry, carry);
MP_ADD_CARRY(r1b, a3b, r1b, carry, carry);
MP_ADD_CARRY(r2a, a4a, r2a, carry, carry);
MP_ADD_CARRY(r2b, a4b, r2b, carry, carry);
r3 = carry; carry = 0;
MP_ADD_CARRY(r0a, a5a, r0a, 0, carry);
MP_ADD_CARRY(r0b, a5b, r0b, carry, carry);
MP_ADD_CARRY(r1a, a5a, r1a, carry, carry);
MP_ADD_CARRY(r1b, a5b, r1b, carry, carry);
MP_ADD_CARRY(r2a, a5a, r2a, carry, carry);
MP_ADD_CARRY(r2b, a5b, r2b, carry, carry);
r3 += carry;
MP_ADD_CARRY(r1a, a4a, r1a, 0, carry);
MP_ADD_CARRY(r1b, a4b, r1b, carry, carry);
MP_ADD_CARRY(r2a, 0, r2a, carry, carry);
MP_ADD_CARRY(r2b, 0, r2b, carry, carry);
r3 += carry;
/* reduce out the carry */
while (r3) {
MP_ADD_CARRY(r0a, r3, r0a, 0, carry);
MP_ADD_CARRY(r0b, 0, r0b, carry, carry);
MP_ADD_CARRY(r1a, r3, r1a, carry, carry);
MP_ADD_CARRY(r1b, 0, r1b, carry, carry);
MP_ADD_CARRY(r2a, 0, r2a, carry, carry);
MP_ADD_CARRY(r2b, 0, r2b, carry, carry);
r3 = carry;
}
/* check for final reduction */
/*
* our field is 0xffffffffffffffff, 0xfffffffffffffffe,
* 0xffffffffffffffff. That means we can only be over and need
* one more reduction
* if r2 == 0xffffffffffffffffff (same as r2+1 == 0)
* and
* r1 == 0xffffffffffffffffff or
* r1 == 0xfffffffffffffffffe and r0 = 0xfffffffffffffffff
* In all cases, we subtract the field (or add the 2's
* complement value (1,1,0)). (r0, r1, r2)
*/
if (((r2b == 0xffffffff) && (r2a == 0xffffffff)
&& (r1b == 0xffffffff) ) &&
((r1a == 0xffffffff) ||
(r1a == 0xfffffffe) && (r0a == 0xffffffff) &&
(r0b == 0xffffffff)) ) {
/* do a quick subtract */
MP_ADD_CARRY(r0a, 1, r0a, 0, carry);
r0b += carry;
r1a = r1b = r2a = r2b = 0;
}
/* set the lower words of r */
if (a != r) {
MP_CHECKOK(s_mp_pad(r, 7));
MP_DIGIT(r, 5) = MP_DIGIT(a, 5);
MP_DIGIT(r, 4) = MP_DIGIT(a, 4);
MP_DIGIT(r, 3) = MP_DIGIT(a, 3);
MP_DIGIT(r, 2) = MP_DIGIT(a, 2);
MP_DIGIT(r, 1) = MP_DIGIT(a, 1);
MP_DIGIT(r, 0) = MP_DIGIT(a, 0);
MP_CHECKOK(s_mp_pad(r, 6));
}
MP_DIGIT(r, 5) = r2b;
MP_DIGIT(r, 4) = r2a;
MP_DIGIT(r, 3) = r1b;
MP_DIGIT(r, 2) = r1a;
MP_DIGIT(r, 1) = r0b;
MP_DIGIT(r, 0) = r0a;
MP_USED(r) = 6;
/* compute r = s1 + s2 + s3 + s4, where s1 = (a2,a1,a0), s2 =
* (0,a3,a3), s3 = (a4,a4,0), and s4 = (a5,a5,a5), for
* sixty-four-bit words */
switch (a_used) {
case 12:
case 11:
sa[5] = sa[3] = sa[1] = a11;
sa[4] = sa[2] = sa[0] = a10;
MP_CHECKOK(mp_add(r, &s, r));
case 10:
case 9:
sa[5] = sa[3] = a9;
sa[4] = sa[2] = a8;
sa[1] = sa[0] = 0;
MP_CHECKOK(mp_add(r, &s, r));
case 8:
case 7:
sa[5] = sa[4] = 0;
sa[3] = sa[1] = a7;
sa[2] = sa[0] = a6;
MP_CHECKOK(mp_add(r, &s, r));
}
/* there might be 1 or 2 bits left to reduce; use regular
* reduction for this */
MP_CHECKOK(mp_mod(r, &meth->irr, r));
}
#else
/* for polynomials larger than twice the field size, use regular
* reduction */
if (a_used > 6) {
MP_CHECKOK(mp_mod(a, &meth->irr, r));
} else {
/* copy out upper words of a */
switch (a_used) {
case 6:
a5 = MP_DIGIT(a, 5);
@ -153,39 +181,268 @@ ec_GFp_nistp192_mod(const mp_int *a, mp_int *r, const GFMethod *meth)
case 4:
a3 = MP_DIGIT(a, 3);
}
r2 = MP_DIGIT(a, 2);
r1 = MP_DIGIT(a, 1);
r0 = MP_DIGIT(a, 0);
/* implement r = (a2,a1,a0)+(a5,a5,a5)+(a4,a4,0)+(0,a3,a3) */
#ifndef MPI_AMD64_ADD
MP_ADD_CARRY(r0, a3, r0, 0, carry);
MP_ADD_CARRY(r1, a3, r1, carry, carry);
MP_ADD_CARRY(r2, a4, r2, carry, carry);
r3 = carry;
MP_ADD_CARRY(r0, a5, r0, 0, carry);
MP_ADD_CARRY(r1, a5, r1, carry, carry);
MP_ADD_CARRY(r2, a5, r2, carry, carry);
r3 += carry;
MP_ADD_CARRY(r1, a4, r1, 0, carry);
MP_ADD_CARRY(r2, 0, r2, carry, carry);
r3 += carry;
#else
r2 = MP_DIGIT(a, 2);
r1 = MP_DIGIT(a, 1);
r0 = MP_DIGIT(a, 0);
/* set the lower words of r */
__asm__ (
"xorq %3,%3 \n\t"
"addq %4,%0 \n\t"
"adcq %4,%1 \n\t"
"adcq %5,%2 \n\t"
"adcq $0,%3 \n\t"
"addq %6,%0 \n\t"
"adcq %6,%1 \n\t"
"adcq %6,%2 \n\t"
"adcq $0,%3 \n\t"
"addq %5,%1 \n\t"
"adcq $0,%2 \n\t"
"adcq $0,%3 \n\t"
: "=r"(r0), "=r"(r1), "=r"(r2), "=r"(r3), "=r"(a3),
"=r"(a4), "=r"(a5)
: "0" (r0), "1" (r1), "2" (r2), "3" (r3),
"4" (a3), "5" (a4), "6"(a5)
: "%cc" );
#endif
/* reduce out the carry */
while (r3) {
#ifndef MPI_AMD64_ADD
MP_ADD_CARRY(r0, r3, r0, 0, carry);
MP_ADD_CARRY(r1, r3, r1, carry, carry);
MP_ADD_CARRY(r2, 0, r2, carry, carry);
r3 = carry;
#else
a3=r3;
__asm__ (
"xorq %3,%3 \n\t"
"addq %4,%0 \n\t"
"adcq %4,%1 \n\t"
"adcq $0,%2 \n\t"
"adcq $0,%3 \n\t"
: "=r"(r0), "=r"(r1), "=r"(r2), "=r"(r3), "=r"(a3)
: "0" (r0), "1" (r1), "2" (r2), "3" (r3), "4"(a3)
: "%cc" );
#endif
}
/* check for final reduction */
/*
* our field is 0xffffffffffffffff, 0xfffffffffffffffe,
* 0xffffffffffffffff. That means we can only be over and need
* one more reduction
* if r2 == 0xffffffffffffffffff (same as r2+1 == 0)
* and
* r1 == 0xffffffffffffffffff or
* r1 == 0xfffffffffffffffffe and r0 = 0xfffffffffffffffff
* In all cases, we subtract the field (or add the 2's
* complement value (1,1,0)). (r0, r1, r2)
*/
if (r3 || ((r2 == MP_DIGIT_MAX) &&
((r1 == MP_DIGIT_MAX) ||
((r1 == (MP_DIGIT_MAX-1)) && (r0 == MP_DIGIT_MAX))))) {
/* do a quick subtract */
r0++;
r1 = r2 = 0;
}
/* set the lower words of r */
if (a != r) {
MP_CHECKOK(s_mp_pad(r, 4));
MP_DIGIT(r, 2) = MP_DIGIT(a, 2);
MP_DIGIT(r, 1) = MP_DIGIT(a, 1);
MP_DIGIT(r, 0) = MP_DIGIT(a, 0);
MP_CHECKOK(s_mp_pad(r, 3));
}
MP_DIGIT(r, 2) = r2;
MP_DIGIT(r, 1) = r1;
MP_DIGIT(r, 0) = r0;
MP_USED(r) = 3;
/* compute r = s1 + s2 + s3 + s4, where s1 = (a2,a1,a0), s2 =
* (0,a3,a3), s3 = (a4,a4,0), and s4 = (a5,a5,a5) */
switch (a_used) {
case 6:
sa[2] = sa[1] = sa[0] = a5;
MP_CHECKOK(mp_add(r, &s, r));
case 5:
sa[2] = sa[1] = a4;
sa[0] = 0;
MP_CHECKOK(mp_add(r, &s, r));
case 4:
sa[2] = 0;
sa[1] = sa[0] = a3;
MP_CHECKOK(mp_add(r, &s, r));
}
/* there might be 1 or 2 bits left to reduce; use regular
* reduction for this */
MP_CHECKOK(mp_mod(r, &meth->irr, r));
}
#endif
}
CLEANUP:
return res;
}
#ifndef ECL_THIRTY_TWO_BIT
/* Compute the sum of 192 bit curves. Do the work in-line since the
* number of words are so small, we don't want to overhead of mp function
* calls. Uses optimized modular reduction for p192.
*/
mp_err
ec_GFp_nistp192_add(const mp_int *a, const mp_int *b, mp_int *r,
const GFMethod *meth)
{
mp_err res = MP_OKAY;
mp_digit a0 = 0, a1 = 0, a2 = 0;
mp_digit r0 = 0, r1 = 0, r2 = 0;
mp_digit carry;
switch(MP_USED(a)) {
case 3:
a2 = MP_DIGIT(a,2);
case 2:
a1 = MP_DIGIT(a,1);
case 1:
a0 = MP_DIGIT(a,0);
}
switch(MP_USED(b)) {
case 3:
r2 = MP_DIGIT(b,2);
case 2:
r1 = MP_DIGIT(b,1);
case 1:
r0 = MP_DIGIT(b,0);
}
#ifndef MPI_AMD64_ADD
MP_ADD_CARRY(a0, r0, r0, 0, carry);
MP_ADD_CARRY(a1, r1, r1, carry, carry);
MP_ADD_CARRY(a2, r2, r2, carry, carry);
#else
__asm__ (
"xorq %3,%3 \n\t"
"addq %4,%0 \n\t"
"adcq %5,%1 \n\t"
"adcq %6,%2 \n\t"
"adcq $0,%3 \n\t"
: "=r"(r0), "=r"(r1), "=r"(r2), "=r"(carry)
: "r" (a0), "r" (a1), "r" (a2), "0" (r0),
"1" (r1), "2" (r2)
: "%cc" );
#endif
/* Do quick 'subract' if we've gone over
* (add the 2's complement of the curve field) */
if (carry || ((r2 == MP_DIGIT_MAX) &&
((r1 == MP_DIGIT_MAX) ||
((r1 == (MP_DIGIT_MAX-1)) && (r0 == MP_DIGIT_MAX))))) {
#ifndef MPI_AMD64_ADD
MP_ADD_CARRY(r0, 1, r0, 0, carry);
MP_ADD_CARRY(r1, 1, r1, carry, carry);
MP_ADD_CARRY(r2, 0, r2, carry, carry);
#else
__asm__ (
"addq $1,%0 \n\t"
"adcq $1,%1 \n\t"
"adcq $0,%2 \n\t"
: "=r"(r0), "=r"(r1), "=r"(r2)
: "0" (r0), "1" (r1), "2" (r2)
: "%cc" );
#endif
}
MP_CHECKOK(s_mp_pad(r, 3));
MP_DIGIT(r, 2) = r2;
MP_DIGIT(r, 1) = r1;
MP_DIGIT(r, 0) = r0;
MP_SIGN(r) = MP_ZPOS;
MP_USED(r) = 3;
s_mp_clamp(r);
CLEANUP:
return res;
}
/* Compute the diff of 192 bit curves. Do the work in-line since the
* number of words are so small, we don't want to overhead of mp function
* calls. Uses optimized modular reduction for p192.
*/
mp_err
ec_GFp_nistp192_sub(const mp_int *a, const mp_int *b, mp_int *r,
const GFMethod *meth)
{
mp_err res = MP_OKAY;
mp_digit b0 = 0, b1 = 0, b2 = 0;
mp_digit r0 = 0, r1 = 0, r2 = 0;
mp_digit borrow;
switch(MP_USED(a)) {
case 3:
r2 = MP_DIGIT(a,2);
case 2:
r1 = MP_DIGIT(a,1);
case 1:
r0 = MP_DIGIT(a,0);
}
switch(MP_USED(b)) {
case 3:
b2 = MP_DIGIT(b,2);
case 2:
b1 = MP_DIGIT(b,1);
case 1:
b0 = MP_DIGIT(b,0);
}
#ifndef MPI_AMD64_ADD
MP_SUB_BORROW(r0, b0, r0, 0, borrow);
MP_SUB_BORROW(r1, b1, r1, borrow, borrow);
MP_SUB_BORROW(r2, b2, r2, borrow, borrow);
#else
__asm__ (
"xorq %3,%3 \n\t"
"subq %4,%0 \n\t"
"sbbq %5,%1 \n\t"
"sbbq %6,%2 \n\t"
"adcq $0,%3 \n\t"
: "=r"(r0), "=r"(r1), "=r"(r2), "=r"(borrow)
: "r" (b0), "r" (b1), "r" (b2), "0" (r0),
"1" (r1), "2" (r2)
: "%cc" );
#endif
/* Do quick 'add' if we've gone under 0
* (subtract the 2's complement of the curve field) */
if (borrow) {
#ifndef MPI_AMD64_ADD
MP_SUB_BORROW(r0, 1, r0, 0, borrow);
MP_SUB_BORROW(r1, 1, r1, borrow, borrow);
MP_SUB_BORROW(r2, 0, r2, borrow, borrow);
#else
__asm__ (
"subq $1,%0 \n\t"
"sbbq $1,%1 \n\t"
"sbbq $0,%2 \n\t"
: "=r"(r0), "=r"(r1), "=r"(r2)
: "0" (r0), "1" (r1), "2" (r2)
: "%cc" );
#endif
}
MP_CHECKOK(s_mp_pad(r, 3));
MP_DIGIT(r, 2) = r2;
MP_DIGIT(r, 1) = r1;
MP_DIGIT(r, 0) = r0;
MP_SIGN(r) = MP_ZPOS;
MP_USED(r) = 3;
s_mp_clamp(r);
CLEANUP:
return res;
}
#endif
/* Compute the square of polynomial a, reduce modulo p192. Store the
* result in r. r could be a. Uses optimized modular reduction for p192.
*/
@ -215,6 +472,31 @@ ec_GFp_nistp192_mul(const mp_int *a, const mp_int *b, mp_int *r,
return res;
}
/* Divides two field elements. If a is NULL, then returns the inverse of
* b. */
mp_err
ec_GFp_nistp192_div(const mp_int *a, const mp_int *b, mp_int *r,
const GFMethod *meth)
{
mp_err res = MP_OKAY;
mp_int t;
/* If a is NULL, then return the inverse of b, otherwise return a/b. */
if (a == NULL) {
return mp_invmod(b, &meth->irr, r);
} else {
/* MPI doesn't support divmod, so we implement it using invmod and
* mulmod. */
MP_CHECKOK(mp_init(&t));
MP_CHECKOK(mp_invmod(b, &meth->irr, &t));
MP_CHECKOK(mp_mul(a, &t, r));
MP_CHECKOK(ec_GFp_nistp192_mod(r, r, meth));
CLEANUP:
mp_clear(&t);
return res;
}
}
/* Wire in fast field arithmetic and precomputation of base point for
* named curves. */
mp_err
@ -224,6 +506,11 @@ ec_group_set_gfp192(ECGroup *group, ECCurveName name)
group->meth->field_mod = &ec_GFp_nistp192_mod;
group->meth->field_mul = &ec_GFp_nistp192_mul;
group->meth->field_sqr = &ec_GFp_nistp192_sqr;
group->meth->field_div = &ec_GFp_nistp192_div;
#ifndef ECL_THIRTY_TWO_BIT
group->meth->field_add = &ec_GFp_nistp192_add;
group->meth->field_sub = &ec_GFp_nistp192_sub;
#endif
}
return MP_OKAY;
}

Просмотреть файл

@ -42,6 +42,8 @@
#include "mpi-priv.h"
#include <stdlib.h>
#define ECP224_DIGITS ECL_CURVE_DIGITS(224)
/* Fast modular reduction for p224 = 2^224 - 2^96 + 1. a can be r. Uses
* algorithm 7 from Brown, Hankerson, Lopez, Menezes. Software
* Implementation of the NIST Elliptic Curves over Prime Fields. */
@ -51,213 +53,254 @@ ec_GFp_nistp224_mod(const mp_int *a, mp_int *r, const GFMethod *meth)
mp_err res = MP_OKAY;
mp_size a_used = MP_USED(a);
/* s is a statically-allocated mp_int of exactly the size we need */
mp_int s;
int r3b;
mp_digit carry;
#ifdef ECL_THIRTY_TWO_BIT
mp_digit sa[8];
mp_digit a13 = 0, a12 = 0, a11 = 0, a10, a9 = 0, a8, a7;
MP_SIGN(&s) = MP_ZPOS;
MP_ALLOC(&s) = 8;
MP_USED(&s) = 7;
MP_DIGITS(&s) = sa;
mp_digit a6a = 0, a6b = 0,
a5a = 0, a5b = 0, a4a = 0, a4b = 0, a3a = 0, a3b = 0;
mp_digit r0a, r0b, r1a, r1b, r2a, r2b, r3a;
#else
mp_digit sa[4];
mp_digit a6 = 0, a5 = 0, a4 = 0, a3 = 0;
MP_SIGN(&s) = MP_ZPOS;
MP_ALLOC(&s) = 4;
MP_USED(&s) = 4;
MP_DIGITS(&s) = sa;
mp_digit a6 = 0, a5 = 0, a4 = 0, a3b = 0, a5a = 0;
mp_digit a6b = 0, a6a_a5b = 0, a5b = 0, a5a_a4b = 0, a4a_a3b = 0;
mp_digit r0, r1, r2, r3;
#endif
/* reduction not needed if a is not larger than field size */
#ifdef ECL_THIRTY_TWO_BIT
if (a_used < 8) {
#else
if (a_used < 4) {
#endif
if (a_used < ECP224_DIGITS) {
if (a == r) return MP_OKAY;
return mp_copy(a, r);
}
#ifdef ECL_THIRTY_TWO_BIT
/* for polynomials larger than twice the field size, use regular
* reduction */
if (a_used > 14) {
if (a_used > ECL_CURVE_DIGITS(224*2)) {
MP_CHECKOK(mp_mod(a, &meth->irr, r));
} else {
#ifdef ECL_THIRTY_TWO_BIT
/* copy out upper words of a */
switch (a_used) {
case 14:
a13 = MP_DIGIT(a, 13);
a6b = MP_DIGIT(a, 13);
case 13:
a12 = MP_DIGIT(a, 12);
a6a = MP_DIGIT(a, 12);
case 12:
a11 = MP_DIGIT(a, 11);
a5b = MP_DIGIT(a, 11);
case 11:
a10 = MP_DIGIT(a, 10);
a5a = MP_DIGIT(a, 10);
case 10:
a9 = MP_DIGIT(a, 9);
a4b = MP_DIGIT(a, 9);
case 9:
a8 = MP_DIGIT(a, 8);
a4a = MP_DIGIT(a, 8);
case 8:
a7 = MP_DIGIT(a, 7);
a3b = MP_DIGIT(a, 7);
}
r3a = MP_DIGIT(a, 6);
r2b= MP_DIGIT(a, 5);
r2a= MP_DIGIT(a, 4);
r1b = MP_DIGIT(a, 3);
r1a = MP_DIGIT(a, 2);
r0b = MP_DIGIT(a, 1);
r0a = MP_DIGIT(a, 0);
/* implement r = (a3a,a2,a1,a0)
+(a5a, a4,a3b, 0)
+( 0, a6,a5b, 0)
-( 0 0, 0|a6b, a6a|a5b )
-( a6b, a6a|a5b, a5a|a4b, a4a|a3b ) */
MP_ADD_CARRY (r1b, a3b, r1b, 0, carry);
MP_ADD_CARRY (r2a, a4a, r2a, carry, carry);
MP_ADD_CARRY (r2b, a4b, r2b, carry, carry);
MP_ADD_CARRY (r3a, a5a, r3a, carry, carry);
r3b = carry;
MP_ADD_CARRY (r1b, a5b, r1b, 0, carry);
MP_ADD_CARRY (r2a, a6a, r2a, carry, carry);
MP_ADD_CARRY (r2b, a6b, r2b, carry, carry);
MP_ADD_CARRY (r3a, 0, r3a, carry, carry);
r3b += carry;
MP_SUB_BORROW(r0a, a3b, r0a, 0, carry);
MP_SUB_BORROW(r0b, a4a, r0b, carry, carry);
MP_SUB_BORROW(r1a, a4b, r1a, carry, carry);
MP_SUB_BORROW(r1b, a5a, r1b, carry, carry);
MP_SUB_BORROW(r2a, a5b, r2a, carry, carry);
MP_SUB_BORROW(r2b, a6a, r2b, carry, carry);
MP_SUB_BORROW(r3a, a6b, r3a, carry, carry);
r3b -= carry;
MP_SUB_BORROW(r0a, a5b, r0a, 0, carry);
MP_SUB_BORROW(r0b, a6a, r0b, carry, carry);
MP_SUB_BORROW(r1a, a6b, r1a, carry, carry);
if (carry) {
MP_SUB_BORROW(r1b, 0, r1b, carry, carry);
MP_SUB_BORROW(r2a, 0, r2a, carry, carry);
MP_SUB_BORROW(r2b, 0, r2b, carry, carry);
MP_SUB_BORROW(r3a, 0, r3a, carry, carry);
r3b -= carry;
}
while (r3b > 0) {
int tmp;
MP_ADD_CARRY(r1b, r3b, r1b, 0, carry);
if (carry) {
MP_ADD_CARRY(r2a, 0, r2a, carry, carry);
MP_ADD_CARRY(r2b, 0, r2b, carry, carry);
MP_ADD_CARRY(r3a, 0, r3a, carry, carry);
}
tmp = carry;
MP_SUB_BORROW(r0a, r3b, r0a, 0, carry);
if (carry) {
MP_SUB_BORROW(r0b, 0, r0b, carry, carry);
MP_SUB_BORROW(r1a, 0, r1a, carry, carry);
MP_SUB_BORROW(r1b, 0, r1b, carry, carry);
MP_SUB_BORROW(r2a, 0, r2a, carry, carry);
MP_SUB_BORROW(r2b, 0, r2b, carry, carry);
MP_SUB_BORROW(r3a, 0, r3a, carry, carry);
tmp -= carry;
}
if (tmp < 0) {
printf("gone from pos to neg\n");
}
r3b = tmp;
}
while (r3b < 0) {
mp_digit maxInt = MP_DIGIT_MAX;
MP_ADD_CARRY (r0a, 1, r0a, 0, carry);
MP_ADD_CARRY (r0b, 0, r0b, carry, carry);
MP_ADD_CARRY (r1a, 0, r1a, carry, carry);
MP_ADD_CARRY (r1b, maxInt, r1b, carry, carry);
MP_ADD_CARRY (r2a, maxInt, r2a, carry, carry);
MP_ADD_CARRY (r2b, maxInt, r2b, carry, carry);
MP_ADD_CARRY (r3a, maxInt, r3a, carry, carry);
r3b += carry;
}
/* check for final reduction */
/* now the only way we are over is if the top 4 words are all ones */
if ((r3a == MP_DIGIT_MAX) && (r2b == MP_DIGIT_MAX)
&& (r2a == MP_DIGIT_MAX) && (r1b == MP_DIGIT_MAX) &&
((r1a != 0) || (r0b != 0) || (r0a != 0)) ) {
/* one last subraction */
MP_SUB_BORROW(r0a, 1, r0a, 0, carry);
MP_SUB_BORROW(r0b, 0, r0b, carry, carry);
MP_SUB_BORROW(r1a, 0, r1a, carry, carry);
r1b = r2a = r2b = r3a = 0;
}
if (a != r) {
MP_CHECKOK(s_mp_pad(r, 7));
}
/* set the lower words of r */
if (a != r) {
MP_CHECKOK(s_mp_pad(r, 8));
MP_DIGIT(r, 6) = MP_DIGIT(a, 6);
MP_DIGIT(r, 5) = MP_DIGIT(a, 5);
MP_DIGIT(r, 4) = MP_DIGIT(a, 4);
MP_DIGIT(r, 3) = MP_DIGIT(a, 3);
MP_DIGIT(r, 2) = MP_DIGIT(a, 2);
MP_DIGIT(r, 1) = MP_DIGIT(a, 1);
MP_DIGIT(r, 0) = MP_DIGIT(a, 0);
}
MP_SIGN(r) = MP_ZPOS;
MP_USED(r) = 7;
switch (a_used) {
case 14:
case 13:
case 12:
case 11:
sa[6] = a10;
case 10:
sa[5] = a9;
case 9:
sa[4] = a8;
case 8:
sa[3] = a7;
sa[2] = sa[1] = sa[0] = 0;
MP_USED(&s) = a_used - 4;
if (MP_USED(&s) > 7)
MP_USED(&s) = 7;
MP_CHECKOK(mp_add(r, &s, r));
}
switch (a_used) {
case 14:
sa[5] = a13;
case 13:
sa[4] = a12;
case 12:
sa[3] = a11;
sa[2] = sa[1] = sa[0] = 0;
MP_USED(&s) = a_used - 8;
MP_CHECKOK(mp_add(r, &s, r));
}
switch (a_used) {
case 14:
sa[6] = a13;
case 13:
sa[5] = a12;
case 12:
sa[4] = a11;
case 11:
sa[3] = a10;
case 10:
sa[2] = a9;
case 9:
sa[1] = a8;
case 8:
sa[0] = a7;
MP_USED(&s) = a_used - 7;
MP_CHECKOK(mp_sub(r, &s, r));
}
switch (a_used) {
case 14:
sa[2] = a13;
case 13:
sa[1] = a12;
case 12:
sa[0] = a11;
MP_USED(&s) = a_used - 11;
MP_CHECKOK(mp_sub(r, &s, r));
}
/* there might be 1 or 2 bits left to reduce; use regular
* reduction for this */
MP_CHECKOK(mp_mod(r, &meth->irr, r));
}
MP_DIGIT(r, 6) = r3a;
MP_DIGIT(r, 5) = r2b;
MP_DIGIT(r, 4) = r2a;
MP_DIGIT(r, 3) = r1b;
MP_DIGIT(r, 2) = r1a;
MP_DIGIT(r, 1) = r0b;
MP_DIGIT(r, 0) = r0a;
#else
/* for polynomials larger than twice the field size, use regular
* reduction */
if (a_used > 7) {
MP_CHECKOK(mp_mod(a, &meth->irr, r));
} else {
/* copy out upper words of a */
switch (a_used) {
case 7:
a6 = MP_DIGIT(a, 6);
a6b = a6 >> 32;
a6a_a5b = a6 << 32;
case 6:
a5 = MP_DIGIT(a, 5);
a5b = a5 >> 32;
a6a_a5b |= a5b;
a5b = a5b << 32;
a5a_a4b = a5 << 32;
a5a = a5 & 0xffffffff;
case 5:
a4 = MP_DIGIT(a, 4);
a5a_a4b |= a4 >> 32;
a4a_a3b = a4 << 32;
case 4:
a3 = MP_DIGIT(a, 3) >> 32;
a3b = MP_DIGIT(a, 3) >> 32;
a4a_a3b |= a3b;
a3b = a3b << 32;
}
r3 = MP_DIGIT(a, 3) & 0xffffffff;
r2 = MP_DIGIT(a, 2);
r1 = MP_DIGIT(a, 1);
r0 = MP_DIGIT(a, 0);
/* implement r = (a3a,a2,a1,a0)
+(a5a, a4,a3b, 0)
+( 0, a6,a5b, 0)
-( 0 0, 0|a6b, a6a|a5b )
-( a6b, a6a|a5b, a5a|a4b, a4a|a3b ) */
MP_ADD_CARRY (r1, a3b, r1, 0, carry);
MP_ADD_CARRY (r2, a4 , r2, carry, carry);
MP_ADD_CARRY (r3, a5a, r3, carry, carry);
MP_ADD_CARRY (r1, a5b, r1, 0, carry);
MP_ADD_CARRY (r2, a6 , r2, carry, carry);
MP_ADD_CARRY (r3, 0, r3, carry, carry);
MP_SUB_BORROW(r0, a4a_a3b, r0, 0, carry);
MP_SUB_BORROW(r1, a5a_a4b, r1, carry, carry);
MP_SUB_BORROW(r2, a6a_a5b, r2, carry, carry);
MP_SUB_BORROW(r3, a6b , r3, carry, carry);
MP_SUB_BORROW(r0, a6a_a5b, r0, 0, carry);
MP_SUB_BORROW(r1, a6b , r1, carry, carry);
if (carry) {
MP_SUB_BORROW(r2, 0, r2, carry, carry);
MP_SUB_BORROW(r3, 0, r3, carry, carry);
}
/* if the value is negative, r3 has a 2's complement
* high value */
r3b = (int)(r3 >>32);
while (r3b > 0) {
r3 &= 0xffffffff;
MP_ADD_CARRY(r1,((mp_digit)r3b) << 32, r1, 0, carry);
if (carry) {
MP_ADD_CARRY(r2, 0, r2, carry, carry);
MP_ADD_CARRY(r3, 0, r3, carry, carry);
}
MP_SUB_BORROW(r0, r3b, r0, 0, carry);
if (carry) {
MP_SUB_BORROW(r1, 0, r1, carry, carry);
MP_SUB_BORROW(r2, 0, r2, carry, carry);
MP_SUB_BORROW(r3, 0, r3, carry, carry);
}
r3b = (int)(r3 >>32);
}
while (r3b < 0) {
MP_ADD_CARRY (r0, 1, r0, 0, carry);
MP_ADD_CARRY (r1, MP_DIGIT_MAX <<32, r1, carry, carry);
MP_ADD_CARRY (r2, MP_DIGIT_MAX, r2, carry, carry);
MP_ADD_CARRY (r3, MP_DIGIT_MAX >> 32, r3, carry, carry);
r3b = (int)(r3 >>32);
}
/* check for final reduction */
/* now the only way we are over is if the top 4 words are all ones */
if ((r3 == (MP_DIGIT_MAX >> 32)) && (r2 == MP_DIGIT_MAX)
&& ((r1 & MP_DIGIT_MAX << 32)== MP_DIGIT_MAX << 32) &&
((r1 != MP_DIGIT_MAX << 32 ) || (r0 != 0)) ) {
/* one last subraction */
MP_SUB_BORROW(r0, 1, r0, 0, carry);
MP_SUB_BORROW(r1, 0, r1, carry, carry);
r2 = r3 = 0;
}
if (a != r) {
MP_CHECKOK(s_mp_pad(r, 4));
}
/* set the lower words of r */
if (a != r) {
MP_CHECKOK(s_mp_pad(r, 5));
MP_DIGIT(r, 3) = MP_DIGIT(a, 3) & 0xFFFFFFFF;
MP_DIGIT(r, 2) = MP_DIGIT(a, 2);
MP_DIGIT(r, 1) = MP_DIGIT(a, 1);
MP_DIGIT(r, 0) = MP_DIGIT(a, 0);
} else {
MP_DIGIT(r, 3) &= 0xFFFFFFFF;
}
MP_SIGN(r) = MP_ZPOS;
MP_USED(r) = 4;
switch (a_used) {
case 7:
case 6:
sa[3] = a5 & 0xFFFFFFFF;
case 5:
sa[2] = a4;
case 4:
sa[1] = a3 << 32;
sa[0] = 0;
MP_USED(&s) = a_used - 2;
if (MP_USED(&s) == 5)
MP_USED(&s) = 4;
MP_CHECKOK(mp_add(r, &s, r));
}
switch (a_used) {
case 7:
sa[2] = a6;
case 6:
sa[1] = (a5 >> 32) << 32;
sa[0] = 0;
MP_USED(&s) = a_used - 4;
MP_CHECKOK(mp_add(r, &s, r));
}
sa[2] = sa[1] = sa[0] = 0;
switch (a_used) {
case 7:
sa[3] = a6 >> 32;
sa[2] = a6 << 32;
case 6:
sa[2] |= a5 >> 32;
sa[1] = a5 << 32;
case 5:
sa[1] |= a4 >> 32;
sa[0] = a4 << 32;
case 4:
sa[0] |= a3;
MP_USED(&s) = a_used - 3;
MP_CHECKOK(mp_sub(r, &s, r));
}
sa[0] = 0;
switch (a_used) {
case 7:
sa[1] = a6 >> 32;
sa[0] = a6 << 32;
case 6:
sa[0] |= a5 >> 32;
MP_USED(&s) = a_used - 5;
MP_CHECKOK(mp_sub(r, &s, r));
}
/* there might be 1 or 2 bits left to reduce; use regular
* reduction for this */
MP_CHECKOK(mp_mod(r, &meth->irr, r));
}
MP_DIGIT(r, 3) = r3;
MP_DIGIT(r, 2) = r2;
MP_DIGIT(r, 1) = r1;
MP_DIGIT(r, 0) = r0;
#endif
}
CLEANUP:
return res;
@ -292,6 +335,31 @@ ec_GFp_nistp224_mul(const mp_int *a, const mp_int *b, mp_int *r,
return res;
}
/* Divides two field elements. If a is NULL, then returns the inverse of
* b. */
mp_err
ec_GFp_nistp224_div(const mp_int *a, const mp_int *b, mp_int *r,
const GFMethod *meth)
{
mp_err res = MP_OKAY;
mp_int t;
/* If a is NULL, then return the inverse of b, otherwise return a/b. */
if (a == NULL) {
return mp_invmod(b, &meth->irr, r);
} else {
/* MPI doesn't support divmod, so we implement it using invmod and
* mulmod. */
MP_CHECKOK(mp_init(&t));
MP_CHECKOK(mp_invmod(b, &meth->irr, &t));
MP_CHECKOK(mp_mul(a, &t, r));
MP_CHECKOK(ec_GFp_nistp224_mod(r, r, meth));
CLEANUP:
mp_clear(&t);
return res;
}
}
/* Wire in fast field arithmetic and precomputation of base point for
* named curves. */
mp_err
@ -301,6 +369,7 @@ ec_group_set_gfp224(ECGroup *group, ECCurveName name)
group->meth->field_mod = &ec_GFp_nistp224_mod;
group->meth->field_mul = &ec_GFp_nistp224_mul;
group->meth->field_sqr = &ec_GFp_nistp224_sqr;
group->meth->field_div = &ec_GFp_nistp224_div;
}
return MP_OKAY;
}

Просмотреть файл

@ -0,0 +1,429 @@
/*
* ***** BEGIN LICENSE BLOCK *****
* Version: MPL 1.1/GPL 2.0/LGPL 2.1
*
* The contents of this file are subject to the Mozilla Public License Version
* 1.1 (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
* http://www.mozilla.org/MPL/
*
* Software distributed under the License is distributed on an "AS IS" basis,
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
* for the specific language governing rights and limitations under the
* License.
*
* The Original Code is the elliptic curve math library for prime field curves.
*
* The Initial Developer of the Original Code is
* Sun Microsystems, Inc.
* Portions created by the Initial Developer are Copyright (C) 2003
* the Initial Developer. All Rights Reserved.
*
* Contributor(s):
* Douglas Stebila <douglas@stebila.ca>
*
* Alternatively, the contents of this file may be used under the terms of
* either the GNU General Public License Version 2 or later (the "GPL"), or
* the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
* in which case the provisions of the GPL or the LGPL are applicable instead
* of those above. If you wish to allow use of your version of this file only
* under the terms of either the GPL or the LGPL, and not to allow others to
* use your version of this file under the terms of the MPL, indicate your
* decision by deleting the provisions above and replace them with the notice
* and other provisions required by the GPL or the LGPL. If you do not delete
* the provisions above, a recipient may use your version of this file under
* the terms of any one of the MPL, the GPL or the LGPL.
*
* ***** END LICENSE BLOCK ***** */
#include "ecp.h"
#include "mpi.h"
#include "mplogic.h"
#include "mpi-priv.h"
#include <stdlib.h>
/* Fast modular reduction for p256 = 2^256 - 2^224 + 2^192+ 2^96 - 1. a can be r.
* Uses algorithm 2.29 from Hankerson, Menezes, Vanstone. Guide to
* Elliptic Curve Cryptography. */
mp_err
ec_GFp_nistp256_mod(const mp_int *a, mp_int *r, const GFMethod *meth)
{
mp_err res = MP_OKAY;
mp_size a_used = MP_USED(a);
int a_bits = mpl_significant_bits(a);
mp_digit carry;
#ifdef ECL_THIRTY_TWO_BIT
mp_digit a8=0, a9=0, a10=0, a11=0, a12=0, a13=0, a14=0, a15=0;
mp_digit r0, r1, r2, r3, r4, r5, r6, r7;
int r8; /* must be a signed value ! */
#else
mp_digit a4=0, a5=0, a6=0, a7=0;
mp_digit a4h, a4l, a5h, a5l, a6h, a6l, a7h, a7l;
mp_digit r0, r1, r2, r3;
int r4; /* must be a signed value ! */
#endif
/* for polynomials larger than twice the field size
* use regular reduction */
if (a_bits < 256) {
if (a == r) return MP_OKAY;
return mp_copy(a,r);
}
if (a_bits > 512) {
MP_CHECKOK(mp_mod(a, &meth->irr, r));
} else {
#ifdef ECL_THIRTY_TWO_BIT
switch (a_used) {
case 16:
a15 = MP_DIGIT(a,15);
case 15:
a14 = MP_DIGIT(a,14);
case 14:
a13 = MP_DIGIT(a,13);
case 13:
a12 = MP_DIGIT(a,12);
case 12:
a11 = MP_DIGIT(a,11);
case 11:
a10 = MP_DIGIT(a,10);
case 10:
a9 = MP_DIGIT(a,9);
case 9:
a8 = MP_DIGIT(a,8);
}
r0 = MP_DIGIT(a,0);
r1 = MP_DIGIT(a,1);
r2 = MP_DIGIT(a,2);
r3 = MP_DIGIT(a,3);
r4 = MP_DIGIT(a,4);
r5 = MP_DIGIT(a,5);
r6 = MP_DIGIT(a,6);
r7 = MP_DIGIT(a,7);
/* sum 1 */
MP_ADD_CARRY(r3, a11, r3, 0, carry);
MP_ADD_CARRY(r4, a12, r4, carry, carry);
MP_ADD_CARRY(r5, a13, r5, carry, carry);
MP_ADD_CARRY(r6, a14, r6, carry, carry);
MP_ADD_CARRY(r7, a15, r7, carry, carry);
r8 = carry;
MP_ADD_CARRY(r3, a11, r3, 0, carry);
MP_ADD_CARRY(r4, a12, r4, carry, carry);
MP_ADD_CARRY(r5, a13, r5, carry, carry);
MP_ADD_CARRY(r6, a14, r6, carry, carry);
MP_ADD_CARRY(r7, a15, r7, carry, carry);
r8 += carry;
/* sum 2 */
MP_ADD_CARRY(r3, a12, r3, 0, carry);
MP_ADD_CARRY(r4, a13, r4, carry, carry);
MP_ADD_CARRY(r5, a14, r5, carry, carry);
MP_ADD_CARRY(r6, a15, r6, carry, carry);
MP_ADD_CARRY(r7, 0, r7, carry, carry);
r8 += carry;
/* combine last bottom of sum 3 with second sum 2 */
MP_ADD_CARRY(r0, a8, r0, 0, carry);
MP_ADD_CARRY(r1, a9, r1, carry, carry);
MP_ADD_CARRY(r2, a10, r2, carry, carry);
MP_ADD_CARRY(r3, a12, r3, carry, carry);
MP_ADD_CARRY(r4, a13, r4, carry, carry);
MP_ADD_CARRY(r5, a14, r5, carry, carry);
MP_ADD_CARRY(r6, a15, r6, carry, carry);
MP_ADD_CARRY(r7, a15, r7, carry, carry); /* from sum 3 */
r8 += carry;
/* sum 3 (rest of it)*/
MP_ADD_CARRY(r6, a14, r6, 0, carry);
MP_ADD_CARRY(r7, 0, r7, carry, carry);
r8 += carry;
/* sum 4 (rest of it)*/
MP_ADD_CARRY(r0, a9, r0, 0, carry);
MP_ADD_CARRY(r1, a10, r1, carry, carry);
MP_ADD_CARRY(r2, a11, r2, carry, carry);
MP_ADD_CARRY(r3, a13, r3, carry, carry);
MP_ADD_CARRY(r4, a14, r4, carry, carry);
MP_ADD_CARRY(r5, a15, r5, carry, carry);
MP_ADD_CARRY(r6, a13, r6, carry, carry);
MP_ADD_CARRY(r7, a8, r7, carry, carry);
r8 += carry;
/* diff 5 */
MP_SUB_BORROW(r0, a11, r0, 0, carry);
MP_SUB_BORROW(r1, a12, r1, carry, carry);
MP_SUB_BORROW(r2, a13, r2, carry, carry);
MP_SUB_BORROW(r3, 0, r3, carry, carry);
MP_SUB_BORROW(r4, 0, r4, carry, carry);
MP_SUB_BORROW(r5, 0, r5, carry, carry);
MP_SUB_BORROW(r6, a8, r6, carry, carry);
MP_SUB_BORROW(r7, a10, r7, carry, carry);
r8 -= carry;
/* diff 6 */
MP_SUB_BORROW(r0, a12, r0, 0, carry);
MP_SUB_BORROW(r1, a13, r1, carry, carry);
MP_SUB_BORROW(r2, a14, r2, carry, carry);
MP_SUB_BORROW(r3, a15, r3, carry, carry);
MP_SUB_BORROW(r4, 0, r4, carry, carry);
MP_SUB_BORROW(r5, 0, r5, carry, carry);
MP_SUB_BORROW(r6, a9, r6, carry, carry);
MP_SUB_BORROW(r7, a11, r7, carry, carry);
r8 -= carry;
/* diff 7 */
MP_SUB_BORROW(r0, a13, r0, 0, carry);
MP_SUB_BORROW(r1, a14, r1, carry, carry);
MP_SUB_BORROW(r2, a15, r2, carry, carry);
MP_SUB_BORROW(r3, a8, r3, carry, carry);
MP_SUB_BORROW(r4, a9, r4, carry, carry);
MP_SUB_BORROW(r5, a10, r5, carry, carry);
MP_SUB_BORROW(r6, 0, r6, carry, carry);
MP_SUB_BORROW(r7, a12, r7, carry, carry);
r8 -= carry;
/* diff 8 */
MP_SUB_BORROW(r0, a14, r0, 0, carry);
MP_SUB_BORROW(r1, a15, r1, carry, carry);
MP_SUB_BORROW(r2, 0, r2, carry, carry);
MP_SUB_BORROW(r3, a9, r3, carry, carry);
MP_SUB_BORROW(r4, a10, r4, carry, carry);
MP_SUB_BORROW(r5, a11, r5, carry, carry);
MP_SUB_BORROW(r6, 0, r6, carry, carry);
MP_SUB_BORROW(r7, a13, r7, carry, carry);
r8 -= carry;
/* reduce the overflows */
while (r8 > 0) {
mp_digit r8_d = r8;
MP_ADD_CARRY(r0, r8_d, r0, 0, carry);
MP_ADD_CARRY(r1, 0, r1, carry, carry);
MP_ADD_CARRY(r2, 0, r2, carry, carry);
MP_ADD_CARRY(r3, -r8_d, r3, carry, carry);
MP_ADD_CARRY(r4, MP_DIGIT_MAX, r4, carry, carry);
MP_ADD_CARRY(r5, MP_DIGIT_MAX, r5, carry, carry);
MP_ADD_CARRY(r6, -(r8_d+1), r6, carry, carry);
MP_ADD_CARRY(r7, (r8_d-1), r7, carry, carry);
r8 = carry;
}
/* reduce the underflows */
while (r8 < 0) {
mp_digit r8_d = -r8;
MP_SUB_BORROW(r0, r8_d, r0, 0, carry);
MP_SUB_BORROW(r1, 0, r1, carry, carry);
MP_SUB_BORROW(r2, 0, r2, carry, carry);
MP_SUB_BORROW(r3, -r8_d, r3, carry, carry);
MP_SUB_BORROW(r4, MP_DIGIT_MAX, r4, carry, carry);
MP_SUB_BORROW(r5, MP_DIGIT_MAX, r5, carry, carry);
MP_SUB_BORROW(r6, -(r8_d+1), r6, carry, carry);
MP_SUB_BORROW(r7, (r8_d-1), r7, carry, carry);
r8 = -carry;
}
if (a != r) {
MP_CHECKOK(s_mp_pad(r,8));
}
MP_SIGN(r) = MP_ZPOS;
MP_USED(r) = 8;
MP_DIGIT(r,7) = r7;
MP_DIGIT(r,6) = r6;
MP_DIGIT(r,5) = r5;
MP_DIGIT(r,4) = r4;
MP_DIGIT(r,3) = r3;
MP_DIGIT(r,2) = r2;
MP_DIGIT(r,1) = r1;
MP_DIGIT(r,0) = r0;
/* final reduction if necessary */
if ((r7 == MP_DIGIT_MAX) &&
((r6 > 1) || ((r6 == 1) &&
(r5 || r4 || r3 ||
((r2 == MP_DIGIT_MAX) && (r1 == MP_DIGIT_MAX)
&& (r0 == MP_DIGIT_MAX)))))) {
MP_CHECKOK(mp_sub(r, &meth->irr, r));
}
#ifdef notdef
/* smooth the negatives */
while (MP_SIGN(r) != MP_ZPOS) {
MP_CHECKOK(mp_add(r, &meth->irr, r));
}
while (MP_USED(r) > 8) {
MP_CHECKOK(mp_sub(r, &meth->irr, r));
}
/* final reduction if necessary */
if (MP_DIGIT(r,7) >= MP_DIGIT(&meth->irr,7)) {
if (mp_cmp(r,&meth->irr) != MP_LT) {
MP_CHECKOK(mp_sub(r, &meth->irr, r));
}
}
#endif
s_mp_clamp(r);
#else
switch (a_used) {
case 8:
a7 = MP_DIGIT(a,7);
case 7:
a6 = MP_DIGIT(a,6);
case 6:
a5 = MP_DIGIT(a,5);
case 5:
a4 = MP_DIGIT(a,4);
}
a7l = a7 << 32;
a7h = a7 >> 32;
a6l = a6 << 32;
a6h = a6 >> 32;
a5l = a5 << 32;
a5h = a5 >> 32;
a4l = a4 << 32;
a4h = a4 >> 32;
r3 = MP_DIGIT(a,3);
r2 = MP_DIGIT(a,2);
r1 = MP_DIGIT(a,1);
r0 = MP_DIGIT(a,0);
/* sum 1 */
MP_ADD_CARRY(r1, a5h << 32, r1, 0, carry);
MP_ADD_CARRY(r2, a6, r2, carry, carry);
MP_ADD_CARRY(r3, a7, r3, carry, carry);
r4 = carry;
MP_ADD_CARRY(r1, a5h << 32, r1, 0, carry);
MP_ADD_CARRY(r2, a6, r2, carry, carry);
MP_ADD_CARRY(r3, a7, r3, carry, carry);
r4 += carry;
/* sum 2 */
MP_ADD_CARRY(r1, a6l, r1, 0, carry);
MP_ADD_CARRY(r2, a6h | a7l, r2, carry, carry);
MP_ADD_CARRY(r3, a7h, r3, carry, carry);
r4 += carry;
MP_ADD_CARRY(r1, a6l, r1, 0, carry);
MP_ADD_CARRY(r2, a6h | a7l, r2, carry, carry);
MP_ADD_CARRY(r3, a7h, r3, carry, carry);
r4 += carry;
/* sum 3 */
MP_ADD_CARRY(r0, a4, r0, 0, carry);
MP_ADD_CARRY(r1, a5l >> 32, r1, carry, carry);
MP_ADD_CARRY(r2, 0, r2, carry, carry);
MP_ADD_CARRY(r3, a7, r3, carry, carry);
r4 += carry;
/* sum 4 */
MP_ADD_CARRY(r0, a4h | a5l, r0, 0, carry);
MP_ADD_CARRY(r1, a5h|(a6h<<32), r1, carry, carry);
MP_ADD_CARRY(r2, a7, r2, carry, carry);
MP_ADD_CARRY(r3, a6h | a4l, r3, carry, carry);
r4 += carry;
/* diff 5 */
MP_SUB_BORROW(r0, a5h | a6l, r0, 0, carry);
MP_SUB_BORROW(r1, a6h, r1, carry, carry);
MP_SUB_BORROW(r2, 0, r2, carry, carry);
MP_SUB_BORROW(r3, (a4l>>32)|a5l,r3, carry, carry);
r4 -= carry;
/* diff 6 */
MP_SUB_BORROW(r0, a6, r0, 0, carry);
MP_SUB_BORROW(r1, a7, r1, carry, carry);
MP_SUB_BORROW(r2, 0, r2, carry, carry);
MP_SUB_BORROW(r3, a4h|(a5h<<32),r3, carry, carry);
r4 -= carry;
/* diff 7 */
MP_SUB_BORROW(r0, a6h|a7l, r0, 0, carry);
MP_SUB_BORROW(r1, a7h|a4l, r1, carry, carry);
MP_SUB_BORROW(r2, a4h|a5l, r2, carry, carry);
MP_SUB_BORROW(r3, a6l, r3, carry, carry);
r4 -= carry;
/* diff 8 */
MP_SUB_BORROW(r0, a7, r0, 0, carry);
MP_SUB_BORROW(r1, a4h<<32, r1, carry, carry);
MP_SUB_BORROW(r2, a5, r2, carry, carry);
MP_SUB_BORROW(r3, a6h<<32, r3, carry, carry);
r4 -= carry;
/* reduce the overflows */
while (r4 > 0) {
mp_digit r4_long = r4;
mp_digit r4l = (r4_long << 32);
MP_ADD_CARRY(r0, r4_long, r0, 0, carry);
MP_ADD_CARRY(r1, -r4l, r1, carry, carry);
MP_ADD_CARRY(r2, MP_DIGIT_MAX, r2, carry, carry);
MP_ADD_CARRY(r3, r4l-r4_long-1,r3, carry, carry);
r4 = carry;
}
/* reduce the underflows */
while (r4 < 0) {
mp_digit r4_long = -r4;
mp_digit r4l = (r4_long << 32);
MP_SUB_BORROW(r0, r4_long, r0, 0, carry);
MP_SUB_BORROW(r1, -r4l, r1, carry, carry);
MP_SUB_BORROW(r2, MP_DIGIT_MAX, r2, carry, carry);
MP_SUB_BORROW(r3, r4l-r4_long-1,r3, carry, carry);
r4 = -carry;
}
if (a != r) {
MP_CHECKOK(s_mp_pad(r,4));
}
MP_SIGN(r) = MP_ZPOS;
MP_USED(r) = 4;
MP_DIGIT(r,3) = r3;
MP_DIGIT(r,2) = r2;
MP_DIGIT(r,1) = r1;
MP_DIGIT(r,0) = r0;
/* final reduction if necessary */
if ((r3 > 0xFFFFFFFF00000001ULL) ||
((r3 == 0xFFFFFFFF00000001UL) &&
(r2 || (r1 >> 32)||
(r1 == 0xFFFFFFFFULL && r0 == MP_DIGIT_MAX)))) {
/* very rare, just use mp_sub */
MP_CHECKOK(mp_sub(r, &meth->irr, r));
}
s_mp_clamp(r);
#endif
}
CLEANUP:
return res;
}
/* Compute the square of polynomial a, reduce modulo p256. Store the
* result in r. r could be a. Uses optimized modular reduction for p256.
*/
mp_err
ec_GFp_nistp256_sqr(const mp_int *a, mp_int *r, const GFMethod *meth)
{
mp_err res = MP_OKAY;
MP_CHECKOK(mp_sqr(a, r));
MP_CHECKOK(ec_GFp_nistp256_mod(r, r, meth));
CLEANUP:
return res;
}
/* Compute the product of two polynomials a and b, reduce modulo p256.
* Store the result in r. r could be a or b; a could be b. Uses
* optimized modular reduction for p256. */
mp_err
ec_GFp_nistp256_mul(const mp_int *a, const mp_int *b, mp_int *r,
const GFMethod *meth)
{
mp_err res = MP_OKAY;
MP_CHECKOK(mp_mul(a, b, r));
MP_CHECKOK(ec_GFp_nistp256_mod(r, r, meth));
CLEANUP:
return res;
}
/* Wire in fast field arithmetic and precomputation of base point for
* named curves. */
mp_err
ec_group_set_gfp256(ECGroup *group, ECCurveName name)
{
if (name == ECCurve_NIST_P256) {
group->meth->field_mod = &ec_GFp_nistp256_mod;
group->meth->field_mul = &ec_GFp_nistp256_mul;
group->meth->field_sqr = &ec_GFp_nistp256_sqr;
}
return MP_OKAY;
}

Просмотреть файл

@ -0,0 +1,293 @@
/*
* ***** BEGIN LICENSE BLOCK *****
* Version: MPL 1.1/GPL 2.0/LGPL 2.1
*
* The contents of this file are subject to the Mozilla Public License Version
* 1.1 (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
* http://www.mozilla.org/MPL/
*
* Software distributed under the License is distributed on an "AS IS" basis,
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
* for the specific language governing rights and limitations under the
* License.
*
* The Original Code is the elliptic curve math library for prime field curves.
*
* The Initial Developer of the Original Code is
* Sun Microsystems, Inc.
* Portions created by the Initial Developer are Copyright (C) 2003
* the Initial Developer. All Rights Reserved.
*
* Contributor(s):
* Douglas Stebila <douglas@stebila.ca>
*
* Alternatively, the contents of this file may be used under the terms of
* either the GNU General Public License Version 2 or later (the "GPL"), or
* the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
* in which case the provisions of the GPL or the LGPL are applicable instead
* of those above. If you wish to allow use of your version of this file only
* under the terms of either the GPL or the LGPL, and not to allow others to
* use your version of this file under the terms of the MPL, indicate your
* decision by deleting the provisions above and replace them with the notice
* and other provisions required by the GPL or the LGPL. If you do not delete
* the provisions above, a recipient may use your version of this file under
* the terms of any one of the MPL, the GPL or the LGPL.
*
* ***** END LICENSE BLOCK ***** */
#include "ecp.h"
#include "mpi.h"
#include "mplogic.h"
#include "mpi-priv.h"
#include <stdlib.h>
/* Fast modular reduction for p384 = 2^384 - 2^128 - 2^96 + 2^32 - 1. a can be r.
* Uses algorithm 2.30 from Hankerson, Menezes, Vanstone. Guide to
* Elliptic Curve Cryptography. */
mp_err
ec_GFp_nistp384_mod(const mp_int *a, mp_int *r, const GFMethod *meth)
{
mp_err res = MP_OKAY;
int a_bits = mpl_significant_bits(a);
int i;
/* m1, m2 are statically-allocated mp_int of exactly the size we need */
mp_int m[10];
#ifdef ECL_THIRTY_TWO_BIT
mp_digit s[10][12];
for (i = 0; i < 10; i++) {
MP_SIGN(&m[i]) = MP_ZPOS;
MP_ALLOC(&m[i]) = 12;
MP_USED(&m[i]) = 12;
MP_DIGITS(&m[i]) = s[i];
}
#else
mp_digit s[10][6];
for (i = 0; i < 10; i++) {
MP_SIGN(&m[i]) = MP_ZPOS;
MP_ALLOC(&m[i]) = 6;
MP_USED(&m[i]) = 6;
MP_DIGITS(&m[i]) = s[i];
}
#endif
#ifdef ECL_THIRTY_TWO_BIT
/* for polynomials larger than twice the field size or polynomials
* not using all words, use regular reduction */
if ((a_bits > 768) || (a_bits <= 736)) {
MP_CHECKOK(mp_mod(a, &meth->irr, r));
} else {
for (i = 0; i < 12; i++) {
s[0][i] = MP_DIGIT(a, i);
}
s[1][0] = 0;
s[1][1] = 0;
s[1][2] = 0;
s[1][3] = 0;
s[1][4] = MP_DIGIT(a, 21);
s[1][5] = MP_DIGIT(a, 22);
s[1][6] = MP_DIGIT(a, 23);
s[1][7] = 0;
s[1][8] = 0;
s[1][9] = 0;
s[1][10] = 0;
s[1][11] = 0;
for (i = 0; i < 12; i++) {
s[2][i] = MP_DIGIT(a, i+12);
}
s[3][0] = MP_DIGIT(a, 21);
s[3][1] = MP_DIGIT(a, 22);
s[3][2] = MP_DIGIT(a, 23);
for (i = 3; i < 12; i++) {
s[3][i] = MP_DIGIT(a, i+9);
}
s[4][0] = 0;
s[4][1] = MP_DIGIT(a, 23);
s[4][2] = 0;
s[4][3] = MP_DIGIT(a, 20);
for (i = 4; i < 12; i++) {
s[4][i] = MP_DIGIT(a, i+8);
}
s[5][0] = 0;
s[5][1] = 0;
s[5][2] = 0;
s[5][3] = 0;
s[5][4] = MP_DIGIT(a, 20);
s[5][5] = MP_DIGIT(a, 21);
s[5][6] = MP_DIGIT(a, 22);
s[5][7] = MP_DIGIT(a, 23);
s[5][8] = 0;
s[5][9] = 0;
s[5][10] = 0;
s[5][11] = 0;
s[6][0] = MP_DIGIT(a, 20);
s[6][1] = 0;
s[6][2] = 0;
s[6][3] = MP_DIGIT(a, 21);
s[6][4] = MP_DIGIT(a, 22);
s[6][5] = MP_DIGIT(a, 23);
s[6][6] = 0;
s[6][7] = 0;
s[6][8] = 0;
s[6][9] = 0;
s[6][10] = 0;
s[6][11] = 0;
s[7][0] = MP_DIGIT(a, 23);
for (i = 1; i < 12; i++) {
s[7][i] = MP_DIGIT(a, i+11);
}
s[8][0] = 0;
s[8][1] = MP_DIGIT(a, 20);
s[8][2] = MP_DIGIT(a, 21);
s[8][3] = MP_DIGIT(a, 22);
s[8][4] = MP_DIGIT(a, 23);
s[8][5] = 0;
s[8][6] = 0;
s[8][7] = 0;
s[8][8] = 0;
s[8][9] = 0;
s[8][10] = 0;
s[8][11] = 0;
s[9][0] = 0;
s[9][1] = 0;
s[9][2] = 0;
s[9][3] = MP_DIGIT(a, 23);
s[9][4] = MP_DIGIT(a, 23);
s[9][5] = 0;
s[9][6] = 0;
s[9][7] = 0;
s[9][8] = 0;
s[9][9] = 0;
s[9][10] = 0;
s[9][11] = 0;
MP_CHECKOK(mp_add(&m[0], &m[1], r));
MP_CHECKOK(mp_add(r, &m[1], r));
MP_CHECKOK(mp_add(r, &m[2], r));
MP_CHECKOK(mp_add(r, &m[3], r));
MP_CHECKOK(mp_add(r, &m[4], r));
MP_CHECKOK(mp_add(r, &m[5], r));
MP_CHECKOK(mp_add(r, &m[6], r));
MP_CHECKOK(mp_sub(r, &m[7], r));
MP_CHECKOK(mp_sub(r, &m[8], r));
MP_CHECKOK(mp_submod(r, &m[9], &meth->irr, r));
s_mp_clamp(r);
}
#else
/* for polynomials larger than twice the field size or polynomials
* not using all words, use regular reduction */
if ((a_bits > 768) || (a_bits <= 736)) {
MP_CHECKOK(mp_mod(a, &meth->irr, r));
} else {
for (i = 0; i < 6; i++) {
s[0][i] = MP_DIGIT(a, i);
}
s[1][0] = 0;
s[1][1] = 0;
s[1][2] = (MP_DIGIT(a, 10) >> 32) | (MP_DIGIT(a, 11) << 32);
s[1][3] = MP_DIGIT(a, 11) >> 32;
s[1][4] = 0;
s[1][5] = 0;
for (i = 0; i < 6; i++) {
s[2][i] = MP_DIGIT(a, i+6);
}
s[3][0] = (MP_DIGIT(a, 10) >> 32) | (MP_DIGIT(a, 11) << 32);
s[3][1] = (MP_DIGIT(a, 11) >> 32) | (MP_DIGIT(a, 6) << 32);
for (i = 2; i < 6; i++) {
s[3][i] = (MP_DIGIT(a, i+4) >> 32) | (MP_DIGIT(a, i+5) << 32);
}
s[4][0] = (MP_DIGIT(a, 11) >> 32) << 32;
s[4][1] = MP_DIGIT(a, 10) << 32;
for (i = 2; i < 6; i++) {
s[4][i] = MP_DIGIT(a, i+4);
}
s[5][0] = 0;
s[5][1] = 0;
s[5][2] = MP_DIGIT(a, 10);
s[5][3] = MP_DIGIT(a, 11);
s[5][4] = 0;
s[5][5] = 0;
s[6][0] = (MP_DIGIT(a, 10) << 32) >> 32;
s[6][1] = (MP_DIGIT(a, 10) >> 32) << 32;
s[6][2] = MP_DIGIT(a, 11);
s[6][3] = 0;
s[6][4] = 0;
s[6][5] = 0;
s[7][0] = (MP_DIGIT(a, 11) >> 32) | (MP_DIGIT(a, 6) << 32);
for (i = 1; i < 6; i++) {
s[7][i] = (MP_DIGIT(a, i+5) >> 32) | (MP_DIGIT(a, i+6) << 32);
}
s[8][0] = MP_DIGIT(a, 10) << 32;
s[8][1] = (MP_DIGIT(a, 10) >> 32) | (MP_DIGIT(a, 11) << 32);
s[8][2] = MP_DIGIT(a, 11) >> 32;
s[8][3] = 0;
s[8][4] = 0;
s[8][5] = 0;
s[9][0] = 0;
s[9][1] = (MP_DIGIT(a, 11) >> 32) << 32;
s[9][2] = MP_DIGIT(a, 11) >> 32;
s[9][3] = 0;
s[9][4] = 0;
s[9][5] = 0;
MP_CHECKOK(mp_add(&m[0], &m[1], r));
MP_CHECKOK(mp_add(r, &m[1], r));
MP_CHECKOK(mp_add(r, &m[2], r));
MP_CHECKOK(mp_add(r, &m[3], r));
MP_CHECKOK(mp_add(r, &m[4], r));
MP_CHECKOK(mp_add(r, &m[5], r));
MP_CHECKOK(mp_add(r, &m[6], r));
MP_CHECKOK(mp_sub(r, &m[7], r));
MP_CHECKOK(mp_sub(r, &m[8], r));
MP_CHECKOK(mp_submod(r, &m[9], &meth->irr, r));
s_mp_clamp(r);
}
#endif
CLEANUP:
return res;
}
/* Compute the square of polynomial a, reduce modulo p384. Store the
* result in r. r could be a. Uses optimized modular reduction for p384.
*/
mp_err
ec_GFp_nistp384_sqr(const mp_int *a, mp_int *r, const GFMethod *meth)
{
mp_err res = MP_OKAY;
MP_CHECKOK(mp_sqr(a, r));
MP_CHECKOK(ec_GFp_nistp384_mod(r, r, meth));
CLEANUP:
return res;
}
/* Compute the product of two polynomials a and b, reduce modulo p384.
* Store the result in r. r could be a or b; a could be b. Uses
* optimized modular reduction for p384. */
mp_err
ec_GFp_nistp384_mul(const mp_int *a, const mp_int *b, mp_int *r,
const GFMethod *meth)
{
mp_err res = MP_OKAY;
MP_CHECKOK(mp_mul(a, b, r));
MP_CHECKOK(ec_GFp_nistp384_mod(r, r, meth));
CLEANUP:
return res;
}
/* Wire in fast field arithmetic and precomputation of base point for
* named curves. */
mp_err
ec_group_set_gfp384(ECGroup *group, ECCurveName name)
{
if (name == ECCurve_NIST_P384) {
group->meth->field_mod = &ec_GFp_nistp384_mod;
group->meth->field_mul = &ec_GFp_nistp384_mul;
group->meth->field_sqr = &ec_GFp_nistp384_sqr;
}
return MP_OKAY;
}

Просмотреть файл

@ -0,0 +1,170 @@
/*
* ***** BEGIN LICENSE BLOCK *****
* Version: MPL 1.1/GPL 2.0/LGPL 2.1
*
* The contents of this file are subject to the Mozilla Public License Version
* 1.1 (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
* http://www.mozilla.org/MPL/
*
* Software distributed under the License is distributed on an "AS IS" basis,
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
* for the specific language governing rights and limitations under the
* License.
*
* The Original Code is the elliptic curve math library for prime field curves.
*
* The Initial Developer of the Original Code is
* Sun Microsystems, Inc.
* Portions created by the Initial Developer are Copyright (C) 2003
* the Initial Developer. All Rights Reserved.
*
* Contributor(s):
* Douglas Stebila <douglas@stebila.ca>
*
* Alternatively, the contents of this file may be used under the terms of
* either the GNU General Public License Version 2 or later (the "GPL"), or
* the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
* in which case the provisions of the GPL or the LGPL are applicable instead
* of those above. If you wish to allow use of your version of this file only
* under the terms of either the GPL or the LGPL, and not to allow others to
* use your version of this file under the terms of the MPL, indicate your
* decision by deleting the provisions above and replace them with the notice
* and other provisions required by the GPL or the LGPL. If you do not delete
* the provisions above, a recipient may use your version of this file under
* the terms of any one of the MPL, the GPL or the LGPL.
*
* ***** END LICENSE BLOCK ***** */
#include "ecp.h"
#include "mpi.h"
#include "mplogic.h"
#include "mpi-priv.h"
#include <stdlib.h>
#define ECP521_DIGITS ECL_CURVE_DIGITS(521)
/* Fast modular reduction for p521 = 2^521 - 1. a can be r. Uses
* algorithm 2.31 from Hankerson, Menezes, Vanstone. Guide to
* Elliptic Curve Cryptography. */
mp_err
ec_GFp_nistp521_mod(const mp_int *a, mp_int *r, const GFMethod *meth)
{
mp_err res = MP_OKAY;
int a_bits = mpl_significant_bits(a);
int i;
/* m1, m2 are statically-allocated mp_int of exactly the size we need */
mp_int m1;
mp_digit s1[ECP521_DIGITS] = { 0 };
MP_SIGN(&m1) = MP_ZPOS;
MP_ALLOC(&m1) = ECP521_DIGITS;
MP_USED(&m1) = ECP521_DIGITS;
MP_DIGITS(&m1) = s1;
if (a_bits < 521) {
if (a==r) return MP_OKAY;
return mp_copy(a, r);
}
/* for polynomials larger than twice the field size or polynomials
* not using all words, use regular reduction */
if (a_bits > (521*2)) {
MP_CHECKOK(mp_mod(a, &meth->irr, r));
} else {
#define FIRST_DIGIT (ECP521_DIGITS-1)
for (i = FIRST_DIGIT; i < MP_USED(a)-1; i++) {
s1[i-FIRST_DIGIT] = (MP_DIGIT(a, i) >> 9)
| (MP_DIGIT(a, 1+i) << (MP_DIGIT_BIT-9));
}
s1[i-FIRST_DIGIT] = MP_DIGIT(a, i) >> 9;
if ( a != r ) {
MP_CHECKOK(s_mp_pad(r,ECP521_DIGITS));
for (i = 0; i < ECP521_DIGITS; i++) {
MP_DIGIT(r,i) = MP_DIGIT(a, i);
}
}
MP_USED(r) = ECP521_DIGITS;
MP_DIGIT(r,FIRST_DIGIT) &= 0x1FF;
MP_CHECKOK(s_mp_add(r, &m1));
if (MP_DIGIT(r, FIRST_DIGIT) & 0x200) {
MP_CHECKOK(s_mp_add_d(r,1));
MP_DIGIT(r,FIRST_DIGIT) &= 0x1FF;
}
s_mp_clamp(r);
}
CLEANUP:
return res;
}
/* Compute the square of polynomial a, reduce modulo p521. Store the
* result in r. r could be a. Uses optimized modular reduction for p521.
*/
mp_err
ec_GFp_nistp521_sqr(const mp_int *a, mp_int *r, const GFMethod *meth)
{
mp_err res = MP_OKAY;
MP_CHECKOK(mp_sqr(a, r));
MP_CHECKOK(ec_GFp_nistp521_mod(r, r, meth));
CLEANUP:
return res;
}
/* Compute the product of two polynomials a and b, reduce modulo p521.
* Store the result in r. r could be a or b; a could be b. Uses
* optimized modular reduction for p521. */
mp_err
ec_GFp_nistp521_mul(const mp_int *a, const mp_int *b, mp_int *r,
const GFMethod *meth)
{
mp_err res = MP_OKAY;
MP_CHECKOK(mp_mul(a, b, r));
MP_CHECKOK(ec_GFp_nistp521_mod(r, r, meth));
CLEANUP:
return res;
}
/* Divides two field elements. If a is NULL, then returns the inverse of
* b. */
mp_err
ec_GFp_nistp521_div(const mp_int *a, const mp_int *b, mp_int *r,
const GFMethod *meth)
{
mp_err res = MP_OKAY;
mp_int t;
/* If a is NULL, then return the inverse of b, otherwise return a/b. */
if (a == NULL) {
return mp_invmod(b, &meth->irr, r);
} else {
/* MPI doesn't support divmod, so we implement it using invmod and
* mulmod. */
MP_CHECKOK(mp_init(&t));
MP_CHECKOK(mp_invmod(b, &meth->irr, &t));
MP_CHECKOK(mp_mul(a, &t, r));
MP_CHECKOK(ec_GFp_nistp521_mod(r, r, meth));
CLEANUP:
mp_clear(&t);
return res;
}
}
/* Wire in fast field arithmetic and precomputation of base point for
* named curves. */
mp_err
ec_group_set_gfp521(ECGroup *group, ECCurveName name)
{
if (name == ECCurve_NIST_P521) {
group->meth->field_mod = &ec_GFp_nistp521_mod;
group->meth->field_mul = &ec_GFp_nistp521_mul;
group->meth->field_sqr = &ec_GFp_nistp521_sqr;
group->meth->field_div = &ec_GFp_nistp521_div;
}
return MP_OKAY;
}

Просмотреть файл

@ -107,7 +107,7 @@ ECL_SRCS = ecl.c ecl_curve.c ecl_mult.c ecl_gf.c \
ec2_aff.c ec2_mont.c ec2_proj.c \
ec2_163.c ec2_193.c ec2_233.c \
ecp_aff.c ecp_jac.c ecp_mont.c \
ecp_192.c ecp_224.c \
ecp_192.c ecp_224.c ecp_256.c ecp_384.c ecp_521.c \
ec_naf.c ecp_jm.c
else
ECL_SRCS = $(NULL)