gecko-dev/security/nss/lib/freebl/rijndael.c

1123 строки
34 KiB
C

/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
#ifdef FREEBL_NO_DEPEND
#include "stubs.h"
#endif
#include "prinit.h"
#include "prenv.h"
#include "prerr.h"
#include "secerr.h"
#include "prtypes.h"
#include "blapi.h"
#include "rijndael.h"
#include "cts.h"
#include "ctr.h"
#include "gcm.h"
#include "mpi.h"
#ifdef USE_HW_AES
#include "intel-aes.h"
#endif
#ifdef INTEL_GCM
#include "intel-gcm.h"
#endif /* INTEL_GCM */
/* Forward declarations */
void rijndael_native_key_expansion(AESContext *cx, const unsigned char *key,
unsigned int Nk);
void rijndael_native_encryptBlock(AESContext *cx,
unsigned char *output,
const unsigned char *input);
/* Stub definitions for the above rijndael_native_* functions, which
* shouldn't be used unless NSS_X86_OR_X64 is defined */
#ifndef NSS_X86_OR_X64
void
rijndael_native_key_expansion(AESContext *cx, const unsigned char *key,
unsigned int Nk)
{
PORT_SetError(SEC_ERROR_LIBRARY_FAILURE);
PORT_Assert(0);
}
void
rijndael_native_encryptBlock(AESContext *cx,
unsigned char *output,
const unsigned char *input)
{
PORT_SetError(SEC_ERROR_LIBRARY_FAILURE);
PORT_Assert(0);
}
#endif /* NSS_X86_OR_X64 */
/*
* There are currently three ways to build this code, varying in performance
* and code size.
*
* RIJNDAEL_INCLUDE_TABLES Include all tables from rijndael32.tab
* RIJNDAEL_GENERATE_VALUES Do not store tables, generate the table
* values "on-the-fly", using gfm
* RIJNDAEL_GENERATE_VALUES_MACRO Same as above, but use macros
*
* The default is RIJNDAEL_INCLUDE_TABLES.
*/
/*
* When building RIJNDAEL_INCLUDE_TABLES, includes S**-1, Rcon, T[0..4],
* T**-1[0..4], IMXC[0..4]
* When building anything else, includes S, S**-1, Rcon
*/
#include "rijndael32.tab"
#if defined(RIJNDAEL_INCLUDE_TABLES)
/*
* RIJNDAEL_INCLUDE_TABLES
*/
#define T0(i) _T0[i]
#define T1(i) _T1[i]
#define T2(i) _T2[i]
#define T3(i) _T3[i]
#define TInv0(i) _TInv0[i]
#define TInv1(i) _TInv1[i]
#define TInv2(i) _TInv2[i]
#define TInv3(i) _TInv3[i]
#define IMXC0(b) _IMXC0[b]
#define IMXC1(b) _IMXC1[b]
#define IMXC2(b) _IMXC2[b]
#define IMXC3(b) _IMXC3[b]
/* The S-box can be recovered from the T-tables */
#ifdef IS_LITTLE_ENDIAN
#define SBOX(b) ((PRUint8)_T3[b])
#else
#define SBOX(b) ((PRUint8)_T1[b])
#endif
#define SINV(b) (_SInv[b])
#else /* not RIJNDAEL_INCLUDE_TABLES */
/*
* Code for generating T-table values.
*/
#ifdef IS_LITTLE_ENDIAN
#define WORD4(b0, b1, b2, b3) \
((((PRUint32)b3) << 24) | \
(((PRUint32)b2) << 16) | \
(((PRUint32)b1) << 8) | \
((PRUint32)b0))
#else
#define WORD4(b0, b1, b2, b3) \
((((PRUint32)b0) << 24) | \
(((PRUint32)b1) << 16) | \
(((PRUint32)b2) << 8) | \
((PRUint32)b3))
#endif
/*
* Define the S and S**-1 tables (both have been stored)
*/
#define SBOX(b) (_S[b])
#define SINV(b) (_SInv[b])
/*
* The function xtime, used for Galois field multiplication
*/
#define XTIME(a) \
((a & 0x80) ? ((a << 1) ^ 0x1b) : (a << 1))
/* Choose GFM method (macros or function) */
#if defined(RIJNDAEL_GENERATE_VALUES_MACRO)
/*
* Galois field GF(2**8) multipliers, in macro form
*/
#define GFM01(a) \
(a) /* a * 01 = a, the identity */
#define GFM02(a) \
(XTIME(a) & 0xff) /* a * 02 = xtime(a) */
#define GFM04(a) \
(GFM02(GFM02(a))) /* a * 04 = xtime**2(a) */
#define GFM08(a) \
(GFM02(GFM04(a))) /* a * 08 = xtime**3(a) */
#define GFM03(a) \
(GFM01(a) ^ GFM02(a)) /* a * 03 = a * (01 + 02) */
#define GFM09(a) \
(GFM01(a) ^ GFM08(a)) /* a * 09 = a * (01 + 08) */
#define GFM0B(a) \
(GFM01(a) ^ GFM02(a) ^ GFM08(a)) /* a * 0B = a * (01 + 02 + 08) */
#define GFM0D(a) \
(GFM01(a) ^ GFM04(a) ^ GFM08(a)) /* a * 0D = a * (01 + 04 + 08) */
#define GFM0E(a) \
(GFM02(a) ^ GFM04(a) ^ GFM08(a)) /* a * 0E = a * (02 + 04 + 08) */
#else /* RIJNDAEL_GENERATE_VALUES */
/* GF_MULTIPLY
*
* multiply two bytes represented in GF(2**8), mod (x**4 + 1)
*/
PRUint8
gfm(PRUint8 a, PRUint8 b)
{
PRUint8 res = 0;
while (b > 0) {
res = (b & 0x01) ? res ^ a : res;
a = XTIME(a);
b >>= 1;
}
return res;
}
#define GFM01(a) \
(a) /* a * 01 = a, the identity */
#define GFM02(a) \
(XTIME(a) & 0xff) /* a * 02 = xtime(a) */
#define GFM03(a) \
(gfm(a, 0x03)) /* a * 03 */
#define GFM09(a) \
(gfm(a, 0x09)) /* a * 09 */
#define GFM0B(a) \
(gfm(a, 0x0B)) /* a * 0B */
#define GFM0D(a) \
(gfm(a, 0x0D)) /* a * 0D */
#define GFM0E(a) \
(gfm(a, 0x0E)) /* a * 0E */
#endif /* choosing GFM function */
/*
* The T-tables
*/
#define G_T0(i) \
(WORD4(GFM02(SBOX(i)), GFM01(SBOX(i)), GFM01(SBOX(i)), GFM03(SBOX(i))))
#define G_T1(i) \
(WORD4(GFM03(SBOX(i)), GFM02(SBOX(i)), GFM01(SBOX(i)), GFM01(SBOX(i))))
#define G_T2(i) \
(WORD4(GFM01(SBOX(i)), GFM03(SBOX(i)), GFM02(SBOX(i)), GFM01(SBOX(i))))
#define G_T3(i) \
(WORD4(GFM01(SBOX(i)), GFM01(SBOX(i)), GFM03(SBOX(i)), GFM02(SBOX(i))))
/*
* The inverse T-tables
*/
#define G_TInv0(i) \
(WORD4(GFM0E(SINV(i)), GFM09(SINV(i)), GFM0D(SINV(i)), GFM0B(SINV(i))))
#define G_TInv1(i) \
(WORD4(GFM0B(SINV(i)), GFM0E(SINV(i)), GFM09(SINV(i)), GFM0D(SINV(i))))
#define G_TInv2(i) \
(WORD4(GFM0D(SINV(i)), GFM0B(SINV(i)), GFM0E(SINV(i)), GFM09(SINV(i))))
#define G_TInv3(i) \
(WORD4(GFM09(SINV(i)), GFM0D(SINV(i)), GFM0B(SINV(i)), GFM0E(SINV(i))))
/*
* The inverse mix column tables
*/
#define G_IMXC0(i) \
(WORD4(GFM0E(i), GFM09(i), GFM0D(i), GFM0B(i)))
#define G_IMXC1(i) \
(WORD4(GFM0B(i), GFM0E(i), GFM09(i), GFM0D(i)))
#define G_IMXC2(i) \
(WORD4(GFM0D(i), GFM0B(i), GFM0E(i), GFM09(i)))
#define G_IMXC3(i) \
(WORD4(GFM09(i), GFM0D(i), GFM0B(i), GFM0E(i)))
/* Now choose the T-table indexing method */
#if defined(RIJNDAEL_GENERATE_VALUES)
/* generate values for the tables with a function*/
static PRUint32
gen_TInvXi(PRUint8 tx, PRUint8 i)
{
PRUint8 si01, si02, si03, si04, si08, si09, si0B, si0D, si0E;
si01 = SINV(i);
si02 = XTIME(si01);
si04 = XTIME(si02);
si08 = XTIME(si04);
si03 = si02 ^ si01;
si09 = si08 ^ si01;
si0B = si08 ^ si03;
si0D = si09 ^ si04;
si0E = si08 ^ si04 ^ si02;
switch (tx) {
case 0:
return WORD4(si0E, si09, si0D, si0B);
case 1:
return WORD4(si0B, si0E, si09, si0D);
case 2:
return WORD4(si0D, si0B, si0E, si09);
case 3:
return WORD4(si09, si0D, si0B, si0E);
}
return -1;
}
#define T0(i) G_T0(i)
#define T1(i) G_T1(i)
#define T2(i) G_T2(i)
#define T3(i) G_T3(i)
#define TInv0(i) gen_TInvXi(0, i)
#define TInv1(i) gen_TInvXi(1, i)
#define TInv2(i) gen_TInvXi(2, i)
#define TInv3(i) gen_TInvXi(3, i)
#define IMXC0(b) G_IMXC0(b)
#define IMXC1(b) G_IMXC1(b)
#define IMXC2(b) G_IMXC2(b)
#define IMXC3(b) G_IMXC3(b)
#else /* RIJNDAEL_GENERATE_VALUES_MACRO */
/* generate values for the tables with macros */
#define T0(i) G_T0(i)
#define T1(i) G_T1(i)
#define T2(i) G_T2(i)
#define T3(i) G_T3(i)
#define TInv0(i) G_TInv0(i)
#define TInv1(i) G_TInv1(i)
#define TInv2(i) G_TInv2(i)
#define TInv3(i) G_TInv3(i)
#define IMXC0(b) G_IMXC0(b)
#define IMXC1(b) G_IMXC1(b)
#define IMXC2(b) G_IMXC2(b)
#define IMXC3(b) G_IMXC3(b)
#endif /* choose T-table indexing method */
#endif /* not RIJNDAEL_INCLUDE_TABLES */
/**************************************************************************
*
* Stuff related to the Rijndael key schedule
*
*************************************************************************/
#define SUBBYTE(w) \
((((PRUint32)SBOX((w >> 24) & 0xff)) << 24) | \
(((PRUint32)SBOX((w >> 16) & 0xff)) << 16) | \
(((PRUint32)SBOX((w >> 8) & 0xff)) << 8) | \
(((PRUint32)SBOX((w)&0xff))))
#ifdef IS_LITTLE_ENDIAN
#define ROTBYTE(b) \
((b >> 8) | (b << 24))
#else
#define ROTBYTE(b) \
((b << 8) | (b >> 24))
#endif
/* rijndael_key_expansion7
*
* Generate the expanded key from the key input by the user.
* XXX
* Nk == 7 (224 key bits) is a weird case. Since Nk > 6, an added SubByte
* transformation is done periodically. The period is every 4 bytes, and
* since 7%4 != 0 this happens at different times for each key word (unlike
* Nk == 8 where it happens twice in every key word, in the same positions).
* For now, I'm implementing this case "dumbly", w/o any unrolling.
*/
static void
rijndael_key_expansion7(AESContext *cx, const unsigned char *key, unsigned int Nk)
{
unsigned int i;
PRUint32 *W;
PRUint32 *pW;
PRUint32 tmp;
W = cx->expandedKey;
/* 1. the first Nk words contain the cipher key */
memcpy(W, key, Nk * 4);
i = Nk;
/* 2. loop until full expanded key is obtained */
pW = W + i - 1;
for (; i < cx->Nb * (cx->Nr + 1); ++i) {
tmp = *pW++;
if (i % Nk == 0)
tmp = SUBBYTE(ROTBYTE(tmp)) ^ Rcon[i / Nk - 1];
else if (i % Nk == 4)
tmp = SUBBYTE(tmp);
*pW = W[i - Nk] ^ tmp;
}
}
/* rijndael_key_expansion
*
* Generate the expanded key from the key input by the user.
*/
static void
rijndael_key_expansion(AESContext *cx, const unsigned char *key, unsigned int Nk)
{
unsigned int i;
PRUint32 *W;
PRUint32 *pW;
PRUint32 tmp;
unsigned int round_key_words = cx->Nb * (cx->Nr + 1);
if (Nk == 7) {
rijndael_key_expansion7(cx, key, Nk);
return;
}
W = cx->expandedKey;
/* The first Nk words contain the input cipher key */
memcpy(W, key, Nk * 4);
i = Nk;
pW = W + i - 1;
/* Loop over all sets of Nk words, except the last */
while (i < round_key_words - Nk) {
tmp = *pW++;
tmp = SUBBYTE(ROTBYTE(tmp)) ^ Rcon[i / Nk - 1];
*pW = W[i++ - Nk] ^ tmp;
tmp = *pW++;
*pW = W[i++ - Nk] ^ tmp;
tmp = *pW++;
*pW = W[i++ - Nk] ^ tmp;
tmp = *pW++;
*pW = W[i++ - Nk] ^ tmp;
if (Nk == 4)
continue;
switch (Nk) {
case 8:
tmp = *pW++;
tmp = SUBBYTE(tmp);
*pW = W[i++ - Nk] ^ tmp;
case 7:
tmp = *pW++;
*pW = W[i++ - Nk] ^ tmp;
case 6:
tmp = *pW++;
*pW = W[i++ - Nk] ^ tmp;
case 5:
tmp = *pW++;
*pW = W[i++ - Nk] ^ tmp;
}
}
/* Generate the last word */
tmp = *pW++;
tmp = SUBBYTE(ROTBYTE(tmp)) ^ Rcon[i / Nk - 1];
*pW = W[i++ - Nk] ^ tmp;
/* There may be overflow here, if Nk % (Nb * (Nr + 1)) > 0. However,
* since the above loop generated all but the last Nk key words, there
* is no more need for the SubByte transformation.
*/
if (Nk < 8) {
for (; i < round_key_words; ++i) {
tmp = *pW++;
*pW = W[i - Nk] ^ tmp;
}
} else {
/* except in the case when Nk == 8. Then one more SubByte may have
* to be performed, at i % Nk == 4.
*/
for (; i < round_key_words; ++i) {
tmp = *pW++;
if (i % Nk == 4)
tmp = SUBBYTE(tmp);
*pW = W[i - Nk] ^ tmp;
}
}
}
/* rijndael_invkey_expansion
*
* Generate the expanded key for the inverse cipher from the key input by
* the user.
*/
static void
rijndael_invkey_expansion(AESContext *cx, const unsigned char *key, unsigned int Nk)
{
unsigned int r;
PRUint32 *roundkeyw;
PRUint8 *b;
int Nb = cx->Nb;
/* begins like usual key expansion ... */
rijndael_key_expansion(cx, key, Nk);
/* ... but has the additional step of InvMixColumn,
* excepting the first and last round keys.
*/
roundkeyw = cx->expandedKey + cx->Nb;
for (r = 1; r < cx->Nr; ++r) {
/* each key word, roundkeyw, represents a column in the key
* matrix. Each column is multiplied by the InvMixColumn matrix.
* [ 0E 0B 0D 09 ] [ b0 ]
* [ 09 0E 0B 0D ] * [ b1 ]
* [ 0D 09 0E 0B ] [ b2 ]
* [ 0B 0D 09 0E ] [ b3 ]
*/
b = (PRUint8 *)roundkeyw;
*roundkeyw++ = IMXC0(b[0]) ^ IMXC1(b[1]) ^ IMXC2(b[2]) ^ IMXC3(b[3]);
b = (PRUint8 *)roundkeyw;
*roundkeyw++ = IMXC0(b[0]) ^ IMXC1(b[1]) ^ IMXC2(b[2]) ^ IMXC3(b[3]);
b = (PRUint8 *)roundkeyw;
*roundkeyw++ = IMXC0(b[0]) ^ IMXC1(b[1]) ^ IMXC2(b[2]) ^ IMXC3(b[3]);
b = (PRUint8 *)roundkeyw;
*roundkeyw++ = IMXC0(b[0]) ^ IMXC1(b[1]) ^ IMXC2(b[2]) ^ IMXC3(b[3]);
if (Nb <= 4)
continue;
switch (Nb) {
case 8:
b = (PRUint8 *)roundkeyw;
*roundkeyw++ = IMXC0(b[0]) ^ IMXC1(b[1]) ^
IMXC2(b[2]) ^ IMXC3(b[3]);
case 7:
b = (PRUint8 *)roundkeyw;
*roundkeyw++ = IMXC0(b[0]) ^ IMXC1(b[1]) ^
IMXC2(b[2]) ^ IMXC3(b[3]);
case 6:
b = (PRUint8 *)roundkeyw;
*roundkeyw++ = IMXC0(b[0]) ^ IMXC1(b[1]) ^
IMXC2(b[2]) ^ IMXC3(b[3]);
case 5:
b = (PRUint8 *)roundkeyw;
*roundkeyw++ = IMXC0(b[0]) ^ IMXC1(b[1]) ^
IMXC2(b[2]) ^ IMXC3(b[3]);
}
}
}
/**************************************************************************
*
* Stuff related to Rijndael encryption/decryption.
*
*************************************************************************/
#ifdef IS_LITTLE_ENDIAN
#define BYTE0WORD(w) ((w)&0x000000ff)
#define BYTE1WORD(w) ((w)&0x0000ff00)
#define BYTE2WORD(w) ((w)&0x00ff0000)
#define BYTE3WORD(w) ((w)&0xff000000)
#else
#define BYTE0WORD(w) ((w)&0xff000000)
#define BYTE1WORD(w) ((w)&0x00ff0000)
#define BYTE2WORD(w) ((w)&0x0000ff00)
#define BYTE3WORD(w) ((w)&0x000000ff)
#endif
typedef union {
PRUint32 w[4];
PRUint8 b[16];
} rijndael_state;
#define COLUMN_0(state) state.w[0]
#define COLUMN_1(state) state.w[1]
#define COLUMN_2(state) state.w[2]
#define COLUMN_3(state) state.w[3]
#define STATE_BYTE(i) state.b[i]
static void NO_SANITIZE_ALIGNMENT
rijndael_encryptBlock128(AESContext *cx,
unsigned char *output,
const unsigned char *input)
{
unsigned int r;
PRUint32 *roundkeyw;
rijndael_state state;
PRUint32 C0, C1, C2, C3;
#if defined(NSS_X86_OR_X64)
#define pIn input
#define pOut output
#else
unsigned char *pIn, *pOut;
PRUint32 inBuf[4], outBuf[4];
if ((ptrdiff_t)input & 0x3) {
memcpy(inBuf, input, sizeof inBuf);
pIn = (unsigned char *)inBuf;
} else {
pIn = (unsigned char *)input;
}
if ((ptrdiff_t)output & 0x3) {
pOut = (unsigned char *)outBuf;
} else {
pOut = (unsigned char *)output;
}
#endif
roundkeyw = cx->expandedKey;
/* Step 1: Add Round Key 0 to initial state */
COLUMN_0(state) = *((PRUint32 *)(pIn)) ^ *roundkeyw++;
COLUMN_1(state) = *((PRUint32 *)(pIn + 4)) ^ *roundkeyw++;
COLUMN_2(state) = *((PRUint32 *)(pIn + 8)) ^ *roundkeyw++;
COLUMN_3(state) = *((PRUint32 *)(pIn + 12)) ^ *roundkeyw++;
/* Step 2: Loop over rounds [1..NR-1] */
for (r = 1; r < cx->Nr; ++r) {
/* Do ShiftRow, ByteSub, and MixColumn all at once */
C0 = T0(STATE_BYTE(0)) ^
T1(STATE_BYTE(5)) ^
T2(STATE_BYTE(10)) ^
T3(STATE_BYTE(15));
C1 = T0(STATE_BYTE(4)) ^
T1(STATE_BYTE(9)) ^
T2(STATE_BYTE(14)) ^
T3(STATE_BYTE(3));
C2 = T0(STATE_BYTE(8)) ^
T1(STATE_BYTE(13)) ^
T2(STATE_BYTE(2)) ^
T3(STATE_BYTE(7));
C3 = T0(STATE_BYTE(12)) ^
T1(STATE_BYTE(1)) ^
T2(STATE_BYTE(6)) ^
T3(STATE_BYTE(11));
/* Round key addition */
COLUMN_0(state) = C0 ^ *roundkeyw++;
COLUMN_1(state) = C1 ^ *roundkeyw++;
COLUMN_2(state) = C2 ^ *roundkeyw++;
COLUMN_3(state) = C3 ^ *roundkeyw++;
}
/* Step 3: Do the last round */
/* Final round does not employ MixColumn */
C0 = ((BYTE0WORD(T2(STATE_BYTE(0)))) |
(BYTE1WORD(T3(STATE_BYTE(5)))) |
(BYTE2WORD(T0(STATE_BYTE(10)))) |
(BYTE3WORD(T1(STATE_BYTE(15))))) ^
*roundkeyw++;
C1 = ((BYTE0WORD(T2(STATE_BYTE(4)))) |
(BYTE1WORD(T3(STATE_BYTE(9)))) |
(BYTE2WORD(T0(STATE_BYTE(14)))) |
(BYTE3WORD(T1(STATE_BYTE(3))))) ^
*roundkeyw++;
C2 = ((BYTE0WORD(T2(STATE_BYTE(8)))) |
(BYTE1WORD(T3(STATE_BYTE(13)))) |
(BYTE2WORD(T0(STATE_BYTE(2)))) |
(BYTE3WORD(T1(STATE_BYTE(7))))) ^
*roundkeyw++;
C3 = ((BYTE0WORD(T2(STATE_BYTE(12)))) |
(BYTE1WORD(T3(STATE_BYTE(1)))) |
(BYTE2WORD(T0(STATE_BYTE(6)))) |
(BYTE3WORD(T1(STATE_BYTE(11))))) ^
*roundkeyw++;
*((PRUint32 *)pOut) = C0;
*((PRUint32 *)(pOut + 4)) = C1;
*((PRUint32 *)(pOut + 8)) = C2;
*((PRUint32 *)(pOut + 12)) = C3;
#if defined(NSS_X86_OR_X64)
#undef pIn
#undef pOut
#else
if ((ptrdiff_t)output & 0x3) {
memcpy(output, outBuf, sizeof outBuf);
}
#endif
}
static SECStatus NO_SANITIZE_ALIGNMENT
rijndael_decryptBlock128(AESContext *cx,
unsigned char *output,
const unsigned char *input)
{
int r;
PRUint32 *roundkeyw;
rijndael_state state;
PRUint32 C0, C1, C2, C3;
#if defined(NSS_X86_OR_X64)
#define pIn input
#define pOut output
#else
unsigned char *pIn, *pOut;
PRUint32 inBuf[4], outBuf[4];
if ((ptrdiff_t)input & 0x3) {
memcpy(inBuf, input, sizeof inBuf);
pIn = (unsigned char *)inBuf;
} else {
pIn = (unsigned char *)input;
}
if ((ptrdiff_t)output & 0x3) {
pOut = (unsigned char *)outBuf;
} else {
pOut = (unsigned char *)output;
}
#endif
roundkeyw = cx->expandedKey + cx->Nb * cx->Nr + 3;
/* reverse the final key addition */
COLUMN_3(state) = *((PRUint32 *)(pIn + 12)) ^ *roundkeyw--;
COLUMN_2(state) = *((PRUint32 *)(pIn + 8)) ^ *roundkeyw--;
COLUMN_1(state) = *((PRUint32 *)(pIn + 4)) ^ *roundkeyw--;
COLUMN_0(state) = *((PRUint32 *)(pIn)) ^ *roundkeyw--;
/* Loop over rounds in reverse [NR..1] */
for (r = cx->Nr; r > 1; --r) {
/* Invert the (InvByteSub*InvMixColumn)(InvShiftRow(state)) */
C0 = TInv0(STATE_BYTE(0)) ^
TInv1(STATE_BYTE(13)) ^
TInv2(STATE_BYTE(10)) ^
TInv3(STATE_BYTE(7));
C1 = TInv0(STATE_BYTE(4)) ^
TInv1(STATE_BYTE(1)) ^
TInv2(STATE_BYTE(14)) ^
TInv3(STATE_BYTE(11));
C2 = TInv0(STATE_BYTE(8)) ^
TInv1(STATE_BYTE(5)) ^
TInv2(STATE_BYTE(2)) ^
TInv3(STATE_BYTE(15));
C3 = TInv0(STATE_BYTE(12)) ^
TInv1(STATE_BYTE(9)) ^
TInv2(STATE_BYTE(6)) ^
TInv3(STATE_BYTE(3));
/* Invert the key addition step */
COLUMN_3(state) = C3 ^ *roundkeyw--;
COLUMN_2(state) = C2 ^ *roundkeyw--;
COLUMN_1(state) = C1 ^ *roundkeyw--;
COLUMN_0(state) = C0 ^ *roundkeyw--;
}
/* inverse sub */
pOut[0] = SINV(STATE_BYTE(0));
pOut[1] = SINV(STATE_BYTE(13));
pOut[2] = SINV(STATE_BYTE(10));
pOut[3] = SINV(STATE_BYTE(7));
pOut[4] = SINV(STATE_BYTE(4));
pOut[5] = SINV(STATE_BYTE(1));
pOut[6] = SINV(STATE_BYTE(14));
pOut[7] = SINV(STATE_BYTE(11));
pOut[8] = SINV(STATE_BYTE(8));
pOut[9] = SINV(STATE_BYTE(5));
pOut[10] = SINV(STATE_BYTE(2));
pOut[11] = SINV(STATE_BYTE(15));
pOut[12] = SINV(STATE_BYTE(12));
pOut[13] = SINV(STATE_BYTE(9));
pOut[14] = SINV(STATE_BYTE(6));
pOut[15] = SINV(STATE_BYTE(3));
/* final key addition */
*((PRUint32 *)(pOut + 12)) ^= *roundkeyw--;
*((PRUint32 *)(pOut + 8)) ^= *roundkeyw--;
*((PRUint32 *)(pOut + 4)) ^= *roundkeyw--;
*((PRUint32 *)pOut) ^= *roundkeyw--;
#if defined(NSS_X86_OR_X64)
#undef pIn
#undef pOut
#else
if ((ptrdiff_t)output & 0x3) {
memcpy(output, outBuf, sizeof outBuf);
}
#endif
return SECSuccess;
}
/**************************************************************************
*
* Rijndael modes of operation (ECB and CBC)
*
*************************************************************************/
static SECStatus
rijndael_encryptECB(AESContext *cx, unsigned char *output,
unsigned int *outputLen, unsigned int maxOutputLen,
const unsigned char *input, unsigned int inputLen)
{
AESBlockFunc *encryptor;
if (aesni_support()) {
/* Use hardware acceleration for normal AES parameters. */
encryptor = &rijndael_native_encryptBlock;
} else {
encryptor = &rijndael_encryptBlock128;
}
while (inputLen > 0) {
(*encryptor)(cx, output, input);
output += AES_BLOCK_SIZE;
input += AES_BLOCK_SIZE;
inputLen -= AES_BLOCK_SIZE;
}
return SECSuccess;
}
static SECStatus
rijndael_encryptCBC(AESContext *cx, unsigned char *output,
unsigned int *outputLen, unsigned int maxOutputLen,
const unsigned char *input, unsigned int inputLen)
{
unsigned int j;
unsigned char *lastblock;
unsigned char inblock[AES_BLOCK_SIZE * 8];
if (!inputLen)
return SECSuccess;
lastblock = cx->iv;
while (inputLen > 0) {
/* XOR with the last block (IV if first block) */
for (j = 0; j < AES_BLOCK_SIZE; ++j) {
inblock[j] = input[j] ^ lastblock[j];
}
/* encrypt */
rijndael_encryptBlock128(cx, output, inblock);
/* move to the next block */
lastblock = output;
output += AES_BLOCK_SIZE;
input += AES_BLOCK_SIZE;
inputLen -= AES_BLOCK_SIZE;
}
memcpy(cx->iv, lastblock, AES_BLOCK_SIZE);
return SECSuccess;
}
static SECStatus
rijndael_decryptECB(AESContext *cx, unsigned char *output,
unsigned int *outputLen, unsigned int maxOutputLen,
const unsigned char *input, unsigned int inputLen)
{
while (inputLen > 0) {
if (rijndael_decryptBlock128(cx, output, input) != SECSuccess) {
return SECFailure;
}
output += AES_BLOCK_SIZE;
input += AES_BLOCK_SIZE;
inputLen -= AES_BLOCK_SIZE;
}
return SECSuccess;
}
static SECStatus
rijndael_decryptCBC(AESContext *cx, unsigned char *output,
unsigned int *outputLen, unsigned int maxOutputLen,
const unsigned char *input, unsigned int inputLen)
{
const unsigned char *in;
unsigned char *out;
unsigned int j;
unsigned char newIV[AES_BLOCK_SIZE];
if (!inputLen)
return SECSuccess;
PORT_Assert(output - input >= 0 || input - output >= (int)inputLen);
in = input + (inputLen - AES_BLOCK_SIZE);
memcpy(newIV, in, AES_BLOCK_SIZE);
out = output + (inputLen - AES_BLOCK_SIZE);
while (inputLen > AES_BLOCK_SIZE) {
if (rijndael_decryptBlock128(cx, out, in) != SECSuccess) {
return SECFailure;
}
for (j = 0; j < AES_BLOCK_SIZE; ++j)
out[j] ^= in[(int)(j - AES_BLOCK_SIZE)];
out -= AES_BLOCK_SIZE;
in -= AES_BLOCK_SIZE;
inputLen -= AES_BLOCK_SIZE;
}
if (in == input) {
if (rijndael_decryptBlock128(cx, out, in) != SECSuccess) {
return SECFailure;
}
for (j = 0; j < AES_BLOCK_SIZE; ++j)
out[j] ^= cx->iv[j];
}
memcpy(cx->iv, newIV, AES_BLOCK_SIZE);
return SECSuccess;
}
/************************************************************************
*
* BLAPI Interface functions
*
* The following functions implement the encryption routines defined in
* BLAPI for the AES cipher, Rijndael.
*
***********************************************************************/
AESContext *
AES_AllocateContext(void)
{
return PORT_ZNewAligned(AESContext, 16, mem);
}
/*
** Initialize a new AES context suitable for AES encryption/decryption in
** the ECB or CBC mode.
** "mode" the mode of operation, which must be NSS_AES or NSS_AES_CBC
*/
static SECStatus
aes_InitContext(AESContext *cx, const unsigned char *key, unsigned int keysize,
const unsigned char *iv, int mode, unsigned int encrypt)
{
unsigned int Nk;
PRBool use_hw_aes;
/* According to AES, block lengths are 128 and key lengths are 128, 192, or
* 256 bits. We support other key sizes as well [128, 256] as long as the
* length in bytes is divisible by 4.
*/
if (key == NULL ||
keysize < AES_BLOCK_SIZE ||
keysize > 32 ||
keysize % 4 != 0) {
PORT_SetError(SEC_ERROR_INVALID_ARGS);
return SECFailure;
}
if (mode != NSS_AES && mode != NSS_AES_CBC) {
PORT_SetError(SEC_ERROR_INVALID_ARGS);
return SECFailure;
}
if (mode == NSS_AES_CBC && iv == NULL) {
PORT_SetError(SEC_ERROR_INVALID_ARGS);
return SECFailure;
}
if (!cx) {
PORT_SetError(SEC_ERROR_INVALID_ARGS);
return SECFailure;
}
use_hw_aes = aesni_support() && (keysize % 8) == 0;
/* Nb = (block size in bits) / 32 */
cx->Nb = AES_BLOCK_SIZE / 4;
/* Nk = (key size in bits) / 32 */
Nk = keysize / 4;
/* Obtain number of rounds from "table" */
cx->Nr = RIJNDAEL_NUM_ROUNDS(Nk, cx->Nb);
/* copy in the iv, if neccessary */
if (mode == NSS_AES_CBC) {
memcpy(cx->iv, iv, AES_BLOCK_SIZE);
#ifdef USE_HW_AES
if (use_hw_aes) {
cx->worker = (freeblCipherFunc)
intel_aes_cbc_worker(encrypt, keysize);
} else
#endif
{
cx->worker = (freeblCipherFunc)(encrypt
? &rijndael_encryptCBC
: &rijndael_decryptCBC);
}
} else {
#ifdef USE_HW_AES
if (use_hw_aes) {
cx->worker = (freeblCipherFunc)
intel_aes_ecb_worker(encrypt, keysize);
} else
#endif
{
cx->worker = (freeblCipherFunc)(encrypt
? &rijndael_encryptECB
: &rijndael_decryptECB);
}
}
PORT_Assert((cx->Nb * (cx->Nr + 1)) <= RIJNDAEL_MAX_EXP_KEY_SIZE);
if ((cx->Nb * (cx->Nr + 1)) > RIJNDAEL_MAX_EXP_KEY_SIZE) {
PORT_SetError(SEC_ERROR_LIBRARY_FAILURE);
return SECFailure;
}
#ifdef USE_HW_AES
if (use_hw_aes) {
intel_aes_init(encrypt, keysize);
} else
#endif
{
/* Generate expanded key */
if (encrypt) {
if (use_hw_aes && (cx->mode == NSS_AES_GCM || cx->mode == NSS_AES ||
cx->mode == NSS_AES_CTR)) {
PORT_Assert(keysize == 16 || keysize == 24 || keysize == 32);
/* Prepare hardware key for normal AES parameters. */
rijndael_native_key_expansion(cx, key, Nk);
} else {
rijndael_key_expansion(cx, key, Nk);
}
} else {
rijndael_invkey_expansion(cx, key, Nk);
}
}
cx->worker_cx = cx;
cx->destroy = NULL;
cx->isBlock = PR_TRUE;
return SECSuccess;
}
SECStatus
AES_InitContext(AESContext *cx, const unsigned char *key, unsigned int keysize,
const unsigned char *iv, int mode, unsigned int encrypt,
unsigned int blocksize)
{
int basemode = mode;
PRBool baseencrypt = encrypt;
SECStatus rv;
if (blocksize != AES_BLOCK_SIZE) {
PORT_SetError(SEC_ERROR_INVALID_ARGS);
return SECFailure;
}
switch (mode) {
case NSS_AES_CTS:
basemode = NSS_AES_CBC;
break;
case NSS_AES_GCM:
case NSS_AES_CTR:
basemode = NSS_AES;
baseencrypt = PR_TRUE;
break;
}
/* Make sure enough is initialized so we can safely call Destroy. */
cx->worker_cx = NULL;
cx->destroy = NULL;
cx->mode = mode;
rv = aes_InitContext(cx, key, keysize, iv, basemode, baseencrypt);
if (rv != SECSuccess) {
AES_DestroyContext(cx, PR_FALSE);
return rv;
}
/* finally, set up any mode specific contexts */
switch (mode) {
case NSS_AES_CTS:
cx->worker_cx = CTS_CreateContext(cx, cx->worker, iv);
cx->worker = (freeblCipherFunc)(encrypt ? CTS_EncryptUpdate : CTS_DecryptUpdate);
cx->destroy = (freeblDestroyFunc)CTS_DestroyContext;
cx->isBlock = PR_FALSE;
break;
case NSS_AES_GCM:
#if defined(INTEL_GCM) && defined(USE_HW_AES)
if (aesni_support() && (keysize % 8) == 0 && avx_support() &&
clmul_support()) {
cx->worker_cx = intel_AES_GCM_CreateContext(cx, cx->worker, iv);
cx->worker = (freeblCipherFunc)(encrypt ? intel_AES_GCM_EncryptUpdate
: intel_AES_GCM_DecryptUpdate);
cx->destroy = (freeblDestroyFunc)intel_AES_GCM_DestroyContext;
cx->isBlock = PR_FALSE;
} else
#endif
{
cx->worker_cx = GCM_CreateContext(cx, cx->worker, iv);
cx->worker = (freeblCipherFunc)(encrypt ? GCM_EncryptUpdate
: GCM_DecryptUpdate);
cx->destroy = (freeblDestroyFunc)GCM_DestroyContext;
cx->isBlock = PR_FALSE;
}
break;
case NSS_AES_CTR:
cx->worker_cx = CTR_CreateContext(cx, cx->worker, iv);
#if defined(USE_HW_AES) && defined(_MSC_VER)
if (aesni_support() && (keysize % 8) == 0) {
cx->worker = (freeblCipherFunc)CTR_Update_HW_AES;
} else
#endif
{
cx->worker = (freeblCipherFunc)CTR_Update;
}
cx->destroy = (freeblDestroyFunc)CTR_DestroyContext;
cx->isBlock = PR_FALSE;
break;
default:
/* everything has already been set up by aes_InitContext, just
* return */
return SECSuccess;
}
/* check to see if we succeeded in getting the worker context */
if (cx->worker_cx == NULL) {
/* no, just destroy the existing context */
cx->destroy = NULL; /* paranoia, though you can see a dozen lines */
/* below that this isn't necessary */
AES_DestroyContext(cx, PR_FALSE);
return SECFailure;
}
return SECSuccess;
}
/* AES_CreateContext
*
* create a new context for Rijndael operations
*/
AESContext *
AES_CreateContext(const unsigned char *key, const unsigned char *iv,
int mode, int encrypt,
unsigned int keysize, unsigned int blocksize)
{
AESContext *cx = AES_AllocateContext();
if (cx) {
SECStatus rv = AES_InitContext(cx, key, keysize, iv, mode, encrypt,
blocksize);
if (rv != SECSuccess) {
AES_DestroyContext(cx, PR_TRUE);
cx = NULL;
}
}
return cx;
}
/*
* AES_DestroyContext
*
* Zero an AES cipher context. If freeit is true, also free the pointer
* to the context.
*/
void
AES_DestroyContext(AESContext *cx, PRBool freeit)
{
if (cx->worker_cx && cx->destroy) {
(*cx->destroy)(cx->worker_cx, PR_TRUE);
cx->worker_cx = NULL;
cx->destroy = NULL;
}
if (freeit) {
PORT_Free(cx->mem);
}
}
/*
* AES_Encrypt
*
* Encrypt an arbitrary-length buffer. The output buffer must already be
* allocated to at least inputLen.
*/
SECStatus
AES_Encrypt(AESContext *cx, unsigned char *output,
unsigned int *outputLen, unsigned int maxOutputLen,
const unsigned char *input, unsigned int inputLen)
{
/* Check args */
if (cx == NULL || output == NULL || (input == NULL && inputLen != 0)) {
PORT_SetError(SEC_ERROR_INVALID_ARGS);
return SECFailure;
}
if (cx->isBlock && (inputLen % AES_BLOCK_SIZE != 0)) {
PORT_SetError(SEC_ERROR_INPUT_LEN);
return SECFailure;
}
if (maxOutputLen < inputLen) {
PORT_SetError(SEC_ERROR_OUTPUT_LEN);
return SECFailure;
}
*outputLen = inputLen;
#if UINT_MAX > MP_32BIT_MAX
/*
* we can guarentee that GSM won't overlfow if we limit the input to
* 2^36 bytes. For simplicity, we are limiting it to 2^32 for now.
*
* We do it here to cover both hardware and software GCM operations.
*/
{
PR_STATIC_ASSERT(sizeof(unsigned int) > 4);
}
if ((cx->mode == NSS_AES_GCM) && (inputLen > MP_32BIT_MAX)) {
PORT_SetError(SEC_ERROR_OUTPUT_LEN);
return SECFailure;
}
#else
/* if we can't pass in a 32_bit number, then no such check needed */
{
PR_STATIC_ASSERT(sizeof(unsigned int) <= 4);
}
#endif
return (*cx->worker)(cx->worker_cx, output, outputLen, maxOutputLen,
input, inputLen, AES_BLOCK_SIZE);
}
/*
* AES_Decrypt
*
* Decrypt and arbitrary-length buffer. The output buffer must already be
* allocated to at least inputLen.
*/
SECStatus
AES_Decrypt(AESContext *cx, unsigned char *output,
unsigned int *outputLen, unsigned int maxOutputLen,
const unsigned char *input, unsigned int inputLen)
{
/* Check args */
if (cx == NULL || output == NULL || (input == NULL && inputLen != 0)) {
PORT_SetError(SEC_ERROR_INVALID_ARGS);
return SECFailure;
}
if (cx->isBlock && (inputLen % AES_BLOCK_SIZE != 0)) {
PORT_SetError(SEC_ERROR_INPUT_LEN);
return SECFailure;
}
if (maxOutputLen < inputLen) {
PORT_SetError(SEC_ERROR_OUTPUT_LEN);
return SECFailure;
}
*outputLen = inputLen;
return (*cx->worker)(cx->worker_cx, output, outputLen, maxOutputLen,
input, inputLen, AES_BLOCK_SIZE);
}