Bug 1606927 - land NSS cd55a3a90502 UPGRADE_NSS_RELEASE, r=kjacobs

2020-01-22  Kai Engert  <kaie@kuix.de>

	* lib/softoken/lowpbe.c:
	Bug 1606992 - Follow-up to also cache most recent PBKDF1 hash (in
	addition to PBKDF2 hash). r=kjacobs

	[cd55a3a90502] [tip]

2020-01-22  Kevin Jacobs  <kjacobs@mozilla.com>

	* lib/freebl/aes-x86.c, lib/freebl/rijndael.c, lib/freebl/rijndael.h:
	Bug 1608493 - Use AES-NI intrinsics for CBC and ECB decrypt when no
	assembly implementation is available. r=mt

	AES-NI is currently not used for //CBC// or //ECB decrypt// when an
	assembly implementation (`intel-aes.s` or `intel-
	aes-x86/64-masm.asm`) is not available. Concretely, this is the case
	on MacOS, Linux32, and other non-Linux OSes such as BSD. This patch
	adds the plumbing to use AES-NI intrinsics when available.

	Before: ``` mode in symmkey opreps cxreps context op time(sec)
	thrgput aes_ecb_d 78Mb 256 10T 0 0.000 395.000 0.395 197Mb aes_cbc_e
	78Mb 256 10T 0 0.000 392.000 0.393 198Mb aes_cbc_d 78Mb 256 10T 0
	0.000 425.000 0.425 183Mb

	```

	After: ``` mode in symmkey opreps cxreps context op time(sec)
	thrgput aes_ecb_d 78Mb 256 10T 0 0.000 39.000 0.039 1Gb aes_cbc_e
	78Mb 256 10T 0 0.000 94.000 0.094 831Mb aes_cbc_d 78Mb 256 10T 0
	0.000 74.000 0.075 1Gb

	```

	[9804c76e76f3]

Differential Revision: https://phabricator.services.mozilla.com/D60763

--HG--
extra : moz-landing-system : lando
This commit is contained in:
J.C. Jones 2020-01-22 23:13:52 +00:00
Родитель 3ba4b04d5c
Коммит c70fa24ea8
6 изменённых файлов: 212 добавлений и 75 удалений

Просмотреть файл

@ -1 +1 @@
124c43a9f768
cd55a3a90502

Просмотреть файл

@ -10,3 +10,4 @@
*/
#error "Do not include this header file."

Просмотреть файл

@ -155,3 +155,30 @@ rijndael_native_encryptBlock(AESContext *cx,
m = _mm_aesenclast_si128(m, cx->k.keySchedule[cx->Nr]);
_mm_storeu_si128((__m128i *)output, m);
}
void
rijndael_native_decryptBlock(AESContext *cx,
unsigned char *output,
const unsigned char *input)
{
int i;
pre_align __m128i m post_align = _mm_loadu_si128((__m128i *)input);
m = _mm_xor_si128(m, cx->k.keySchedule[cx->Nr]);
for (i = cx->Nr - 1; i > 0; --i) {
m = _mm_aesdec_si128(m, cx->k.keySchedule[i]);
}
m = _mm_aesdeclast_si128(m, cx->k.keySchedule[0]);
_mm_storeu_si128((__m128i *)output, m);
}
// out = a ^ b
void
native_xorBlock(unsigned char *out,
const unsigned char *a,
const unsigned char *b)
{
pre_align __m128i post_align in1 = _mm_loadu_si128((__m128i *)(a));
pre_align __m128i post_align in2 = _mm_loadu_si128((__m128i *)(b));
in1 = _mm_xor_si128(in1, in2);
_mm_storeu_si128((__m128i *)(out), in1);
}

Просмотреть файл

@ -42,6 +42,12 @@ void rijndael_native_key_expansion(AESContext *cx, const unsigned char *key,
void rijndael_native_encryptBlock(AESContext *cx,
unsigned char *output,
const unsigned char *input);
void rijndael_native_decryptBlock(AESContext *cx,
unsigned char *output,
const unsigned char *input);
void native_xorBlock(unsigned char *out,
const unsigned char *a,
const unsigned char *b);
/* Stub definitions for the above rijndael_native_* functions, which
* shouldn't be used unless NSS_X86_OR_X64 is defined */
@ -62,6 +68,23 @@ rijndael_native_encryptBlock(AESContext *cx,
PORT_SetError(SEC_ERROR_LIBRARY_FAILURE);
PORT_Assert(0);
}
void
rijndael_native_decryptBlock(AESContext *cx,
unsigned char *output,
const unsigned char *input)
{
PORT_SetError(SEC_ERROR_LIBRARY_FAILURE);
PORT_Assert(0);
}
void
native_xorBlock(unsigned char *out, const unsigned char *a,
const unsigned char *b)
{
PORT_SetError(SEC_ERROR_LIBRARY_FAILURE);
PORT_Assert(0);
}
#endif /* NSS_X86_OR_X64 */
/*
@ -509,6 +532,15 @@ typedef union {
#define STATE_BYTE(i) state.b[i]
// out = a ^ b
inline static void
xorBlock(unsigned char *out, const unsigned char *a, const unsigned char *b)
{
for (unsigned int j = 0; j < AES_BLOCK_SIZE; ++j) {
(out)[j] = (a)[j] ^ (b)[j];
}
}
static void NO_SANITIZE_ALIGNMENT
rijndael_encryptBlock128(AESContext *cx,
unsigned char *output,
@ -604,7 +636,7 @@ rijndael_encryptBlock128(AESContext *cx,
#endif
}
static SECStatus NO_SANITIZE_ALIGNMENT
static void NO_SANITIZE_ALIGNMENT
rijndael_decryptBlock128(AESContext *cx,
unsigned char *output,
const unsigned char *input)
@ -693,7 +725,6 @@ rijndael_decryptBlock128(AESContext *cx,
memcpy(output, outBuf, sizeof outBuf);
}
#endif
return SECSuccess;
}
/**************************************************************************
@ -707,16 +738,13 @@ rijndael_encryptECB(AESContext *cx, unsigned char *output,
unsigned int *outputLen, unsigned int maxOutputLen,
const unsigned char *input, unsigned int inputLen)
{
AESBlockFunc *encryptor;
if (aesni_support()) {
/* Use hardware acceleration for normal AES parameters. */
encryptor = &rijndael_native_encryptBlock;
} else {
encryptor = &rijndael_encryptBlock128;
}
PRBool aesni = aesni_support();
while (inputLen > 0) {
(*encryptor)(cx, output, input);
if (aesni) {
rijndael_native_encryptBlock(cx, output, input);
} else {
rijndael_encryptBlock128(cx, output, input);
}
output += AES_BLOCK_SIZE;
input += AES_BLOCK_SIZE;
inputLen -= AES_BLOCK_SIZE;
@ -729,20 +757,23 @@ rijndael_encryptCBC(AESContext *cx, unsigned char *output,
unsigned int *outputLen, unsigned int maxOutputLen,
const unsigned char *input, unsigned int inputLen)
{
unsigned int j;
unsigned char *lastblock;
unsigned char *lastblock = cx->iv;
unsigned char inblock[AES_BLOCK_SIZE * 8];
PRBool aesni = aesni_support();
if (!inputLen)
return SECSuccess;
lastblock = cx->iv;
while (inputLen > 0) {
/* XOR with the last block (IV if first block) */
for (j = 0; j < AES_BLOCK_SIZE; ++j) {
inblock[j] = input[j] ^ lastblock[j];
if (aesni) {
/* XOR with the last block (IV if first block) */
native_xorBlock(inblock, input, lastblock);
/* encrypt */
rijndael_native_encryptBlock(cx, output, inblock);
} else {
xorBlock(inblock, input, lastblock);
rijndael_encryptBlock128(cx, output, inblock);
}
/* encrypt */
rijndael_encryptBlock128(cx, output, inblock);
/* move to the next block */
lastblock = output;
output += AES_BLOCK_SIZE;
@ -758,9 +789,12 @@ rijndael_decryptECB(AESContext *cx, unsigned char *output,
unsigned int *outputLen, unsigned int maxOutputLen,
const unsigned char *input, unsigned int inputLen)
{
PRBool aesni = aesni_support();
while (inputLen > 0) {
if (rijndael_decryptBlock128(cx, output, input) != SECSuccess) {
return SECFailure;
if (aesni) {
rijndael_native_decryptBlock(cx, output, input);
} else {
rijndael_decryptBlock128(cx, output, input);
}
output += AES_BLOCK_SIZE;
input += AES_BLOCK_SIZE;
@ -776,8 +810,8 @@ rijndael_decryptCBC(AESContext *cx, unsigned char *output,
{
const unsigned char *in;
unsigned char *out;
unsigned int j;
unsigned char newIV[AES_BLOCK_SIZE];
PRBool aesni = aesni_support();
if (!inputLen)
return SECSuccess;
@ -786,21 +820,26 @@ rijndael_decryptCBC(AESContext *cx, unsigned char *output,
memcpy(newIV, in, AES_BLOCK_SIZE);
out = output + (inputLen - AES_BLOCK_SIZE);
while (inputLen > AES_BLOCK_SIZE) {
if (rijndael_decryptBlock128(cx, out, in) != SECSuccess) {
return SECFailure;
if (aesni) {
// Use hardware acceleration for normal AES parameters.
rijndael_native_decryptBlock(cx, out, in);
native_xorBlock(out, out, &in[-AES_BLOCK_SIZE]);
} else {
rijndael_decryptBlock128(cx, out, in);
xorBlock(out, out, &in[-AES_BLOCK_SIZE]);
}
for (j = 0; j < AES_BLOCK_SIZE; ++j)
out[j] ^= in[(int)(j - AES_BLOCK_SIZE)];
out -= AES_BLOCK_SIZE;
in -= AES_BLOCK_SIZE;
inputLen -= AES_BLOCK_SIZE;
}
if (in == input) {
if (rijndael_decryptBlock128(cx, out, in) != SECSuccess) {
return SECFailure;
if (aesni) {
rijndael_native_decryptBlock(cx, out, in);
native_xorBlock(out, out, cx->iv);
} else {
rijndael_decryptBlock128(cx, out, in);
xorBlock(out, out, cx->iv);
}
for (j = 0; j < AES_BLOCK_SIZE; ++j)
out[j] ^= cx->iv[j];
}
memcpy(cx->iv, newIV, AES_BLOCK_SIZE);
return SECSuccess;

Просмотреть файл

@ -26,10 +26,6 @@
#endif /* NSS_DISABLE_SSE2 */
#endif
typedef void AESBlockFunc(AESContext *cx,
unsigned char *output,
const unsigned char *input);
/* RIJNDAEL_NUM_ROUNDS
*
* Number of rounds per execution

Просмотреть файл

@ -557,18 +557,30 @@ loser:
return A;
}
/* Bug 1606992 - Cache the hash result for the common case that we're
* asked to repeatedly compute the key for the same password item,
* hash, iterations and salt. */
static struct {
PZLock *lock;
SECItem *hashPBKDF2;
struct KDFCacheItemStr {
SECItem *hash;
SECItem *salt;
SECItem *pwItem;
HASH_HashType hashType;
int iterations;
int keyLen;
} PBECache = { NULL, NULL, NULL, NULL };
};
typedef struct KDFCacheItemStr KDFCacheItem;
/* Bug 1606992 - Cache the hash result for the common case that we're
* asked to repeatedly compute the key for the same password item,
* hash, iterations and salt. */
static struct {
PZLock *lock;
struct {
KDFCacheItem common;
int ivLen;
PRBool faulty3DES;
} cacheKDF1;
struct {
KDFCacheItem common;
} cacheKDF2;
} PBECache;
void
sftk_PBELockInit(void)
@ -579,56 +591,113 @@ sftk_PBELockInit(void)
}
static void
sftk_clearPBECacheItemsLocked(void)
sftk_clearPBECommonCacheItemsLocked(KDFCacheItem *item)
{
if (PBECache.hashPBKDF2) {
SECITEM_ZfreeItem(PBECache.hashPBKDF2, PR_TRUE);
PBECache.hashPBKDF2 = NULL;
if (item->hash) {
SECITEM_ZfreeItem(item->hash, PR_TRUE);
item->hash = NULL;
}
if (PBECache.salt) {
SECITEM_FreeItem(PBECache.salt, PR_TRUE);
PBECache.salt = NULL;
if (item->salt) {
SECITEM_FreeItem(item->salt, PR_TRUE);
item->salt = NULL;
}
if (PBECache.pwItem) {
SECITEM_ZfreeItem(PBECache.pwItem, PR_TRUE);
PBECache.pwItem = NULL;
if (item->pwItem) {
SECITEM_ZfreeItem(item->pwItem, PR_TRUE);
item->pwItem = NULL;
}
}
static void
sftk_setPBECache(const SECItem *hash,
const NSSPKCS5PBEParameter *pbe_param,
const SECItem *pwItem)
sftk_setPBECommonCacheItemsKDFLocked(KDFCacheItem *cacheItem,
const SECItem *hash,
const NSSPKCS5PBEParameter *pbe_param,
const SECItem *pwItem)
{
cacheItem->hash = SECITEM_DupItem(hash);
cacheItem->hashType = pbe_param->hashType;
cacheItem->iterations = pbe_param->iter;
cacheItem->keyLen = pbe_param->keyLen;
cacheItem->salt = SECITEM_DupItem(&pbe_param->salt);
cacheItem->pwItem = SECITEM_DupItem(pwItem);
}
static void
sftk_setPBECacheKDF2(const SECItem *hash,
const NSSPKCS5PBEParameter *pbe_param,
const SECItem *pwItem)
{
PZ_Lock(PBECache.lock);
sftk_clearPBECacheItemsLocked();
sftk_clearPBECommonCacheItemsLocked(&PBECache.cacheKDF2.common);
PBECache.hashPBKDF2 = SECITEM_DupItem(hash);
PBECache.hashType = pbe_param->hashType;
PBECache.iterations = pbe_param->iter;
PBECache.keyLen = pbe_param->keyLen;
PBECache.salt = SECITEM_DupItem(&pbe_param->salt);
PBECache.pwItem = SECITEM_DupItem(pwItem);
sftk_setPBECommonCacheItemsKDFLocked(&PBECache.cacheKDF2.common,
hash, pbe_param, pwItem);
PZ_Unlock(PBECache.lock);
}
static void
sftk_setPBECacheKDF1(const SECItem *hash,
const NSSPKCS5PBEParameter *pbe_param,
const SECItem *pwItem,
PRBool faulty3DES)
{
PZ_Lock(PBECache.lock);
sftk_clearPBECommonCacheItemsLocked(&PBECache.cacheKDF1.common);
sftk_setPBECommonCacheItemsKDFLocked(&PBECache.cacheKDF1.common,
hash, pbe_param, pwItem);
PBECache.cacheKDF1.faulty3DES = faulty3DES;
PBECache.cacheKDF1.ivLen = pbe_param->ivLen;
PZ_Unlock(PBECache.lock);
}
static PRBool
sftk_comparePBECommonCacheItemLocked(const KDFCacheItem *cacheItem,
const NSSPKCS5PBEParameter *pbe_param,
const SECItem *pwItem)
{
return (cacheItem->hash &&
cacheItem->salt &&
cacheItem->pwItem &&
pbe_param->hashType == cacheItem->hashType &&
pbe_param->iter == cacheItem->iterations &&
pbe_param->keyLen == cacheItem->keyLen &&
SECITEM_ItemsAreEqual(&pbe_param->salt, cacheItem->salt) &&
SECITEM_ItemsAreEqual(pwItem, cacheItem->pwItem));
}
static SECItem *
sftk_getPBECache(const NSSPKCS5PBEParameter *pbe_param,
const SECItem *pwItem)
sftk_getPBECacheKDF2(const NSSPKCS5PBEParameter *pbe_param,
const SECItem *pwItem)
{
SECItem *result = NULL;
const KDFCacheItem *cacheItem = &PBECache.cacheKDF2.common;
PZ_Lock(PBECache.lock);
if (PBECache.hashPBKDF2 && PBECache.salt && PBECache.pwItem &&
pbe_param->hashType == PBECache.hashType &&
pbe_param->iter == PBECache.iterations &&
pbe_param->keyLen == PBECache.keyLen &&
SECITEM_ItemsAreEqual(&pbe_param->salt, PBECache.salt) &&
SECITEM_ItemsAreEqual(pwItem, PBECache.pwItem)) {
if (sftk_comparePBECommonCacheItemLocked(cacheItem, pbe_param, pwItem)) {
result = SECITEM_DupItem(cacheItem->hash);
}
PZ_Unlock(PBECache.lock);
result = SECITEM_DupItem(PBECache.hashPBKDF2);
return result;
}
static SECItem *
sftk_getPBECacheKDF1(const NSSPKCS5PBEParameter *pbe_param,
const SECItem *pwItem,
PRBool faulty3DES)
{
SECItem *result = NULL;
const KDFCacheItem *cacheItem = &PBECache.cacheKDF1.common;
PZ_Lock(PBECache.lock);
if (sftk_comparePBECommonCacheItemLocked(cacheItem, pbe_param, pwItem) &&
PBECache.cacheKDF1.faulty3DES == faulty3DES &&
PBECache.cacheKDF1.ivLen == pbe_param->ivLen) {
result = SECITEM_DupItem(cacheItem->hash);
}
PZ_Unlock(PBECache.lock);
@ -642,7 +711,8 @@ sftk_PBELockShutdown(void)
PZ_DestroyLock(PBECache.lock);
PBECache.lock = 0;
}
sftk_clearPBECacheItemsLocked();
sftk_clearPBECommonCacheItemsLocked(&PBECache.cacheKDF1.common);
sftk_clearPBECommonCacheItemsLocked(&PBECache.cacheKDF2.common);
}
/*
@ -677,7 +747,11 @@ nsspkcs5_ComputeKeyAndIV(NSSPKCS5PBEParameter *pbe_param, SECItem *pwitem,
hashObj = HASH_GetRawHashObject(pbe_param->hashType);
switch (pbe_param->pbeType) {
case NSSPKCS5_PBKDF1:
hash = nsspkcs5_PBKDF1Extended(hashObj, pbe_param, pwitem, faulty3DES);
hash = sftk_getPBECacheKDF1(pbe_param, pwitem, faulty3DES);
if (!hash) {
hash = nsspkcs5_PBKDF1Extended(hashObj, pbe_param, pwitem, faulty3DES);
sftk_setPBECacheKDF1(hash, pbe_param, pwitem, faulty3DES);
}
if (hash == NULL) {
goto loser;
}
@ -688,10 +762,10 @@ nsspkcs5_ComputeKeyAndIV(NSSPKCS5PBEParameter *pbe_param, SECItem *pwitem,
break;
case NSSPKCS5_PBKDF2:
hash = sftk_getPBECache(pbe_param, pwitem);
hash = sftk_getPBECacheKDF2(pbe_param, pwitem);
if (!hash) {
hash = nsspkcs5_PBKDF2(hashObj, pbe_param, pwitem);
sftk_setPBECache(hash, pbe_param, pwitem);
sftk_setPBECacheKDF2(hash, pbe_param, pwitem);
}
if (getIV) {
PORT_Memcpy(iv->data, pbe_param->ivData, iv->len);