From c70fa24ea8d55927f8b5fef9c8ba607b7053bf0b Mon Sep 17 00:00:00 2001 From: "J.C. Jones" Date: Wed, 22 Jan 2020 23:13:52 +0000 Subject: [PATCH] Bug 1606927 - land NSS cd55a3a90502 UPGRADE_NSS_RELEASE, r=kjacobs 2020-01-22 Kai Engert * lib/softoken/lowpbe.c: Bug 1606992 - Follow-up to also cache most recent PBKDF1 hash (in addition to PBKDF2 hash). r=kjacobs [cd55a3a90502] [tip] 2020-01-22 Kevin Jacobs * lib/freebl/aes-x86.c, lib/freebl/rijndael.c, lib/freebl/rijndael.h: Bug 1608493 - Use AES-NI intrinsics for CBC and ECB decrypt when no assembly implementation is available. r=mt AES-NI is currently not used for //CBC// or //ECB decrypt// when an assembly implementation (`intel-aes.s` or `intel- aes-x86/64-masm.asm`) is not available. Concretely, this is the case on MacOS, Linux32, and other non-Linux OSes such as BSD. This patch adds the plumbing to use AES-NI intrinsics when available. Before: ``` mode in symmkey opreps cxreps context op time(sec) thrgput aes_ecb_d 78Mb 256 10T 0 0.000 395.000 0.395 197Mb aes_cbc_e 78Mb 256 10T 0 0.000 392.000 0.393 198Mb aes_cbc_d 78Mb 256 10T 0 0.000 425.000 0.425 183Mb ``` After: ``` mode in symmkey opreps cxreps context op time(sec) thrgput aes_ecb_d 78Mb 256 10T 0 0.000 39.000 0.039 1Gb aes_cbc_e 78Mb 256 10T 0 0.000 94.000 0.094 831Mb aes_cbc_d 78Mb 256 10T 0 0.000 74.000 0.075 1Gb ``` [9804c76e76f3] Differential Revision: https://phabricator.services.mozilla.com/D60763 --HG-- extra : moz-landing-system : lando --- security/nss/TAG-INFO | 2 +- security/nss/coreconf/coreconf.dep | 1 + security/nss/lib/freebl/aes-x86.c | 27 +++++ security/nss/lib/freebl/rijndael.c | 99 +++++++++++++------ security/nss/lib/freebl/rijndael.h | 4 - security/nss/lib/softoken/lowpbe.c | 154 +++++++++++++++++++++-------- 6 files changed, 212 insertions(+), 75 deletions(-) diff --git a/security/nss/TAG-INFO b/security/nss/TAG-INFO index c7714f41f7d2..024f5389f641 100644 --- a/security/nss/TAG-INFO +++ b/security/nss/TAG-INFO @@ -1 +1 @@ -124c43a9f768 \ No newline at end of file +cd55a3a90502 \ No newline at end of file diff --git a/security/nss/coreconf/coreconf.dep b/security/nss/coreconf/coreconf.dep index 5182f75552c8..590d1bfaeee3 100644 --- a/security/nss/coreconf/coreconf.dep +++ b/security/nss/coreconf/coreconf.dep @@ -10,3 +10,4 @@ */ #error "Do not include this header file." + diff --git a/security/nss/lib/freebl/aes-x86.c b/security/nss/lib/freebl/aes-x86.c index edd0b512513f..508fcc6502a0 100644 --- a/security/nss/lib/freebl/aes-x86.c +++ b/security/nss/lib/freebl/aes-x86.c @@ -155,3 +155,30 @@ rijndael_native_encryptBlock(AESContext *cx, m = _mm_aesenclast_si128(m, cx->k.keySchedule[cx->Nr]); _mm_storeu_si128((__m128i *)output, m); } + +void +rijndael_native_decryptBlock(AESContext *cx, + unsigned char *output, + const unsigned char *input) +{ + int i; + pre_align __m128i m post_align = _mm_loadu_si128((__m128i *)input); + m = _mm_xor_si128(m, cx->k.keySchedule[cx->Nr]); + for (i = cx->Nr - 1; i > 0; --i) { + m = _mm_aesdec_si128(m, cx->k.keySchedule[i]); + } + m = _mm_aesdeclast_si128(m, cx->k.keySchedule[0]); + _mm_storeu_si128((__m128i *)output, m); +} + +// out = a ^ b +void +native_xorBlock(unsigned char *out, + const unsigned char *a, + const unsigned char *b) +{ + pre_align __m128i post_align in1 = _mm_loadu_si128((__m128i *)(a)); + pre_align __m128i post_align in2 = _mm_loadu_si128((__m128i *)(b)); + in1 = _mm_xor_si128(in1, in2); + _mm_storeu_si128((__m128i *)(out), in1); +} diff --git a/security/nss/lib/freebl/rijndael.c b/security/nss/lib/freebl/rijndael.c index 2e8bab87ffd7..247a9419bd0a 100644 --- a/security/nss/lib/freebl/rijndael.c +++ b/security/nss/lib/freebl/rijndael.c @@ -42,6 +42,12 @@ void rijndael_native_key_expansion(AESContext *cx, const unsigned char *key, void rijndael_native_encryptBlock(AESContext *cx, unsigned char *output, const unsigned char *input); +void rijndael_native_decryptBlock(AESContext *cx, + unsigned char *output, + const unsigned char *input); +void native_xorBlock(unsigned char *out, + const unsigned char *a, + const unsigned char *b); /* Stub definitions for the above rijndael_native_* functions, which * shouldn't be used unless NSS_X86_OR_X64 is defined */ @@ -62,6 +68,23 @@ rijndael_native_encryptBlock(AESContext *cx, PORT_SetError(SEC_ERROR_LIBRARY_FAILURE); PORT_Assert(0); } + +void +rijndael_native_decryptBlock(AESContext *cx, + unsigned char *output, + const unsigned char *input) +{ + PORT_SetError(SEC_ERROR_LIBRARY_FAILURE); + PORT_Assert(0); +} + +void +native_xorBlock(unsigned char *out, const unsigned char *a, + const unsigned char *b) +{ + PORT_SetError(SEC_ERROR_LIBRARY_FAILURE); + PORT_Assert(0); +} #endif /* NSS_X86_OR_X64 */ /* @@ -509,6 +532,15 @@ typedef union { #define STATE_BYTE(i) state.b[i] +// out = a ^ b +inline static void +xorBlock(unsigned char *out, const unsigned char *a, const unsigned char *b) +{ + for (unsigned int j = 0; j < AES_BLOCK_SIZE; ++j) { + (out)[j] = (a)[j] ^ (b)[j]; + } +} + static void NO_SANITIZE_ALIGNMENT rijndael_encryptBlock128(AESContext *cx, unsigned char *output, @@ -604,7 +636,7 @@ rijndael_encryptBlock128(AESContext *cx, #endif } -static SECStatus NO_SANITIZE_ALIGNMENT +static void NO_SANITIZE_ALIGNMENT rijndael_decryptBlock128(AESContext *cx, unsigned char *output, const unsigned char *input) @@ -693,7 +725,6 @@ rijndael_decryptBlock128(AESContext *cx, memcpy(output, outBuf, sizeof outBuf); } #endif - return SECSuccess; } /************************************************************************** @@ -707,16 +738,13 @@ rijndael_encryptECB(AESContext *cx, unsigned char *output, unsigned int *outputLen, unsigned int maxOutputLen, const unsigned char *input, unsigned int inputLen) { - AESBlockFunc *encryptor; - - if (aesni_support()) { - /* Use hardware acceleration for normal AES parameters. */ - encryptor = &rijndael_native_encryptBlock; - } else { - encryptor = &rijndael_encryptBlock128; - } + PRBool aesni = aesni_support(); while (inputLen > 0) { - (*encryptor)(cx, output, input); + if (aesni) { + rijndael_native_encryptBlock(cx, output, input); + } else { + rijndael_encryptBlock128(cx, output, input); + } output += AES_BLOCK_SIZE; input += AES_BLOCK_SIZE; inputLen -= AES_BLOCK_SIZE; @@ -729,20 +757,23 @@ rijndael_encryptCBC(AESContext *cx, unsigned char *output, unsigned int *outputLen, unsigned int maxOutputLen, const unsigned char *input, unsigned int inputLen) { - unsigned int j; - unsigned char *lastblock; + unsigned char *lastblock = cx->iv; unsigned char inblock[AES_BLOCK_SIZE * 8]; + PRBool aesni = aesni_support(); if (!inputLen) return SECSuccess; - lastblock = cx->iv; while (inputLen > 0) { - /* XOR with the last block (IV if first block) */ - for (j = 0; j < AES_BLOCK_SIZE; ++j) { - inblock[j] = input[j] ^ lastblock[j]; + if (aesni) { + /* XOR with the last block (IV if first block) */ + native_xorBlock(inblock, input, lastblock); + /* encrypt */ + rijndael_native_encryptBlock(cx, output, inblock); + } else { + xorBlock(inblock, input, lastblock); + rijndael_encryptBlock128(cx, output, inblock); } - /* encrypt */ - rijndael_encryptBlock128(cx, output, inblock); + /* move to the next block */ lastblock = output; output += AES_BLOCK_SIZE; @@ -758,9 +789,12 @@ rijndael_decryptECB(AESContext *cx, unsigned char *output, unsigned int *outputLen, unsigned int maxOutputLen, const unsigned char *input, unsigned int inputLen) { + PRBool aesni = aesni_support(); while (inputLen > 0) { - if (rijndael_decryptBlock128(cx, output, input) != SECSuccess) { - return SECFailure; + if (aesni) { + rijndael_native_decryptBlock(cx, output, input); + } else { + rijndael_decryptBlock128(cx, output, input); } output += AES_BLOCK_SIZE; input += AES_BLOCK_SIZE; @@ -776,8 +810,8 @@ rijndael_decryptCBC(AESContext *cx, unsigned char *output, { const unsigned char *in; unsigned char *out; - unsigned int j; unsigned char newIV[AES_BLOCK_SIZE]; + PRBool aesni = aesni_support(); if (!inputLen) return SECSuccess; @@ -786,21 +820,26 @@ rijndael_decryptCBC(AESContext *cx, unsigned char *output, memcpy(newIV, in, AES_BLOCK_SIZE); out = output + (inputLen - AES_BLOCK_SIZE); while (inputLen > AES_BLOCK_SIZE) { - if (rijndael_decryptBlock128(cx, out, in) != SECSuccess) { - return SECFailure; + if (aesni) { + // Use hardware acceleration for normal AES parameters. + rijndael_native_decryptBlock(cx, out, in); + native_xorBlock(out, out, &in[-AES_BLOCK_SIZE]); + } else { + rijndael_decryptBlock128(cx, out, in); + xorBlock(out, out, &in[-AES_BLOCK_SIZE]); } - for (j = 0; j < AES_BLOCK_SIZE; ++j) - out[j] ^= in[(int)(j - AES_BLOCK_SIZE)]; out -= AES_BLOCK_SIZE; in -= AES_BLOCK_SIZE; inputLen -= AES_BLOCK_SIZE; } if (in == input) { - if (rijndael_decryptBlock128(cx, out, in) != SECSuccess) { - return SECFailure; + if (aesni) { + rijndael_native_decryptBlock(cx, out, in); + native_xorBlock(out, out, cx->iv); + } else { + rijndael_decryptBlock128(cx, out, in); + xorBlock(out, out, cx->iv); } - for (j = 0; j < AES_BLOCK_SIZE; ++j) - out[j] ^= cx->iv[j]; } memcpy(cx->iv, newIV, AES_BLOCK_SIZE); return SECSuccess; diff --git a/security/nss/lib/freebl/rijndael.h b/security/nss/lib/freebl/rijndael.h index 61060e016e70..16357c8c6624 100644 --- a/security/nss/lib/freebl/rijndael.h +++ b/security/nss/lib/freebl/rijndael.h @@ -26,10 +26,6 @@ #endif /* NSS_DISABLE_SSE2 */ #endif -typedef void AESBlockFunc(AESContext *cx, - unsigned char *output, - const unsigned char *input); - /* RIJNDAEL_NUM_ROUNDS * * Number of rounds per execution diff --git a/security/nss/lib/softoken/lowpbe.c b/security/nss/lib/softoken/lowpbe.c index 12c67f1ed465..1bed0b351299 100644 --- a/security/nss/lib/softoken/lowpbe.c +++ b/security/nss/lib/softoken/lowpbe.c @@ -557,18 +557,30 @@ loser: return A; } -/* Bug 1606992 - Cache the hash result for the common case that we're - * asked to repeatedly compute the key for the same password item, - * hash, iterations and salt. */ -static struct { - PZLock *lock; - SECItem *hashPBKDF2; +struct KDFCacheItemStr { + SECItem *hash; SECItem *salt; SECItem *pwItem; HASH_HashType hashType; int iterations; int keyLen; -} PBECache = { NULL, NULL, NULL, NULL }; +}; +typedef struct KDFCacheItemStr KDFCacheItem; + +/* Bug 1606992 - Cache the hash result for the common case that we're + * asked to repeatedly compute the key for the same password item, + * hash, iterations and salt. */ +static struct { + PZLock *lock; + struct { + KDFCacheItem common; + int ivLen; + PRBool faulty3DES; + } cacheKDF1; + struct { + KDFCacheItem common; + } cacheKDF2; +} PBECache; void sftk_PBELockInit(void) @@ -579,56 +591,113 @@ sftk_PBELockInit(void) } static void -sftk_clearPBECacheItemsLocked(void) +sftk_clearPBECommonCacheItemsLocked(KDFCacheItem *item) { - if (PBECache.hashPBKDF2) { - SECITEM_ZfreeItem(PBECache.hashPBKDF2, PR_TRUE); - PBECache.hashPBKDF2 = NULL; + if (item->hash) { + SECITEM_ZfreeItem(item->hash, PR_TRUE); + item->hash = NULL; } - if (PBECache.salt) { - SECITEM_FreeItem(PBECache.salt, PR_TRUE); - PBECache.salt = NULL; + if (item->salt) { + SECITEM_FreeItem(item->salt, PR_TRUE); + item->salt = NULL; } - if (PBECache.pwItem) { - SECITEM_ZfreeItem(PBECache.pwItem, PR_TRUE); - PBECache.pwItem = NULL; + if (item->pwItem) { + SECITEM_ZfreeItem(item->pwItem, PR_TRUE); + item->pwItem = NULL; } } static void -sftk_setPBECache(const SECItem *hash, - const NSSPKCS5PBEParameter *pbe_param, - const SECItem *pwItem) +sftk_setPBECommonCacheItemsKDFLocked(KDFCacheItem *cacheItem, + const SECItem *hash, + const NSSPKCS5PBEParameter *pbe_param, + const SECItem *pwItem) +{ + cacheItem->hash = SECITEM_DupItem(hash); + cacheItem->hashType = pbe_param->hashType; + cacheItem->iterations = pbe_param->iter; + cacheItem->keyLen = pbe_param->keyLen; + cacheItem->salt = SECITEM_DupItem(&pbe_param->salt); + cacheItem->pwItem = SECITEM_DupItem(pwItem); +} + +static void +sftk_setPBECacheKDF2(const SECItem *hash, + const NSSPKCS5PBEParameter *pbe_param, + const SECItem *pwItem) { PZ_Lock(PBECache.lock); - sftk_clearPBECacheItemsLocked(); + sftk_clearPBECommonCacheItemsLocked(&PBECache.cacheKDF2.common); - PBECache.hashPBKDF2 = SECITEM_DupItem(hash); - PBECache.hashType = pbe_param->hashType; - PBECache.iterations = pbe_param->iter; - PBECache.keyLen = pbe_param->keyLen; - PBECache.salt = SECITEM_DupItem(&pbe_param->salt); - PBECache.pwItem = SECITEM_DupItem(pwItem); + sftk_setPBECommonCacheItemsKDFLocked(&PBECache.cacheKDF2.common, + hash, pbe_param, pwItem); PZ_Unlock(PBECache.lock); } +static void +sftk_setPBECacheKDF1(const SECItem *hash, + const NSSPKCS5PBEParameter *pbe_param, + const SECItem *pwItem, + PRBool faulty3DES) +{ + PZ_Lock(PBECache.lock); + + sftk_clearPBECommonCacheItemsLocked(&PBECache.cacheKDF1.common); + + sftk_setPBECommonCacheItemsKDFLocked(&PBECache.cacheKDF1.common, + hash, pbe_param, pwItem); + PBECache.cacheKDF1.faulty3DES = faulty3DES; + PBECache.cacheKDF1.ivLen = pbe_param->ivLen; + + PZ_Unlock(PBECache.lock); +} + +static PRBool +sftk_comparePBECommonCacheItemLocked(const KDFCacheItem *cacheItem, + const NSSPKCS5PBEParameter *pbe_param, + const SECItem *pwItem) +{ + return (cacheItem->hash && + cacheItem->salt && + cacheItem->pwItem && + pbe_param->hashType == cacheItem->hashType && + pbe_param->iter == cacheItem->iterations && + pbe_param->keyLen == cacheItem->keyLen && + SECITEM_ItemsAreEqual(&pbe_param->salt, cacheItem->salt) && + SECITEM_ItemsAreEqual(pwItem, cacheItem->pwItem)); +} + static SECItem * -sftk_getPBECache(const NSSPKCS5PBEParameter *pbe_param, - const SECItem *pwItem) +sftk_getPBECacheKDF2(const NSSPKCS5PBEParameter *pbe_param, + const SECItem *pwItem) { SECItem *result = NULL; + const KDFCacheItem *cacheItem = &PBECache.cacheKDF2.common; PZ_Lock(PBECache.lock); - if (PBECache.hashPBKDF2 && PBECache.salt && PBECache.pwItem && - pbe_param->hashType == PBECache.hashType && - pbe_param->iter == PBECache.iterations && - pbe_param->keyLen == PBECache.keyLen && - SECITEM_ItemsAreEqual(&pbe_param->salt, PBECache.salt) && - SECITEM_ItemsAreEqual(pwItem, PBECache.pwItem)) { + if (sftk_comparePBECommonCacheItemLocked(cacheItem, pbe_param, pwItem)) { + result = SECITEM_DupItem(cacheItem->hash); + } + PZ_Unlock(PBECache.lock); - result = SECITEM_DupItem(PBECache.hashPBKDF2); + return result; +} + +static SECItem * +sftk_getPBECacheKDF1(const NSSPKCS5PBEParameter *pbe_param, + const SECItem *pwItem, + PRBool faulty3DES) +{ + SECItem *result = NULL; + const KDFCacheItem *cacheItem = &PBECache.cacheKDF1.common; + + PZ_Lock(PBECache.lock); + if (sftk_comparePBECommonCacheItemLocked(cacheItem, pbe_param, pwItem) && + PBECache.cacheKDF1.faulty3DES == faulty3DES && + PBECache.cacheKDF1.ivLen == pbe_param->ivLen) { + result = SECITEM_DupItem(cacheItem->hash); } PZ_Unlock(PBECache.lock); @@ -642,7 +711,8 @@ sftk_PBELockShutdown(void) PZ_DestroyLock(PBECache.lock); PBECache.lock = 0; } - sftk_clearPBECacheItemsLocked(); + sftk_clearPBECommonCacheItemsLocked(&PBECache.cacheKDF1.common); + sftk_clearPBECommonCacheItemsLocked(&PBECache.cacheKDF2.common); } /* @@ -677,7 +747,11 @@ nsspkcs5_ComputeKeyAndIV(NSSPKCS5PBEParameter *pbe_param, SECItem *pwitem, hashObj = HASH_GetRawHashObject(pbe_param->hashType); switch (pbe_param->pbeType) { case NSSPKCS5_PBKDF1: - hash = nsspkcs5_PBKDF1Extended(hashObj, pbe_param, pwitem, faulty3DES); + hash = sftk_getPBECacheKDF1(pbe_param, pwitem, faulty3DES); + if (!hash) { + hash = nsspkcs5_PBKDF1Extended(hashObj, pbe_param, pwitem, faulty3DES); + sftk_setPBECacheKDF1(hash, pbe_param, pwitem, faulty3DES); + } if (hash == NULL) { goto loser; } @@ -688,10 +762,10 @@ nsspkcs5_ComputeKeyAndIV(NSSPKCS5PBEParameter *pbe_param, SECItem *pwitem, break; case NSSPKCS5_PBKDF2: - hash = sftk_getPBECache(pbe_param, pwitem); + hash = sftk_getPBECacheKDF2(pbe_param, pwitem); if (!hash) { hash = nsspkcs5_PBKDF2(hashObj, pbe_param, pwitem); - sftk_setPBECache(hash, pbe_param, pwitem); + sftk_setPBECacheKDF2(hash, pbe_param, pwitem); } if (getIV) { PORT_Memcpy(iv->data, pbe_param->ivData, iv->len);