зеркало из https://github.com/microsoft/SymCrypt.git
Merged PR 9746161: Support cbNonce != 12 for AES-GCM
This change adds support for nonces larger or smaller than 12-bytes for AES-GCM (and GCM generally, if it's ever used with other block ciphers). It adds 32-bit CTR functions, as required in the GCM spec. Previously we used a 64-bit CTR function, which worked for 12-byte nonces because the counter block always started at 1 and the message length limit for GCM is 2^36 - 32 bytes, so the 32-bit counter would never overflow. Using a 64-bit counter does not work for non-12-byte nonces because the counter block starts at an arbitrary value computed by GHASHing the nonce. It also updates the "stitched" implementations of AES-GCM to use 32-bit addition intrinsics instead of 64-bit addition. Tested with unit tests: - AMD64 with all features enabled - AMD64 without aesni - AMD64 without pclmulqdq - AMD64 with everything disabled except for rdrand, rdseed, savexmmnofail - ARM64 hardware (Galaxy Book 2) + qemu (via pipeline) Related work items: #33824154
This commit is contained in:
Родитель
4359d75a43
Коммит
d6933e03f6
|
@ -6,6 +6,7 @@ prior to the creation of a new release, based on the changes contained in that r
|
|||
- Extended SymCrypt support for XTS-AES adding support for 128-bit tweak and ciphertext-stealing
|
||||
- Added support for salt length detection in RSA-PSS verification
|
||||
- Export various constant time operations from SymCrypt Linux modules
|
||||
- Added support for nonce sizes other than 12 bytes for AES-GCM
|
||||
|
||||
# Version 103.3.2
|
||||
|
||||
|
|
|
@ -3495,7 +3495,7 @@ SymCryptMarvin32Selftest(void);
|
|||
// for different block cipher computations as the expanded key is not modified once initialized.
|
||||
//
|
||||
// SymCryptXxxBlockCipher
|
||||
// A SYMCRYPT_BLOCK_CIPHER structure that provides a description
|
||||
// A SYMCRYPT_BLOCKCIPHER structure that provides a description
|
||||
// of the block cipher and its primary functions. This is used by cipher modes to pass
|
||||
// all the block-cipher specific information in a single structure.
|
||||
//
|
||||
|
@ -4152,8 +4152,7 @@ SymCryptCtrMsb64(
|
|||
SIZE_T cbData );
|
||||
//
|
||||
// This function implements the CTR cipher mode.
|
||||
// It is not intended to be used as-is, rather it is a building block
|
||||
// for modes like CCM and GCM.
|
||||
// It is not intended to be used as-is, rather it is a building block for modes like CCM.
|
||||
// On some platforms we have optimized code for AES-CTR, on other platforms
|
||||
// we use this generic construction to achieve the same effect.
|
||||
//
|
||||
|
@ -4174,7 +4173,6 @@ SymCryptCtrMsb64(
|
|||
// buffers may be the same or non-overlapping, but may not partially overlap.
|
||||
//
|
||||
|
||||
|
||||
VOID
|
||||
SYMCRYPT_CALL
|
||||
SymCryptCfbEncrypt(
|
||||
|
|
|
@ -361,6 +361,51 @@ SymCryptAesCbcMac(
|
|||
#endif
|
||||
}
|
||||
|
||||
VOID
|
||||
SYMCRYPT_CALL
|
||||
SymCryptAesCtrMsb32(
|
||||
_In_ PCSYMCRYPT_AES_EXPANDED_KEY pExpandedKey,
|
||||
_Inout_updates_( SYMCRYPT_AES_BLOCK_SIZE ) PBYTE pbChainingValue,
|
||||
_In_reads_( cbData ) PCBYTE pbSrc,
|
||||
_Out_writes_( cbData ) PBYTE pbDst,
|
||||
SIZE_T cbData )
|
||||
{
|
||||
#if SYMCRYPT_CPU_AMD64
|
||||
if( SYMCRYPT_CPU_FEATURES_PRESENT( SYMCRYPT_CPU_FEATURES_FOR_AESNI_CODE ) )
|
||||
{
|
||||
SymCryptAesCtrMsb32Xmm( pExpandedKey, pbChainingValue, pbSrc, pbDst, cbData );
|
||||
} else {
|
||||
SYMCRYPT_ASSERT( SymCryptAesBlockCipherNoOpt.blockSize == SYMCRYPT_AES_BLOCK_SIZE ); // keep Prefast happy
|
||||
SymCryptCtrMsb32( &SymCryptAesBlockCipherNoOpt, pExpandedKey, pbChainingValue, pbSrc, pbDst, cbData );
|
||||
}
|
||||
|
||||
#elif SYMCRYPT_CPU_X86
|
||||
SYMCRYPT_EXTENDED_SAVE_DATA SaveData;
|
||||
|
||||
if( SYMCRYPT_CPU_FEATURES_PRESENT( SYMCRYPT_CPU_FEATURES_FOR_AESNI_CODE ) &&
|
||||
SymCryptSaveXmm( &SaveData ) == SYMCRYPT_NO_ERROR )
|
||||
{
|
||||
SymCryptAesCtrMsb32Xmm( pExpandedKey, pbChainingValue, pbSrc, pbDst, cbData );
|
||||
SymCryptRestoreXmm( &SaveData );
|
||||
} else {
|
||||
SYMCRYPT_ASSERT( SymCryptAesBlockCipherNoOpt.blockSize == SYMCRYPT_AES_BLOCK_SIZE ); // keep Prefast happy
|
||||
SymCryptCtrMsb32( &SymCryptAesBlockCipherNoOpt, pExpandedKey, pbChainingValue, pbSrc, pbDst, cbData );
|
||||
}
|
||||
|
||||
#elif SYMCRYPT_CPU_ARM64
|
||||
if( SYMCRYPT_CPU_FEATURES_PRESENT( SYMCRYPT_CPU_FEATURE_NEON_AES ) )
|
||||
{
|
||||
SymCryptAesCtrMsb32Neon( pExpandedKey, pbChainingValue, pbSrc, pbDst, cbData );
|
||||
} else {
|
||||
SymCryptCtrMsb32( &SymCryptAesBlockCipherNoOpt, pExpandedKey, pbChainingValue, pbSrc, pbDst, cbData );
|
||||
}
|
||||
|
||||
#else
|
||||
SYMCRYPT_ASSERT( SymCryptAesBlockCipherNoOpt.blockSize == SYMCRYPT_AES_BLOCK_SIZE ); // keep Prefast happy
|
||||
SymCryptCtrMsb32( &SymCryptAesBlockCipherNoOpt, pExpandedKey, pbChainingValue, pbSrc, pbDst, cbData );
|
||||
#endif
|
||||
}
|
||||
|
||||
VOID
|
||||
SYMCRYPT_CALL
|
||||
SymCryptAesCtrMsb64(
|
||||
|
@ -529,7 +574,7 @@ SymCryptAesGcmEncryptPartOnePass(
|
|||
bytesToProcess );
|
||||
|
||||
#else
|
||||
SymCryptAesCtrMsb64(&pState->pKey->blockcipherKey.aes,
|
||||
SymCryptAesCtrMsb32(&pState->pKey->blockcipherKey.aes,
|
||||
&pState->counterBlock[0],
|
||||
pbSrc,
|
||||
pbDst,
|
||||
|
@ -563,7 +608,7 @@ SymCryptAesGcmEncryptPartOnePass(
|
|||
SymCryptWipeKnownSize( &pState->keystreamBlock[0], SYMCRYPT_GCM_BLOCK_SIZE );
|
||||
|
||||
SYMCRYPT_ASSERT( pState->pKey->pBlockCipher->blockSize == SYMCRYPT_GCM_BLOCK_SIZE );
|
||||
SymCryptAesCtrMsb64(&pState->pKey->blockcipherKey.aes,
|
||||
SymCryptAesCtrMsb32(&pState->pKey->blockcipherKey.aes,
|
||||
&pState->counterBlock[0],
|
||||
&pState->keystreamBlock[0],
|
||||
&pState->keystreamBlock[0],
|
||||
|
@ -712,7 +757,7 @@ SymCryptAesGcmDecryptPartOnePass(
|
|||
// This violates the read-once rule, but it is safe for the same reasons as above
|
||||
// in the encryption case.
|
||||
//
|
||||
SymCryptAesCtrMsb64(&pState->pKey->blockcipherKey.aes,
|
||||
SymCryptAesCtrMsb32(&pState->pKey->blockcipherKey.aes,
|
||||
&pState->counterBlock[0],
|
||||
pbSrc,
|
||||
pbDst,
|
||||
|
@ -729,7 +774,7 @@ SymCryptAesGcmDecryptPartOnePass(
|
|||
SymCryptWipeKnownSize( &pState->keystreamBlock[0], SYMCRYPT_GCM_BLOCK_SIZE );
|
||||
|
||||
SYMCRYPT_ASSERT( pState->pKey->pBlockCipher->blockSize == SYMCRYPT_GCM_BLOCK_SIZE );
|
||||
SymCryptAesCtrMsb64(&pState->pKey->blockcipherKey.aes,
|
||||
SymCryptAesCtrMsb32(&pState->pKey->blockcipherKey.aes,
|
||||
&pState->counterBlock[0],
|
||||
&pState->keystreamBlock[0],
|
||||
&pState->keystreamBlock[0],
|
||||
|
|
258
lib/aes-neon.c
258
lib/aes-neon.c
|
@ -769,172 +769,26 @@ SymCryptAesEcbEncryptNeon(
|
|||
#pragma warning( disable:4701 ) // "Use of uninitialized variable"
|
||||
#pragma runtime_checks( "u", off )
|
||||
|
||||
VOID
|
||||
SYMCRYPT_CALL
|
||||
SymCryptAesCtrMsb64Neon(
|
||||
_In_ PCSYMCRYPT_AES_EXPANDED_KEY pExpandedKey,
|
||||
_Inout_updates_( SYMCRYPT_AES_BLOCK_SIZE ) PBYTE pbChainingValue,
|
||||
_In_reads_( cbData ) PCBYTE pbSrc,
|
||||
_Out_writes_( cbData ) PBYTE pbDst,
|
||||
SIZE_T cbData )
|
||||
{
|
||||
__n128 chain = *(__n128 *)pbChainingValue;
|
||||
const __n128 * pSrc = (const __n128 *) pbSrc;
|
||||
__n128 * pDst = (__n128 *) pbDst;
|
||||
#define SYMCRYPT_AesCtrMsbXxNeon SymCryptAesCtrMsb64Neon
|
||||
#define VADDQ_UXX vaddq_u64
|
||||
#define VSUBQ_UXX vsubq_u64
|
||||
|
||||
const __n128 chainIncrement1 = SYMCRYPT_SET_N128_U64( 0, 1 );
|
||||
const __n128 chainIncrement2 = SYMCRYPT_SET_N128_U64( 0, 2 );
|
||||
const __n128 chainIncrement8 = SYMCRYPT_SET_N128_U64( 0, 8 );
|
||||
#include "aes-pattern.c"
|
||||
|
||||
__n128 ctr0, ctr1, ctr2, ctr3, ctr4, ctr5, ctr6, ctr7;
|
||||
__n128 c0, c1, c2, c3, c4, c5, c6, c7;
|
||||
#undef VSUBQ_UXX
|
||||
#undef VADDQ_UXX
|
||||
#undef SYMCRYPT_AesCtrMsbXxNeon
|
||||
|
||||
cbData &= ~(SYMCRYPT_AES_BLOCK_SIZE - 1);
|
||||
#define SYMCRYPT_AesCtrMsbXxNeon SymCryptAesCtrMsb32Neon
|
||||
#define VADDQ_UXX vaddq_u32
|
||||
#define VSUBQ_UXX vsubq_u32
|
||||
|
||||
// Our chain variable is in integer format, not the MSBfirst format loaded from memory.
|
||||
ctr0 = vrev64q_u8( chain );
|
||||
ctr1 = vaddq_u64( ctr0, chainIncrement1 );
|
||||
ctr2 = vaddq_u64( ctr0, chainIncrement2 );
|
||||
ctr3 = vaddq_u64( ctr1, chainIncrement2 );
|
||||
ctr4 = vaddq_u64( ctr2, chainIncrement2 );
|
||||
ctr5 = vaddq_u64( ctr3, chainIncrement2 );
|
||||
ctr6 = vaddq_u64( ctr4, chainIncrement2 );
|
||||
ctr7 = vaddq_u64( ctr5, chainIncrement2 );
|
||||
#include "aes-pattern.c"
|
||||
|
||||
/*
|
||||
while cbData >= 5 * block
|
||||
generate 8 blocks of key stream
|
||||
if cbData < 8 * block
|
||||
break;
|
||||
process 8 blocks
|
||||
if cbData >= 5 * block
|
||||
process 5-7 blocks
|
||||
done
|
||||
if cbData >= 2 * block
|
||||
generate 4 blocks of key stream
|
||||
process 2-4 blocks
|
||||
done
|
||||
if cbData == 1 block
|
||||
generate 1 block of key stream
|
||||
process block
|
||||
*/
|
||||
while( cbData >= 5 * SYMCRYPT_AES_BLOCK_SIZE )
|
||||
{
|
||||
c0 = vrev64q_u8( ctr0 );
|
||||
c1 = vrev64q_u8( ctr1 );
|
||||
c2 = vrev64q_u8( ctr2 );
|
||||
c3 = vrev64q_u8( ctr3 );
|
||||
c4 = vrev64q_u8( ctr4 );
|
||||
c5 = vrev64q_u8( ctr5 );
|
||||
c6 = vrev64q_u8( ctr6 );
|
||||
c7 = vrev64q_u8( ctr7 );
|
||||
#undef VSUBQ_UXX
|
||||
#undef VADDQ_UXX
|
||||
#undef SYMCRYPT_AesCtrMsbXxNeon
|
||||
|
||||
ctr0 = vaddq_u64( ctr0, chainIncrement8 );
|
||||
ctr1 = vaddq_u64( ctr1, chainIncrement8 );
|
||||
ctr2 = vaddq_u64( ctr2, chainIncrement8 );
|
||||
ctr3 = vaddq_u64( ctr3, chainIncrement8 );
|
||||
ctr4 = vaddq_u64( ctr4, chainIncrement8 );
|
||||
ctr5 = vaddq_u64( ctr5, chainIncrement8 );
|
||||
ctr6 = vaddq_u64( ctr6, chainIncrement8 );
|
||||
ctr7 = vaddq_u64( ctr7, chainIncrement8 );
|
||||
|
||||
AES_ENCRYPT_8( pExpandedKey, c0, c1, c2, c3, c4, c5, c6, c7 );
|
||||
|
||||
if( cbData < 8 * SYMCRYPT_AES_BLOCK_SIZE )
|
||||
{
|
||||
break;
|
||||
}
|
||||
|
||||
pDst[0] = veorq_u64( pSrc[0], c0 );
|
||||
pDst[1] = veorq_u64( pSrc[1], c1 );
|
||||
pDst[2] = veorq_u64( pSrc[2], c2 );
|
||||
pDst[3] = veorq_u64( pSrc[3], c3 );
|
||||
pDst[4] = veorq_u64( pSrc[4], c4 );
|
||||
pDst[5] = veorq_u64( pSrc[5], c5 );
|
||||
pDst[6] = veorq_u64( pSrc[6], c6 );
|
||||
pDst[7] = veorq_u64( pSrc[7], c7 );
|
||||
|
||||
pDst += 8;
|
||||
pSrc += 8;
|
||||
cbData -= 8 * SYMCRYPT_AES_BLOCK_SIZE;
|
||||
}
|
||||
|
||||
//
|
||||
// At this point we have one of the two following cases:
|
||||
// - cbData >= 5 * 16 and we have 8 blocks of key stream in c0-c7. ctr0-ctr7 is set to (c0+8)-(c7+8)
|
||||
// - cbData < 5 * 16 and we have no blocks of key stream, and ctr0-ctr7 set to the next 8 counters to use
|
||||
//
|
||||
|
||||
if( cbData >= SYMCRYPT_AES_BLOCK_SIZE ) // quick exit of function if the request was a multiple of 8 blocks
|
||||
{
|
||||
if( cbData >= 5 * SYMCRYPT_AES_BLOCK_SIZE )
|
||||
{
|
||||
//
|
||||
// We already have the key stream
|
||||
//
|
||||
pDst[0] = veorq_u64( pSrc[0], c0 );
|
||||
pDst[1] = veorq_u64( pSrc[1], c1 );
|
||||
pDst[2] = veorq_u64( pSrc[2], c2 );
|
||||
pDst[3] = veorq_u64( pSrc[3], c3 );
|
||||
pDst[4] = veorq_u64( pSrc[4], c4 );
|
||||
chain = vsubq_u64( ctr5, chainIncrement8 );
|
||||
|
||||
if( cbData >= 96 )
|
||||
{
|
||||
chain = vsubq_u64( ctr6, chainIncrement8 );
|
||||
pDst[5] = veorq_u64( pSrc[5], c5 );
|
||||
if( cbData >= 112 )
|
||||
{
|
||||
chain = vsubq_u64( ctr7, chainIncrement8 );
|
||||
pDst[6] = veorq_u64( pSrc[6], c6 );
|
||||
}
|
||||
}
|
||||
}
|
||||
else if( cbData >= 2 * SYMCRYPT_AES_BLOCK_SIZE )
|
||||
{
|
||||
// Produce 4 blocks of key stream
|
||||
|
||||
chain = ctr2; // chain is only incremented by 2 for now
|
||||
|
||||
c0 = vrev64q_u8( ctr0 );
|
||||
c1 = vrev64q_u8( ctr1 );
|
||||
c2 = vrev64q_u8( ctr2 );
|
||||
c3 = vrev64q_u8( ctr3 );
|
||||
|
||||
AES_ENCRYPT_4( pExpandedKey, c0, c1, c2, c3 );
|
||||
|
||||
pDst[0] = veorq_u64( pSrc[0], c0 );
|
||||
pDst[1] = veorq_u64( pSrc[1], c1 );
|
||||
if( cbData >= 48 )
|
||||
{
|
||||
chain = ctr3;
|
||||
pDst[2] = veorq_u64( pSrc[2], c2 );
|
||||
if( cbData >= 64 )
|
||||
{
|
||||
chain = ctr4;
|
||||
pDst[3] = veorq_u64( pSrc[3], c3 );
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
// Exactly 1 block to process
|
||||
chain = ctr1;
|
||||
|
||||
c0 = vrev64q_u8( ctr0 );
|
||||
|
||||
AES_ENCRYPT_1( pExpandedKey, c0 );
|
||||
pDst[0] = veorq_u64( pSrc[0], c0 );
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
chain = ctr0;
|
||||
}
|
||||
|
||||
chain = vrev64q_u8( chain );
|
||||
*(__n128 *)pbChainingValue = chain;
|
||||
}
|
||||
#pragma runtime_checks( "u", restore )
|
||||
#pragma warning(pop)
|
||||
|
||||
|
@ -1662,13 +1516,13 @@ SymCryptAesGcmEncryptStitchedNeon(
|
|||
|
||||
// Our chain variable is in integer format, not the MSBfirst format loaded from memory.
|
||||
ctr0 = vrev64q_u8( chain );
|
||||
ctr1 = vaddq_u64( ctr0, chainIncrement1 );
|
||||
ctr2 = vaddq_u64( ctr0, chainIncrement2 );
|
||||
ctr3 = vaddq_u64( ctr1, chainIncrement2 );
|
||||
ctr4 = vaddq_u64( ctr2, chainIncrement2 );
|
||||
ctr5 = vaddq_u64( ctr3, chainIncrement2 );
|
||||
ctr6 = vaddq_u64( ctr4, chainIncrement2 );
|
||||
ctr7 = vaddq_u64( ctr5, chainIncrement2 );
|
||||
ctr1 = vaddq_u32( ctr0, chainIncrement1 );
|
||||
ctr2 = vaddq_u32( ctr0, chainIncrement2 );
|
||||
ctr3 = vaddq_u32( ctr1, chainIncrement2 );
|
||||
ctr4 = vaddq_u32( ctr2, chainIncrement2 );
|
||||
ctr5 = vaddq_u32( ctr3, chainIncrement2 );
|
||||
ctr6 = vaddq_u32( ctr4, chainIncrement2 );
|
||||
ctr7 = vaddq_u32( ctr5, chainIncrement2 );
|
||||
|
||||
state = *(__n128 *) pState;
|
||||
|
||||
|
@ -1689,14 +1543,14 @@ SymCryptAesGcmEncryptStitchedNeon(
|
|||
|
||||
if ( cbData >= 8 * SYMCRYPT_AES_BLOCK_SIZE )
|
||||
{
|
||||
ctr0 = vaddq_u64( ctr0, chainIncrement8 );
|
||||
ctr1 = vaddq_u64( ctr1, chainIncrement8 );
|
||||
ctr2 = vaddq_u64( ctr2, chainIncrement8 );
|
||||
ctr3 = vaddq_u64( ctr3, chainIncrement8 );
|
||||
ctr4 = vaddq_u64( ctr4, chainIncrement8 );
|
||||
ctr5 = vaddq_u64( ctr5, chainIncrement8 );
|
||||
ctr6 = vaddq_u64( ctr6, chainIncrement8 );
|
||||
ctr7 = vaddq_u64( ctr7, chainIncrement8 );
|
||||
ctr0 = vaddq_u32( ctr0, chainIncrement8 );
|
||||
ctr1 = vaddq_u32( ctr1, chainIncrement8 );
|
||||
ctr2 = vaddq_u32( ctr2, chainIncrement8 );
|
||||
ctr3 = vaddq_u32( ctr3, chainIncrement8 );
|
||||
ctr4 = vaddq_u32( ctr4, chainIncrement8 );
|
||||
ctr5 = vaddq_u32( ctr5, chainIncrement8 );
|
||||
ctr6 = vaddq_u32( ctr6, chainIncrement8 );
|
||||
ctr7 = vaddq_u32( ctr7, chainIncrement8 );
|
||||
|
||||
// Encrypt first 8 blocks
|
||||
pDst[0] = veorq_u64( pSrc[0], c0 );
|
||||
|
@ -1723,14 +1577,14 @@ SymCryptAesGcmEncryptStitchedNeon(
|
|||
c6 = vrev64q_u8( ctr6 );
|
||||
c7 = vrev64q_u8( ctr7 );
|
||||
|
||||
ctr0 = vaddq_u64( ctr0, chainIncrement8 );
|
||||
ctr1 = vaddq_u64( ctr1, chainIncrement8 );
|
||||
ctr2 = vaddq_u64( ctr2, chainIncrement8 );
|
||||
ctr3 = vaddq_u64( ctr3, chainIncrement8 );
|
||||
ctr4 = vaddq_u64( ctr4, chainIncrement8 );
|
||||
ctr5 = vaddq_u64( ctr5, chainIncrement8 );
|
||||
ctr6 = vaddq_u64( ctr6, chainIncrement8 );
|
||||
ctr7 = vaddq_u64( ctr7, chainIncrement8 );
|
||||
ctr0 = vaddq_u32( ctr0, chainIncrement8 );
|
||||
ctr1 = vaddq_u32( ctr1, chainIncrement8 );
|
||||
ctr2 = vaddq_u32( ctr2, chainIncrement8 );
|
||||
ctr3 = vaddq_u32( ctr3, chainIncrement8 );
|
||||
ctr4 = vaddq_u32( ctr4, chainIncrement8 );
|
||||
ctr5 = vaddq_u32( ctr5, chainIncrement8 );
|
||||
ctr6 = vaddq_u32( ctr6, chainIncrement8 );
|
||||
ctr7 = vaddq_u32( ctr7, chainIncrement8 );
|
||||
|
||||
AES_GCM_ENCRYPT_8( pExpandedKey, c0, c1, c2, c3, c4, c5, c6, c7, pGhashSrc, 8, expandedKeyTable, todo, a0, a1, a2 );
|
||||
|
||||
|
@ -1814,7 +1668,7 @@ SymCryptAesGcmEncryptStitchedNeon(
|
|||
// Encrypt 1-7 blocks with pre-generated AES-CTR blocks and GHASH the results
|
||||
while( nBlocks >= 2 )
|
||||
{
|
||||
ctr0 = vaddq_u64( ctr0, chainIncrement2 );
|
||||
ctr0 = vaddq_u32( ctr0, chainIncrement2 );
|
||||
|
||||
r0 = veorq_u64( pSrc[0], c0 );
|
||||
r1 = veorq_u64( pSrc[1], c1 );
|
||||
|
@ -1845,7 +1699,7 @@ SymCryptAesGcmEncryptStitchedNeon(
|
|||
|
||||
if( nBlocks > 0 )
|
||||
{
|
||||
ctr0 = vaddq_u64( ctr0, chainIncrement1 );
|
||||
ctr0 = vaddq_u32( ctr0, chainIncrement1 );
|
||||
|
||||
r0 = veorq_u64( pSrc[0], c0 );
|
||||
pDst[0] = r0;
|
||||
|
@ -1911,13 +1765,13 @@ SymCryptAesGcmDecryptStitchedNeon(
|
|||
|
||||
// Our chain variable is in integer format, not the MSBfirst format loaded from memory.
|
||||
ctr0 = vrev64q_u8( chain );
|
||||
ctr1 = vaddq_u64( ctr0, chainIncrement1 );
|
||||
ctr2 = vaddq_u64( ctr0, chainIncrement2 );
|
||||
ctr3 = vaddq_u64( ctr1, chainIncrement2 );
|
||||
ctr4 = vaddq_u64( ctr2, chainIncrement2 );
|
||||
ctr5 = vaddq_u64( ctr3, chainIncrement2 );
|
||||
ctr6 = vaddq_u64( ctr4, chainIncrement2 );
|
||||
ctr7 = vaddq_u64( ctr5, chainIncrement2 );
|
||||
ctr1 = vaddq_u32( ctr0, chainIncrement1 );
|
||||
ctr2 = vaddq_u32( ctr0, chainIncrement2 );
|
||||
ctr3 = vaddq_u32( ctr1, chainIncrement2 );
|
||||
ctr4 = vaddq_u32( ctr2, chainIncrement2 );
|
||||
ctr5 = vaddq_u32( ctr3, chainIncrement2 );
|
||||
ctr6 = vaddq_u32( ctr4, chainIncrement2 );
|
||||
ctr7 = vaddq_u32( ctr5, chainIncrement2 );
|
||||
|
||||
state = *(__n128 *) pState;
|
||||
|
||||
|
@ -1937,14 +1791,14 @@ SymCryptAesGcmDecryptStitchedNeon(
|
|||
c6 = vrev64q_u8( ctr6 );
|
||||
c7 = vrev64q_u8( ctr7 );
|
||||
|
||||
ctr0 = vaddq_u64( ctr0, chainIncrement8 );
|
||||
ctr1 = vaddq_u64( ctr1, chainIncrement8 );
|
||||
ctr2 = vaddq_u64( ctr2, chainIncrement8 );
|
||||
ctr3 = vaddq_u64( ctr3, chainIncrement8 );
|
||||
ctr4 = vaddq_u64( ctr4, chainIncrement8 );
|
||||
ctr5 = vaddq_u64( ctr5, chainIncrement8 );
|
||||
ctr6 = vaddq_u64( ctr6, chainIncrement8 );
|
||||
ctr7 = vaddq_u64( ctr7, chainIncrement8 );
|
||||
ctr0 = vaddq_u32( ctr0, chainIncrement8 );
|
||||
ctr1 = vaddq_u32( ctr1, chainIncrement8 );
|
||||
ctr2 = vaddq_u32( ctr2, chainIncrement8 );
|
||||
ctr3 = vaddq_u32( ctr3, chainIncrement8 );
|
||||
ctr4 = vaddq_u32( ctr4, chainIncrement8 );
|
||||
ctr5 = vaddq_u32( ctr5, chainIncrement8 );
|
||||
ctr6 = vaddq_u32( ctr6, chainIncrement8 );
|
||||
ctr7 = vaddq_u32( ctr7, chainIncrement8 );
|
||||
|
||||
AES_GCM_ENCRYPT_8( pExpandedKey, c0, c1, c2, c3, c4, c5, c6, c7, pGhashSrc, 8, expandedKeyTable, todo, a0, a1, a2 );
|
||||
|
||||
|
@ -1999,7 +1853,7 @@ SymCryptAesGcmDecryptStitchedNeon(
|
|||
// Decrypt 1-7 blocks with pre-generated AES-CTR blocks
|
||||
while( nBlocks >= 2 )
|
||||
{
|
||||
ctr0 = vaddq_u64( ctr0, chainIncrement2 );
|
||||
ctr0 = vaddq_u32( ctr0, chainIncrement2 );
|
||||
|
||||
pDst[0] = veorq_u64( pSrc[0], c0 );
|
||||
pDst[1] = veorq_u64( pSrc[1], c1 );
|
||||
|
@ -2016,7 +1870,7 @@ SymCryptAesGcmDecryptStitchedNeon(
|
|||
|
||||
if( nBlocks > 0 )
|
||||
{
|
||||
ctr0 = vaddq_u64( ctr0, chainIncrement1 );
|
||||
ctr0 = vaddq_u32( ctr0, chainIncrement1 );
|
||||
|
||||
pDst[0] = veorq_u64( pSrc[0], c0 );
|
||||
}
|
||||
|
|
|
@ -0,0 +1,344 @@
|
|||
//
|
||||
// aes-pattern.c
|
||||
//
|
||||
// Copyright (c) Microsoft Corporation. Licensed under the MIT license.
|
||||
//
|
||||
// This file contains "pattern" code for AES-related functions. It's not intended to be compiled
|
||||
// directly; rather it is included by other aes-*.c files which define the macros used here.
|
||||
//
|
||||
|
||||
#if SYMCRYPT_CPU_ARM64
|
||||
|
||||
VOID
|
||||
SYMCRYPT_CALL
|
||||
SYMCRYPT_AesCtrMsbXxNeon(
|
||||
_In_ PCSYMCRYPT_AES_EXPANDED_KEY pExpandedKey,
|
||||
_Inout_updates_( SYMCRYPT_AES_BLOCK_SIZE ) PBYTE pbChainingValue,
|
||||
_In_reads_( cbData ) PCBYTE pbSrc,
|
||||
_Out_writes_( cbData ) PBYTE pbDst,
|
||||
SIZE_T cbData )
|
||||
{
|
||||
__n128 chain = *(__n128 *)pbChainingValue;
|
||||
const __n128 * pSrc = (const __n128 *) pbSrc;
|
||||
__n128 * pDst = (__n128 *) pbDst;
|
||||
|
||||
const __n128 chainIncrement1 = SYMCRYPT_SET_N128_U64( 0, 1 );
|
||||
const __n128 chainIncrement2 = SYMCRYPT_SET_N128_U64( 0, 2 );
|
||||
const __n128 chainIncrement8 = SYMCRYPT_SET_N128_U64( 0, 8 );
|
||||
|
||||
__n128 ctr0, ctr1, ctr2, ctr3, ctr4, ctr5, ctr6, ctr7;
|
||||
__n128 c0, c1, c2, c3, c4, c5, c6, c7;
|
||||
|
||||
cbData &= ~(SYMCRYPT_AES_BLOCK_SIZE - 1);
|
||||
|
||||
// Our chain variable is in integer format, not the MSBfirst format loaded from memory.
|
||||
ctr0 = vrev64q_u8( chain );
|
||||
ctr1 = VADDQ_UXX( ctr0, chainIncrement1 );
|
||||
ctr2 = VADDQ_UXX( ctr0, chainIncrement2 );
|
||||
ctr3 = VADDQ_UXX( ctr1, chainIncrement2 );
|
||||
ctr4 = VADDQ_UXX( ctr2, chainIncrement2 );
|
||||
ctr5 = VADDQ_UXX( ctr3, chainIncrement2 );
|
||||
ctr6 = VADDQ_UXX( ctr4, chainIncrement2 );
|
||||
ctr7 = VADDQ_UXX( ctr5, chainIncrement2 );
|
||||
|
||||
/*
|
||||
while cbData >= 5 * block
|
||||
generate 8 blocks of key stream
|
||||
if cbData < 8 * block
|
||||
break;
|
||||
process 8 blocks
|
||||
if cbData >= 5 * block
|
||||
process 5-7 blocks
|
||||
done
|
||||
if cbData >= 2 * block
|
||||
generate 4 blocks of key stream
|
||||
process 2-4 blocks
|
||||
done
|
||||
if cbData == 1 block
|
||||
generate 1 block of key stream
|
||||
process block
|
||||
*/
|
||||
while( cbData >= 5 * SYMCRYPT_AES_BLOCK_SIZE )
|
||||
{
|
||||
c0 = vrev64q_u8( ctr0 );
|
||||
c1 = vrev64q_u8( ctr1 );
|
||||
c2 = vrev64q_u8( ctr2 );
|
||||
c3 = vrev64q_u8( ctr3 );
|
||||
c4 = vrev64q_u8( ctr4 );
|
||||
c5 = vrev64q_u8( ctr5 );
|
||||
c6 = vrev64q_u8( ctr6 );
|
||||
c7 = vrev64q_u8( ctr7 );
|
||||
|
||||
ctr0 = VADDQ_UXX( ctr0, chainIncrement8 );
|
||||
ctr1 = VADDQ_UXX( ctr1, chainIncrement8 );
|
||||
ctr2 = VADDQ_UXX( ctr2, chainIncrement8 );
|
||||
ctr3 = VADDQ_UXX( ctr3, chainIncrement8 );
|
||||
ctr4 = VADDQ_UXX( ctr4, chainIncrement8 );
|
||||
ctr5 = VADDQ_UXX( ctr5, chainIncrement8 );
|
||||
ctr6 = VADDQ_UXX( ctr6, chainIncrement8 );
|
||||
ctr7 = VADDQ_UXX( ctr7, chainIncrement8 );
|
||||
|
||||
AES_ENCRYPT_8( pExpandedKey, c0, c1, c2, c3, c4, c5, c6, c7 );
|
||||
|
||||
if( cbData < 8 * SYMCRYPT_AES_BLOCK_SIZE )
|
||||
{
|
||||
break;
|
||||
}
|
||||
|
||||
pDst[0] = veorq_u64( pSrc[0], c0 );
|
||||
pDst[1] = veorq_u64( pSrc[1], c1 );
|
||||
pDst[2] = veorq_u64( pSrc[2], c2 );
|
||||
pDst[3] = veorq_u64( pSrc[3], c3 );
|
||||
pDst[4] = veorq_u64( pSrc[4], c4 );
|
||||
pDst[5] = veorq_u64( pSrc[5], c5 );
|
||||
pDst[6] = veorq_u64( pSrc[6], c6 );
|
||||
pDst[7] = veorq_u64( pSrc[7], c7 );
|
||||
|
||||
pDst += 8;
|
||||
pSrc += 8;
|
||||
cbData -= 8 * SYMCRYPT_AES_BLOCK_SIZE;
|
||||
}
|
||||
|
||||
//
|
||||
// At this point we have one of the two following cases:
|
||||
// - cbData >= 5 * 16 and we have 8 blocks of key stream in c0-c7. ctr0-ctr7 is set to (c0+8)-(c7+8)
|
||||
// - cbData < 5 * 16 and we have no blocks of key stream, and ctr0-ctr7 set to the next 8 counters to use
|
||||
//
|
||||
|
||||
if( cbData >= SYMCRYPT_AES_BLOCK_SIZE ) // quick exit of function if the request was a multiple of 8 blocks
|
||||
{
|
||||
if( cbData >= 5 * SYMCRYPT_AES_BLOCK_SIZE )
|
||||
{
|
||||
//
|
||||
// We already have the key stream
|
||||
//
|
||||
pDst[0] = veorq_u64( pSrc[0], c0 );
|
||||
pDst[1] = veorq_u64( pSrc[1], c1 );
|
||||
pDst[2] = veorq_u64( pSrc[2], c2 );
|
||||
pDst[3] = veorq_u64( pSrc[3], c3 );
|
||||
pDst[4] = veorq_u64( pSrc[4], c4 );
|
||||
chain = VSUBQ_UXX( ctr5, chainIncrement8 );
|
||||
|
||||
if( cbData >= 96 )
|
||||
{
|
||||
chain = VSUBQ_UXX( ctr6, chainIncrement8 );
|
||||
pDst[5] = veorq_u64( pSrc[5], c5 );
|
||||
if( cbData >= 112 )
|
||||
{
|
||||
chain = VSUBQ_UXX( ctr7, chainIncrement8 );
|
||||
pDst[6] = veorq_u64( pSrc[6], c6 );
|
||||
}
|
||||
}
|
||||
}
|
||||
else if( cbData >= 2 * SYMCRYPT_AES_BLOCK_SIZE )
|
||||
{
|
||||
// Produce 4 blocks of key stream
|
||||
|
||||
chain = ctr2; // chain is only incremented by 2 for now
|
||||
|
||||
c0 = vrev64q_u8( ctr0 );
|
||||
c1 = vrev64q_u8( ctr1 );
|
||||
c2 = vrev64q_u8( ctr2 );
|
||||
c3 = vrev64q_u8( ctr3 );
|
||||
|
||||
AES_ENCRYPT_4( pExpandedKey, c0, c1, c2, c3 );
|
||||
|
||||
pDst[0] = veorq_u64( pSrc[0], c0 );
|
||||
pDst[1] = veorq_u64( pSrc[1], c1 );
|
||||
if( cbData >= 48 )
|
||||
{
|
||||
chain = ctr3;
|
||||
pDst[2] = veorq_u64( pSrc[2], c2 );
|
||||
if( cbData >= 64 )
|
||||
{
|
||||
chain = ctr4;
|
||||
pDst[3] = veorq_u64( pSrc[3], c3 );
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
// Exactly 1 block to process
|
||||
chain = ctr1;
|
||||
|
||||
c0 = vrev64q_u8( ctr0 );
|
||||
|
||||
AES_ENCRYPT_1( pExpandedKey, c0 );
|
||||
pDst[0] = veorq_u64( pSrc[0], c0 );
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
chain = ctr0;
|
||||
}
|
||||
|
||||
chain = vrev64q_u8( chain );
|
||||
*(__n128 *)pbChainingValue = chain;
|
||||
}
|
||||
|
||||
#endif // SYMCRYPT_CPU_ARM64
|
||||
|
||||
#if SYMCRYPT_CPU_X86 | SYMCRYPT_CPU_AMD64
|
||||
|
||||
VOID
|
||||
SYMCRYPT_CALL
|
||||
SYMCRYPT_AesCtrMsbXxXmm(
|
||||
_In_ PCSYMCRYPT_AES_EXPANDED_KEY pExpandedKey,
|
||||
_Inout_updates_( SYMCRYPT_AES_BLOCK_SIZE ) PBYTE pbChainingValue,
|
||||
_In_reads_( cbData ) PCBYTE pbSrc,
|
||||
_Out_writes_( cbData ) PBYTE pbDst,
|
||||
SIZE_T cbData )
|
||||
{
|
||||
__m128i chain = _mm_loadu_si128( (__m128i *) pbChainingValue );
|
||||
|
||||
__m128i BYTE_REVERSE_ORDER = _mm_set_epi8(
|
||||
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 );
|
||||
|
||||
__m128i chainIncrement1 = _mm_set_epi32( 0, 0, 0, 1 );
|
||||
__m128i chainIncrement2 = _mm_set_epi32( 0, 0, 0, 2 );
|
||||
__m128i chainIncrement3 = _mm_set_epi32( 0, 0, 0, 3 );
|
||||
//__m128i chainIncrement8 = _mm_set_epi32( 0, 0, 0, 8 );
|
||||
|
||||
__m128i c0, c1, c2, c3, c4, c5, c6, c7;
|
||||
|
||||
cbData &= ~(SYMCRYPT_AES_BLOCK_SIZE - 1);
|
||||
|
||||
chain = _mm_shuffle_epi8( chain, BYTE_REVERSE_ORDER );
|
||||
|
||||
/*
|
||||
while cbData >= 5 * block
|
||||
generate 8 blocks of key stream
|
||||
if cbData < 8 * block
|
||||
break;
|
||||
process 8 blocks
|
||||
if cbData >= 5 * block
|
||||
process 5-7 blocks
|
||||
done
|
||||
if cbData > 1 block
|
||||
generate 4 blocks of key stream
|
||||
process 2-4 blocks
|
||||
done
|
||||
if cbData == 1 block
|
||||
generate 1 block of key stream
|
||||
process block
|
||||
*/
|
||||
while( cbData >= 5 * SYMCRYPT_AES_BLOCK_SIZE )
|
||||
{
|
||||
c0 = chain;
|
||||
c1 = MM_ADD_EPIXX( chain, chainIncrement1 );
|
||||
c2 = MM_ADD_EPIXX( chain, chainIncrement2 );
|
||||
c3 = MM_ADD_EPIXX( c1, chainIncrement2 );
|
||||
c4 = MM_ADD_EPIXX( c2, chainIncrement2 );
|
||||
c5 = MM_ADD_EPIXX( c3, chainIncrement2 );
|
||||
c6 = MM_ADD_EPIXX( c4, chainIncrement2 );
|
||||
c7 = MM_ADD_EPIXX( c5, chainIncrement2 );
|
||||
chain = MM_ADD_EPIXX( c6, chainIncrement2 );
|
||||
|
||||
c0 = _mm_shuffle_epi8( c0, BYTE_REVERSE_ORDER );
|
||||
c1 = _mm_shuffle_epi8( c1, BYTE_REVERSE_ORDER );
|
||||
c2 = _mm_shuffle_epi8( c2, BYTE_REVERSE_ORDER );
|
||||
c3 = _mm_shuffle_epi8( c3, BYTE_REVERSE_ORDER );
|
||||
c4 = _mm_shuffle_epi8( c4, BYTE_REVERSE_ORDER );
|
||||
c5 = _mm_shuffle_epi8( c5, BYTE_REVERSE_ORDER );
|
||||
c6 = _mm_shuffle_epi8( c6, BYTE_REVERSE_ORDER );
|
||||
c7 = _mm_shuffle_epi8( c7, BYTE_REVERSE_ORDER );
|
||||
|
||||
AES_ENCRYPT_8( pExpandedKey, c0, c1, c2, c3, c4, c5, c6, c7 );
|
||||
|
||||
if( cbData < 8 * SYMCRYPT_AES_BLOCK_SIZE )
|
||||
{
|
||||
break;
|
||||
}
|
||||
|
||||
_mm_storeu_si128( (__m128i *) (pbDst + 0), _mm_xor_si128( c0, _mm_loadu_si128( ( __m128i * ) (pbSrc + 0 ) ) ) );
|
||||
_mm_storeu_si128( (__m128i *) (pbDst + 16), _mm_xor_si128( c1, _mm_loadu_si128( ( __m128i * ) (pbSrc + 16 ) ) ) );
|
||||
_mm_storeu_si128( (__m128i *) (pbDst + 32), _mm_xor_si128( c2, _mm_loadu_si128( ( __m128i * ) (pbSrc + 32 ) ) ) );
|
||||
_mm_storeu_si128( (__m128i *) (pbDst + 48), _mm_xor_si128( c3, _mm_loadu_si128( ( __m128i * ) (pbSrc + 48 ) ) ) );
|
||||
_mm_storeu_si128( (__m128i *) (pbDst + 64), _mm_xor_si128( c4, _mm_loadu_si128( ( __m128i * ) (pbSrc + 64 ) ) ) );
|
||||
_mm_storeu_si128( (__m128i *) (pbDst + 80), _mm_xor_si128( c5, _mm_loadu_si128( ( __m128i * ) (pbSrc + 80 ) ) ) );
|
||||
_mm_storeu_si128( (__m128i *) (pbDst + 96), _mm_xor_si128( c6, _mm_loadu_si128( ( __m128i * ) (pbSrc + 96 ) ) ) );
|
||||
_mm_storeu_si128( (__m128i *) (pbDst +112), _mm_xor_si128( c7, _mm_loadu_si128( ( __m128i * ) (pbSrc +112 ) ) ) );
|
||||
pbDst += 8 * SYMCRYPT_AES_BLOCK_SIZE;
|
||||
pbSrc += 8 * SYMCRYPT_AES_BLOCK_SIZE;
|
||||
cbData -= 8 * SYMCRYPT_AES_BLOCK_SIZE;
|
||||
}
|
||||
|
||||
//
|
||||
// At this point we have one of the two following cases:
|
||||
// - cbData >= 5 * 16 and we have 8 blocks of key stream in c0-c7. chain is set to c7 + 1
|
||||
// - cbData < 5 * 16 and we have no blocks of key stream, with chain the next value to use
|
||||
//
|
||||
|
||||
if( cbData >= SYMCRYPT_AES_BLOCK_SIZE ) // quick exit of function if the request was a multiple of 8 blocks
|
||||
{
|
||||
if( cbData >= 5 * SYMCRYPT_AES_BLOCK_SIZE )
|
||||
{
|
||||
//
|
||||
// We already have the key stream
|
||||
//
|
||||
_mm_storeu_si128( (__m128i *) (pbDst + 0), _mm_xor_si128( c0, _mm_loadu_si128( ( __m128i * ) (pbSrc + 0 ) ) ) );
|
||||
_mm_storeu_si128( (__m128i *) (pbDst + 16), _mm_xor_si128( c1, _mm_loadu_si128( ( __m128i * ) (pbSrc + 16 ) ) ) );
|
||||
_mm_storeu_si128( (__m128i *) (pbDst + 32), _mm_xor_si128( c2, _mm_loadu_si128( ( __m128i * ) (pbSrc + 32 ) ) ) );
|
||||
_mm_storeu_si128( (__m128i *) (pbDst + 48), _mm_xor_si128( c3, _mm_loadu_si128( ( __m128i * ) (pbSrc + 48 ) ) ) );
|
||||
_mm_storeu_si128( (__m128i *) (pbDst + 64), _mm_xor_si128( c4, _mm_loadu_si128( ( __m128i * ) (pbSrc + 64 ) ) ) );
|
||||
chain = MM_SUB_EPIXX( chain, chainIncrement3 );
|
||||
|
||||
if( cbData >= 96 )
|
||||
{
|
||||
chain = MM_ADD_EPIXX( chain, chainIncrement1 );
|
||||
_mm_storeu_si128( (__m128i *) (pbDst + 80), _mm_xor_si128( c5, _mm_loadu_si128( ( __m128i * ) (pbSrc + 80 ) ) ) );
|
||||
if( cbData >= 112 )
|
||||
{
|
||||
chain = MM_ADD_EPIXX( chain, chainIncrement1 );
|
||||
_mm_storeu_si128( (__m128i *) (pbDst + 96), _mm_xor_si128( c6, _mm_loadu_si128( ( __m128i * ) (pbSrc + 96 ) ) ) );
|
||||
}
|
||||
}
|
||||
}
|
||||
else if( cbData >= 2 * SYMCRYPT_AES_BLOCK_SIZE )
|
||||
{
|
||||
// Produce 4 blocks of key stream
|
||||
|
||||
c0 = chain;
|
||||
c1 = MM_ADD_EPIXX( chain, chainIncrement1 );
|
||||
c2 = MM_ADD_EPIXX( chain, chainIncrement2 );
|
||||
c3 = MM_ADD_EPIXX( c1, chainIncrement2 );
|
||||
chain = c2; // chain is only incremented by 2 for now
|
||||
|
||||
c0 = _mm_shuffle_epi8( c0, BYTE_REVERSE_ORDER );
|
||||
c1 = _mm_shuffle_epi8( c1, BYTE_REVERSE_ORDER );
|
||||
c2 = _mm_shuffle_epi8( c2, BYTE_REVERSE_ORDER );
|
||||
c3 = _mm_shuffle_epi8( c3, BYTE_REVERSE_ORDER );
|
||||
|
||||
AES_ENCRYPT_4( pExpandedKey, c0, c1, c2, c3 );
|
||||
|
||||
_mm_storeu_si128( (__m128i *) (pbDst + 0), _mm_xor_si128( c0, _mm_loadu_si128( ( __m128i * ) (pbSrc + 0 ) ) ) );
|
||||
_mm_storeu_si128( (__m128i *) (pbDst + 16), _mm_xor_si128( c1, _mm_loadu_si128( ( __m128i * ) (pbSrc + 16 ) ) ) );
|
||||
if( cbData >= 48 )
|
||||
{
|
||||
chain = MM_ADD_EPIXX( chain, chainIncrement1 );
|
||||
_mm_storeu_si128( (__m128i *) (pbDst + 32), _mm_xor_si128( c2, _mm_loadu_si128( ( __m128i * ) (pbSrc + 32 ) ) ) );
|
||||
if( cbData >= 64 )
|
||||
{
|
||||
chain = MM_ADD_EPIXX( chain, chainIncrement1 );
|
||||
_mm_storeu_si128( (__m128i *) (pbDst + 48), _mm_xor_si128( c3, _mm_loadu_si128( ( __m128i * ) (pbSrc + 48 ) ) ) );
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
// Exactly 1 block to process
|
||||
c0 = chain;
|
||||
chain = MM_ADD_EPIXX( chain, chainIncrement1 );
|
||||
|
||||
c0 = _mm_shuffle_epi8( c0, BYTE_REVERSE_ORDER );
|
||||
|
||||
AES_ENCRYPT_1( pExpandedKey, c0 );
|
||||
_mm_storeu_si128( (__m128i *) (pbDst + 0), _mm_xor_si128( c0, _mm_loadu_si128( ( __m128i * ) (pbSrc + 0 ) ) ) );
|
||||
}
|
||||
}
|
||||
|
||||
chain = _mm_shuffle_epi8( chain, BYTE_REVERSE_ORDER );
|
||||
_mm_storeu_si128( (__m128i *) pbChainingValue, chain );
|
||||
}
|
||||
|
||||
#endif // SYMCRYPT_CPU_X86 | SYMCRYPT_CPU_AMD64
|
248
lib/aes-xmm.c
248
lib/aes-xmm.c
|
@ -727,166 +727,26 @@ SymCryptAesCbcMacXmm(
|
|||
#pragma warning( disable:4701 ) // "Use of uninitialized variable"
|
||||
#pragma runtime_checks( "u", off )
|
||||
|
||||
VOID
|
||||
SYMCRYPT_CALL
|
||||
SymCryptAesCtrMsb64Xmm(
|
||||
_In_ PCSYMCRYPT_AES_EXPANDED_KEY pExpandedKey,
|
||||
_Inout_updates_( SYMCRYPT_AES_BLOCK_SIZE ) PBYTE pbChainingValue,
|
||||
_In_reads_( cbData ) PCBYTE pbSrc,
|
||||
_Out_writes_( cbData ) PBYTE pbDst,
|
||||
SIZE_T cbData )
|
||||
{
|
||||
__m128i chain = _mm_loadu_si128( (__m128i *) pbChainingValue );
|
||||
#define SYMCRYPT_AesCtrMsbXxXmm SymCryptAesCtrMsb64Xmm
|
||||
#define MM_ADD_EPIXX _mm_add_epi64
|
||||
#define MM_SUB_EPIXX _mm_sub_epi64
|
||||
|
||||
__m128i BYTE_REVERSE_ORDER = _mm_set_epi8(
|
||||
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 );
|
||||
#include "aes-pattern.c"
|
||||
|
||||
__m128i chainIncrement1 = _mm_set_epi32( 0, 0, 0, 1 );
|
||||
__m128i chainIncrement2 = _mm_set_epi32( 0, 0, 0, 2 );
|
||||
__m128i chainIncrement3 = _mm_set_epi32( 0, 0, 0, 3 );
|
||||
//__m128i chainIncrement8 = _mm_set_epi32( 0, 0, 0, 8 );
|
||||
#undef MM_SUB_EPIXX
|
||||
#undef MM_ADD_EPIXX
|
||||
#undef SYMCRYPT_AesCtrMsbXxXmm
|
||||
|
||||
__m128i c0, c1, c2, c3, c4, c5, c6, c7;
|
||||
#define SYMCRYPT_AesCtrMsbXxXmm SymCryptAesCtrMsb32Xmm
|
||||
#define MM_ADD_EPIXX _mm_add_epi32
|
||||
#define MM_SUB_EPIXX _mm_sub_epi32
|
||||
|
||||
cbData &= ~(SYMCRYPT_AES_BLOCK_SIZE - 1);
|
||||
#include "aes-pattern.c"
|
||||
|
||||
chain = _mm_shuffle_epi8( chain, BYTE_REVERSE_ORDER );
|
||||
#undef MM_SUB_EPIXX
|
||||
#undef MM_ADD_EPIXX
|
||||
#undef SYMCRYPT_AesCtrMsbXxXmm
|
||||
|
||||
/*
|
||||
while cbData >= 5 * block
|
||||
generate 8 blocks of key stream
|
||||
if cbData < 8 * block
|
||||
break;
|
||||
process 8 blocks
|
||||
if cbData >= 5 * block
|
||||
process 5-7 blocks
|
||||
done
|
||||
if cbData > 1 block
|
||||
generate 4 blocks of key stream
|
||||
process 2-4 blocks
|
||||
done
|
||||
if cbData == 1 block
|
||||
generate 1 block of key stream
|
||||
process block
|
||||
*/
|
||||
while( cbData >= 5 * SYMCRYPT_AES_BLOCK_SIZE )
|
||||
{
|
||||
c0 = chain;
|
||||
c1 = _mm_add_epi64( chain, chainIncrement1 );
|
||||
c2 = _mm_add_epi64( chain, chainIncrement2 );
|
||||
c3 = _mm_add_epi64( c1, chainIncrement2 );
|
||||
c4 = _mm_add_epi64( c2, chainIncrement2 );
|
||||
c5 = _mm_add_epi64( c3, chainIncrement2 );
|
||||
c6 = _mm_add_epi64( c4, chainIncrement2 );
|
||||
c7 = _mm_add_epi64( c5, chainIncrement2 );
|
||||
chain = _mm_add_epi64( c6, chainIncrement2 );
|
||||
|
||||
c0 = _mm_shuffle_epi8( c0, BYTE_REVERSE_ORDER );
|
||||
c1 = _mm_shuffle_epi8( c1, BYTE_REVERSE_ORDER );
|
||||
c2 = _mm_shuffle_epi8( c2, BYTE_REVERSE_ORDER );
|
||||
c3 = _mm_shuffle_epi8( c3, BYTE_REVERSE_ORDER );
|
||||
c4 = _mm_shuffle_epi8( c4, BYTE_REVERSE_ORDER );
|
||||
c5 = _mm_shuffle_epi8( c5, BYTE_REVERSE_ORDER );
|
||||
c6 = _mm_shuffle_epi8( c6, BYTE_REVERSE_ORDER );
|
||||
c7 = _mm_shuffle_epi8( c7, BYTE_REVERSE_ORDER );
|
||||
|
||||
AES_ENCRYPT_8( pExpandedKey, c0, c1, c2, c3, c4, c5, c6, c7 );
|
||||
|
||||
if( cbData < 8 * SYMCRYPT_AES_BLOCK_SIZE )
|
||||
{
|
||||
break;
|
||||
}
|
||||
|
||||
_mm_storeu_si128( (__m128i *) (pbDst + 0), _mm_xor_si128( c0, _mm_loadu_si128( ( __m128i * ) (pbSrc + 0 ) ) ) );
|
||||
_mm_storeu_si128( (__m128i *) (pbDst + 16), _mm_xor_si128( c1, _mm_loadu_si128( ( __m128i * ) (pbSrc + 16 ) ) ) );
|
||||
_mm_storeu_si128( (__m128i *) (pbDst + 32), _mm_xor_si128( c2, _mm_loadu_si128( ( __m128i * ) (pbSrc + 32 ) ) ) );
|
||||
_mm_storeu_si128( (__m128i *) (pbDst + 48), _mm_xor_si128( c3, _mm_loadu_si128( ( __m128i * ) (pbSrc + 48 ) ) ) );
|
||||
_mm_storeu_si128( (__m128i *) (pbDst + 64), _mm_xor_si128( c4, _mm_loadu_si128( ( __m128i * ) (pbSrc + 64 ) ) ) );
|
||||
_mm_storeu_si128( (__m128i *) (pbDst + 80), _mm_xor_si128( c5, _mm_loadu_si128( ( __m128i * ) (pbSrc + 80 ) ) ) );
|
||||
_mm_storeu_si128( (__m128i *) (pbDst + 96), _mm_xor_si128( c6, _mm_loadu_si128( ( __m128i * ) (pbSrc + 96 ) ) ) );
|
||||
_mm_storeu_si128( (__m128i *) (pbDst +112), _mm_xor_si128( c7, _mm_loadu_si128( ( __m128i * ) (pbSrc +112 ) ) ) );
|
||||
pbDst += 8 * SYMCRYPT_AES_BLOCK_SIZE;
|
||||
pbSrc += 8 * SYMCRYPT_AES_BLOCK_SIZE;
|
||||
cbData -= 8 * SYMCRYPT_AES_BLOCK_SIZE;
|
||||
}
|
||||
|
||||
//
|
||||
// At this point we have one of the two following cases:
|
||||
// - cbData >= 5 * 16 and we have 8 blocks of key stream in c0-c7. chain is set to c7 + 1
|
||||
// - cbData < 5 * 16 and we have no blocks of key stream, with chain the next value to use
|
||||
//
|
||||
|
||||
if( cbData >= SYMCRYPT_AES_BLOCK_SIZE ) // quick exit of function if the request was a multiple of 8 blocks
|
||||
{
|
||||
if( cbData >= 5 * SYMCRYPT_AES_BLOCK_SIZE )
|
||||
{
|
||||
//
|
||||
// We already have the key stream
|
||||
//
|
||||
_mm_storeu_si128( (__m128i *) (pbDst + 0), _mm_xor_si128( c0, _mm_loadu_si128( ( __m128i * ) (pbSrc + 0 ) ) ) );
|
||||
_mm_storeu_si128( (__m128i *) (pbDst + 16), _mm_xor_si128( c1, _mm_loadu_si128( ( __m128i * ) (pbSrc + 16 ) ) ) );
|
||||
_mm_storeu_si128( (__m128i *) (pbDst + 32), _mm_xor_si128( c2, _mm_loadu_si128( ( __m128i * ) (pbSrc + 32 ) ) ) );
|
||||
_mm_storeu_si128( (__m128i *) (pbDst + 48), _mm_xor_si128( c3, _mm_loadu_si128( ( __m128i * ) (pbSrc + 48 ) ) ) );
|
||||
_mm_storeu_si128( (__m128i *) (pbDst + 64), _mm_xor_si128( c4, _mm_loadu_si128( ( __m128i * ) (pbSrc + 64 ) ) ) );
|
||||
chain = _mm_sub_epi64( chain, chainIncrement3 );
|
||||
|
||||
if( cbData >= 96 )
|
||||
{
|
||||
chain = _mm_add_epi64( chain, chainIncrement1 );
|
||||
_mm_storeu_si128( (__m128i *) (pbDst + 80), _mm_xor_si128( c5, _mm_loadu_si128( ( __m128i * ) (pbSrc + 80 ) ) ) );
|
||||
if( cbData >= 112 )
|
||||
{
|
||||
chain = _mm_add_epi64( chain, chainIncrement1 );
|
||||
_mm_storeu_si128( (__m128i *) (pbDst + 96), _mm_xor_si128( c6, _mm_loadu_si128( ( __m128i * ) (pbSrc + 96 ) ) ) );
|
||||
}
|
||||
}
|
||||
}
|
||||
else if( cbData >= 2 * SYMCRYPT_AES_BLOCK_SIZE )
|
||||
{
|
||||
// Produce 4 blocks of key stream
|
||||
|
||||
c0 = chain;
|
||||
c1 = _mm_add_epi64( chain, chainIncrement1 );
|
||||
c2 = _mm_add_epi64( chain, chainIncrement2 );
|
||||
c3 = _mm_add_epi64( c1, chainIncrement2 );
|
||||
chain = c2; // chain is only incremented by 2 for now
|
||||
|
||||
c0 = _mm_shuffle_epi8( c0, BYTE_REVERSE_ORDER );
|
||||
c1 = _mm_shuffle_epi8( c1, BYTE_REVERSE_ORDER );
|
||||
c2 = _mm_shuffle_epi8( c2, BYTE_REVERSE_ORDER );
|
||||
c3 = _mm_shuffle_epi8( c3, BYTE_REVERSE_ORDER );
|
||||
|
||||
AES_ENCRYPT_4( pExpandedKey, c0, c1, c2, c3 );
|
||||
|
||||
_mm_storeu_si128( (__m128i *) (pbDst + 0), _mm_xor_si128( c0, _mm_loadu_si128( ( __m128i * ) (pbSrc + 0 ) ) ) );
|
||||
_mm_storeu_si128( (__m128i *) (pbDst + 16), _mm_xor_si128( c1, _mm_loadu_si128( ( __m128i * ) (pbSrc + 16 ) ) ) );
|
||||
if( cbData >= 48 )
|
||||
{
|
||||
chain = _mm_add_epi64( chain, chainIncrement1 );
|
||||
_mm_storeu_si128( (__m128i *) (pbDst + 32), _mm_xor_si128( c2, _mm_loadu_si128( ( __m128i * ) (pbSrc + 32 ) ) ) );
|
||||
if( cbData >= 64 )
|
||||
{
|
||||
chain = _mm_add_epi64( chain, chainIncrement1 );
|
||||
_mm_storeu_si128( (__m128i *) (pbDst + 48), _mm_xor_si128( c3, _mm_loadu_si128( ( __m128i * ) (pbSrc + 48 ) ) ) );
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
// Exactly 1 block to process
|
||||
c0 = chain;
|
||||
chain = _mm_add_epi64( chain, chainIncrement1 );
|
||||
|
||||
c0 = _mm_shuffle_epi8( c0, BYTE_REVERSE_ORDER );
|
||||
|
||||
AES_ENCRYPT_1( pExpandedKey, c0 );
|
||||
_mm_storeu_si128( (__m128i *) (pbDst + 0), _mm_xor_si128( c0, _mm_loadu_si128( ( __m128i * ) (pbSrc + 0 ) ) ) );
|
||||
}
|
||||
}
|
||||
|
||||
chain = _mm_shuffle_epi8( chain, BYTE_REVERSE_ORDER );
|
||||
_mm_storeu_si128( (__m128i *) pbChainingValue, chain );
|
||||
}
|
||||
#pragma runtime_checks( "u", restore )
|
||||
#pragma warning(pop)
|
||||
|
||||
|
@ -1558,13 +1418,13 @@ SymCryptAesGcmEncryptStitchedXmm(
|
|||
|
||||
// Do 8 blocks of CTR either for tail (if total blocks <8) or for encryption of first 8 blocks
|
||||
c0 = chain;
|
||||
c1 = _mm_add_epi64( chain, chainIncrement1 );
|
||||
c2 = _mm_add_epi64( chain, chainIncrement2 );
|
||||
c3 = _mm_add_epi64( c1, chainIncrement2 );
|
||||
c4 = _mm_add_epi64( c2, chainIncrement2 );
|
||||
c5 = _mm_add_epi64( c3, chainIncrement2 );
|
||||
c6 = _mm_add_epi64( c4, chainIncrement2 );
|
||||
c7 = _mm_add_epi64( c5, chainIncrement2 );
|
||||
c1 = _mm_add_epi32( chain, chainIncrement1 );
|
||||
c2 = _mm_add_epi32( chain, chainIncrement2 );
|
||||
c3 = _mm_add_epi32( c1, chainIncrement2 );
|
||||
c4 = _mm_add_epi32( c2, chainIncrement2 );
|
||||
c5 = _mm_add_epi32( c3, chainIncrement2 );
|
||||
c6 = _mm_add_epi32( c4, chainIncrement2 );
|
||||
c7 = _mm_add_epi32( c5, chainIncrement2 );
|
||||
|
||||
c0 = _mm_shuffle_epi8( c0, BYTE_REVERSE_ORDER );
|
||||
c1 = _mm_shuffle_epi8( c1, BYTE_REVERSE_ORDER );
|
||||
|
@ -1580,7 +1440,7 @@ SymCryptAesGcmEncryptStitchedXmm(
|
|||
if( nBlocks >= 8 )
|
||||
{
|
||||
// Encrypt first 8 blocks - update chain
|
||||
chain = _mm_add_epi64( chain, chainIncrement8 );
|
||||
chain = _mm_add_epi32( chain, chainIncrement8 );
|
||||
|
||||
_mm_storeu_si128( (__m128i *) (pbDst + 0), _mm_xor_si128( c0, _mm_loadu_si128( ( __m128i * ) (pbSrc + 0) ) ) );
|
||||
_mm_storeu_si128( (__m128i *) (pbDst + 16), _mm_xor_si128( c1, _mm_loadu_si128( ( __m128i * ) (pbSrc + 16) ) ) );
|
||||
|
@ -1598,14 +1458,14 @@ SymCryptAesGcmEncryptStitchedXmm(
|
|||
{
|
||||
// In this loop we always have 8 blocks to encrypt and we have already encrypted the previous 8 blocks ready for GHASH
|
||||
c0 = chain;
|
||||
c1 = _mm_add_epi64( chain, chainIncrement1 );
|
||||
c2 = _mm_add_epi64( chain, chainIncrement2 );
|
||||
c3 = _mm_add_epi64( c1, chainIncrement2 );
|
||||
c4 = _mm_add_epi64( c2, chainIncrement2 );
|
||||
c5 = _mm_add_epi64( c3, chainIncrement2 );
|
||||
c6 = _mm_add_epi64( c4, chainIncrement2 );
|
||||
c7 = _mm_add_epi64( c5, chainIncrement2 );
|
||||
chain = _mm_add_epi64( c6, chainIncrement2 );
|
||||
c1 = _mm_add_epi32( chain, chainIncrement1 );
|
||||
c2 = _mm_add_epi32( chain, chainIncrement2 );
|
||||
c3 = _mm_add_epi32( c1, chainIncrement2 );
|
||||
c4 = _mm_add_epi32( c2, chainIncrement2 );
|
||||
c5 = _mm_add_epi32( c3, chainIncrement2 );
|
||||
c6 = _mm_add_epi32( c4, chainIncrement2 );
|
||||
c7 = _mm_add_epi32( c5, chainIncrement2 );
|
||||
chain = _mm_add_epi32( c6, chainIncrement2 );
|
||||
|
||||
c0 = _mm_shuffle_epi8( c0, BYTE_REVERSE_ORDER );
|
||||
c1 = _mm_shuffle_epi8( c1, BYTE_REVERSE_ORDER );
|
||||
|
@ -1647,10 +1507,10 @@ SymCryptAesGcmEncryptStitchedXmm(
|
|||
if (nBlocks > 0)
|
||||
{
|
||||
c0 = chain;
|
||||
c1 = _mm_add_epi64( chain, chainIncrement1 );
|
||||
c2 = _mm_add_epi64( chain, chainIncrement2 );
|
||||
c3 = _mm_add_epi64( c1, chainIncrement2 );
|
||||
c4 = _mm_add_epi64( c2, chainIncrement2 );
|
||||
c1 = _mm_add_epi32( chain, chainIncrement1 );
|
||||
c2 = _mm_add_epi32( chain, chainIncrement2 );
|
||||
c3 = _mm_add_epi32( c1, chainIncrement2 );
|
||||
c4 = _mm_add_epi32( c2, chainIncrement2 );
|
||||
|
||||
c0 = _mm_shuffle_epi8( c0, BYTE_REVERSE_ORDER );
|
||||
c1 = _mm_shuffle_epi8( c1, BYTE_REVERSE_ORDER );
|
||||
|
@ -1660,8 +1520,8 @@ SymCryptAesGcmEncryptStitchedXmm(
|
|||
if (nBlocks > 4)
|
||||
{
|
||||
// Do 8 rounds of AES-CTR for tail in parallel with 8 rounds of GHASH
|
||||
c5 = _mm_add_epi64( c4, chainIncrement1 );
|
||||
c6 = _mm_add_epi64( c4, chainIncrement2 );
|
||||
c5 = _mm_add_epi32( c4, chainIncrement1 );
|
||||
c6 = _mm_add_epi32( c4, chainIncrement2 );
|
||||
|
||||
c4 = _mm_shuffle_epi8( c4, BYTE_REVERSE_ORDER );
|
||||
c5 = _mm_shuffle_epi8( c5, BYTE_REVERSE_ORDER );
|
||||
|
@ -1705,7 +1565,7 @@ SymCryptAesGcmEncryptStitchedXmm(
|
|||
// Encrypt 1-7 blocks with pre-generated AES-CTR blocks and GHASH the results
|
||||
while( nBlocks >= 2 )
|
||||
{
|
||||
chain = _mm_add_epi64( chain, chainIncrement2 );
|
||||
chain = _mm_add_epi32( chain, chainIncrement2 );
|
||||
|
||||
r0 = _mm_xor_si128( c0, _mm_loadu_si128( ( __m128i * ) (pbSrc + 0) ) );
|
||||
r1 = _mm_xor_si128( c1, _mm_loadu_si128( ( __m128i * ) (pbSrc + 16) ) );
|
||||
|
@ -1732,7 +1592,7 @@ SymCryptAesGcmEncryptStitchedXmm(
|
|||
|
||||
if( nBlocks > 0 )
|
||||
{
|
||||
chain = _mm_add_epi64( chain, chainIncrement1 );
|
||||
chain = _mm_add_epi32( chain, chainIncrement1 );
|
||||
|
||||
r0 = _mm_xor_si128( c0, _mm_loadu_si128( ( __m128i * ) (pbSrc + 0) ) );
|
||||
|
||||
|
@ -1805,14 +1665,14 @@ SymCryptAesGcmDecryptStitchedXmm(
|
|||
{
|
||||
// In this loop we always have 8 blocks to decrypt and GHASH
|
||||
c0 = chain;
|
||||
c1 = _mm_add_epi64( chain, chainIncrement1 );
|
||||
c2 = _mm_add_epi64( chain, chainIncrement2 );
|
||||
c3 = _mm_add_epi64( c1, chainIncrement2 );
|
||||
c4 = _mm_add_epi64( c2, chainIncrement2 );
|
||||
c5 = _mm_add_epi64( c3, chainIncrement2 );
|
||||
c6 = _mm_add_epi64( c4, chainIncrement2 );
|
||||
c7 = _mm_add_epi64( c5, chainIncrement2 );
|
||||
chain = _mm_add_epi64( c6, chainIncrement2 );
|
||||
c1 = _mm_add_epi32( chain, chainIncrement1 );
|
||||
c2 = _mm_add_epi32( chain, chainIncrement2 );
|
||||
c3 = _mm_add_epi32( c1, chainIncrement2 );
|
||||
c4 = _mm_add_epi32( c2, chainIncrement2 );
|
||||
c5 = _mm_add_epi32( c3, chainIncrement2 );
|
||||
c6 = _mm_add_epi32( c4, chainIncrement2 );
|
||||
c7 = _mm_add_epi32( c5, chainIncrement2 );
|
||||
chain = _mm_add_epi32( c6, chainIncrement2 );
|
||||
|
||||
c0 = _mm_shuffle_epi8( c0, BYTE_REVERSE_ORDER );
|
||||
c1 = _mm_shuffle_epi8( c1, BYTE_REVERSE_ORDER );
|
||||
|
@ -1856,10 +1716,10 @@ SymCryptAesGcmDecryptStitchedXmm(
|
|||
// We have 1-7 blocks to GHASH and decrypt
|
||||
// Do the exact number of GHASH blocks we need in parallel with generating either 4 or 8 blocks of AES-CTR
|
||||
c0 = chain;
|
||||
c1 = _mm_add_epi64( chain, chainIncrement1 );
|
||||
c2 = _mm_add_epi64( chain, chainIncrement2 );
|
||||
c3 = _mm_add_epi64( c1, chainIncrement2 );
|
||||
c4 = _mm_add_epi64( c2, chainIncrement2 );
|
||||
c1 = _mm_add_epi32( chain, chainIncrement1 );
|
||||
c2 = _mm_add_epi32( chain, chainIncrement2 );
|
||||
c3 = _mm_add_epi32( c1, chainIncrement2 );
|
||||
c4 = _mm_add_epi32( c2, chainIncrement2 );
|
||||
|
||||
c0 = _mm_shuffle_epi8( c0, BYTE_REVERSE_ORDER );
|
||||
c1 = _mm_shuffle_epi8( c1, BYTE_REVERSE_ORDER );
|
||||
|
@ -1868,8 +1728,8 @@ SymCryptAesGcmDecryptStitchedXmm(
|
|||
|
||||
if( nBlocks > 4 )
|
||||
{
|
||||
c5 = _mm_add_epi64( c4, chainIncrement1 );
|
||||
c6 = _mm_add_epi64( c4, chainIncrement2 );
|
||||
c5 = _mm_add_epi32( c4, chainIncrement1 );
|
||||
c6 = _mm_add_epi32( c4, chainIncrement2 );
|
||||
|
||||
c4 = _mm_shuffle_epi8( c4, BYTE_REVERSE_ORDER );
|
||||
c5 = _mm_shuffle_epi8( c5, BYTE_REVERSE_ORDER );
|
||||
|
@ -1886,7 +1746,7 @@ SymCryptAesGcmDecryptStitchedXmm(
|
|||
// Decrypt 1-7 blocks with pre-generated AES-CTR blocks
|
||||
while( nBlocks >= 2 )
|
||||
{
|
||||
chain = _mm_add_epi64( chain, chainIncrement2 );
|
||||
chain = _mm_add_epi32( chain, chainIncrement2 );
|
||||
|
||||
_mm_storeu_si128( (__m128i *) (pbDst + 0), _mm_xor_si128( c0, _mm_loadu_si128( ( __m128i * ) (pbSrc + 0) ) ) );
|
||||
_mm_storeu_si128( (__m128i *) (pbDst + 16), _mm_xor_si128( c1, _mm_loadu_si128( ( __m128i * ) (pbSrc + 16) ) ) );
|
||||
|
@ -1903,7 +1763,7 @@ SymCryptAesGcmDecryptStitchedXmm(
|
|||
|
||||
if( nBlocks > 0 )
|
||||
{
|
||||
chain = _mm_add_epi64( chain, chainIncrement1 );
|
||||
chain = _mm_add_epi32( chain, chainIncrement1 );
|
||||
|
||||
_mm_storeu_si128( (__m128i *) (pbDst + 0), _mm_xor_si128( c0, _mm_loadu_si128( ( __m128i * ) (pbSrc + 0) ) ) );
|
||||
}
|
||||
|
|
|
@ -497,14 +497,14 @@ SymCryptAesGcmEncryptStitchedYmm_2048(
|
|||
|
||||
state = _mm_loadu_si128( (__m128i *) pState );
|
||||
ctr0 = _mm256_insertf128_si256( _mm256_castsi128_si256( chain ), chain, 1); // AVX
|
||||
ctr0 = _mm256_add_epi64( ctr0, chainIncrementUpper1 );
|
||||
ctr1 = _mm256_add_epi64( ctr0, chainIncrement2 );
|
||||
ctr2 = _mm256_add_epi64( ctr0, chainIncrement4 );
|
||||
ctr3 = _mm256_add_epi64( ctr1, chainIncrement4 );
|
||||
ctr4 = _mm256_add_epi64( ctr2, chainIncrement4 );
|
||||
ctr5 = _mm256_add_epi64( ctr3, chainIncrement4 );
|
||||
ctr6 = _mm256_add_epi64( ctr4, chainIncrement4 );
|
||||
ctr7 = _mm256_add_epi64( ctr5, chainIncrement4 );
|
||||
ctr0 = _mm256_add_epi32( ctr0, chainIncrementUpper1 );
|
||||
ctr1 = _mm256_add_epi32( ctr0, chainIncrement2 );
|
||||
ctr2 = _mm256_add_epi32( ctr0, chainIncrement4 );
|
||||
ctr3 = _mm256_add_epi32( ctr1, chainIncrement4 );
|
||||
ctr4 = _mm256_add_epi32( ctr2, chainIncrement4 );
|
||||
ctr5 = _mm256_add_epi32( ctr3, chainIncrement4 );
|
||||
ctr6 = _mm256_add_epi32( ctr4, chainIncrement4 );
|
||||
ctr7 = _mm256_add_epi32( ctr5, chainIncrement4 );
|
||||
|
||||
CLMUL_3( state, GHASH_H_POWER(expandedKeyTable, todo), GHASH_Hx_POWER(expandedKeyTable, todo), a0_xmm, a1_xmm, a2_xmm );
|
||||
a0 = a1 = a2 = _mm256_setzero_si256();
|
||||
|
@ -518,14 +518,14 @@ SymCryptAesGcmEncryptStitchedYmm_2048(
|
|||
c6 = _mm256_shuffle_epi8( ctr6, BYTE_REVERSE_ORDER );
|
||||
c7 = _mm256_shuffle_epi8( ctr7, BYTE_REVERSE_ORDER );
|
||||
|
||||
ctr0 = _mm256_add_epi64( ctr0, chainIncrement16 );
|
||||
ctr1 = _mm256_add_epi64( ctr1, chainIncrement16 );
|
||||
ctr2 = _mm256_add_epi64( ctr2, chainIncrement16 );
|
||||
ctr3 = _mm256_add_epi64( ctr3, chainIncrement16 );
|
||||
ctr4 = _mm256_add_epi64( ctr4, chainIncrement16 );
|
||||
ctr5 = _mm256_add_epi64( ctr5, chainIncrement16 );
|
||||
ctr6 = _mm256_add_epi64( ctr6, chainIncrement16 );
|
||||
ctr7 = _mm256_add_epi64( ctr7, chainIncrement16 );
|
||||
ctr0 = _mm256_add_epi32( ctr0, chainIncrement16 );
|
||||
ctr1 = _mm256_add_epi32( ctr1, chainIncrement16 );
|
||||
ctr2 = _mm256_add_epi32( ctr2, chainIncrement16 );
|
||||
ctr3 = _mm256_add_epi32( ctr3, chainIncrement16 );
|
||||
ctr4 = _mm256_add_epi32( ctr4, chainIncrement16 );
|
||||
ctr5 = _mm256_add_epi32( ctr5, chainIncrement16 );
|
||||
ctr6 = _mm256_add_epi32( ctr6, chainIncrement16 );
|
||||
ctr7 = _mm256_add_epi32( ctr7, chainIncrement16 );
|
||||
|
||||
AES_ENCRYPT_YMM_2048( pExpandedKey, c0, c1, c2, c3, c4, c5, c6, c7 );
|
||||
|
||||
|
@ -552,14 +552,14 @@ SymCryptAesGcmEncryptStitchedYmm_2048(
|
|||
c6 = _mm256_shuffle_epi8( ctr6, BYTE_REVERSE_ORDER );
|
||||
c7 = _mm256_shuffle_epi8( ctr7, BYTE_REVERSE_ORDER );
|
||||
|
||||
ctr0 = _mm256_add_epi64( ctr0, chainIncrement16 );
|
||||
ctr1 = _mm256_add_epi64( ctr1, chainIncrement16 );
|
||||
ctr2 = _mm256_add_epi64( ctr2, chainIncrement16 );
|
||||
ctr3 = _mm256_add_epi64( ctr3, chainIncrement16 );
|
||||
ctr4 = _mm256_add_epi64( ctr4, chainIncrement16 );
|
||||
ctr5 = _mm256_add_epi64( ctr5, chainIncrement16 );
|
||||
ctr6 = _mm256_add_epi64( ctr6, chainIncrement16 );
|
||||
ctr7 = _mm256_add_epi64( ctr7, chainIncrement16 );
|
||||
ctr0 = _mm256_add_epi32( ctr0, chainIncrement16 );
|
||||
ctr1 = _mm256_add_epi32( ctr1, chainIncrement16 );
|
||||
ctr2 = _mm256_add_epi32( ctr2, chainIncrement16 );
|
||||
ctr3 = _mm256_add_epi32( ctr3, chainIncrement16 );
|
||||
ctr4 = _mm256_add_epi32( ctr4, chainIncrement16 );
|
||||
ctr5 = _mm256_add_epi32( ctr5, chainIncrement16 );
|
||||
ctr6 = _mm256_add_epi32( ctr6, chainIncrement16 );
|
||||
ctr7 = _mm256_add_epi32( ctr7, chainIncrement16 );
|
||||
|
||||
AES_GCM_ENCRYPT_16_Ymm( pExpandedKey, c0, c1, c2, c3, c4, c5, c6, c7, pbGhashSrc, BYTE_REVERSE_ORDER, expandedKeyTable, todo, a0, a1, a2 );
|
||||
|
||||
|
@ -694,14 +694,14 @@ SymCryptAesGcmDecryptStitchedYmm_2048(
|
|||
|
||||
state = _mm_loadu_si128( (__m128i *) pState );
|
||||
ctr0 = _mm256_insertf128_si256( _mm256_castsi128_si256( chain ), chain, 1); // AVX
|
||||
ctr0 = _mm256_add_epi64( ctr0, chainIncrementUpper1 );
|
||||
ctr1 = _mm256_add_epi64( ctr0, chainIncrement2 );
|
||||
ctr2 = _mm256_add_epi64( ctr0, chainIncrement4 );
|
||||
ctr3 = _mm256_add_epi64( ctr1, chainIncrement4 );
|
||||
ctr4 = _mm256_add_epi64( ctr2, chainIncrement4 );
|
||||
ctr5 = _mm256_add_epi64( ctr3, chainIncrement4 );
|
||||
ctr6 = _mm256_add_epi64( ctr4, chainIncrement4 );
|
||||
ctr7 = _mm256_add_epi64( ctr5, chainIncrement4 );
|
||||
ctr0 = _mm256_add_epi32( ctr0, chainIncrementUpper1 );
|
||||
ctr1 = _mm256_add_epi32( ctr0, chainIncrement2 );
|
||||
ctr2 = _mm256_add_epi32( ctr0, chainIncrement4 );
|
||||
ctr3 = _mm256_add_epi32( ctr1, chainIncrement4 );
|
||||
ctr4 = _mm256_add_epi32( ctr2, chainIncrement4 );
|
||||
ctr5 = _mm256_add_epi32( ctr3, chainIncrement4 );
|
||||
ctr6 = _mm256_add_epi32( ctr4, chainIncrement4 );
|
||||
ctr7 = _mm256_add_epi32( ctr5, chainIncrement4 );
|
||||
|
||||
CLMUL_3( state, GHASH_H_POWER(expandedKeyTable, todo), GHASH_Hx_POWER(expandedKeyTable, todo), a0_xmm, a1_xmm, a2_xmm );
|
||||
a0 = a1 = a2 = _mm256_setzero_si256();
|
||||
|
@ -717,14 +717,14 @@ SymCryptAesGcmDecryptStitchedYmm_2048(
|
|||
c6 = _mm256_shuffle_epi8( ctr6, BYTE_REVERSE_ORDER );
|
||||
c7 = _mm256_shuffle_epi8( ctr7, BYTE_REVERSE_ORDER );
|
||||
|
||||
ctr0 = _mm256_add_epi64( ctr0, chainIncrement16 );
|
||||
ctr1 = _mm256_add_epi64( ctr1, chainIncrement16 );
|
||||
ctr2 = _mm256_add_epi64( ctr2, chainIncrement16 );
|
||||
ctr3 = _mm256_add_epi64( ctr3, chainIncrement16 );
|
||||
ctr4 = _mm256_add_epi64( ctr4, chainIncrement16 );
|
||||
ctr5 = _mm256_add_epi64( ctr5, chainIncrement16 );
|
||||
ctr6 = _mm256_add_epi64( ctr6, chainIncrement16 );
|
||||
ctr7 = _mm256_add_epi64( ctr7, chainIncrement16 );
|
||||
ctr0 = _mm256_add_epi32( ctr0, chainIncrement16 );
|
||||
ctr1 = _mm256_add_epi32( ctr1, chainIncrement16 );
|
||||
ctr2 = _mm256_add_epi32( ctr2, chainIncrement16 );
|
||||
ctr3 = _mm256_add_epi32( ctr3, chainIncrement16 );
|
||||
ctr4 = _mm256_add_epi32( ctr4, chainIncrement16 );
|
||||
ctr5 = _mm256_add_epi32( ctr5, chainIncrement16 );
|
||||
ctr6 = _mm256_add_epi32( ctr6, chainIncrement16 );
|
||||
ctr7 = _mm256_add_epi32( ctr7, chainIncrement16 );
|
||||
|
||||
AES_GCM_ENCRYPT_16_Ymm( pExpandedKey, c0, c1, c2, c3, c4, c5, c6, c7, pbGhashSrc, BYTE_REVERSE_ORDER, expandedKeyTable, todo, a0, a1, a2 );
|
||||
|
||||
|
|
|
@ -244,6 +244,57 @@ SymCryptCbcMac(
|
|||
SymCryptWipeKnownSize( buf, sizeof( buf ));
|
||||
}
|
||||
|
||||
VOID
|
||||
SYMCRYPT_CALL
|
||||
SymCryptCtrMsb32(
|
||||
_In_ PCSYMCRYPT_BLOCKCIPHER pBlockCipher,
|
||||
_In_ PCVOID pExpandedKey,
|
||||
_Inout_updates_( pBlockCipher->blockSize )
|
||||
PBYTE pbChainingValue,
|
||||
_In_reads_( cbData ) PCBYTE pbSrc,
|
||||
_Out_writes_( cbData ) PBYTE pbDst,
|
||||
SIZE_T cbData )
|
||||
{
|
||||
SYMCRYPT_ALIGN BYTE buf[2 * SYMCRYPT_MAX_BLOCK_SIZE];
|
||||
PBYTE count = &buf[0];
|
||||
PBYTE keystream= &buf[SYMCRYPT_MAX_BLOCK_SIZE];
|
||||
SIZE_T blockSize;
|
||||
PCBYTE pbSrcEnd;
|
||||
|
||||
blockSize = pBlockCipher->blockSize;
|
||||
SYMCRYPT_ASSERT( blockSize <= SYMCRYPT_MAX_BLOCK_SIZE );
|
||||
|
||||
//
|
||||
// Compute the end of the data, rounding the size down to a multiple of the block size.
|
||||
//
|
||||
pbSrcEnd = &pbSrc[ cbData & ~(blockSize - 1) ];
|
||||
|
||||
//
|
||||
// We keep the chaining state in a local buffer to enforce the read-once write-once rule.
|
||||
// It also improves memory locality.
|
||||
//
|
||||
#pragma warning(suppress: 22105)
|
||||
memcpy( count, pbChainingValue, blockSize );
|
||||
while( pbSrc < pbSrcEnd )
|
||||
{
|
||||
SYMCRYPT_ASSERT( pbSrc <= pbSrcEnd - blockSize ); // help PreFast
|
||||
(*pBlockCipher->encryptFunc)( pExpandedKey, count, keystream );
|
||||
SymCryptXorBytes( keystream, pbSrc, pbDst, blockSize );
|
||||
|
||||
//
|
||||
// We only need to increment the last 32 bits of the counter value.
|
||||
//
|
||||
SYMCRYPT_STORE_MSBFIRST32( &count[ blockSize-4 ], 1 + SYMCRYPT_LOAD_MSBFIRST32( &count[ blockSize-4 ] ) );
|
||||
|
||||
pbSrc += blockSize;
|
||||
pbDst += blockSize;
|
||||
}
|
||||
|
||||
memcpy( pbChainingValue, count, blockSize );
|
||||
|
||||
SymCryptWipeKnownSize( buf, sizeof( buf ));
|
||||
}
|
||||
|
||||
VOID
|
||||
SYMCRYPT_CALL
|
||||
SymCryptCtrMsb64(
|
||||
|
|
152
lib/gcm.c
152
lib/gcm.c
|
@ -6,7 +6,7 @@
|
|||
|
||||
#include "precomp.h"
|
||||
|
||||
#define GCM_REQUIRED_NONCE_SIZE (12)
|
||||
#define GCM_MIN_NONCE_SIZE (1)
|
||||
#define GCM_MIN_TAG_SIZE (12)
|
||||
#define GCM_MAX_TAG_SIZE (16)
|
||||
|
||||
|
@ -26,9 +26,10 @@ SymCryptGcmValidateParameters(
|
|||
}
|
||||
|
||||
//
|
||||
// We only support 12-byte nonces, are per SP800-38D recommendations.
|
||||
// SP800-38D specifies that the nonce must be at least one bit, but we operate on bytes,
|
||||
// so the minimum is one byte.
|
||||
//
|
||||
if( cbNonce != GCM_REQUIRED_NONCE_SIZE )
|
||||
if( cbNonce < GCM_MIN_NONCE_SIZE )
|
||||
{
|
||||
return SYMCRYPT_WRONG_NONCE_SIZE;
|
||||
}
|
||||
|
@ -161,21 +162,8 @@ SymCryptGcmEncryptDecryptPart(
|
|||
{
|
||||
bytesToProcess = cbData & SYMCRYPT_GCM_BLOCK_ROUND_MASK;
|
||||
|
||||
//
|
||||
// We use the CTR mode function that increments 64 bits, rather than the 32 bits that GCM requires.
|
||||
// As we only support 12-byte nonces, the 32-bit counter never overflows, and we can safely use
|
||||
// the 64-bit incrementing primitive.
|
||||
// If we ever support other nonce sizes this is going to be a big problem.
|
||||
// You can't fake a 32-bit counter using a 64-bit counter function without side-channels that expose
|
||||
// information about the current counter value.
|
||||
// With other nonce sizes the actual counter value itself is not public, so we can't expose that.
|
||||
// We can do two things:
|
||||
// - create SymCryptCtrMsb32
|
||||
// - Accept that we leak information about the counter value; after all it is not treated as a
|
||||
// secret when the nonce is 12 bytes.
|
||||
//
|
||||
SYMCRYPT_ASSERT( pState->pKey->pBlockCipher->blockSize == SYMCRYPT_GCM_BLOCK_SIZE );
|
||||
SymCryptCtrMsb64( pState->pKey->pBlockCipher,
|
||||
SymCryptCtrMsb32( pState->pKey->pBlockCipher,
|
||||
&pState->pKey->blockcipherKey,
|
||||
&pState->counterBlock[0],
|
||||
pbSrc,
|
||||
|
@ -192,7 +180,7 @@ SymCryptGcmEncryptDecryptPart(
|
|||
SymCryptWipeKnownSize( &pState->keystreamBlock[0], SYMCRYPT_GCM_BLOCK_SIZE );
|
||||
|
||||
SYMCRYPT_ASSERT( pState->pKey->pBlockCipher->blockSize == SYMCRYPT_GCM_BLOCK_SIZE );
|
||||
SymCryptCtrMsb64( pState->pKey->pBlockCipher,
|
||||
SymCryptCtrMsb32( pState->pKey->pBlockCipher,
|
||||
&pState->pKey->blockcipherKey,
|
||||
&pState->counterBlock[0],
|
||||
&pState->keystreamBlock[0],
|
||||
|
@ -209,7 +197,25 @@ SymCryptGcmEncryptDecryptPart(
|
|||
|
||||
}
|
||||
|
||||
FORCEINLINE
|
||||
VOID
|
||||
SYMCRYPT_CALL
|
||||
SymCryptGcmResetCounterBlock(
|
||||
_Inout_ PSYMCRYPT_GCM_STATE pState )
|
||||
{
|
||||
// Computing the tag for GCM requires invoking the GCTR function with the pre-counter
|
||||
// block which was computed when the nonce was set. Historically, we only supported 12-byte
|
||||
// nonces, so we could trivially reset the counter block by just setting the last 4 bytes to
|
||||
// (DWORD) 1. With support for larger IVs, the pre-counter block is computed from a GHash of
|
||||
// the nonce, and we don't store the value. Adding a field in the GCM struct to store the value
|
||||
// would be ABI-breaking, so instead we can recompute the value by decrementing the last 32 bits
|
||||
// of the counter block by the number of blocks that have been processed (since the counter is
|
||||
// incremented once per block), plus one for the initial increment.
|
||||
UINT32 preCounter32 = SYMCRYPT_LOAD_MSBFIRST32(&pState->counterBlock[12]) -
|
||||
(UINT32) ((pState->cbData + SYMCRYPT_GCM_BLOCK_SIZE - 1) / SYMCRYPT_GCM_BLOCK_SIZE) - 1;
|
||||
|
||||
SYMCRYPT_STORE_MSBFIRST32(&pState->counterBlock[12], preCounter32);
|
||||
}
|
||||
|
||||
VOID
|
||||
SYMCRYPT_CALL
|
||||
|
@ -218,7 +224,6 @@ SymCryptGcmComputeTag(
|
|||
_Out_writes_( SYMCRYPT_GCM_BLOCK_SIZE ) PBYTE pbTag )
|
||||
{
|
||||
SYMCRYPT_ALIGN BYTE buf[2 * SYMCRYPT_GCM_BLOCK_SIZE];
|
||||
UINT64 cntLow;
|
||||
|
||||
SYMCRYPT_STORE_MSBFIRST64( &buf[16], pState->cbAuthData * 8 );
|
||||
SYMCRYPT_STORE_MSBFIRST64( &buf[24], pState->cbData * 8 );
|
||||
|
@ -238,19 +243,7 @@ SymCryptGcmComputeTag(
|
|||
SymCryptGHashAppendData( &pState->pKey->ghashKey, &pState->ghashState, &buf[16], SYMCRYPT_GCM_BLOCK_SIZE );
|
||||
}
|
||||
|
||||
//
|
||||
// Set up the correct counter block value
|
||||
// This is a bit tricky. Normally all we have to do is set the last
|
||||
// 4 bytes to 00000001. But if the message is 2^36-32 bytes then
|
||||
// our use of a 64-bit incrementing CTR function has incremented bytes
|
||||
// 8-11 of the nonce. (The 4-byte counter has overflowed, and
|
||||
// the carry went into the next byte(s).)
|
||||
// We resolve this by decrementing the 8-byte value first,
|
||||
// and then setting the proper bits.
|
||||
//
|
||||
cntLow = SYMCRYPT_LOAD_MSBFIRST64( &pState->counterBlock[8] );
|
||||
cntLow = ((cntLow - 1) & 0xffffffff00000000) | 1;
|
||||
SYMCRYPT_STORE_MSBFIRST64( &pState->counterBlock[8], cntLow );
|
||||
SymCryptGcmResetCounterBlock(pState);
|
||||
|
||||
//
|
||||
// Convert the GHash state to an array of bytes
|
||||
|
@ -259,7 +252,7 @@ SymCryptGcmComputeTag(
|
|||
SYMCRYPT_STORE_MSBFIRST64( &buf[8], pState->ghashState.ull[0] );
|
||||
|
||||
SYMCRYPT_ASSERT( pState->pKey->pBlockCipher->blockSize == SYMCRYPT_GCM_BLOCK_SIZE );
|
||||
SymCryptCtrMsb64( pState->pKey->pBlockCipher,
|
||||
SymCryptCtrMsb32( pState->pKey->pBlockCipher,
|
||||
&pState->pKey->blockcipherKey,
|
||||
&pState->counterBlock[0],
|
||||
buf,
|
||||
|
@ -269,7 +262,6 @@ SymCryptGcmComputeTag(
|
|||
SymCryptWipeKnownSize( buf, sizeof( buf ) );
|
||||
}
|
||||
|
||||
|
||||
SYMCRYPT_NOINLINE
|
||||
SYMCRYPT_ERROR
|
||||
SYMCRYPT_CALL
|
||||
|
@ -337,7 +329,58 @@ SymCryptGcmKeyCopy( _In_ PCSYMCRYPT_GCM_EXPANDED_KEY pSrc, _Out_ PSYMCRYPT_GCM_E
|
|||
SYMCRYPT_ASSERT( status == SYMCRYPT_NO_ERROR );
|
||||
}
|
||||
|
||||
VOID
|
||||
SYMCRYPT_CALL
|
||||
SymCryptGcmSetNonce(
|
||||
_Out_ PSYMCRYPT_GCM_STATE pState,
|
||||
_In_reads_( cbNonce ) PCBYTE pbNonce,
|
||||
SIZE_T cbNonce )
|
||||
{
|
||||
SYMCRYPT_ASSERT( cbNonce >= GCM_MIN_NONCE_SIZE );
|
||||
|
||||
// Handle the nonce depending on its size, as specified in NIST SP800-38D
|
||||
if( cbNonce == 12 )
|
||||
{
|
||||
// If len(nonce) = 96 bits (12 bytes), pre-counter block = nonce || (DWORD) 1
|
||||
memcpy( &pState->counterBlock[0], pbNonce, cbNonce );
|
||||
SymCryptWipeKnownSize( &pState->counterBlock[12], 4 );
|
||||
pState->counterBlock[15] = 1;
|
||||
}
|
||||
else
|
||||
{
|
||||
// If len(nonce) != 96 bits (12 bytes),
|
||||
// pre-counter block = GHASH(nonce padded to a multiple of 128 bits || (QWORD) len(nonce))
|
||||
BYTE buf[SYMCRYPT_GF128_BLOCK_SIZE];
|
||||
SIZE_T cbNonceRemainder = cbNonce & 0xf;
|
||||
|
||||
SymCryptGHashAppendData( &pState->pKey->ghashKey, &pState->ghashState, pbNonce,
|
||||
cbNonce - cbNonceRemainder );
|
||||
|
||||
// If the nonce length is not a multiple of 128 bits, it needs to be padded with zeros
|
||||
// until it is, as GHASH is only defined on multiples of 128 bits.
|
||||
if(cbNonceRemainder > 0)
|
||||
{
|
||||
SymCryptWipeKnownSize( buf, sizeof(buf) );
|
||||
memcpy(buf, pbNonce + cbNonce - cbNonceRemainder, cbNonceRemainder);
|
||||
SymCryptGHashAppendData( &pState->pKey->ghashKey, &pState->ghashState, buf, sizeof(buf) );
|
||||
}
|
||||
|
||||
// Now we append the length of the nonce in bits. We take the length as a 64-bit integer,
|
||||
// but it too must be padded to 128 bits for use in GHASH.
|
||||
SymCryptWipeKnownSize( buf, 8 );
|
||||
SYMCRYPT_STORE_MSBFIRST64( &buf[8], cbNonce * 8 );
|
||||
SymCryptGHashAppendData( &pState->pKey->ghashKey, &pState->ghashState, buf, sizeof(buf) );
|
||||
|
||||
SymCryptGHashResult( &pState->ghashState, pState->counterBlock );
|
||||
SymCryptWipeKnownSize( &pState->ghashState, sizeof( pState->ghashState ) );
|
||||
}
|
||||
|
||||
// Increment the last 32 bits of the counter. We'll recalculate the pre-counter block later
|
||||
// when computing the tag.
|
||||
SYMCRYPT_STORE_MSBFIRST32(
|
||||
&pState->counterBlock[12],
|
||||
1 + SYMCRYPT_LOAD_MSBFIRST32( &pState->counterBlock[12] ) );
|
||||
}
|
||||
|
||||
SYMCRYPT_NOINLINE
|
||||
VOID
|
||||
|
@ -350,8 +393,6 @@ SymCryptGcmInit(
|
|||
{
|
||||
UNREFERENCED_PARAMETER( cbNonce ); // It is used in an ASSERT, but only in CHKed builds.
|
||||
|
||||
SYMCRYPT_ASSERT( cbNonce == GCM_REQUIRED_NONCE_SIZE );
|
||||
|
||||
SYMCRYPT_CHECK_MAGIC( pExpandedKey );
|
||||
|
||||
pState->pKey = pExpandedKey;
|
||||
|
@ -360,12 +401,7 @@ SymCryptGcmInit(
|
|||
pState->bytesInMacBlock = 0;
|
||||
SymCryptWipeKnownSize( &pState->ghashState, sizeof( pState->ghashState ) );
|
||||
|
||||
//
|
||||
// Set up the counter block value
|
||||
//
|
||||
memcpy( &pState->counterBlock[0], pbNonce, GCM_REQUIRED_NONCE_SIZE );
|
||||
SymCryptWipeKnownSize( &pState->counterBlock[12], 4 );
|
||||
pState->counterBlock[15] = 2;
|
||||
SymCryptGcmSetNonce(pState, pbNonce, cbNonce);
|
||||
|
||||
SYMCRYPT_SET_MAGIC( pState );
|
||||
}
|
||||
|
@ -591,12 +627,11 @@ SymCryptGcmEncrypt(
|
|||
SYMCRYPT_ALIGN BYTE buf[2 * SYMCRYPT_GCM_BLOCK_SIZE];
|
||||
SYMCRYPT_GCM_STATE state;
|
||||
PSYMCRYPT_GCM_STATE pState = &state;
|
||||
UINT64 cntLow;
|
||||
|
||||
// SymCryptGcmInit( &state, pExpandedKey, pbNonce, cbNonce );
|
||||
UNREFERENCED_PARAMETER( cbNonce ); // It is used in an ASSERT, but only in CHKed builds.
|
||||
|
||||
SYMCRYPT_ASSERT( cbNonce == GCM_REQUIRED_NONCE_SIZE );
|
||||
SYMCRYPT_ASSERT( cbNonce >= GCM_MIN_NONCE_SIZE );
|
||||
SYMCRYPT_ASSERT( cbTag >= GCM_MIN_TAG_SIZE && cbTag <= GCM_MAX_TAG_SIZE );
|
||||
|
||||
SYMCRYPT_CHECK_MAGIC( pExpandedKey );
|
||||
|
@ -607,14 +642,7 @@ SymCryptGcmEncrypt(
|
|||
pState->bytesInMacBlock = 0;
|
||||
SymCryptWipeKnownSize( &pState->ghashState, sizeof( pState->ghashState ) );
|
||||
|
||||
memcpy( &pState->counterBlock[0], pbNonce, GCM_REQUIRED_NONCE_SIZE );
|
||||
SymCryptWipeKnownSize( &pState->counterBlock[12], 4 );
|
||||
pState->counterBlock[15] = 1;
|
||||
// Keep cntLow (for encrypting the tag) for later
|
||||
cntLow = *((PUINT64) &pState->counterBlock[8]);
|
||||
|
||||
pState->counterBlock[15] = 2;
|
||||
|
||||
SymCryptGcmSetNonce( pState, pbNonce, cbNonce );
|
||||
|
||||
// SymCryptGcmAuthPart( &state, pbAuthData, cbAuthData );
|
||||
pState->cbAuthData += cbAuthData;
|
||||
|
@ -670,7 +698,8 @@ SymCryptGcmEncrypt(
|
|||
SymCryptGHashAppendData( &pState->pKey->ghashKey, &pState->ghashState, &buf[16], SYMCRYPT_GCM_BLOCK_SIZE );
|
||||
}
|
||||
|
||||
*((PUINT64) &pState->counterBlock[8]) = cntLow;
|
||||
// Reset the counter block prior to computing the tag
|
||||
SymCryptGcmResetCounterBlock( pState );
|
||||
|
||||
//
|
||||
// Convert the GHash state to an array of bytes
|
||||
|
@ -679,7 +708,7 @@ SymCryptGcmEncrypt(
|
|||
SYMCRYPT_STORE_MSBFIRST64( &buf[8], pState->ghashState.ull[0] );
|
||||
|
||||
SYMCRYPT_ASSERT( pState->pKey->pBlockCipher->blockSize == SYMCRYPT_GCM_BLOCK_SIZE );
|
||||
SymCryptCtrMsb64( pState->pKey->pBlockCipher,
|
||||
SymCryptCtrMsb32( pState->pKey->pBlockCipher,
|
||||
&pState->pKey->blockcipherKey,
|
||||
&pState->counterBlock[0],
|
||||
buf,
|
||||
|
@ -712,12 +741,11 @@ SymCryptGcmDecrypt(
|
|||
SYMCRYPT_ALIGN BYTE buf[2 * SYMCRYPT_GCM_BLOCK_SIZE];
|
||||
SYMCRYPT_GCM_STATE state;
|
||||
PSYMCRYPT_GCM_STATE pState = &state;
|
||||
UINT64 cntLow;
|
||||
|
||||
// SymCryptGcmInit( &state, pExpandedKey, pbNonce, cbNonce );
|
||||
UNREFERENCED_PARAMETER( cbNonce ); // It is used in an ASSERT, but only in CHKed builds.
|
||||
|
||||
SYMCRYPT_ASSERT( cbNonce == GCM_REQUIRED_NONCE_SIZE );
|
||||
SYMCRYPT_ASSERT( cbNonce >= GCM_MIN_NONCE_SIZE );
|
||||
SYMCRYPT_ASSERT( cbTag >= GCM_MIN_TAG_SIZE && cbTag <= GCM_MAX_TAG_SIZE );
|
||||
|
||||
SYMCRYPT_CHECK_MAGIC( pExpandedKey );
|
||||
|
@ -728,13 +756,7 @@ SymCryptGcmDecrypt(
|
|||
pState->bytesInMacBlock = 0;
|
||||
SymCryptWipeKnownSize( &pState->ghashState, sizeof( pState->ghashState ) );
|
||||
|
||||
memcpy( &pState->counterBlock[0], pbNonce, GCM_REQUIRED_NONCE_SIZE );
|
||||
SymCryptWipeKnownSize( &pState->counterBlock[12], 4 );
|
||||
pState->counterBlock[15] = 1;
|
||||
// Keep cntLow (for encrypting the tag) for later
|
||||
cntLow = *((PUINT64) &pState->counterBlock[8]);
|
||||
|
||||
pState->counterBlock[15] = 2;
|
||||
SymCryptGcmSetNonce( pState, pbNonce, cbNonce );
|
||||
|
||||
// SymCryptGcmAuthPart( &state, pbAuthData, cbAuthData );
|
||||
pState->cbAuthData += cbAuthData;
|
||||
|
@ -790,7 +812,7 @@ SymCryptGcmDecrypt(
|
|||
SymCryptGHashAppendData( &pState->pKey->ghashKey, &pState->ghashState, &buf[16], SYMCRYPT_GCM_BLOCK_SIZE );
|
||||
}
|
||||
|
||||
*((PUINT64) &pState->counterBlock[8]) = cntLow;
|
||||
SymCryptGcmResetCounterBlock( pState );
|
||||
|
||||
//
|
||||
// Convert the GHash state to an array of bytes
|
||||
|
@ -799,7 +821,7 @@ SymCryptGcmDecrypt(
|
|||
SYMCRYPT_STORE_MSBFIRST64( &buf[8], pState->ghashState.ull[0] );
|
||||
|
||||
SYMCRYPT_ASSERT( pState->pKey->pBlockCipher->blockSize == SYMCRYPT_GCM_BLOCK_SIZE );
|
||||
SymCryptCtrMsb64( pState->pKey->pBlockCipher,
|
||||
SymCryptCtrMsb32( pState->pKey->pBlockCipher,
|
||||
&pState->pKey->blockcipherKey,
|
||||
&pState->counterBlock[0],
|
||||
buf,
|
||||
|
|
11
lib/ghash.c
11
lib/ghash.c
|
@ -59,11 +59,14 @@ SymCryptGHashExpandKeyC(
|
|||
VOID
|
||||
SYMCRYPT_CALL
|
||||
SymCryptGHashAppendDataC(
|
||||
_In_reads_( SYMCRYPT_GF128_FIELD_SIZE ) PCSYMCRYPT_GF128_ELEMENT expandedKeyTable,
|
||||
_Inout_ PSYMCRYPT_GF128_ELEMENT pState,
|
||||
_In_reads_( cbData ) PCBYTE pbData,
|
||||
_In_ SIZE_T cbData )
|
||||
_In_reads_( SYMCRYPT_GF128_FIELD_SIZE ) PCSYMCRYPT_GF128_ELEMENT expandedKeyTable,
|
||||
_Inout_ PSYMCRYPT_GF128_ELEMENT pState,
|
||||
_In_reads_( cbData ) PCBYTE pbData,
|
||||
_In_range_( SYMCRYPT_GF128_BLOCK_SIZE, SIZE_T_MAX & ~0xf) SIZE_T cbData )
|
||||
{
|
||||
SYMCRYPT_ASSERT(cbData >= SYMCRYPT_GF128_BLOCK_SIZE);
|
||||
SYMCRYPT_ASSERT((cbData & 0xf) == 0);
|
||||
|
||||
UINT64 R0, R1;
|
||||
UINT64 mask;
|
||||
SYMCRYPT_ALIGN UINT32 state32[4];
|
||||
|
|
52
lib/sc_lib.h
52
lib/sc_lib.h
|
@ -1407,6 +1407,24 @@ SymCryptAesCtrMsb64Neon(
|
|||
_Out_writes_( cbData ) PBYTE pbDst,
|
||||
SIZE_T cbData );
|
||||
|
||||
VOID
|
||||
SYMCRYPT_CALL
|
||||
SymCryptAesCtrMsb32Xmm(
|
||||
_In_ PCSYMCRYPT_AES_EXPANDED_KEY pExpandedKey,
|
||||
_Inout_updates_( SYMCRYPT_AES_BLOCK_SIZE ) PBYTE pbChainingValue,
|
||||
_In_reads_( cbData ) PCBYTE pbSrc,
|
||||
_Out_writes_( cbData ) PBYTE pbDst,
|
||||
SIZE_T cbData );
|
||||
|
||||
VOID
|
||||
SYMCRYPT_CALL
|
||||
SymCryptAesCtrMsb32Neon(
|
||||
_In_ PCSYMCRYPT_AES_EXPANDED_KEY pExpandedKey,
|
||||
_Inout_updates_( SYMCRYPT_AES_BLOCK_SIZE ) PBYTE pbChainingValue,
|
||||
_In_reads_( cbData ) PCBYTE pbSrc,
|
||||
_Out_writes_( cbData ) PBYTE pbDst,
|
||||
SIZE_T cbData );
|
||||
|
||||
VOID
|
||||
SYMCRYPT_CALL
|
||||
SymCryptXtsAesEncryptDataUnitC(
|
||||
|
@ -1645,6 +1663,40 @@ SymCryptGcmDecryptPartTwoPass(
|
|||
_Out_writes_( cbData ) PBYTE pbDst,
|
||||
SIZE_T cbData );
|
||||
|
||||
VOID
|
||||
SYMCRYPT_CALL
|
||||
SymCryptCtrMsb32(
|
||||
_In_ PCSYMCRYPT_BLOCKCIPHER pBlockCipher,
|
||||
_In_ PCVOID pExpandedKey,
|
||||
_Inout_updates_( pBlockCipher->blockSize )
|
||||
PBYTE pbChainingValue,
|
||||
_In_reads_( cbData ) PCBYTE pbSrc,
|
||||
_Out_writes_( cbData ) PBYTE pbDst,
|
||||
SIZE_T cbData );
|
||||
//
|
||||
// SymCryptCtrMsb32 implements the CTR cipher mode with a 32-bit increment function.
|
||||
// It is not intended to be used as-is, rather it is a building block for modes like GCM.
|
||||
// See the description of SymCryptCtrMsb64 in symcrypt.h for more details.
|
||||
//
|
||||
// For now, this function is only intended for use with GCM, which specifies the use a
|
||||
// 32-bit increment function. It's only used in cases where we can't use one of the optimized
|
||||
// implementations (i.e. on ARM32 or x86[-64] without AESNI). Therefore, unlike the 64-bit version,
|
||||
// there are no optimized implementations of the CTR function to call. If we ever need this
|
||||
// functionality for other block cipher modes, this function will need to be updated and we'll
|
||||
// need to add an additional pointer to SYMCRYPT_BLOCKCIPHER for the optimized CTR function.
|
||||
|
||||
VOID
|
||||
SYMCRYPT_CALL
|
||||
SymCryptAesCtrMsb32(
|
||||
_In_ PCSYMCRYPT_AES_EXPANDED_KEY pExpandedKey,
|
||||
_Inout_updates_( SYMCRYPT_AES_BLOCK_SIZE ) PBYTE pbChainingValue,
|
||||
_In_reads_( cbData ) PCBYTE pbSrc,
|
||||
_Out_writes_( cbData ) PBYTE pbDst,
|
||||
SIZE_T cbData );
|
||||
|
||||
// SymCryptAesCtrMsb32 is a dispatch function for the optimized AES CTR implementations that use
|
||||
//a 32-bit counter function (currently only relevant to GCM).
|
||||
|
||||
SYMCRYPT_ERROR
|
||||
SYMCRYPT_CALL
|
||||
SymCryptParallelHashProcess_serial(
|
||||
|
|
Различия файлов скрыты, потому что одна или несколько строк слишком длинны
|
@ -344,6 +344,13 @@ AuthEncImp<ImpXxx, AlgXxx, ModeXxx>::encrypt(
|
|||
return status;
|
||||
}
|
||||
|
||||
auto nonceSizes = getNonceSizes();
|
||||
if( nonceSizes.find( cbNonce ) == nonceSizes.end() )
|
||||
{
|
||||
status = STATUS_NOT_SUPPORTED;
|
||||
return status;
|
||||
}
|
||||
|
||||
if( !state.inComputation )
|
||||
{
|
||||
// Only init the authInfo if we are starting a new computation
|
||||
|
@ -385,7 +392,6 @@ AuthEncImp<ImpXxx, AlgXxx, ModeXxx>::encrypt(
|
|||
return status;
|
||||
}
|
||||
|
||||
|
||||
template<>
|
||||
NTSTATUS
|
||||
AuthEncImp<ImpXxx, AlgXxx, ModeXxx>::decrypt(
|
||||
|
@ -408,20 +414,23 @@ AuthEncImp<ImpXxx, AlgXxx, ModeXxx>::decrypt(
|
|||
if( flags != 0 )
|
||||
{
|
||||
status = STATUS_NOT_SUPPORTED;
|
||||
goto cleanup;
|
||||
return status;
|
||||
}
|
||||
|
||||
auto nonceSizes = getNonceSizes();
|
||||
if( nonceSizes.find( cbNonce ) == nonceSizes.end() )
|
||||
{
|
||||
status = STATUS_NOT_SUPPORTED;
|
||||
return status;
|
||||
}
|
||||
|
||||
|
||||
authInfo.pbNonce = (PBYTE) pbNonce;
|
||||
authInfo.cbNonce = (ULONG) cbNonce;
|
||||
authInfo.pbAuthData = (PBYTE) pbAuthData;
|
||||
authInfo.cbAuthData = (ULONG) cbAuthData;
|
||||
authInfo.pbTag = (PBYTE) pbTag;
|
||||
authInfo.cbTag = (ULONG) cbTag;
|
||||
//authInfo.pbMacContext = NULL;
|
||||
//authInfo.cbMacContext = 0;
|
||||
//authInfo.cbAAD = 0;
|
||||
//authInfo.cbData = 0;
|
||||
//authInfo.dwFlags = 0;
|
||||
|
||||
ULONG res;
|
||||
status = CngDecryptFn( state.hKey, (PBYTE) pbSrc, (ULONG) cbData, &authInfo, NULL, 0, pbDst, (ULONG) cbData, &res, 0 );
|
||||
|
@ -438,7 +447,6 @@ AuthEncImp<ImpXxx, AlgXxx, ModeXxx>::decrypt(
|
|||
CHECK( res == cbData, "?" );
|
||||
}
|
||||
|
||||
cleanup:
|
||||
return status;
|
||||
}
|
||||
|
||||
|
|
|
@ -2945,6 +2945,12 @@ AuthEncImp<ImpRsa32, AlgAes, ModeGcm>::encrypt(
|
|||
goto cleanup;
|
||||
}
|
||||
|
||||
if( cbNonce != 12 )
|
||||
{
|
||||
status = STATUS_NOT_SUPPORTED;
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
status = AesGcm ( &state.key,
|
||||
NULL,
|
||||
16,
|
||||
|
@ -2953,6 +2959,7 @@ AuthEncImp<ImpRsa32, AlgAes, ModeGcm>::encrypt(
|
|||
(PBYTE) pbAuthData, (ULONG) cbAuthData,
|
||||
pbTag, (ULONG) cbTag,
|
||||
ENCRYPT );
|
||||
|
||||
CHECK( NT_SUCCESS( status ), "GCM encrypt failure" );
|
||||
|
||||
cleanup:
|
||||
|
@ -2982,6 +2989,12 @@ AuthEncImp<ImpRsa32, AlgAes, ModeGcm>::decrypt(
|
|||
goto cleanup;
|
||||
}
|
||||
|
||||
if( cbNonce != 12 )
|
||||
{
|
||||
status = STATUS_NOT_SUPPORTED;
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
status = AesGcm ( &state.key,
|
||||
NULL,
|
||||
16,
|
||||
|
|
|
@ -874,7 +874,10 @@ AuthEncImp<ImpXxx, AlgAes, ModeGcm>::getNonceSizes()
|
|||
{
|
||||
std::set<SIZE_T> res;
|
||||
|
||||
res.insert( 12 );
|
||||
for(int i = 1; i <= 256; ++i)
|
||||
{
|
||||
res.insert( i );
|
||||
}
|
||||
|
||||
return res;
|
||||
}
|
||||
|
|
|
@ -270,7 +270,7 @@ katAuthEncSingle(
|
|||
SIZE_T cbTag,
|
||||
ULONGLONG line)
|
||||
{
|
||||
BYTE bufData[512];
|
||||
BYTE bufData[4096];
|
||||
BYTE bufTag[32];
|
||||
NTSTATUS status;
|
||||
|
||||
|
@ -333,24 +333,27 @@ testAuthEncRandom( AuthEncMultiImp * pImp, int rrep, PCBYTE pbResult, SIZE_T cbR
|
|||
std::sort( nonceSizes.begin(), nonceSizes.end() );
|
||||
std::sort( tagSizes.begin(), tagSizes.end() );
|
||||
|
||||
//iprint( "# sizes: %d, %d, %d\n", keySizes.size(), nonceSizes.size(), tagSizes.size() );
|
||||
//
|
||||
//for( int i=0; i< tagSizes.size(); i++ )
|
||||
//{
|
||||
// iprint( "tag %d: %d\n", i, tagSizes[i] );
|
||||
//}
|
||||
|
||||
memset( buf, 0, sizeof( buf ) );
|
||||
|
||||
|
||||
for( int i=0; i<rrep; i++ )
|
||||
{
|
||||
SIZE_T cbKey = keySizes[ rng.sizet( keySizes.size() )];
|
||||
SIZE_T cbNonce = nonceSizes[ rng.sizet( nonceSizes.size() )];
|
||||
SIZE_T cbTag = tagSizes[ rng.sizet( tagSizes.size() )];
|
||||
|
||||
// Kludge: previous implementations of AES-GCM only support 12-byte nonces. That doesn't
|
||||
// block this test case, as those implementations will just be ignored if a different
|
||||
// nonce size is used, but since the nonce size is abitrary for GCM, if we choose
|
||||
// randomly we will rarely get a 12-byte nonce, meaning that we'll have few results to
|
||||
// compare against. So, we force a 12-byte nonce 50% of the time.
|
||||
if( pImp->m_algorithmName == "AesGcm" && (rng.byte() & 1) )
|
||||
{
|
||||
cbNonce = 12;
|
||||
}
|
||||
|
||||
CHECK( cbKey <= bufSize && cbNonce <= bufSize && cbTag <= sizeof( tagBuf ), "??" );
|
||||
|
||||
SIZE_T keyIdx = rng.sizet( bufSize - cbKey );
|
||||
SIZE_T nonceIdx = rng.sizet( bufSize - cbNonce );
|
||||
SIZE_T tagIdx = rng.sizet( bufSize - cbTag );
|
||||
|
||||
|
@ -363,6 +366,8 @@ testAuthEncRandom( AuthEncMultiImp * pImp, int rrep, PCBYTE pbResult, SIZE_T cbR
|
|||
rng.randomSubRange( bufSize, &srcIdx, &cbData );
|
||||
SIZE_T dstIdx = rng.sizet( bufSize - cbData );
|
||||
|
||||
pImp->setKey( &buf[keyIdx], cbKey );
|
||||
|
||||
pImp->encrypt( &buf[nonceIdx], cbNonce,
|
||||
&buf[authDataIdx], cbAuthData,
|
||||
&buf[srcIdx], tmp1, cbData,
|
||||
|
|
Загрузка…
Ссылка в новой задаче