diff --git a/azure-build-template.yml b/azure-build-template.yml index a10c97b..29890cc 100644 --- a/azure-build-template.yml +++ b/azure-build-template.yml @@ -132,6 +132,14 @@ steps: ./symcryptunittest displayName: 'Execute unit tests' name: '${{parameters.env}}UnitTest_${{parameters.buildType}}' + - ${{ if eq(parameters.arch, 'AMD64') }}: + # Run unit tests in a mode where the Ymm registers must not be modified by the functional tests + # Set GLIBC to not use AVX using GLIBC_TUNABLES + - script: | + cd bin/exe/${{parameters.arch}}/${{parameters.env}} + GLIBC_TUNABLES=glibc.cpu.hwcaps=-AVX_Usable,-AVX_Fast_Unaligned_Load,-AVX2_Usable ./symcryptunittest testSaveYmm + displayName: 'Execute unit tests (Test Ymm Save/Restore)' + name: '${{parameters.env}}UnitTest_TestYmm_${{parameters.buildType}}' - ${{ if ne(parameters.buildType, 'Sanitize') }}: # Only run oe module test if rdseed is present on the CPU - script: | diff --git a/gen/main_gen.cpp b/gen/main_gen.cpp index 92243b5..0e64927 100644 --- a/gen/main_gen.cpp +++ b/gen/main_gen.cpp @@ -590,7 +590,7 @@ createIfxTpmWeakKeyTable() fclose( f ); } -int __cdecl +int SYMCRYPT_CDECL main( int argc, _In_reads_( argc ) char * argv[] ) { printf( "SymCrypt constants generation program\n" ); diff --git a/inc/symcrypt_internal.h b/inc/symcrypt_internal.h index d1ee8ef..a115159 100644 --- a/inc/symcrypt_internal.h +++ b/inc/symcrypt_internal.h @@ -390,7 +390,7 @@ C_ASSERT( (SYMCRYPT_ALIGN_VALUE & (SYMCRYPT_ALIGN_VALUE - 1 )) == 0 ); #define SYMCRYPT_CPU_FEATURE_SSSE3 0x0002 // includes SSE, SSE2, SSE3, SSSE3 #define SYMCRYPT_CPU_FEATURE_AESNI 0x0004 #define SYMCRYPT_CPU_FEATURE_PCLMULQDQ 0x0008 -#define SYMCRYPT_CPU_FEATURE_AVX2 0x0010 // includes AVX, AVX2 +#define SYMCRYPT_CPU_FEATURE_AVX2 0x0010 // includes AVX, AVX2 - also indicates support for saving/restoring Ymm registers #define SYMCRYPT_CPU_FEATURE_SAVEXMM_NOFAIL 0x0020 // if SymCryptSaveXmm() will never fail #define SYMCRYPT_CPU_FEATURE_SHANI 0x0040 #define SYMCRYPT_CPU_FEATURE_BMI2 0x0080 // MULX, RORX, SARX, SHLX, SHRX @@ -398,8 +398,9 @@ C_ASSERT( (SYMCRYPT_ALIGN_VALUE & (SYMCRYPT_ALIGN_VALUE - 1 )) == 0 ); #define SYMCRYPT_CPU_FEATURE_ADX 0x0100 // ADCX, ADOX #define SYMCRYPT_CPU_FEATURE_RDRAND 0x0200 #define SYMCRYPT_CPU_FEATURE_RDSEED 0x0400 -#define SYMCRYPT_CPU_FEATURE_VAES_256 0x0800 -#define SYMCRYPT_CPU_FEATURE_VAES_512 0x1000 +#define SYMCRYPT_CPU_FEATURE_VAES 0x0800 // support for VAES and VPCLMULQDQ (may only be supported on Ymm registers (i.e. Zen3)) +#define SYMCRYPT_CPU_FEATURE_AVX512 0x1000 // includes F, VL, DQ, BW (VL allows AVX-512 instructions to be used on Xmm and Ymm registers) + // also indicates support for saving/restoring additional AVX-512 state #define SYMCRYPT_CPU_FEATURE_CMPXCHG16B 0x2000 // Compare and Swap 128b value diff --git a/lib/cpuid.c b/lib/cpuid.c index 15af977..ee46b70 100644 --- a/lib/cpuid.c +++ b/lib/cpuid.c @@ -69,19 +69,19 @@ CPUID_BIT_INFO cpuidBitInfo[] = { {1, WORD_EDX, CPUID_1_EDX_SSE2_BIT, SYMCRYPT_CPU_FEATURE_SSE2 | SYMCRYPT_CPU_FEATURE_SSSE3 }, {1, WORD_ECX, CPUID_1_ECX_SSE3_BIT, SYMCRYPT_CPU_FEATURE_SSSE3 }, {1, WORD_ECX, CPUID_1_ECX_SSSE3_BIT, SYMCRYPT_CPU_FEATURE_SSSE3 }, - {1, WORD_ECX, CPUID_1_ECX_AVX_BIT, SYMCRYPT_CPU_FEATURE_AVX2 | SYMCRYPT_CPU_FEATURE_VAES_256 }, + {1, WORD_ECX, CPUID_1_ECX_AVX_BIT, SYMCRYPT_CPU_FEATURE_AVX2 }, {1, WORD_ECX, CPUID_1_ECX_CMPXCHG16B_BIT, SYMCRYPT_CPU_FEATURE_CMPXCHG16B }, - {7, WORD_EBX, CPUID_70_EBX_AVX2_BIT, SYMCRYPT_CPU_FEATURE_AVX2 | SYMCRYPT_CPU_FEATURE_VAES_256 }, + {7, WORD_EBX, CPUID_70_EBX_AVX2_BIT, SYMCRYPT_CPU_FEATURE_AVX2 }, {7, WORD_EBX, CPUID_70_EBX_RDSEED_BIT, SYMCRYPT_CPU_FEATURE_RDSEED }, {7, WORD_EBX, CPUID_70_EBX_SHANI_BIT, SYMCRYPT_CPU_FEATURE_SHANI }, {7, WORD_EBX, CPUID_70_EBX_ADX_BIT, SYMCRYPT_CPU_FEATURE_ADX }, {7, WORD_EBX, CPUID_70_EBX_BMI2_BIT, SYMCRYPT_CPU_FEATURE_BMI2 }, - {7, WORD_EBX, CPUID_70_EBX_AVX512F_BIT, SYMCRYPT_CPU_FEATURE_VAES_512 }, - {7, WORD_EBX, CPUID_70_EBX_AVX512VL_BIT, SYMCRYPT_CPU_FEATURE_VAES_512 }, - {7, WORD_EBX, CPUID_70_EBX_AVX512BW_BIT, SYMCRYPT_CPU_FEATURE_VAES_512 }, - {7, WORD_EBX, CPUID_70_EBX_AVX512DQ_BIT, SYMCRYPT_CPU_FEATURE_VAES_512 }, - {7, WORD_ECX, CPUID_70_ECX_VAES_BIT, SYMCRYPT_CPU_FEATURE_VAES_512 | SYMCRYPT_CPU_FEATURE_VAES_256 }, - {7, WORD_ECX, CPUID_70_ECX_VPCLMULQDQ_BIT, SYMCRYPT_CPU_FEATURE_VAES_512 | SYMCRYPT_CPU_FEATURE_VAES_256 }, + {7, WORD_EBX, CPUID_70_EBX_AVX512F_BIT, SYMCRYPT_CPU_FEATURE_AVX512 }, + {7, WORD_EBX, CPUID_70_EBX_AVX512VL_BIT, SYMCRYPT_CPU_FEATURE_AVX512 }, + {7, WORD_EBX, CPUID_70_EBX_AVX512BW_BIT, SYMCRYPT_CPU_FEATURE_AVX512 }, + {7, WORD_EBX, CPUID_70_EBX_AVX512DQ_BIT, SYMCRYPT_CPU_FEATURE_AVX512 }, + {7, WORD_ECX, CPUID_70_ECX_VAES_BIT, SYMCRYPT_CPU_FEATURE_VAES }, + {7, WORD_ECX, CPUID_70_ECX_VPCLMULQDQ_BIT, SYMCRYPT_CPU_FEATURE_VAES }, }; extern void __cpuid( _Out_writes_(4) int a[4], int b); // Add SAL annotation to intrinsic declaration to keep Prefast happy. @@ -95,7 +95,7 @@ SymCryptDetectCpuFeaturesByCpuid( UINT32 flags ) int InfoType; int maxInfoType; int i; - BOOLEAN allowYmm; + BOOLEAN allowYmm, allowZmm; INT64 xGetBvResult; // @@ -112,8 +112,8 @@ SymCryptDetectCpuFeaturesByCpuid( UINT32 flags ) SYMCRYPT_CPU_FEATURE_ADX | SYMCRYPT_CPU_FEATURE_RDRAND | SYMCRYPT_CPU_FEATURE_RDSEED | - // SYMCRYPT_CPU_FEATURE_VAES_512 | - SYMCRYPT_CPU_FEATURE_VAES_256 | + SYMCRYPT_CPU_FEATURE_AVX512 | + SYMCRYPT_CPU_FEATURE_VAES | SYMCRYPT_CPU_FEATURE_CMPXCHG16B ); @@ -150,6 +150,7 @@ SymCryptDetectCpuFeaturesByCpuid( UINT32 flags ) // all our (known) OSes have it. // allowYmm = FALSE; + allowZmm = FALSE; SymCryptCpuidExFunc( CPUInfo, 1, 0 ); if( (CPUInfo[WORD_ECX] & (1 << CPUID_1_ECX_OSXSAVE_BIT)) != 0 ) @@ -161,13 +162,40 @@ SymCryptDetectCpuFeaturesByCpuid( UINT32 flags ) if( (xGetBvResult & 0x6) == 0x6) { allowYmm = TRUE; + + // + // For AVX-512, also check that bits 5, 6, and 7 are set, corresponding to the + // opmask, ZMM (0-15), and ZMM (16-31) register states + // This follows the recommendation in the Intel 64 and IA-32 Architectures Software + // Developer's Manual, Volume 1, 15.3 / 15.4. + // + // It seems plausible that on some system the OS would not support save/restore of + // AVX-512 state, but use of AVX-512VL instructions on Ymm or Xmm registers would be + // OK, however Intel explicitly suggests that we should only use AVX512-VL if the + // support is indicated by xgetbv, so we use the same logic as for AVX2 (our + // SymCrypt feature indicates both CPU support, and OS support for saving/restoring + // the extended state) + // + if( (xGetBvResult & 0xe0) == 0xe0) + { + allowZmm = TRUE; + } } } if( !allowYmm ) { // Disallow the AVX2-dependent code because we don't have OS YMM support. - result |= SYMCRYPT_CPU_FEATURE_AVX2 | SYMCRYPT_CPU_FEATURE_VAES_512 | SYMCRYPT_CPU_FEATURE_VAES_256; + result |= SYMCRYPT_CPU_FEATURE_AVX2; + } + + if( !allowZmm ) + { + // Disallow any AVX512-dependent code because we don't have OS ZMM support. + // Note that not all AVX-512 dependent code will need to save/restore ZMM state, but we + // do not support AVX-512 instructions (even acting on YMM or XMM registers), unless the + // OS indicates support via XCR0 + result |= SYMCRYPT_CPU_FEATURE_AVX512; } } diff --git a/lib/env_linuxUserMode.c b/lib/env_linuxUserMode.c index e614f8b..ae55453 100644 --- a/lib/env_linuxUserMode.c +++ b/lib/env_linuxUserMode.c @@ -25,19 +25,6 @@ SymCryptInitEnvLinuxUsermode( UINT32 version ) #if SYMCRYPT_CPU_X86 | SYMCRYPT_CPU_AMD64 SymCryptDetectCpuFeaturesByCpuid( SYMCRYPT_CPUID_DETECT_FLAG_CHECK_OS_SUPPORT_FOR_YMM ); - // - // Don't use Ymm registers if the OS doesn't report them as available. - // We assume Ymm register swapping isn't supported unless we can verify that it is. - // - g_SymCryptCpuFeaturesNotPresent |= SYMCRYPT_CPU_FEATURE_AVX2; - - #if SYMCRYPT_MS_VC - if( (GetEnabledXStateFeatures() & XSTATE_MASK_AVX) != 0 ) - { - g_SymCryptCpuFeaturesNotPresent &= ~SYMCRYPT_CPU_FEATURE_AVX2; - } - #endif - // // Our SaveXmm function never fails because it doesn't have to do anything in User mode. // diff --git a/lib/sc_lib.h b/lib/sc_lib.h index 86d8b45..8571856 100644 --- a/lib/sc_lib.h +++ b/lib/sc_lib.h @@ -249,8 +249,8 @@ SymCryptCheckLibraryInitialized() #define SYMCRYPT_CPU_FEATURES_FOR_AESNI_CODE (SYMCRYPT_CPU_FEATURE_SSSE3 | SYMCRYPT_CPU_FEATURE_AESNI) #define SYMCRYPT_CPU_FEATURES_FOR_AESNI_PCLMULQDQ_CODE (SYMCRYPT_CPU_FEATURES_FOR_AESNI_CODE | SYMCRYPT_CPU_FEATURES_FOR_PCLMULQDQ_CODE) -#define SYMCRYPT_CPU_FEATURES_FOR_VAES_256_CODE (SYMCRYPT_CPU_FEATURES_FOR_AESNI_CODE | SYMCRYPT_CPU_FEATURE_VAES_256) -#define SYMCRYPT_CPU_FEATURES_FOR_VAES_512_CODE (SYMCRYPT_CPU_FEATURES_FOR_AESNI_CODE | SYMCRYPT_CPU_FEATURE_VAES_512) +#define SYMCRYPT_CPU_FEATURES_FOR_VAES_256_CODE (SYMCRYPT_CPU_FEATURES_FOR_AESNI_CODE | SYMCRYPT_CPU_FEATURE_AVX2 | SYMCRYPT_CPU_FEATURE_VAES) +#define SYMCRYPT_CPU_FEATURES_FOR_VAES_512_CODE (SYMCRYPT_CPU_FEATURES_FOR_AESNI_CODE | SYMCRYPT_CPU_FEATURE_AVX512 | SYMCRYPT_CPU_FEATURE_VAES) #define SYMCRYPT_CPU_FEATURES_FOR_SHANI_CODE (SYMCRYPT_CPU_FEATURE_SSSE3 | SYMCRYPT_CPU_FEATURE_SHANI) diff --git a/test/indirect_call_perf/main.cpp b/test/indirect_call_perf/main.cpp index dad8c05..75efe02 100644 --- a/test/indirect_call_perf/main.cpp +++ b/test/indirect_call_perf/main.cpp @@ -102,7 +102,7 @@ void printPerfNumbers() } -int __cdecl +int SYMCRYPT_CDECL main( int argc, _In_reads_( argc ) LPSTR * argv[] ) { UNREFERENCED_PARAMETER( argv ); diff --git a/unittest/exe_Win7nLater/main_exe.cpp b/unittest/exe_Win7nLater/main_exe.cpp index 5658404..585ed68 100644 --- a/unittest/exe_Win7nLater/main_exe.cpp +++ b/unittest/exe_Win7nLater/main_exe.cpp @@ -9,31 +9,4 @@ SYMCRYPT_ENVIRONMENT_WINDOWS_USERMODE_WIN7_N_LATER; -#include "main_exe_common.cpp" - -int __cdecl -main( int argc, _In_reads_( argc ) char * argv[] ) -{ - - initTestInfrastructure( argc, argv ); - - addAllAlgs(); - - if (g_profile) - { - runProfiling(); - } - else - { - runFunctionalTests(); - - testMultiThread(); - - runPerfTests(); - } - - exitTestInfrastructure(); - - return 0; -} - +#include "main_exe_common_windows.cpp" diff --git a/unittest/exe_Win8_1nLater/main_exe.cpp b/unittest/exe_Win8_1nLater/main_exe.cpp index b32a20d..5622694 100644 --- a/unittest/exe_Win8_1nLater/main_exe.cpp +++ b/unittest/exe_Win8_1nLater/main_exe.cpp @@ -9,31 +9,4 @@ SYMCRYPT_ENVIRONMENT_WINDOWS_USERMODE_WIN8_1_N_LATER; -#include "main_exe_common.cpp" - -int __cdecl -main( int argc, _In_reads_( argc ) char * argv[] ) -{ - - initTestInfrastructure( argc, argv ); - - addAllAlgs(); - - if (g_profile) - { - runProfiling(); - } - else - { - runFunctionalTests(); - - testMultiThread(); - - runPerfTests(); - } - - exitTestInfrastructure(); - - return 0; -} - +#include "main_exe_common_windows.cpp" diff --git a/unittest/exe_legacy/main_exe.cpp b/unittest/exe_legacy/main_exe.cpp index 6e5becb..2204e3a 100644 --- a/unittest/exe_legacy/main_exe.cpp +++ b/unittest/exe_legacy/main_exe.cpp @@ -9,31 +9,4 @@ SYMCRYPT_ENVIRONMENT_WINDOWS_USERMODE_LEGACY; -#include "main_exe_common.cpp" - -int __cdecl -main( int argc, _In_reads_( argc ) char * argv[] ) -{ - - initTestInfrastructure( argc, argv ); - - addAllAlgs(); - - if (g_profile) - { - runProfiling(); - } - else - { - runFunctionalTests(); - - testMultiThread(); - - runPerfTests(); - } - - exitTestInfrastructure(); - - return 0; -} - +#include "main_exe_common_windows.cpp" diff --git a/unittest/exe_linux/main_exe.cpp b/unittest/exe_linux/main_exe.cpp index cb1d74c..faf569a 100644 --- a/unittest/exe_linux/main_exe.cpp +++ b/unittest/exe_linux/main_exe.cpp @@ -9,59 +9,81 @@ SYMCRYPT_ENVIRONMENT_DEFS( Unittest ); -const char * g_implementationNames[] = +#if SYMCRYPT_CPU_AMD64 +///////////////////////////////////////////////////////////// +// +// Code to set up the YMM registers for testing in SAVE_YMM mode + +__m256i g_ymmStartState[16]; +__m256i g_ymmTestState[16]; + +VOID +verifyVectorRegisters() { - ImpSc::name, - // ImpRsa32::name, - // ImpRsa32b::name, - // ImpCapi::name, - // ImpCng::name, - // ImpMsBignum::name, - ImpRef::name, - NULL, -}; - -// #include "main_exe_common.cpp" - -int -main( int argc, _In_reads_( argc ) char * argv[] ) -{ - - initTestInfrastructure( argc, argv ); - - TestSaveXmmEnabled = TRUE; - TestSaveYmmEnabled = TRUE; - - // addCapiAlgs(); - // addRsa32Algs(); - // addCngAlgs(); - // addMsBignumAlgs(); - addSymCryptAlgs(); - addRefAlgs(); - - if (!g_profile) + if( !SYMCRYPT_CPU_FEATURES_PRESENT( SYMCRYPT_CPU_FEATURE_AVX2 ) ) { - runFunctionalTests(); + return; } - TestSaveXmmEnabled = FALSE; - TestSaveYmmEnabled = FALSE; - - if (g_profile) + // + // We know that AVX2 is present from here on + // + if( TestSaveYmmEnabled ) { - runProfiling(); + SymCryptEnvUmSaveYmmRegistersAsm( g_ymmTestState ); + + // + // It is perfectly fine for the XMM register values to have been modified. + // We just test that the top half of the Ymm registers have been preserved. + // + for( int i=0; i>5, i&31); + } + } } - else - { - runPerfTests(); - - // testMultiThread(); - - testSelftest(); - } - - exitTestInfrastructure(); - - return 0; } +VOID +initVectorRegisters() +{ + if( !SYMCRYPT_CPU_FEATURES_PRESENT( SYMCRYPT_CPU_FEATURE_AVX2 ) ) + { + return; + } + if( TestSaveYmmEnabled ) + { + // + // Do the memsets outside the save area as it might use vector registers + // Set the initial Ymm registers to a non-trivial value. It is likely (for performance + // reasons) that the upper halves are already zero-ed and will be re-zeroed by any function + // we call. + // + memset( g_ymmTestState, 17, sizeof( g_ymmTestState ) ); + memset( g_ymmStartState, (__rdtsc() & 255) ^ 0x42, sizeof( g_ymmStartState ) ); + SymCryptEnvUmRestoreYmmRegistersAsm( g_ymmStartState ); + verifyVectorRegisters(); + } +} + +#else + +VOID verifyVectorRegisters() +{ +} + +VOID initVectorRegisters() +{ +} + +#endif + +VOID testMultiThread() +{ +} + +#include "main_exe_common.cpp" diff --git a/unittest/exe_test/main_exe.cpp b/unittest/exe_test/main_exe.cpp index a2fb26b..f7442cf 100644 --- a/unittest/exe_test/main_exe.cpp +++ b/unittest/exe_test/main_exe.cpp @@ -9,48 +9,4 @@ SYMCRYPT_ENVIRONMENT_DEFS( Unittest ); -#include "main_exe_common.cpp" - -int __cdecl -main( int argc, _In_reads_( argc ) char * argv[] ) -{ - - initTestInfrastructure( argc, argv ); - - // As of January 2020, we can't test XMM register saving and restoring because basic CRT - // functions like memcpy and memcmp use the XMM registers. This causes the test to fail on - // x86, but there's no point in testing this on AMD64 either because it effectively ignores - // the modified XMM values, meaning it's not actually testing anything. - TestSaveXmmEnabled = FALSE; - TestSaveYmmEnabled = TRUE; - - addAllAlgs(); - - if (!g_profile && !g_measure_specific_sizes) - { - runFunctionalTests(); - } - - TestSaveYmmEnabled = FALSE; - - if (g_profile) - { - runProfiling(); - } - else - { - runPerfTests(); - - if (!g_measure_specific_sizes) - { - testMultiThread(); - - testSelftest(); - } - } - - exitTestInfrastructure(); - - return 0; -} - +#include "main_exe_common_windows.cpp" diff --git a/unittest/inc/test_lib.h b/unittest/inc/test_lib.h index d3ec8ca..c33cad0 100644 --- a/unittest/inc/test_lib.h +++ b/unittest/inc/test_lib.h @@ -1076,7 +1076,6 @@ std::unique_ptr> getAlgorithmsOfOneType( ); extern BOOLEAN TestSelftestsEnabled; extern BOOLEAN TestSaveXmmEnabled; extern BOOLEAN TestSaveYmmEnabled; -extern BOOLEAN TestSaveYmmFallback; extern ULONGLONG TestFatalCount; extern ULONGLONG TestErrorInjectionCount; @@ -1085,21 +1084,14 @@ extern ULONG TestErrorInjectionProb; extern BYTE TestErrorInjectionSeed[ SYMCRYPT_SHA1_RESULT_SIZE ]; -#if SYMCRYPT_CPU_X86 +#if SYMCRYPT_CPU_X86 | SYMCRYPT_CPU_AMD64 // -// These Save/Restore functions are used by user mode SaveXmm code, plus the testing code. +// These Save/Restore functions are used by user mode SaveXmm and Ymm code, plus the testing code. // extern "C" { VOID SYMCRYPT_CALL SymCryptEnvUmSaveXmmRegistersAsm( __m128i * buffer ); VOID SYMCRYPT_CALL SymCryptEnvUmRestoreXmmRegistersAsm( __m128i * buffer ); -} -#endif -#if SYMCRYPT_CPU_X86 | SYMCRYPT_CPU_AMD64 -// -// These Save/Restore functions are used by user mode SaveXmm code, plus the testing code. -// -extern "C" { VOID SYMCRYPT_CALL SymCryptEnvUmSaveYmmRegistersAsm( __m256i * buffer ); VOID SYMCRYPT_CALL SymCryptEnvUmRestoreYmmRegistersAsm( __m256i * buffer ); } @@ -1202,16 +1194,10 @@ CHAR charToLower( CHAR c ); extern double g_wipePerf[PERF_WIPE_MAX_SIZE+1][PERF_WIPE_N_OFFSETS]; VOID -initXmmRegisters(); +initVectorRegisters(); VOID -verifyXmmRegisters(); - -VOID -initYmmRegisters(); - -VOID -verifyYmmRegisters(); +verifyVectorRegisters(); VOID diff --git a/unittest/lib/CMakeLists.txt b/unittest/lib/CMakeLists.txt index bf87951..5adb1da 100644 --- a/unittest/lib/CMakeLists.txt +++ b/unittest/lib/CMakeLists.txt @@ -19,7 +19,6 @@ set(SOURCES testutil.cpp testKdf.cpp testTlsCbcHmac.cpp - env_SymCryptUnittest.cpp testMultiThread.cpp rndDriver.cpp testArithmetic.cpp @@ -39,6 +38,7 @@ set(SOURCES # Append Windows-specific sources if(WIN32) list(APPEND SOURCES + env_windowsSymCryptUnittest.cpp rsa32_implementations.cpp capi_implementations.cpp cng_implementations.cpp @@ -54,15 +54,19 @@ if(WIN32) # testDl_msbignum.cpp # testDl_cng.cpp ) +else() + list(APPEND SOURCES + env_linuxSymCryptUnittest.cpp + ) endif() if(WIN32 AND NOT SYMCRYPT_TARGET_ENV MATCHES "Generic") if(CMAKE_SYSTEM_PROCESSOR MATCHES "AMD64") - list(APPEND SOURCES amd64/saveymm.asm) - set_source_files_properties(amd64/saveymm.asm PROPERTY LANGUAGE ASM_MASM) + list(APPEND SOURCES amd64/savevectors.asm) + set_source_files_properties(amd64/savevectors.asm PROPERTY LANGUAGE ASM_MASM) elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "X86") - list(APPEND SOURCES i386/savexmm.asm) - set_source_files_properties(i386/savexmm.asm PROPERTY LANGUAGE ASM_MASM) + list(APPEND SOURCES i386/savevectors.asm) + set_source_files_properties(i386/savevectors.asm PROPERTY LANGUAGE ASM_MASM) endif() elseif(NOT SYMCRYPT_TARGET_ENV MATCHES "Generic") if(CMAKE_SYSTEM_PROCESSOR MATCHES "AMD64") diff --git a/unittest/lib/amd64/saveymm.asm b/unittest/lib/amd64/savevectors.asm similarity index 51% rename from unittest/lib/amd64/saveymm.asm rename to unittest/lib/amd64/savevectors.asm index 9407eea..65ba68b 100644 --- a/unittest/lib/amd64/saveymm.asm +++ b/unittest/lib/amd64/savevectors.asm @@ -1,68 +1,121 @@ -; -; saveymm.asm -; -; Copyright (c) Microsoft Corporation. Licensed under the MIT license. -; - -include ksamd64.inc - - TITLE saveymm.asm - -;VOID SYMCRYPT_CALL SymCryptEnvUmSaveYmmRegistersAsm( __m256i * buffer ); -;VOID SYMCRYPT_CALL SymCryptEnvUmRestoreYmmRegistersAsm( __m256i * buffer ); - - LEAF_ENTRY SymCryptEnvUmSaveYmmRegistersAsm, _TEXT - - add rcx, 31 - and rcx, NOT 31 - - vmovaps [rcx+ 0 * 32 ], ymm0 - vmovaps [rcx+ 1 * 32 ], ymm1 - vmovaps [rcx+ 2 * 32 ], ymm2 - vmovaps [rcx+ 3 * 32 ], ymm3 - vmovaps [rcx+ 4 * 32 ], ymm4 - vmovaps [rcx+ 5 * 32 ], ymm5 - vmovaps [rcx+ 6 * 32 ], ymm6 - vmovaps [rcx+ 7 * 32 ], ymm7 - vmovaps [rcx+ 8 * 32 ], ymm8 - vmovaps [rcx+ 9 * 32 ], ymm9 - vmovaps [rcx+ 10 * 32 ], ymm10 - vmovaps [rcx+ 11 * 32 ], ymm11 - vmovaps [rcx+ 12 * 32 ], ymm12 - vmovaps [rcx+ 13 * 32 ], ymm13 - vmovaps [rcx+ 14 * 32 ], ymm14 - vmovaps [rcx+ 15 * 32 ], ymm15 - - ret - - LEAF_END SymCryptEnvUmSaveYmmRegistersAsm, _TEXT - - LEAF_ENTRY SymCryptEnvUmRestoreYmmRegistersAsm, _TEXT - - add rcx, 31 - and rcx, NOT 31 - - vmovaps ymm0 , [rcx+ 0 * 32 ] - vmovaps ymm1 , [rcx+ 1 * 32 ] - vmovaps ymm2 , [rcx+ 2 * 32 ] - vmovaps ymm3 , [rcx+ 3 * 32 ] - vmovaps ymm4 , [rcx+ 4 * 32 ] - vmovaps ymm5 , [rcx+ 5 * 32 ] - vmovaps ymm6 , [rcx+ 6 * 32 ] - vmovaps ymm7 , [rcx+ 7 * 32 ] - vmovaps ymm8 , [rcx+ 8 * 32 ] - vmovaps ymm9 , [rcx+ 9 * 32 ] - vmovaps ymm10, [rcx+ 10 * 32 ] - vmovaps ymm11, [rcx+ 11 * 32 ] - vmovaps ymm12, [rcx+ 12 * 32 ] - vmovaps ymm13, [rcx+ 13 * 32 ] - vmovaps ymm14, [rcx+ 14 * 32 ] - vmovaps ymm15, [rcx+ 15 * 32 ] - - ret - - LEAF_END SymCryptEnvUmRestoreYmmRegistersAsm, _TEXT - - -END - +; +; savevectors.asm +; +; Copyright (c) Microsoft Corporation. Licensed under the MIT license. +; + +include ksamd64.inc + + TITLE savevectors.asm + +;VOID SYMCRYPT_CALL SymCryptEnvUmSaveXmmRegistersAsm( __m128i * buffer ); +;VOID SYMCRYPT_CALL SymCryptEnvUmRestoreXmmRegistersAsm( __m128i * buffer ); + + LEAF_ENTRY SymCryptEnvUmSaveXmmRegistersAsm, _TEXT + + add rcx, 15 + and rcx, NOT 15 + + movaps [rcx+ 0 * 16 ], xmm0 + movaps [rcx+ 1 * 16 ], xmm1 + movaps [rcx+ 2 * 16 ], xmm2 + movaps [rcx+ 3 * 16 ], xmm3 + movaps [rcx+ 4 * 16 ], xmm4 + movaps [rcx+ 5 * 16 ], xmm5 + movaps [rcx+ 6 * 16 ], xmm6 + movaps [rcx+ 7 * 16 ], xmm7 + movaps [rcx+ 8 * 16 ], xmm8 + movaps [rcx+ 9 * 16 ], xmm9 + movaps [rcx+ 10 * 16 ], xmm10 + movaps [rcx+ 11 * 16 ], xmm11 + movaps [rcx+ 12 * 16 ], xmm12 + movaps [rcx+ 13 * 16 ], xmm13 + movaps [rcx+ 14 * 16 ], xmm14 + movaps [rcx+ 15 * 16 ], xmm15 + + ret + + LEAF_END SymCryptEnvUmSaveXmmRegistersAsm, _TEXT + + LEAF_ENTRY SymCryptEnvUmRestoreXmmRegistersAsm, _TEXT + + add rcx, 15 + and rcx, NOT 15 + + movaps xmm0 , [rcx+ 0 * 16 ] + movaps xmm1 , [rcx+ 1 * 16 ] + movaps xmm2 , [rcx+ 2 * 16 ] + movaps xmm3 , [rcx+ 3 * 16 ] + movaps xmm4 , [rcx+ 4 * 16 ] + movaps xmm5 , [rcx+ 5 * 16 ] + movaps xmm6 , [rcx+ 6 * 16 ] + movaps xmm7 , [rcx+ 7 * 16 ] + movaps xmm8 , [rcx+ 8 * 16 ] + movaps xmm9 , [rcx+ 9 * 16 ] + movaps xmm10, [rcx+ 10 * 16 ] + movaps xmm11, [rcx+ 11 * 16 ] + movaps xmm12, [rcx+ 12 * 16 ] + movaps xmm13, [rcx+ 13 * 16 ] + movaps xmm14, [rcx+ 14 * 16 ] + movaps xmm15, [rcx+ 15 * 16 ] + + ret + + LEAF_END SymCryptEnvUmRestoreXmmRegistersAsm, _TEXT + +;VOID SYMCRYPT_CALL SymCryptEnvUmSaveYmmRegistersAsm( __m256i * buffer ); +;VOID SYMCRYPT_CALL SymCryptEnvUmRestoreYmmRegistersAsm( __m256i * buffer ); + + LEAF_ENTRY SymCryptEnvUmSaveYmmRegistersAsm, _TEXT + + add rcx, 31 + and rcx, NOT 31 + + vmovaps [rcx+ 0 * 32 ], ymm0 + vmovaps [rcx+ 1 * 32 ], ymm1 + vmovaps [rcx+ 2 * 32 ], ymm2 + vmovaps [rcx+ 3 * 32 ], ymm3 + vmovaps [rcx+ 4 * 32 ], ymm4 + vmovaps [rcx+ 5 * 32 ], ymm5 + vmovaps [rcx+ 6 * 32 ], ymm6 + vmovaps [rcx+ 7 * 32 ], ymm7 + vmovaps [rcx+ 8 * 32 ], ymm8 + vmovaps [rcx+ 9 * 32 ], ymm9 + vmovaps [rcx+ 10 * 32 ], ymm10 + vmovaps [rcx+ 11 * 32 ], ymm11 + vmovaps [rcx+ 12 * 32 ], ymm12 + vmovaps [rcx+ 13 * 32 ], ymm13 + vmovaps [rcx+ 14 * 32 ], ymm14 + vmovaps [rcx+ 15 * 32 ], ymm15 + + ret + + LEAF_END SymCryptEnvUmSaveYmmRegistersAsm, _TEXT + + LEAF_ENTRY SymCryptEnvUmRestoreYmmRegistersAsm, _TEXT + + add rcx, 31 + and rcx, NOT 31 + + vmovaps ymm0 , [rcx+ 0 * 32 ] + vmovaps ymm1 , [rcx+ 1 * 32 ] + vmovaps ymm2 , [rcx+ 2 * 32 ] + vmovaps ymm3 , [rcx+ 3 * 32 ] + vmovaps ymm4 , [rcx+ 4 * 32 ] + vmovaps ymm5 , [rcx+ 5 * 32 ] + vmovaps ymm6 , [rcx+ 6 * 32 ] + vmovaps ymm7 , [rcx+ 7 * 32 ] + vmovaps ymm8 , [rcx+ 8 * 32 ] + vmovaps ymm9 , [rcx+ 9 * 32 ] + vmovaps ymm10, [rcx+ 10 * 32 ] + vmovaps ymm11, [rcx+ 11 * 32 ] + vmovaps ymm12, [rcx+ 12 * 32 ] + vmovaps ymm13, [rcx+ 13 * 32 ] + vmovaps ymm14, [rcx+ 14 * 32 ] + vmovaps ymm15, [rcx+ 15 * 32 ] + + ret + + LEAF_END SymCryptEnvUmRestoreYmmRegistersAsm, _TEXT + +END diff --git a/unittest/lib/amd64/saveymm-gas.asm b/unittest/lib/amd64/saveymm-gas.asm index 23f76bb..cd6bbde 100644 --- a/unittest/lib/amd64/saveymm-gas.asm +++ b/unittest/lib/amd64/saveymm-gas.asm @@ -5,8 +5,6 @@ # .intel_syntax noprefix - # TITLE saveymm.asm - .text #VOID SYMCRYPT_CALL SymCryptEnvUmSaveYmmRegistersAsm( __m256i * buffer ); diff --git a/unittest/lib/env_SymCryptUnittest.cpp b/unittest/lib/env_commonSymCryptUnittest.cpp similarity index 79% rename from unittest/lib/env_SymCryptUnittest.cpp rename to unittest/lib/env_commonSymCryptUnittest.cpp index a091d5e..b018283 100644 --- a/unittest/lib/env_SymCryptUnittest.cpp +++ b/unittest/lib/env_commonSymCryptUnittest.cpp @@ -1,412 +1,352 @@ -// -// env_SymCryptUnitTest -// Non-standard environment to support the unit test -// - -#include "precomp.h" - -// -// Some test hooks to allow the unit test to have its own environment. -// -extern "C" { -#if SYMCRYPT_APPLE_CC -#include "sc_lib-testhooks.h" -#else -#include "sc_lib-testhooks.h" -#endif -} - -// -// We hack and create a NEW environment for our unit test. -// - -BOOLEAN TestSelftestsEnabled = FALSE; -BOOLEAN TestSaveXmmEnabled = FALSE; -BOOLEAN TestSaveYmmEnabled = FALSE; -// Set to TRUE when unit tests artificially fail to save Ymm registers -BOOLEAN TestSaveYmmFallback = FALSE; - -ULONGLONG TestFatalCount = 0; -ULONGLONG TestErrorInjectionCount = 0; -ULONGLONG TestErrorInjectionCalls = 0; -ULONG TestErrorInjectionProb = 0; - -BYTE TestErrorInjectionSeed[ SYMCRYPT_SHA1_RESULT_SIZE ] = {0}; - -extern "C" { -; - - -/////////////////////////////////////////////////////// -// Start of the actual fake environment code - -SYMCRYPT_CPU_FEATURES SYMCRYPT_CALL SymCryptCpuFeaturesNeverPresentEnvUnittest() -{ - return 0; -} - -VOID -SYMCRYPT_CALL -SymCryptInitEnvUnittest( UINT32 version ) -{ - if( g_SymCryptFlags & SYMCRYPT_FLAG_LIB_INITIALIZED ) - { - return; - } - -#if SYMCRYPT_CPU_X86 | SYMCRYPT_CPU_AMD64 - SymCryptDetectCpuFeaturesByCpuid( SYMCRYPT_CPUID_DETECT_FLAG_CHECK_OS_SUPPORT_FOR_YMM ); - - - // - // Don't use Ymm registers if the OS doesn't report them as available. - // We assume Ymm register swapping isn't supported unless we can verify that it is. - // - g_SymCryptCpuFeaturesNotPresent |= SYMCRYPT_CPU_FEATURE_AVX2; - - #if SYMCRYPT_MS_VC - if( (GetEnabledXStateFeatures() & XSTATE_MASK_AVX) != 0 ) - { - g_SymCryptCpuFeaturesNotPresent &= ~SYMCRYPT_CPU_FEATURE_AVX2; - } - #endif - - // - // By default we don't fail XMM so that we get proper performance for GCM. - // We allow the nofail to be disabled by command-line option. - // - g_SymCryptCpuFeaturesNotPresent &= ~SYMCRYPT_CPU_FEATURE_SAVEXMM_NOFAIL; - -#elif SYMCRYPT_CPU_ARM - - g_SymCryptCpuFeaturesNotPresent = (SYMCRYPT_CPU_FEATURES) ~SYMCRYPT_CPU_FEATURE_NEON; - -#elif SYMCRYPT_CPU_ARM64 - - SymCryptDetectCpuFeaturesFromIsProcessorFeaturePresent(); - -#endif - - SymCryptInitEnvCommon( version ); -} - -_Analysis_noreturn_ -VOID -SYMCRYPT_CALL -SymCryptFatalEnvUnittest( ULONG fatalCode ) -{ - if( TestSelftestsEnabled ) - { - TestFatalCount++; - return; - } - - FATAL5( "*\n\nSymCrypt fatal error '%c%c%c%c' ", (fatalCode >> 24) & 0xff, (fatalCode >> 16) & 0xff, (fatalCode >> 8) & 0xff, fatalCode & 0xff ); -} - -VOID SYMCRYPT_CALL SymCryptTestInjectErrorEnvUnittest( PBYTE pbBuf, SIZE_T cbBuf ) -{ - if( TestSelftestsEnabled ) - { - ++TestErrorInjectionCalls; - if( TestErrorInjectionSeed[10]% TestErrorInjectionProb == 1 ) - { - SIZE_T bitNo = (*(ULONGLONG *)TestErrorInjectionSeed) % (8*cbBuf); - pbBuf[ bitNo/8 ] ^= ( 1 << (bitNo % 8) ); - - ++TestErrorInjectionCount; - } - SymCryptSha1( TestErrorInjectionSeed, sizeof( TestErrorInjectionSeed ), TestErrorInjectionSeed ); - } -} - - - -PVOID malloc_align32( SIZE_T size ) -{ - PVOID pBase = malloc( size + 8 + 31 ); - if( pBase == NULL ) - { - return pBase; - } - PBYTE pAligned = (PBYTE)((((ULONG_PTR) pBase) + 8 + 31) & ~31); - *(PVOID *) (pAligned - 8) = pBase; - return pAligned; -} - -VOID free_align32( PVOID p ) -{ - CHECK( ((ULONG_PTR)p & 31) == 0, "?" ); - free( *(PVOID *) ((PBYTE)p - 8) ); -} - -#if SYMCRYPT_CPU_AMD64 | SYMCRYPT_CPU_X86 - -char g_saveInProgressType = 0; -PVOID g_savePtr = NULL; -extern "C" { -ULONG g_nSaves = 0; -} - -#endif - -#if SYMCRYPT_CPU_X86 -// -// We have XMM save/restore logic even in Windows user mode so that we can test the library in user mode -// This makes it much easier to do thorough testing. -// We can disable these tests through a flag to get reasonable performance measurements on the same code. -// - -#pragma warning(push) -#pragma warning(disable:4359) -typedef SYMCRYPT_ALIGN_STRUCT _SYMCRYPT_ENV_XMM_SAVE_DATA_REGS { - // - // The alignment on x86 is only 4, so we can't align the __m128i fields properly. - // We add some padding and let the assembler code adjust the alignmetn of the actual data. - // This is all transperant to the C code - // - __m128i xmm[8]; // 8 for the XMM registers. - SYMCRYPT_MAGIC_FIELD -} SYMCRYPT_ENV_XMM_SAVE_DATA_REGS, *PSYMCRYPT_ENV_XMM_SAVE_DATA_REGS; - -#pragma warning(pop) - -typedef struct _SYMCRYPT_ENV_XMM_SAVE_DATA { - PSYMCRYPT_ENV_XMM_SAVE_DATA_REGS pRegs; - SYMCRYPT_MAGIC_FIELD -} SYMCRYPT_ENV_XMM_SAVE_DATA, *PSYMCRYPT_ENV_XMM_SAVE_DATA; - -SYMCRYPT_ERROR -SYMCRYPT_CALL -SymCryptSaveXmmEnvUnittest( _Out_ PSYMCRYPT_EXTENDED_SAVE_DATA pSaveData ) -{ - PSYMCRYPT_ENV_XMM_SAVE_DATA p = (PSYMCRYPT_ENV_XMM_SAVE_DATA) pSaveData; - PSYMCRYPT_ENV_XMM_SAVE_DATA_REGS pRegs; - __m128i regs[8]; - - if( TestSaveXmmEnabled ) - { - // - // To test the fallback from the failure of the savexmm function we introduce occasional errors - // - if( !SYMCRYPT_CPU_FEATURES_PRESENT( SYMCRYPT_CPU_FEATURE_SAVEXMM_NOFAIL ) && __rdtsc() % 101 == 0 ) - { - return SYMCRYPT_EXTERNAL_FAILURE; - } - - // - // Malloc & Free can modify the XMM registers, so we save them first in a temp - // so that we call them inside the save/restore area. - // - SymCryptEnvUmSaveXmmRegistersAsm( ®s[0] ); - - pRegs = (PSYMCRYPT_ENV_XMM_SAVE_DATA_REGS) malloc_align32( sizeof( *pRegs ) ); - if( pRegs == NULL ) - { - return SYMCRYPT_EXTERNAL_FAILURE; - } - - memcpy( &pRegs->xmm[0], ®s[0], sizeof( regs ) ); - SYMCRYPT_SET_MAGIC( pRegs ); - p->pRegs = pRegs; - SYMCRYPT_SET_MAGIC( p ); - - CHECK( g_saveInProgressType == 0, "Nested register saves are not supported at IRQL=DISPATCH_LEVEL" ); - g_savePtr = pSaveData; - g_saveInProgressType = 'X'; - } - - return SYMCRYPT_NO_ERROR; -} - - -VOID -SYMCRYPT_CALL -SymCryptRestoreXmmEnvUnittest( _Inout_ PSYMCRYPT_EXTENDED_SAVE_DATA pSaveData ) -{ - PSYMCRYPT_ENV_XMM_SAVE_DATA p = (PSYMCRYPT_ENV_XMM_SAVE_DATA) pSaveData; - PSYMCRYPT_ENV_XMM_SAVE_DATA_REGS pRegs; - - __m128i regs[8]; - - if( TestSaveXmmEnabled ) - { - - SYMCRYPT_CHECK_MAGIC( p ); - pRegs = p->pRegs; - SYMCRYPT_CHECK_MAGIC( pRegs ); - - CHECK( g_saveInProgressType == 'X', "XMM not saved" ); - CHECK( g_savePtr == pSaveData, "?" ); - - memcpy( ®s[0], &pRegs->xmm[0], sizeof( regs ) ); - SYMCRYPT_WIPE_MAGIC( pRegs ); - free_align32( pRegs ); - p->pRegs = NULL; - SYMCRYPT_WIPE_MAGIC( p ); - - SymCryptEnvUmRestoreXmmRegistersAsm( ®s[0] ); - - g_saveInProgressType = 0; - } -} - -#elif SYMCRYPT_CPU_AMD64 - -SYMCRYPT_ERROR -SYMCRYPT_CALL -SymCryptSaveXmmEnvUnittest( _Out_ PSYMCRYPT_EXTENDED_SAVE_DATA pSaveData ) -{ - UNREFERENCED_PARAMETER( pSaveData ); - - return SYMCRYPT_NO_ERROR; -} - - -VOID -SYMCRYPT_CALL -SymCryptRestoreXmmEnvUnittest( _Inout_ PSYMCRYPT_EXTENDED_SAVE_DATA pSaveData ) -{ - UNREFERENCED_PARAMETER( pSaveData ); -} - - -#endif - -#if SYMCRYPT_CPU_AMD64 | SYMCRYPT_CPU_X86 - -// -// We have YMM save/restore logic even in Windows user mode so that we can test the library in user mode -// This makes it much easier to do thorough testing. -// We can disable these tests through a flag to get reasonable performance measurements on the same code. -// - -typedef SYMCRYPT_ALIGN_AT(32) struct _SYMCRYPT_ENV_YMM_SAVE_DATA_REGS { - __m256i ymm[16]; // 16 for the XMM registers - SYMCRYPT_MAGIC_FIELD -} SYMCRYPT_ENV_YMM_SAVE_DATA_REGS, *PSYMCRYPT_ENV_YMM_SAVE_DATA_REGS; - -typedef struct _SYMCRYPT_ENV_YMM_SAVE_DATA { - PSYMCRYPT_ENV_YMM_SAVE_DATA_REGS pRegs; - SYMCRYPT_MAGIC_FIELD -} SYMCRYPT_ENV_YMM_SAVE_DATA, *PSYMCRYPT_ENV_YMM_SAVE_DATA; - -SYMCRYPT_ERROR -SYMCRYPT_CALL -SymCryptSaveYmmEnvUnittest( _Out_ PSYMCRYPT_EXTENDED_SAVE_DATA pSaveData ) -{ - PSYMCRYPT_ENV_YMM_SAVE_DATA p = (PSYMCRYPT_ENV_YMM_SAVE_DATA) pSaveData; - PSYMCRYPT_ENV_YMM_SAVE_DATA_REGS pRegs; - __m256i regs[16]; - - CHECK( SYMCRYPT_CPU_FEATURES_PRESENT( SYMCRYPT_CPU_FEATURE_AVX2 ), "?" ); - - if( TestSaveYmmEnabled ) - { - - // - // To test the fallback from the failure of the saveYmm function we introduce occasional errors - // - if( __rdtsc() % 101 == 0 ) - { - // If we are testing the fallback path, we want to record this so test for presence of - // Ymm save/restore logic is not triggered. If fallback code calls memcpy (for instance) - // the CRT may (correctly) use Ymm registers without saving/restoring in user mode. - // This is the case for our Parallel SHA implementations. - TestSaveYmmFallback = TRUE; - return SYMCRYPT_EXTERNAL_FAILURE; - } - - // - // Alloc can modify the regs, so save them first so that the modification happens - // inside the save block - // - SymCryptEnvUmSaveYmmRegistersAsm( regs ); - - pRegs = (PSYMCRYPT_ENV_YMM_SAVE_DATA_REGS) malloc_align32( sizeof( *pRegs ) ); - if( pRegs == NULL ) - { - return SYMCRYPT_EXTERNAL_FAILURE; - } - - memcpy( pRegs->ymm, regs, sizeof( regs ) ); - SYMCRYPT_SET_MAGIC( pRegs ); - SYMCRYPT_CHECK_MAGIC( pRegs ); - - p->pRegs = pRegs; - SYMCRYPT_SET_MAGIC( p ); - - - CHECK( g_saveInProgressType == 0, "Nested register saves are not supported at IRQL=DISPATCH_LEVEL" ); - g_savePtr = pSaveData; - g_saveInProgressType = 'Y'; - - } - - return SYMCRYPT_NO_ERROR; -} - - -VOID -SYMCRYPT_CALL -SymCryptRestoreYmmEnvUnittest( _Inout_ PSYMCRYPT_EXTENDED_SAVE_DATA pSaveData ) -{ - PSYMCRYPT_ENV_YMM_SAVE_DATA p = (PSYMCRYPT_ENV_YMM_SAVE_DATA) pSaveData; - PSYMCRYPT_ENV_YMM_SAVE_DATA_REGS pRegs; - __m256i regs[16]; - - CHECK( SYMCRYPT_CPU_FEATURES_PRESENT( SYMCRYPT_CPU_FEATURE_AVX2 ), "?" ); - - if( TestSaveYmmEnabled ) - { - SYMCRYPT_CHECK_MAGIC( p ); - pRegs = p->pRegs; - SYMCRYPT_CHECK_MAGIC( pRegs ); - - CHECK( g_saveInProgressType == 'Y', "YMM not saved" ); - CHECK( g_savePtr == pSaveData, "?" ); - - memcpy( regs, pRegs->ymm, sizeof( regs ) ); - SYMCRYPT_WIPE_MAGIC( pRegs ); - free_align32( pRegs ); - p->pRegs = NULL; - SYMCRYPT_WIPE_MAGIC( p ); - - SymCryptEnvUmRestoreYmmRegistersAsm( regs ); - - g_saveInProgressType = 0; - } -} - -#endif - - -VOID -SYMCRYPT_CALL -SymCryptEnvUnittestDetectCpuFeatures( ULONG flags ) -{ -#if SYMCRYPT_CPU_X86 | SYMCRYPT_CPU_AMD64 - SymCryptDetectCpuFeaturesByCpuid( flags ); -#elif SYMCRYPT_CPU_ARM | SYMCRYPT_CPU_ARM64 - UNREFERENCED_PARAMETER( flags ); - g_SymCryptCpuFeaturesNotPresent = (SYMCRYPT_CPU_FEATURES) ~SYMCRYPT_CPU_FEATURE_NEON; -#else - UNREFERENCED_PARAMETER( flags ); - g_SymCryptCpuFeaturesNotPresent = (SYMCRYPT_CPU_FEATURES) (-1); -#endif -} - -#if SYMCRYPT_CPU_AMD64 | SYMCRYPT_CPU_X86 - -VOID -SYMCRYPT_CALL -SymCryptCpuidExFuncEnvUnittest( int cpuInfo[4], int function_id, int subfunction_id ) -{ - __cpuidex( cpuInfo, function_id, subfunction_id ); -} - -#endif -} // extern "C" - - - +// +// env_commonSymCryptUnitTest +// Common parts of non-standard environment to support the unit test +// + +// +// Some test hooks to allow the unit test to have its own environment. +// +extern "C" { +#include "sc_lib-testhooks.h" +} + +// +// We hack and create a NEW environment for our unit test. +// + +BOOLEAN TestSelftestsEnabled = FALSE; + +ULONGLONG TestFatalCount = 0; +ULONGLONG TestErrorInjectionCount = 0; +ULONGLONG TestErrorInjectionCalls = 0; +ULONG TestErrorInjectionProb = 0; + +BYTE TestErrorInjectionSeed[ SYMCRYPT_SHA1_RESULT_SIZE ] = {0}; + +extern "C" { +; + + +/////////////////////////////////////////////////////// +// Start of the actual fake environment code + +SYMCRYPT_CPU_FEATURES SYMCRYPT_CALL SymCryptCpuFeaturesNeverPresentEnvUnittest() +{ + return 0; +} + +_Analysis_noreturn_ +VOID +SYMCRYPT_CALL +SymCryptFatalEnvUnittest( ULONG fatalCode ) +{ + if( TestSelftestsEnabled ) + { + TestFatalCount++; + return; + } + + FATAL5( "*\n\nSymCrypt fatal error '%c%c%c%c' ", (fatalCode >> 24) & 0xff, (fatalCode >> 16) & 0xff, (fatalCode >> 8) & 0xff, fatalCode & 0xff ); +} + +VOID SYMCRYPT_CALL SymCryptTestInjectErrorEnvUnittest( PBYTE pbBuf, SIZE_T cbBuf ) +{ + if( TestSelftestsEnabled ) + { + ++TestErrorInjectionCalls; + if( TestErrorInjectionSeed[10]% TestErrorInjectionProb == 1 ) + { + SIZE_T bitNo = (*(ULONGLONG *)TestErrorInjectionSeed) % (8*cbBuf); + pbBuf[ bitNo/8 ] ^= ( 1 << (bitNo % 8) ); + + ++TestErrorInjectionCount; + } + SymCryptSha1( TestErrorInjectionSeed, sizeof( TestErrorInjectionSeed ), TestErrorInjectionSeed ); + } +} + + + +PVOID malloc_align32( SIZE_T size ) +{ + PVOID pBase = malloc( size + 8 + 31 ); + if( pBase == NULL ) + { + return pBase; + } + PBYTE pAligned = (PBYTE)((((ULONG_PTR) pBase) + 8 + 31) & ~31); + *(PVOID *) (pAligned - 8) = pBase; + return pAligned; +} + +VOID free_align32( PVOID p ) +{ + CHECK( ((ULONG_PTR)p & 31) == 0, "?" ); + free( *(PVOID *) ((PBYTE)p - 8) ); +} + +#if SYMCRYPT_CPU_AMD64 | SYMCRYPT_CPU_X86 + +char g_saveInProgressType = 0; +PVOID g_savePtr = NULL; +extern "C" { +ULONG g_nSaves = 0; +} + +#endif + +#if SYMCRYPT_CPU_X86 +// +// We have XMM save/restore logic even in Windows user mode so that we can test the library in user mode +// This makes it much easier to do thorough testing. +// We can disable these tests through a flag to get reasonable performance measurements on the same code. +// + +#pragma warning(push) +#pragma warning(disable:4359) +typedef SYMCRYPT_ALIGN_STRUCT _SYMCRYPT_ENV_XMM_SAVE_DATA_REGS { + // + // The alignment on x86 is only 4, so we can't align the __m128i fields properly. + // We add some padding and let the assembler code adjust the alignmetn of the actual data. + // This is all transperant to the C code + // + __m128i xmm[8]; // 8 for the XMM registers. + SYMCRYPT_MAGIC_FIELD +} SYMCRYPT_ENV_XMM_SAVE_DATA_REGS, *PSYMCRYPT_ENV_XMM_SAVE_DATA_REGS; + +#pragma warning(pop) + +typedef struct _SYMCRYPT_ENV_XMM_SAVE_DATA { + PSYMCRYPT_ENV_XMM_SAVE_DATA_REGS pRegs; + SYMCRYPT_MAGIC_FIELD +} SYMCRYPT_ENV_XMM_SAVE_DATA, *PSYMCRYPT_ENV_XMM_SAVE_DATA; + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptSaveXmmEnvUnittest( _Out_ PSYMCRYPT_EXTENDED_SAVE_DATA pSaveData ) +{ + PSYMCRYPT_ENV_XMM_SAVE_DATA p = (PSYMCRYPT_ENV_XMM_SAVE_DATA) pSaveData; + PSYMCRYPT_ENV_XMM_SAVE_DATA_REGS pRegs; + __m128i regs[8]; + + if( TestSaveXmmEnabled ) + { + // + // To test the fallback from the failure of the savexmm function we introduce occasional errors + // + if( !SYMCRYPT_CPU_FEATURES_PRESENT( SYMCRYPT_CPU_FEATURE_SAVEXMM_NOFAIL ) && __rdtsc() % 101 == 0 ) + { + return SYMCRYPT_EXTERNAL_FAILURE; + } + + // + // Malloc & Free can modify the XMM registers, so we save them first in a temp + // so that we call them inside the save/restore area. + // + SymCryptEnvUmSaveXmmRegistersAsm( ®s[0] ); + + pRegs = (PSYMCRYPT_ENV_XMM_SAVE_DATA_REGS) malloc_align32( sizeof( *pRegs ) ); + if( pRegs == NULL ) + { + return SYMCRYPT_EXTERNAL_FAILURE; + } + + memcpy( &pRegs->xmm[0], ®s[0], sizeof( regs ) ); + SYMCRYPT_SET_MAGIC( pRegs ); + p->pRegs = pRegs; + SYMCRYPT_SET_MAGIC( p ); + + CHECK( g_saveInProgressType == 0, "Nested register saves are not supported at IRQL=DISPATCH_LEVEL" ); + g_savePtr = pSaveData; + g_saveInProgressType = 'X'; + } + + return SYMCRYPT_NO_ERROR; +} + + +VOID +SYMCRYPT_CALL +SymCryptRestoreXmmEnvUnittest( _Inout_ PSYMCRYPT_EXTENDED_SAVE_DATA pSaveData ) +{ + PSYMCRYPT_ENV_XMM_SAVE_DATA p = (PSYMCRYPT_ENV_XMM_SAVE_DATA) pSaveData; + PSYMCRYPT_ENV_XMM_SAVE_DATA_REGS pRegs; + + __m128i regs[8]; + + if( TestSaveXmmEnabled ) + { + + SYMCRYPT_CHECK_MAGIC( p ); + pRegs = p->pRegs; + SYMCRYPT_CHECK_MAGIC( pRegs ); + + CHECK( g_saveInProgressType == 'X', "XMM not saved" ); + CHECK( g_savePtr == pSaveData, "?" ); + + memcpy( ®s[0], &pRegs->xmm[0], sizeof( regs ) ); + SYMCRYPT_WIPE_MAGIC( pRegs ); + free_align32( pRegs ); + p->pRegs = NULL; + SYMCRYPT_WIPE_MAGIC( p ); + + SymCryptEnvUmRestoreXmmRegistersAsm( ®s[0] ); + + g_saveInProgressType = 0; + } +} + +#elif SYMCRYPT_CPU_AMD64 + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptSaveXmmEnvUnittest( _Out_ PSYMCRYPT_EXTENDED_SAVE_DATA pSaveData ) +{ + UNREFERENCED_PARAMETER( pSaveData ); + + return SYMCRYPT_NO_ERROR; +} + + +VOID +SYMCRYPT_CALL +SymCryptRestoreXmmEnvUnittest( _Inout_ PSYMCRYPT_EXTENDED_SAVE_DATA pSaveData ) +{ + UNREFERENCED_PARAMETER( pSaveData ); +} + + +#endif + +#if SYMCRYPT_CPU_AMD64 | SYMCRYPT_CPU_X86 + +// +// We have YMM save/restore logic even in Windows user mode so that we can test the library in user mode +// This makes it much easier to do thorough testing. +// We can disable these tests through a flag to get reasonable performance measurements on the same code. +// + +typedef SYMCRYPT_ALIGN_AT(32) struct _SYMCRYPT_ENV_YMM_SAVE_DATA_REGS { + __m256i ymm[16]; // 16 for the XMM registers + SYMCRYPT_MAGIC_FIELD +} SYMCRYPT_ENV_YMM_SAVE_DATA_REGS, *PSYMCRYPT_ENV_YMM_SAVE_DATA_REGS; + +typedef struct _SYMCRYPT_ENV_YMM_SAVE_DATA { + PSYMCRYPT_ENV_YMM_SAVE_DATA_REGS pRegs; + SYMCRYPT_MAGIC_FIELD +} SYMCRYPT_ENV_YMM_SAVE_DATA, *PSYMCRYPT_ENV_YMM_SAVE_DATA; + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptSaveYmmEnvUnittest( _Out_ PSYMCRYPT_EXTENDED_SAVE_DATA pSaveData ) +{ + PSYMCRYPT_ENV_YMM_SAVE_DATA p = (PSYMCRYPT_ENV_YMM_SAVE_DATA) pSaveData; + PSYMCRYPT_ENV_YMM_SAVE_DATA_REGS pRegs; + __m256i regs[16]; + + CHECK( SYMCRYPT_CPU_FEATURES_PRESENT( SYMCRYPT_CPU_FEATURE_AVX2 ), "?" ); + + if( TestSaveYmmEnabled ) + { + + // + // To test the fallback from the failure of the saveYmm function we introduce occasional errors + // + if( __rdtsc() % 101 == 0 ) + { + return SYMCRYPT_EXTERNAL_FAILURE; + } + + // + // Alloc can modify the regs, so save them first so that the modification happens + // inside the save block + // + SymCryptEnvUmSaveYmmRegistersAsm( regs ); + + pRegs = (PSYMCRYPT_ENV_YMM_SAVE_DATA_REGS) malloc_align32( sizeof( *pRegs ) ); + if( pRegs == NULL ) + { + return SYMCRYPT_EXTERNAL_FAILURE; + } + + memcpy( pRegs->ymm, regs, sizeof( regs ) ); + SYMCRYPT_SET_MAGIC( pRegs ); + SYMCRYPT_CHECK_MAGIC( pRegs ); + + p->pRegs = pRegs; + SYMCRYPT_SET_MAGIC( p ); + + + CHECK( g_saveInProgressType == 0, "Nested register saves are not supported at IRQL=DISPATCH_LEVEL" ); + g_savePtr = pSaveData; + g_saveInProgressType = 'Y'; + + } + + return SYMCRYPT_NO_ERROR; +} + + +VOID +SYMCRYPT_CALL +SymCryptRestoreYmmEnvUnittest( _Inout_ PSYMCRYPT_EXTENDED_SAVE_DATA pSaveData ) +{ + PSYMCRYPT_ENV_YMM_SAVE_DATA p = (PSYMCRYPT_ENV_YMM_SAVE_DATA) pSaveData; + PSYMCRYPT_ENV_YMM_SAVE_DATA_REGS pRegs; + __m256i regs[16]; + + CHECK( SYMCRYPT_CPU_FEATURES_PRESENT( SYMCRYPT_CPU_FEATURE_AVX2 ), "?" ); + + if( TestSaveYmmEnabled ) + { + SYMCRYPT_CHECK_MAGIC( p ); + pRegs = p->pRegs; + SYMCRYPT_CHECK_MAGIC( pRegs ); + + CHECK( g_saveInProgressType == 'Y', "YMM not saved" ); + CHECK( g_savePtr == pSaveData, "?" ); + + memcpy( regs, pRegs->ymm, sizeof( regs ) ); + SYMCRYPT_WIPE_MAGIC( pRegs ); + free_align32( pRegs ); + p->pRegs = NULL; + SYMCRYPT_WIPE_MAGIC( p ); + + SymCryptEnvUmRestoreYmmRegistersAsm( regs ); + + g_saveInProgressType = 0; + } +} + +#endif + + +VOID +SYMCRYPT_CALL +SymCryptEnvUnittestDetectCpuFeatures( ULONG flags ) +{ +#if SYMCRYPT_CPU_X86 | SYMCRYPT_CPU_AMD64 + SymCryptDetectCpuFeaturesByCpuid( flags ); +#elif SYMCRYPT_CPU_ARM | SYMCRYPT_CPU_ARM64 + UNREFERENCED_PARAMETER( flags ); + g_SymCryptCpuFeaturesNotPresent = (SYMCRYPT_CPU_FEATURES) ~SYMCRYPT_CPU_FEATURE_NEON; +#else + UNREFERENCED_PARAMETER( flags ); + g_SymCryptCpuFeaturesNotPresent = (SYMCRYPT_CPU_FEATURES) (-1); +#endif +} + +#if SYMCRYPT_CPU_AMD64 | SYMCRYPT_CPU_X86 + +VOID +SYMCRYPT_CALL +SymCryptCpuidExFuncEnvUnittest( int cpuInfo[4], int function_id, int subfunction_id ) +{ + __cpuidex( cpuInfo, function_id, subfunction_id ); +} + +#endif +} // extern "C" + + + diff --git a/unittest/lib/env_linuxSymCryptUnittest.cpp b/unittest/lib/env_linuxSymCryptUnittest.cpp new file mode 100644 index 0000000..cc2156d --- /dev/null +++ b/unittest/lib/env_linuxSymCryptUnittest.cpp @@ -0,0 +1,45 @@ +// +// env_linuxSymCryptUnitTest +// Non-standard environment to support the unit test +// + +#include "precomp.h" +#include "env_commonSymCryptUnittest.cpp" + +BOOLEAN TestSaveXmmEnabled = FALSE; +BOOLEAN TestSaveYmmEnabled = FALSE; + +extern "C" { + +VOID +SYMCRYPT_CALL +SymCryptInitEnvUnittest( UINT32 version ) +{ + if( g_SymCryptFlags & SYMCRYPT_FLAG_LIB_INITIALIZED ) + { + return; + } + +#if SYMCRYPT_CPU_X86 | SYMCRYPT_CPU_AMD64 + SymCryptDetectCpuFeaturesByCpuid( SYMCRYPT_CPUID_DETECT_FLAG_CHECK_OS_SUPPORT_FOR_YMM ); + + // + // By default we don't fail XMM so that we get proper performance for GCM. + // We allow the nofail to be disabled by command-line option. + // + g_SymCryptCpuFeaturesNotPresent &= ~SYMCRYPT_CPU_FEATURE_SAVEXMM_NOFAIL; + +#elif SYMCRYPT_CPU_ARM + + g_SymCryptCpuFeaturesNotPresent = (SYMCRYPT_CPU_FEATURES) ~SYMCRYPT_CPU_FEATURE_NEON; + +#elif SYMCRYPT_CPU_ARM64 + + SymCryptDetectCpuFeaturesFromIsProcessorFeaturePresent(); + +#endif + + SymCryptInitEnvCommon( version ); +} + +} // extern "C" diff --git a/unittest/lib/env_windowsSymCryptUnittest.cpp b/unittest/lib/env_windowsSymCryptUnittest.cpp new file mode 100644 index 0000000..5ff3aab --- /dev/null +++ b/unittest/lib/env_windowsSymCryptUnittest.cpp @@ -0,0 +1,68 @@ +// +// env_windowsSymCryptUnitTest +// Non-standard environment to support the unit test +// + +#include "precomp.h" +#include "env_commonSymCryptUnittest.cpp" + +#if SYMCRYPT_CPU_AMD64 +BOOLEAN TestSaveXmmEnabled = TRUE; // For AMD64 we always test Xmm6-Xmm15 are preserved +#else +BOOLEAN TestSaveXmmEnabled = FALSE; +#endif +BOOLEAN TestSaveYmmEnabled = FALSE; + +extern "C" { + +VOID +SYMCRYPT_CALL +SymCryptInitEnvUnittest( UINT32 version ) +{ + if( g_SymCryptFlags & SYMCRYPT_FLAG_LIB_INITIALIZED ) + { + return; + } + +#if SYMCRYPT_CPU_X86 | SYMCRYPT_CPU_AMD64 + SymCryptDetectCpuFeaturesByCpuid( SYMCRYPT_CPUID_DETECT_FLAG_CHECK_OS_SUPPORT_FOR_YMM ); + + // + // Check OS reports the same AVX2 availability through GetEnabledXStateFeatures and _xgetbv + // + if (((GetEnabledXStateFeatures() & XSTATE_MASK_AVX) != 0) ^ + ((g_SymCryptCpuFeaturesNotPresent & SYMCRYPT_CPU_FEATURE_AVX2) == 0) ) + { + FATAL3("GetEnabledXStateFeatures (%d) and g_SymCryptCpuFeaturesNotPresent (%d) set by _xgetbv disagree on whether AVX2 should be enabled!", + GetEnabledXStateFeatures() & XSTATE_MASK_AVX, g_SymCryptCpuFeaturesNotPresent & SYMCRYPT_CPU_FEATURE_AVX2); + } + // + // Check OS reports the same AVX512 availability through GetEnabledXStateFeatures and _xgetbv + // + if (((GetEnabledXStateFeatures() & XSTATE_MASK_AVX512) != 0) ^ + ((g_SymCryptCpuFeaturesNotPresent & SYMCRYPT_CPU_FEATURE_AVX512) == 0) ) + { + FATAL3("GetEnabledXStateFeatures (%d) and g_SymCryptCpuFeaturesNotPresent (%d) set by _xgetbv disagree on whether AVX512 should be enabled!", + GetEnabledXStateFeatures() & XSTATE_MASK_AVX512, g_SymCryptCpuFeaturesNotPresent & SYMCRYPT_CPU_FEATURE_AVX512); + } + + // + // By default we don't fail XMM so that we get proper performance for GCM. + // We allow the nofail to be disabled by command-line option. + // + g_SymCryptCpuFeaturesNotPresent &= ~SYMCRYPT_CPU_FEATURE_SAVEXMM_NOFAIL; + +#elif SYMCRYPT_CPU_ARM + + g_SymCryptCpuFeaturesNotPresent = (SYMCRYPT_CPU_FEATURES) ~SYMCRYPT_CPU_FEATURE_NEON; + +#elif SYMCRYPT_CPU_ARM64 + + SymCryptDetectCpuFeaturesFromIsProcessorFeaturePresent(); + +#endif + + SymCryptInitEnvCommon( version ); +} + +} // extern "C" diff --git a/unittest/lib/i386/savexmm.asm b/unittest/lib/i386/savevectors.asm similarity index 95% rename from unittest/lib/i386/savexmm.asm rename to unittest/lib/i386/savevectors.asm index b6ca103..3c144cc 100644 --- a/unittest/lib/i386/savexmm.asm +++ b/unittest/lib/i386/savevectors.asm @@ -1,184 +1,184 @@ -; -; savexmm.asm -; -; Copyright (c) Microsoft Corporation. Licensed under the MIT license. -; -; Routines for saving and restoring XMM registers. -; - - TITLE savexmm.asm - - - .686 - .xmm - -_TEXT SEGMENT PARA PUBLIC USE32 'CODE' - ASSUME CS:_TEXT, DS:FLAT, SS:FLAT - - PUBLIC @SymCryptEnvUmSaveXmmRegistersAsm@4 - PUBLIC @SymCryptEnvUmRestoreXmmRegistersAsm@4 - PUBLIC @SymCryptEnvUmSaveYmmRegistersAsm@4 - PUBLIC @SymCryptEnvUmRestoreYmmRegistersAsm@4 - - -;VOID SYMCRYPT_CALL SymCryptEnvUmSaveXmmRegistersAsm( __m128i * buffer ); -;VOID SYMCRYPT_CALL SymCryptEnvUmRestoreXmmRegistersAsm( __m128i * buffer ); -;VOID SYMCRYPT_CALL SymCryptEnvUmSaveYmmRegistersAsm( __m256i * buffer ); -;VOID SYMCRYPT_CALL SymCryptEnvUmRestoreYmmRegistersAsm( __m256i * buffer ); - -@SymCryptEnvUmSaveXmmRegistersAsm@4 PROC - - ; - ; The .FPO provides debugging information for stack frames that do not use - ; ebp as a base pointer. - ; This stuff not well documented, - ; but here is the information I've gathered about the arguments to .FPO - ; - ; In order: - ; cdwLocals: Size of local variables, in DWords - ; cdwParams: Size of parameters, in DWords. Given that this is all about - ; stack stuff, I'm assuming this is only about parameters passed - ; on the stack. - ; cbProlog : Number of bytes in the prolog code. We sometimes interleaved the - ; prolog code with work for better performance. Most uses of - ; .FPO seem to set this value to 0. - ; The debugger seems to work if the prolog defined by this value - ; contains all the stack adjustments. - ; cbRegs : # registers saved in the prolog. 4 in our case - ; fUseBP : 0 if EBP is not used as base pointer, 1 if EBP is used as base pointer - ; cbFrame : Type of frame. - ; 0 = FPO frame (no frame pointer) - ; 1 = Trap frame (result of a CPU trap event) - ; 2 = TSS frame - ; - ; Having looked at various occurrences of .FPO in the Windows code it - ; seems to be used fairly sloppy, with lots of arguments left 0 even when - ; they probably shouldn't be according to the spec. - ; - .FPO(0,0,0,0,0,0) - - ; ecx = buffer - - ; - ; First we align ecx to the next multiple of 16. The buffer is defined to have 16*9 bytes so we have enough room - ; - add ecx, 15 - and ecx, NOT 15 - - movaps [ecx ], xmm0 - movaps [ecx+ 16], xmm1 - movaps [ecx+ 32], xmm2 - movaps [ecx+ 48], xmm3 - movaps [ecx+ 64], xmm4 - movaps [ecx+ 80], xmm5 - movaps [ecx+ 96], xmm6 - movaps [ecx+112], xmm7 - - ret - -@SymCryptEnvUmSaveXmmRegistersAsm@4 ENDP - -@SymCryptEnvUmRestoreXmmRegistersAsm@4 PROC - - - ; ecx = buffer - - ; - ; First we align ecx to the next multiple of 16. The buffer is defined to have 16*9 bytes so we have enough room - ; - add ecx, 15 - and ecx, NOT 15 - - movaps xmm0, [ecx ] - movaps xmm1, [ecx+ 16] - movaps xmm2, [ecx+ 32] - movaps xmm3, [ecx+ 48] - movaps xmm4, [ecx+ 64] - movaps xmm5, [ecx+ 80] - movaps xmm6, [ecx+ 96] - movaps xmm7, [ecx+112] - - ret - -@SymCryptEnvUmRestoreXmmRegistersAsm@4 ENDP - -@SymCryptEnvUmSaveYmmRegistersAsm@4 PROC - - ; - ; The .FPO provides debugging information for stack frames that do not use - ; ebp as a base pointer. - ; This stuff not well documented, - ; but here is the information I've gathered about the arguments to .FPO - ; - ; In order: - ; cdwLocals: Size of local variables, in DWords - ; cdwParams: Size of parameters, in DWords. Given that this is all about - ; stack stuff, I'm assuming this is only about parameters passed - ; on the stack. - ; cbProlog : Number of bytes in the prolog code. We sometimes interleaved the - ; prolog code with work for better performance. Most uses of - ; .FPO seem to set this value to 0. - ; The debugger seems to work if the prolog defined by this value - ; contains all the stack adjustments. - ; cbRegs : # registers saved in the prolog. 4 in our case - ; fUseBP : 0 if EBP is not used as base pointer, 1 if EBP is used as base pointer - ; cbFrame : Type of frame. - ; 0 = FPO frame (no frame pointer) - ; 1 = Trap frame (result of a CPU trap event) - ; 2 = TSS frame - ; - ; Having looked at various occurrences of .FPO in the Windows code it - ; seems to be used fairly sloppy, with lots of arguments left 0 even when - ; they probably shouldn't be according to the spec. - ; - .FPO(0,0,0,0,0,0) - - ; ecx = buffer - - ; - ; First we align ecx to the next multiple of 16. The buffer is defined to have 16*9 bytes so we have enough room - ; - add ecx, 31 - and ecx, NOT 31 - - vmovaps [ecx+ 0 * 32 ], ymm0 - vmovaps [ecx+ 1 * 32 ], ymm1 - vmovaps [ecx+ 2 * 32 ], ymm2 - vmovaps [ecx+ 3 * 32 ], ymm3 - vmovaps [ecx+ 4 * 32 ], ymm4 - vmovaps [ecx+ 5 * 32 ], ymm5 - vmovaps [ecx+ 6 * 32 ], ymm6 - vmovaps [ecx+ 7 * 32 ], ymm7 - - ret - -@SymCryptEnvUmSaveYmmRegistersAsm@4 ENDP - -@SymCryptEnvUmRestoreYmmRegistersAsm@4 PROC - - - ; ecx = buffer - - ; - ; First we align ecx to the next multiple of 16. The buffer is defined to have 16*9 bytes so we have enough room - ; - add ecx, 31 - and ecx, NOT 31 - - vmovaps ymm0, [ecx + 0 * 32 ] - vmovaps ymm1, [ecx + 1 * 32 ] - vmovaps ymm2, [ecx + 2 * 32 ] - vmovaps ymm3, [ecx + 3 * 32 ] - vmovaps ymm4, [ecx + 4 * 32 ] - vmovaps ymm5, [ecx + 5 * 32 ] - vmovaps ymm6, [ecx + 6 * 32 ] - vmovaps ymm7, [ecx + 7 * 32 ] - - ret - -@SymCryptEnvUmRestoreYmmRegistersAsm@4 ENDP - -_TEXT ENDS - -END - +; +; savevectors.asm +; +; Copyright (c) Microsoft Corporation. Licensed under the MIT license. +; +; Routines for saving and restoring XMM and YMM registers. +; + + TITLE savevectors.asm + + + .686 + .xmm + +_TEXT SEGMENT PARA PUBLIC USE32 'CODE' + ASSUME CS:_TEXT, DS:FLAT, SS:FLAT + + PUBLIC @SymCryptEnvUmSaveXmmRegistersAsm@4 + PUBLIC @SymCryptEnvUmRestoreXmmRegistersAsm@4 + PUBLIC @SymCryptEnvUmSaveYmmRegistersAsm@4 + PUBLIC @SymCryptEnvUmRestoreYmmRegistersAsm@4 + + +;VOID SYMCRYPT_CALL SymCryptEnvUmSaveXmmRegistersAsm( __m128i * buffer ); +;VOID SYMCRYPT_CALL SymCryptEnvUmRestoreXmmRegistersAsm( __m128i * buffer ); +;VOID SYMCRYPT_CALL SymCryptEnvUmSaveYmmRegistersAsm( __m256i * buffer ); +;VOID SYMCRYPT_CALL SymCryptEnvUmRestoreYmmRegistersAsm( __m256i * buffer ); + +@SymCryptEnvUmSaveXmmRegistersAsm@4 PROC + + ; + ; The .FPO provides debugging information for stack frames that do not use + ; ebp as a base pointer. + ; This stuff not well documented, + ; but here is the information I've gathered about the arguments to .FPO + ; + ; In order: + ; cdwLocals: Size of local variables, in DWords + ; cdwParams: Size of parameters, in DWords. Given that this is all about + ; stack stuff, I'm assuming this is only about parameters passed + ; on the stack. + ; cbProlog : Number of bytes in the prolog code. We sometimes interleaved the + ; prolog code with work for better performance. Most uses of + ; .FPO seem to set this value to 0. + ; The debugger seems to work if the prolog defined by this value + ; contains all the stack adjustments. + ; cbRegs : # registers saved in the prolog. 4 in our case + ; fUseBP : 0 if EBP is not used as base pointer, 1 if EBP is used as base pointer + ; cbFrame : Type of frame. + ; 0 = FPO frame (no frame pointer) + ; 1 = Trap frame (result of a CPU trap event) + ; 2 = TSS frame + ; + ; Having looked at various occurrences of .FPO in the Windows code it + ; seems to be used fairly sloppy, with lots of arguments left 0 even when + ; they probably shouldn't be according to the spec. + ; + .FPO(0,0,0,0,0,0) + + ; ecx = buffer + + ; + ; First we align ecx to the next multiple of 16. The buffer is defined to have 16*9 bytes so we have enough room + ; + add ecx, 15 + and ecx, NOT 15 + + movaps [ecx ], xmm0 + movaps [ecx+ 16], xmm1 + movaps [ecx+ 32], xmm2 + movaps [ecx+ 48], xmm3 + movaps [ecx+ 64], xmm4 + movaps [ecx+ 80], xmm5 + movaps [ecx+ 96], xmm6 + movaps [ecx+112], xmm7 + + ret + +@SymCryptEnvUmSaveXmmRegistersAsm@4 ENDP + +@SymCryptEnvUmRestoreXmmRegistersAsm@4 PROC + + + ; ecx = buffer + + ; + ; First we align ecx to the next multiple of 16. The buffer is defined to have 16*9 bytes so we have enough room + ; + add ecx, 15 + and ecx, NOT 15 + + movaps xmm0, [ecx ] + movaps xmm1, [ecx+ 16] + movaps xmm2, [ecx+ 32] + movaps xmm3, [ecx+ 48] + movaps xmm4, [ecx+ 64] + movaps xmm5, [ecx+ 80] + movaps xmm6, [ecx+ 96] + movaps xmm7, [ecx+112] + + ret + +@SymCryptEnvUmRestoreXmmRegistersAsm@4 ENDP + +@SymCryptEnvUmSaveYmmRegistersAsm@4 PROC + + ; + ; The .FPO provides debugging information for stack frames that do not use + ; ebp as a base pointer. + ; This stuff not well documented, + ; but here is the information I've gathered about the arguments to .FPO + ; + ; In order: + ; cdwLocals: Size of local variables, in DWords + ; cdwParams: Size of parameters, in DWords. Given that this is all about + ; stack stuff, I'm assuming this is only about parameters passed + ; on the stack. + ; cbProlog : Number of bytes in the prolog code. We sometimes interleaved the + ; prolog code with work for better performance. Most uses of + ; .FPO seem to set this value to 0. + ; The debugger seems to work if the prolog defined by this value + ; contains all the stack adjustments. + ; cbRegs : # registers saved in the prolog. 4 in our case + ; fUseBP : 0 if EBP is not used as base pointer, 1 if EBP is used as base pointer + ; cbFrame : Type of frame. + ; 0 = FPO frame (no frame pointer) + ; 1 = Trap frame (result of a CPU trap event) + ; 2 = TSS frame + ; + ; Having looked at various occurrences of .FPO in the Windows code it + ; seems to be used fairly sloppy, with lots of arguments left 0 even when + ; they probably shouldn't be according to the spec. + ; + .FPO(0,0,0,0,0,0) + + ; ecx = buffer + + ; + ; First we align ecx to the next multiple of 16. The buffer is defined to have 16*9 bytes so we have enough room + ; + add ecx, 31 + and ecx, NOT 31 + + vmovaps [ecx+ 0 * 32 ], ymm0 + vmovaps [ecx+ 1 * 32 ], ymm1 + vmovaps [ecx+ 2 * 32 ], ymm2 + vmovaps [ecx+ 3 * 32 ], ymm3 + vmovaps [ecx+ 4 * 32 ], ymm4 + vmovaps [ecx+ 5 * 32 ], ymm5 + vmovaps [ecx+ 6 * 32 ], ymm6 + vmovaps [ecx+ 7 * 32 ], ymm7 + + ret + +@SymCryptEnvUmSaveYmmRegistersAsm@4 ENDP + +@SymCryptEnvUmRestoreYmmRegistersAsm@4 PROC + + + ; ecx = buffer + + ; + ; First we align ecx to the next multiple of 16. The buffer is defined to have 16*9 bytes so we have enough room + ; + add ecx, 31 + and ecx, NOT 31 + + vmovaps ymm0, [ecx + 0 * 32 ] + vmovaps ymm1, [ecx + 1 * 32 ] + vmovaps ymm2, [ecx + 2 * 32 ] + vmovaps ymm3, [ecx + 3 * 32 ] + vmovaps ymm4, [ecx + 4 * 32 ] + vmovaps ymm5, [ecx + 5 * 32 ] + vmovaps ymm6, [ecx + 6 * 32 ] + vmovaps ymm7, [ecx + 7 * 32 ] + + ret + +@SymCryptEnvUmRestoreYmmRegistersAsm@4 ENDP + +_TEXT ENDS + +END + diff --git a/unittest/lib/main.cpp b/unittest/lib/main.cpp index 17a5773..e6f970d 100644 --- a/unittest/lib/main.cpp +++ b/unittest/lib/main.cpp @@ -596,7 +596,7 @@ usage() " sizeprefix: Only applies when sizes: parameter is also specified. Prefixes\n" " output of test command with a specific string. This can enable\n" " easier concatenation of many test runs on differing platforms into\n" - " a single .csv for postprocessing." + " a single .csv for postprocessing.\n" " kernel Run the kernel-mode tests \n" " verbose Print detailed information for some algorithms\n" " noperftests Skip running the performance tests - only run functional tests\n" @@ -609,6 +609,9 @@ usage() " rsakgp Run perf measurement of RSA key generation.\n" " sgx Run CNG and symcrypt test implementations against BCrypt in SGX enclave.\n" " This option is only valid for win8_1 version and newer of the tests.\n" + " testSaveYmm This option enables the unit tests to test the save/restore logic for\n" + " Ymm registers. Normally the C runtime may overwrite Ymm registers and\n" + " these tests will fail, so the test is disabled by default.\n" "\n" #if SYMCRYPT_CPU_X86 | SYMCRYPT_CPU_AMD64 " CPU feature: aesni, pclmulqdq, sse2, sse3, ssse3, avx2,\n" @@ -677,8 +680,8 @@ const CPU_FEATURE_DATA g_cpuFeatureData[] = { "shani", SYMCRYPT_CPU_FEATURE_SHANI }, { "adx", SYMCRYPT_CPU_FEATURE_ADX }, { "bmi2", SYMCRYPT_CPU_FEATURE_BMI2 }, - { "vaes512", SYMCRYPT_CPU_FEATURE_VAES_512 }, - { "vaes256", SYMCRYPT_CPU_FEATURE_VAES_256 }, + { "vaes", SYMCRYPT_CPU_FEATURE_VAES }, + { "avx512", SYMCRYPT_CPU_FEATURE_AVX512 }, { "cmpxchg16b", SYMCRYPT_CPU_FEATURE_CMPXCHG16B }, #elif SYMCRYPT_CPU_ARM64 { "neon", SYMCRYPT_CPU_FEATURE_NEON }, @@ -707,6 +710,25 @@ VOID printSymCryptCpuInfo( PCSTR text, SYMCRYPT_CPU_FEATURES notPresent ) print( "\n" ); } +VOID printTestVectorSaveOptions() +{ + CHAR sep = ' '; + print("\nTest Vector Save/Restore options:"); + if (TestSaveXmmEnabled) + { + print("%cTestSaveXmmEnabled", sep); + sep = ','; + } + if (TestSaveYmmEnabled) + { + print("%cTestSaveYmmEnabled", sep); + sep = ','; + } + if (sep == ' ') + { + print(" None"); + } +} VOID processSingleOption( _In_ PSTR option ) @@ -858,6 +880,11 @@ processSingleOption( _In_ PSTR option ) g_sgx = TRUE; optionHandled = TRUE; } + if (STRICMP(&option[0], "testSaveYmm") == 0) + { + TestSaveYmmEnabled = TRUE; + optionHandled = TRUE; + } } if( !optionHandled ) { @@ -1244,6 +1271,8 @@ initTestInfrastructure( int argc, _In_reads_( argc ) char * argv[] ) printSymCryptCpuInfo( "Modified CPU features for this test", g_SymCryptCpuFeaturesNotPresent ); } + printTestVectorSaveOptions(); + if( g_rngSeed == 0 ) { CHECK( NT_SUCCESS( GENRANDOM(&g_rngSeed, sizeof( g_rngSeed )) ), "Failed to get random seed" ); @@ -1762,204 +1791,6 @@ rdrandTest() #endif } -// -// Below some of the code used to test the XMM registers. -// This is Unittest code, so outside the extern "C" block. -// - - -#if SYMCRYPT_CPU_X86 -///////////////////////////////////////////////////////////// -// -// Code to set up the XMM registers for testing in SAVE_XMM mode - -__m128i g_xmmStartState[8]; -__m128i g_xmmTestState[8]; - -// -// The save/restore functions work on an aligned subset of the structure. -// We don't care which part is used, we copy the start structure, store the -// XMM registers in it, and check that it is the same. -// - -VOID -verifyXmmRegisters() -{ - BOOL difference = FALSE; - if( TestSaveXmmEnabled && SYMCRYPT_CPU_FEATURES_PRESENT( SYMCRYPT_CPU_FEATURE_SSE2 ) && !SYMCRYPT_CPU_FEATURES_PRESENT( SYMCRYPT_CPU_FEATURE_SAVEXMM_NOFAIL ) ) - { - memset( g_xmmTestState, 0, sizeof( g_xmmTestState ) ); - SymCryptEnvUmSaveXmmRegistersAsm( g_xmmTestState ); - - difference = memcmp( g_xmmTestState, g_xmmStartState, sizeof( g_xmmStartState ) ) != 0; - - if( difference ) - { - // - // Starting late 2018 our compiler & CRT are now using XMM registers for transient things. - // In particular, the compiler calls memset() on a large local struct to wipe the memory. - // (Part of the security mitigations against leaking data from uninitialized stack variables.) - // The CRT in turn uses XMM0 to wipe more efficiently. - // This is indistinguishable from a SymCrypt bug where we use XMM registers in X86 code without - // proper save/restore logic. - // In short: we cannot test this anymore in user mode. We'd have to compile for Win7 kernel mode - // to even run this test. - // For now we will relax this test to not be triggered by the compiler/CRT. This means that we - // no longer test this property, but we can at least detect some violations, which is better - // than none. - // - if( (g_xmmTestState[0].m128i_u64[0] | g_xmmTestState[0].m128i_u64[1]) == 0 && - memcmp( &g_xmmTestState[1], &g_xmmStartState[1], 7 * sizeof( g_xmmStartState[0] ) ) == 0 ) - { - difference = FALSE; - } - } - - if( difference ) - { - print( "\n" ); - print( "Registers different: " ); - for( int i=0; i<8; i++ ) - { - if( memcmp( &g_xmmTestState[i], &g_xmmStartState[i], 16 ) != 0 ) - { - print( "xmm%d ", i ); - } - - } - print( "\nStartState:\n" ); - printHexArray( (PCBYTE) g_xmmStartState, 8, 16 ); - print( "TestState:\n"); - printHexArray( (PCBYTE) g_xmmTestState, 8, 16 ); - - ULONGLONG checksum; - SymCryptMarvin32( SymCryptMarvin32DefaultSeed, (PCBYTE) g_xmmStartState, 8*16, (PBYTE) &checksum ); - print( "%04x\n", (ULONG) checksum & 0xffff ); - SymCryptMarvin32( SymCryptMarvin32DefaultSeed, (PCBYTE) g_xmmTestState, 8*16, (PBYTE) &checksum ); - print( "%04x\n", (ULONG) checksum & 0xffff ); - - FATAL( "Xmm registers modified without proper save/restore" ); - } - } -} - - -VOID -initXmmRegisters() -{ -/* -#pragma prefast(push) -#pragma prefast(disable:6031) - BCryptGenRandom( NULL, (PBYTE) g_xmmStartState, sizeof( g_xmmStartState ), BCRYPT_USE_SYSTEM_PREFERRED_RNG ); -#pragma prefast(pop) - memcpy( g_xmmTestState, g_xmmStartState, sizeof( g_xmmStartState ) ); - - SymCryptEnvUmRestoreXmmRegistersAsm( g_xmmStartState ); -*/ - if( TestSaveXmmEnabled && SYMCRYPT_CPU_FEATURES_PRESENT( SYMCRYPT_CPU_FEATURE_SSE2 ) && !SYMCRYPT_CPU_FEATURES_PRESENT( SYMCRYPT_CPU_FEATURE_SAVEXMM_NOFAIL ) ) - { - SymCryptEnvUmSaveXmmRegistersAsm( g_xmmStartState ); - verifyXmmRegisters(); - } -} - -#else - -VOID verifyXmmRegisters() -{ -} - -VOID initXmmRegisters() -{ -} -#endif - -#if SYMCRYPT_CPU_X86 | SYMCRYPT_CPU_AMD64 -///////////////////////////////////////////////////////////// -// -// Code to set up the YMM registers for testing in SAVE_YMM mode - -#if SYMCRYPT_CPU_AMD64 -__m256i g_ymmStartState[16]; -__m256i g_ymmTestState[16]; -#else -__m256i g_ymmStartState[8]; -__m256i g_ymmTestState[8]; -#endif - - -VOID -verifyYmmRegisters() -{ - if( !SYMCRYPT_CPU_FEATURES_PRESENT( SYMCRYPT_CPU_FEATURE_AVX2 ) ) - { - verifyXmmRegisters(); - return; - } - - // - // We know that AVX2 is present from here on - // - if( TestSaveYmmEnabled && (SYMCRYPT_CPU_X86 || !TestSaveYmmFallback) ) - { - SymCryptEnvUmSaveYmmRegistersAsm( g_ymmTestState ); - - // - // On AMD64 it is perfectly fine for the XMM register values to have been modified. - // Similarly, on x86 C runtime functions which SymCrypt uses may now use XMM registers and - // fail to preserve them. - // We just test that the top half of the Ymm registers have been preserved. - // - for( int i=0; i>5, i&31); - } - } - } -} - - -VOID -initYmmRegisters() -{ - if( !SYMCRYPT_CPU_FEATURES_PRESENT( SYMCRYPT_CPU_FEATURE_AVX2 ) ) - { - initXmmRegisters(); - return; - } - if( TestSaveYmmEnabled ) - { - // - // Do the memsets outside the save area as it might use XMM registers on x86 - // Set the initial Ymm registers to a non-trivial value. It is likely (for performance - // reasons) that the upper halves are already zero-ed and will be re-zeroed by any function - // we call. - // - memset( g_ymmTestState, 17, sizeof( g_ymmTestState ) ); - memset( g_ymmStartState, (__rdtsc() & 255) ^ 0x42, sizeof( g_ymmStartState ) ); - // Reset TestSaveYmmFallback (set to TRUE when unit-test artificially fails save Ymm, in - // which case user mode code can clobber volatile Ymm registers on AMD64) - TestSaveYmmFallback = FALSE; - SymCryptEnvUmRestoreYmmRegistersAsm( g_ymmStartState ); - verifyYmmRegisters(); - } -} - -#else - -VOID verifyYmmRegisters() -{ -} - -VOID initYmmRegisters() -{ -} -#endif - VOID printHexArray( PCBYTE pData, SIZE_T nElements, SIZE_T elementSize ) { diff --git a/unittest/lib/main_exe_common.cpp b/unittest/lib/main_exe_common.cpp index 553a5f8..e679a56 100644 --- a/unittest/lib/main_exe_common.cpp +++ b/unittest/lib/main_exe_common.cpp @@ -2,9 +2,6 @@ // Copyright (c) Microsoft Corporation. Licensed under the MIT license. // - -PSTR testDriverName = TESTDRIVER_NAME; - const char * g_implementationNames[] = { ImpSc::name, @@ -56,49 +53,55 @@ addAllAlgs() #endif } - -DWORD WINAPI umThreadFunc( LPVOID param ) +int SYMCRYPT_CDECL +main( int argc, _In_reads_( argc ) char * argv[] ) { - runTestThread( param ); + initTestInfrastructure( argc, argv ); + + // Set up vector registers to be in a state that should not be modified by unit tests + // This may do nothing if TestSaveXXXEnabled is FALSE, but it can also: + // On Windows AMD64 set Xmm6-Xmm15 to random values + // these values are non-volatile in Window x64 ABI, so should be preserved. If they are not + // preserved it indicates a problem with our assembly not adhering to the Windows ABI + // On Linux AMD64 set Ymm0-Ymm15 to random values + // these values are naturally volatile on Linux, but symcryptunittest callers may specify the + // following environment variable: + // GLIBC_TUNABLES=glibc.cpu.hwcaps=-AVX_Usable,-AVX_Fast_Unaligned_Load,-AVX2_Usable + // to avoid use of AVX in glibc. This means we can test the Ymm save/restore logic that is + // used in Windows kernel using Linux user mode. + initVectorRegisters(); + + addAllAlgs(); + + if (!g_profile && !g_measure_specific_sizes) + { + runFunctionalTests(); + } + + // Check that all uses of vector registers in the functional unit tests correctly saved/restored + verifyVectorRegisters(); + + + if (g_profile) + { + runProfiling(); + } + else + { + runPerfTests(); + + if (!g_measure_specific_sizes) + { + testSelftest(); + + // Disable Vector save tests for multithreaded tests + TestSaveXmmEnabled = FALSE; + TestSaveYmmEnabled = FALSE; + testMultiThread(); + } + } + + exitTestInfrastructure(); + return 0; } - -VOID -scheduleAsyncTest( SelfTestFn f ) -{ - // - // No async testing in user mode, just run the test in-line. - // - f(); -} - - -VOID -testMultiThread() -{ - HANDLE threads[64]; - int i; - g_fExitMultithreadTest = FALSE; - g_nMultithreadTestsRun = 0; - - iprint( "\nMulti-thread test..." ); - - for( i=0; i 6*sizeof( g_ymmStartState[0] ))) + ) + { + FATAL3( "Ymm registers modified without proper save/restore Ymm%d[%d]", i>>5, i&31); + } + } + } +#if SYMCRYPT_CPU_AMD64 + // + // For x86 all vector registers Xmm0-Xmm7 are volatile by default - so we cannot test that + // they are not modified. E.g. In the unit tests we call BCrypt to generate random numbers + // and BCrypt can trash the full Xmm state, as this is how our AES intrinsics are compiled + // (using all registers and no save/restore in prologue/epilogue). + // The CRT is also free to trash the state semi-arbitrarily (observationally the CRT tends to + // only trash Xmm0 - Xmm5, same as AMD64, but it is free to use all Xmm registers) + // + if (TestSaveXmmEnabled && SYMCRYPT_CPU_FEATURES_PRESENT(SYMCRYPT_CPU_FEATURE_SSE2) && !SYMCRYPT_CPU_FEATURES_PRESENT(SYMCRYPT_CPU_FEATURE_SAVEXMM_NOFAIL)) + { + memset( g_xmmTestState, 0, sizeof( g_xmmTestState ) ); + SymCryptEnvUmSaveXmmRegistersAsm(g_xmmTestState); + + // + // For MSFT x64 ABI Xmm6-Xmm15 are non-volatile so should be preserved. We just check this + // is done, which gives us confidence none of our assembly breaks the ABI. + // + for( int i = 6 * sizeof(g_xmmStartState[0]); i < sizeof(g_xmmStartState); i++ ) + { + if( ((volatile BYTE * )&g_xmmStartState[0])[i] != ((volatile BYTE * )&g_xmmTestState[0])[i] ) + { + FATAL3( "Xmm registers modified without proper save/restore Xmm%d[%d]", i>>4, i&15); + } + } + } +#endif +} + +VOID +initVectorRegisters() +{ + if (TestSaveYmmEnabled && SYMCRYPT_CPU_FEATURES_PRESENT(SYMCRYPT_CPU_FEATURE_AVX2)) + { + // + // Do the memsets outside the save area as it might use vector registers + // Set the initial Ymm registers to a non-trivial value. It is likely (for performance + // reasons) that the upper halves are already zero-ed and will be re-zeroed by any function + // we call. + // + memset( g_ymmTestState, 17, sizeof( g_ymmTestState ) ); + memset( g_ymmStartState, (__rdtsc() & 255) ^ 0x42, sizeof( g_ymmStartState ) ); + SymCryptEnvUmRestoreYmmRegistersAsm( g_ymmStartState ); + verifyVectorRegisters(); + } +#if SYMCRYPT_CPU_AMD64 + if (TestSaveXmmEnabled && SYMCRYPT_CPU_FEATURES_PRESENT(SYMCRYPT_CPU_FEATURE_SSE2) && !SYMCRYPT_CPU_FEATURES_PRESENT(SYMCRYPT_CPU_FEATURE_SAVEXMM_NOFAIL)) + { + // + // Do the memsets outside the save area as it might use Xmm registers + // Set the initial Xmm registers to a non-trivial value. + // + memset( g_xmmTestState, 17, sizeof( g_xmmTestState ) ); + memset( g_xmmStartState, (__rdtsc() & 255) ^ 0x42, sizeof( g_xmmStartState ) ); + SymCryptEnvUmRestoreXmmRegistersAsm( g_xmmStartState ); + verifyVectorRegisters(); + } +#endif +} + +#else + +VOID verifyVectorRegisters() +{ +} + +VOID initVectorRegisters() +{ +} + +#endif + +DWORD WINAPI umThreadFunc( LPVOID param ) +{ + runTestThread( param ); + return 0; +} + +VOID +scheduleAsyncTest( SelfTestFn f ) +{ + // + // No async testing in user mode, just run the test in-line. + // + f(); +} + +VOID +testMultiThread() +{ + HANDLE threads[64]; + int i; + g_fExitMultithreadTest = FALSE; + g_nMultithreadTestsRun = 0; + + iprint( "\nMulti-thread test..." ); + + for( i=0; i::setKey( PCBYTE pbKey, SIZE_T cbKey ) { SYMCRYPT_ERROR e; - initXmmRegisters(); e = SYMCRYPT_XxxExpandKey( &state.key, pbKey, cbKey ); - verifyXmmRegisters(); + verifyVectorRegisters(); if( e != SYMCRYPT_NO_ERROR ) { @@ -88,9 +87,8 @@ BlockCipherImp::encrypt( PBYTE pbChain, SIZE_T cbChain, CHECK( cbData % msgBlockLen() == 0, "Wrong data length" ); CHECK( cbChain == chainBlockLen(), "Wrong chain len" ); - initXmmRegisters(); SYMCRYPT_XxxXxxEncrypt( &state.key, pbChain, pbSrc, pbDst, cbData ); - verifyXmmRegisters(); + verifyVectorRegisters(); } template<> @@ -101,8 +99,7 @@ BlockCipherImp::decrypt( PBYTE pbChain, SIZE_T cbChain, CHECK( cbData % msgBlockLen() == 0, "Wrong data length" ); CHECK( cbChain == chainBlockLen(), "Wrong chain len" ); - initXmmRegisters(); SYMCRYPT_XxxXxxDecrypt( &state.key, pbChain, pbSrc, pbDst, cbData ); - verifyXmmRegisters(); + verifyVectorRegisters(); } diff --git a/unittest/lib/sc_imp_hashpattern.cpp b/unittest/lib/sc_imp_hashpattern.cpp index 655f0af..334ec03 100644 --- a/unittest/lib/sc_imp_hashpattern.cpp +++ b/unittest/lib/sc_imp_hashpattern.cpp @@ -25,7 +25,6 @@ HashImp< ImpXxx, AlgXxx >::HashImp() SYMCRYPT_XXX_STATE hashState; BYTE exportBlob[SYMCRYPT_XXX_STATE_EXPORT_SIZE]; - initXmmRegisters(); SYMCRYPT_XxxInit( &hashState ); for( int i=0; i<200; i++ ) { @@ -44,7 +43,7 @@ HashImp< ImpXxx, AlgXxx >::HashImp() CHECK( SymCryptHashStateSize( SYMCRYPT_XxxAlgorithm ) == sizeof( SYMCRYPT_XXX_STATE ), "State size mismatch" ); state.isReset = FALSE; - verifyXmmRegisters(); + verifyVectorRegisters(); } // @@ -90,15 +89,13 @@ VOID HashImp::hash( SYMCRYPT_XXX_STATE state2; SIZE_T halfSize = cbData >> 1; - initXmmRegisters(); CHECK( cbResult == SYMCRYPT_XXX_RESULT_SIZE, "Result len error in SymCrypt" STRING( ALG_Name ) ); SYMCRYPT_Xxx( pbData, cbData, pbResult ); - verifyXmmRegisters(); + verifyVectorRegisters(); - initXmmRegisters(); SymCryptHash( SYMCRYPT_XxxAlgorithm, pbData, cbData, splitResult, cbResult ); CHECK( memcmp( splitResult, pbResult, SYMCRYPT_XXX_RESULT_SIZE ) == 0, "Generic hash error in SymCrypt" STRING( ALG_Name ) ); - verifyXmmRegisters(); + verifyVectorRegisters(); SYMCRYPT_XxxInit( &state1 ); SYMCRYPT_XxxAppend( &state1, pbData, halfSize ); @@ -106,7 +103,7 @@ VOID HashImp::hash( SYMCRYPT_XxxAppend( &state2, pbData+halfSize, cbData-halfSize ); SYMCRYPT_XxxResult( &state2, splitResult ); CHECK( memcmp( splitResult, pbResult, SYMCRYPT_XXX_RESULT_SIZE ) == 0, "State copy error in SymCrypt" STRING( ALG_Name ) ); - verifyXmmRegisters(); + verifyVectorRegisters(); SYMCRYPT_XxxInit( &state1 ); SYMCRYPT_XxxAppend( &state1, pbData, halfSize ); @@ -117,7 +114,7 @@ VOID HashImp::hash( SYMCRYPT_XxxAppend( &state2, pbData+halfSize, cbData-halfSize ); SYMCRYPT_XxxResult( &state2, splitResult ); CHECK( memcmp( splitResult, pbResult, SYMCRYPT_XXX_RESULT_SIZE ) == 0, "Import/Export error in SymCrypt" STRING( ALG_Name ) ); - verifyXmmRegisters(); + verifyVectorRegisters(); } @@ -129,24 +126,22 @@ VOID HashImp::hash( template<> VOID HashImp::init() { - initXmmRegisters(); if( !state.isReset || (g_rng.byte() & 1) == 0 ) { SYMCRYPT_XxxInit( &state.sc ); SymCryptHashInit( SYMCRYPT_XxxAlgorithm, &state.scHash ); } state.isReset = TRUE; - verifyXmmRegisters(); + verifyVectorRegisters(); } template<> VOID HashImp::append( _In_reads_( cbData ) PCBYTE pbData, SIZE_T cbData ) { - initXmmRegisters(); SYMCRYPT_XxxAppend( &state.sc, pbData, cbData ); SymCryptHashAppend( SYMCRYPT_XxxAlgorithm, &state.scHash, pbData, cbData ); state.isReset = FALSE; - verifyXmmRegisters(); + verifyVectorRegisters(); } template<> @@ -155,12 +150,11 @@ VOID HashImp::result( _Out_writes_( cbResult ) PBYTE pbResult, SI BYTE buf[SYMCRYPT_HASH_MAX_RESULT_SIZE]; CHECK( cbResult == SYMCRYPT_XXX_RESULT_SIZE, "Result len error in SymCrypt " STRING( ALG_Name ) ); - initXmmRegisters(); SYMCRYPT_XxxResult( &state.sc, pbResult ); SymCryptHashResult( SYMCRYPT_XxxAlgorithm, &state.scHash, buf, sizeof( buf ) ); CHECK( memcmp( pbResult, buf, cbResult ) == 0, "Inconsistent result" ); state.isReset = TRUE; - verifyXmmRegisters(); + verifyVectorRegisters(); } template<> @@ -171,12 +165,11 @@ NTSTATUS HashImp::exportSymCryptFormat( { CHECK( cbResultBufferSize >= SYMCRYPT_XXX_STATE_EXPORT_SIZE, "Export buffer too small" ); - initXmmRegisters(); SYMCRYPT_XxxStateExport( &state.sc, pbResult ); *pcbResult = SYMCRYPT_XXX_STATE_EXPORT_SIZE; SymCryptWipeKnownSize( &state.sc, sizeof( state.sc ) ); SYMCRYPT_XxxStateImport( &state.sc, pbResult ); - verifyXmmRegisters(); + verifyVectorRegisters(); return STATUS_SUCCESS; } diff --git a/unittest/lib/sc_imp_hkdfpattern.cpp b/unittest/lib/sc_imp_hkdfpattern.cpp index 656f3ea..0ec644d 100644 --- a/unittest/lib/sc_imp_hkdfpattern.cpp +++ b/unittest/lib/sc_imp_hkdfpattern.cpp @@ -86,14 +86,13 @@ KdfImp::derive( } // 1) Full HKDF - initXmmRegisters(); scError = SymCryptHkdf( SYMCRYPT_BaseXxxAlgorithm, pbKey, cbKey, pbSalt, cbSalt, pbInfo, cbInfo, &buf1[0], cbDst); - verifyXmmRegisters(); + verifyVectorRegisters(); CHECK(scError == SYMCRYPT_NO_ERROR, "Error in SymCrypt HKDF"); @@ -102,7 +101,7 @@ KdfImp::derive( SYMCRYPT_BaseXxxAlgorithm, pbKey, cbKey, pbSalt, cbSalt ); - verifyXmmRegisters(); + verifyVectorRegisters(); CHECK(scError == SYMCRYPT_NO_ERROR, "Error in SymCrypt HKDF"); SymCryptMarvin32(SymCryptMarvin32DefaultSeed, (PCBYTE)&expandedKey, sizeof(expandedKey), expandedKeyChecksum); @@ -110,7 +109,7 @@ KdfImp::derive( scError = SymCryptHkdfDerive( &expandedKey, pbInfo, cbInfo, &buf2[0], cbDst); - verifyXmmRegisters(); + verifyVectorRegisters(); CHECK(scError == SYMCRYPT_NO_ERROR, "Error in SymCrypt HKDF"); CHECK(memcmp(buf1, buf2, cbDst) == 0, "SymCrypt HKDF calling versions disagree"); @@ -123,13 +122,13 @@ KdfImp::derive( pbKey, cbKey, pbSalt, cbSalt, rbPrk, SYMCRYPT_BaseXxxAlgorithm->resultSize ); - verifyXmmRegisters(); + verifyVectorRegisters(); CHECK(scError == SYMCRYPT_NO_ERROR, "Error in SymCrypt HKDF"); scError = SymCryptHkdfPrkExpandKey( &expandedKey, SYMCRYPT_BaseXxxAlgorithm, rbPrk, SYMCRYPT_BaseXxxAlgorithm->resultSize ); - verifyXmmRegisters(); + verifyVectorRegisters(); CHECK(scError == SYMCRYPT_NO_ERROR, "Error in SymCrypt HKDF"); SymCryptMarvin32(SymCryptMarvin32DefaultSeed, (PCBYTE)&expandedKey, sizeof(expandedKey), expandedKeyChecksum); @@ -137,7 +136,7 @@ KdfImp::derive( scError = SymCryptHkdfDerive( &expandedKey, pbInfo, cbInfo, &buf2[0], cbDst); - verifyXmmRegisters(); + verifyVectorRegisters(); CHECK(scError == SYMCRYPT_NO_ERROR, "Error in SymCrypt HKDF"); CHECK(memcmp(buf1, buf2, cbDst) == 0, "SymCrypt HKDF calling versions disagree"); diff --git a/unittest/lib/sc_imp_macpattern.cpp b/unittest/lib/sc_imp_macpattern.cpp index 34af240..dfddd51 100644 --- a/unittest/lib/sc_imp_macpattern.cpp +++ b/unittest/lib/sc_imp_macpattern.cpp @@ -64,13 +64,11 @@ NTSTATUS MacImp::mac( CHECK( cbResult == SYMCRYPT_XXX_RESULT_SIZE, "Result len error in SymCrypt" STRING( MAC_Name ) ); - initXmmRegisters(); - SYMCRYPT_XxxExpandKey( &state.key, pbKey, cbKey ); - verifyXmmRegisters(); + verifyVectorRegisters(); SYMCRYPT_Xxx( &state.key, pbData, cbData, pbResult ); - verifyXmmRegisters(); + verifyVectorRegisters(); // // Test the key & state duplication functions @@ -78,7 +76,7 @@ NTSTATUS MacImp::mac( SYMCRYPT_XxxExpandKey( &key1, pbKey, cbKey ); SYMCRYPT_XxxKeyCopy( &key1, &key2 ); SymCryptWipe( &key1, sizeof( key1 ) ); - verifyXmmRegisters(); + verifyVectorRegisters(); SYMCRYPT_XxxInit( &state1, &key2 ); SYMCRYPT_XxxAppend( &state1, pbData, halfSize ); @@ -86,13 +84,13 @@ NTSTATUS MacImp::mac( SYMCRYPT_XxxAppend( &state2, pbData+halfSize, cbData-halfSize ); SYMCRYPT_XxxResult( &state2, splitResult ); CHECK( memcmp( splitResult, pbResult, SYMCRYPT_XXX_RESULT_SIZE ) == 0, "State copy error in SymCrypt" STRING( ALG_Name ) ); - verifyXmmRegisters(); + verifyVectorRegisters(); SYMCRYPT_XxxStateCopy( &state1, &state.key, &state2 ); SYMCRYPT_XxxAppend( &state2, pbData+halfSize, cbData-halfSize ); SYMCRYPT_XxxResult( &state2, splitResult ); CHECK( memcmp( splitResult, pbResult, SYMCRYPT_XXX_RESULT_SIZE ) == 0, "State copy error in SymCrypt" STRING( ALG_Name ) ); - verifyXmmRegisters(); + verifyVectorRegisters(); SymCryptWipeKnownSize( &state.key, sizeof( state.key ) ); SymCryptWipeKnownSize( &state1, sizeof( state1 ) ); @@ -110,10 +108,9 @@ NTSTATUS MacImp::mac( template<> NTSTATUS MacImp::init( _In_reads_( cbKey ) PCBYTE pbKey, SIZE_T cbKey ) { - initXmmRegisters(); SYMCRYPT_XxxExpandKey( &state.key, pbKey, cbKey ); SYMCRYPT_XxxInit( &state.state, &state.key ); - verifyXmmRegisters(); + verifyVectorRegisters(); return STATUS_SUCCESS; } @@ -121,9 +118,8 @@ NTSTATUS MacImp::init( _In_reads_( cbKey ) PCBYTE pbKey, SIZE_T template<> VOID MacImp::append( _In_reads_( cbData ) PCBYTE pbData, SIZE_T cbData ) { - initXmmRegisters(); SYMCRYPT_XxxAppend( &state.state, pbData, cbData ); - verifyXmmRegisters(); + verifyVectorRegisters(); } template<> @@ -131,9 +127,8 @@ VOID MacImp::result( _Out_writes_( cbResult ) PBYTE pbResult, SI { CHECK( cbResult == SYMCRYPT_XXX_RESULT_SIZE, "Result len error in SymCrypt " STRING( MAC_Name ) ); - initXmmRegisters(); SYMCRYPT_XxxResult( &state.state, pbResult ); - verifyXmmRegisters(); + verifyVectorRegisters(); } template<> diff --git a/unittest/lib/sc_imp_pbkdf2pattern.cpp b/unittest/lib/sc_imp_pbkdf2pattern.cpp index 80cdc77..370220d 100644 --- a/unittest/lib/sc_imp_pbkdf2pattern.cpp +++ b/unittest/lib/sc_imp_pbkdf2pattern.cpp @@ -43,21 +43,20 @@ KdfImp::derive( CHECK( cbDst <= sizeof( buf1 ), "PBKDF2 output too large" ); - initXmmRegisters(); scError = SymCryptPbkdf2( SYMCRYPT_BaseXxxAlgorithm, pbKey, cbKey, pbSalt, cbSalt, iterationCnt, &buf1[0], cbDst ); - verifyXmmRegisters(); + verifyVectorRegisters(); CHECK( scError == SYMCRYPT_NO_ERROR, "Error in SymCrypt PBKDF2" ); scError = SymCryptPbkdf2ExpandKey( &expandedKey, SYMCRYPT_BaseXxxAlgorithm, pbKey, cbKey ); - verifyXmmRegisters(); + verifyVectorRegisters(); CHECK( scError == SYMCRYPT_NO_ERROR, "Error in SymCrypt PBKDF2" ); SymCryptMarvin32( SymCryptMarvin32DefaultSeed, (PCBYTE) &expandedKey, sizeof( expandedKey ), expandedKeyChecksum ); @@ -66,7 +65,7 @@ KdfImp::derive( pbSalt, cbSalt, iterationCnt, &buf2[0], cbDst ); - verifyXmmRegisters(); + verifyVectorRegisters(); CHECK( scError == SYMCRYPT_NO_ERROR, "Error in SymCrypt PBKDF2" ); CHECK( memcmp( buf1, buf2, cbDst ) == 0, "SymCrypt PBKDF2 calling versions disagree" ); diff --git a/unittest/lib/sc_imp_sp800_108pattern.cpp b/unittest/lib/sc_imp_sp800_108pattern.cpp index 6cc46b4..d4a6c39 100644 --- a/unittest/lib/sc_imp_sp800_108pattern.cpp +++ b/unittest/lib/sc_imp_sp800_108pattern.cpp @@ -47,21 +47,20 @@ KdfImp::derive( return; } - initXmmRegisters(); scError = SymCryptSp800_108( SYMCRYPT_BaseXxxAlgorithm, pbKey, cbKey, pbLabel, cbLabel, pbContext, cbContext, &buf1[0], cbDst ); - verifyXmmRegisters(); + verifyVectorRegisters(); CHECK( scError == SYMCRYPT_NO_ERROR, "Error in SymCrypt SP800_108" ); scError = SymCryptSp800_108ExpandKey( &expandedKey, SYMCRYPT_BaseXxxAlgorithm, pbKey, cbKey ); - verifyXmmRegisters(); + verifyVectorRegisters(); CHECK( scError == SYMCRYPT_NO_ERROR, "Error in SymCrypt SP800_108" ); SymCryptMarvin32( SymCryptMarvin32DefaultSeed, (PCBYTE) &expandedKey, sizeof( expandedKey ), expandedKeyChecksum ); @@ -70,7 +69,7 @@ KdfImp::derive( pbLabel, cbLabel, pbContext, cbContext, &buf2[0], cbDst ); - verifyXmmRegisters(); + verifyVectorRegisters(); CHECK( scError == SYMCRYPT_NO_ERROR, "Error in SymCrypt SP800_108" ); CHECK( memcmp( buf1, buf2, cbDst ) == 0, "SymCrypt SP800_108 calling versions disagree" ); diff --git a/unittest/lib/sc_imp_tlsprf1_1pattern.cpp b/unittest/lib/sc_imp_tlsprf1_1pattern.cpp index 2138920..fcf9751 100644 --- a/unittest/lib/sc_imp_tlsprf1_1pattern.cpp +++ b/unittest/lib/sc_imp_tlsprf1_1pattern.cpp @@ -85,19 +85,18 @@ KdfImp::derive( return; } - initXmmRegisters(); scError = SymCryptTlsPrf1_1( pbKey, cbKey, pbLabel, cbLabel, pbSeed, cbSeed, &buf1[0], cbDst); - verifyXmmRegisters(); + verifyVectorRegisters(); CHECK(scError == SYMCRYPT_NO_ERROR, "Error in SymCrypt TLS PRF 1.1"); scError = SymCryptTlsPrf1_1ExpandKey(&expandedKey, pbKey, cbKey); - verifyXmmRegisters(); + verifyVectorRegisters(); CHECK(scError == SYMCRYPT_NO_ERROR, "Error in SymCrypt TLS PRF 1.1"); SymCryptMarvin32(SymCryptMarvin32DefaultSeed, (PCBYTE)&expandedKey, sizeof(expandedKey), expandedKeyChecksum); @@ -106,7 +105,7 @@ KdfImp::derive( pbLabel, cbLabel, pbSeed, cbSeed, &buf2[0], cbDst); - verifyXmmRegisters(); + verifyVectorRegisters(); CHECK(scError == SYMCRYPT_NO_ERROR, "Error in SymCrypt TLS PRF 1.1"); CHECK(memcmp(buf1, buf2, cbDst) == 0, "SymCrypt TLS PRF 1.1 calling versions disagree"); diff --git a/unittest/lib/sc_imp_tlsprf1_2pattern.cpp b/unittest/lib/sc_imp_tlsprf1_2pattern.cpp index a094f17..669c59a 100644 --- a/unittest/lib/sc_imp_tlsprf1_2pattern.cpp +++ b/unittest/lib/sc_imp_tlsprf1_2pattern.cpp @@ -40,21 +40,20 @@ KdfImp::derive( return; } - initXmmRegisters(); scError = SymCryptTlsPrf1_2( SYMCRYPT_BaseXxxAlgorithm, pbKey, cbKey, pbLabel, cbLabel, pbSeed, cbSeed, &buf1[0], cbDst); - verifyXmmRegisters(); + verifyVectorRegisters(); CHECK(scError == SYMCRYPT_NO_ERROR, "Error in SymCrypt TLS PRF 1.2"); scError = SymCryptTlsPrf1_2ExpandKey( &expandedKey, SYMCRYPT_BaseXxxAlgorithm, pbKey, cbKey); - verifyXmmRegisters(); + verifyVectorRegisters(); CHECK(scError == SYMCRYPT_NO_ERROR, "Error in SymCrypt TLS PRF 1.2"); SymCryptMarvin32(SymCryptMarvin32DefaultSeed, (PCBYTE)&expandedKey, sizeof(expandedKey), expandedKeyChecksum); @@ -63,7 +62,7 @@ KdfImp::derive( pbLabel, cbLabel, pbSeed, cbSeed, &buf2[0], cbDst); - verifyXmmRegisters(); + verifyVectorRegisters(); CHECK(scError == SYMCRYPT_NO_ERROR, "Error in SymCrypt TLS PRF 1.2"); CHECK(memcmp(buf1, buf2, cbDst) == 0, "SymCrypt TLS PRF 1.2 calling versions disagree"); diff --git a/unittest/lib/sc_implementations.cpp b/unittest/lib/sc_implementations.cpp index 0652557..738a0f9 100644 --- a/unittest/lib/sc_implementations.cpp +++ b/unittest/lib/sc_implementations.cpp @@ -970,9 +970,8 @@ NTSTATUS AuthEncImp::setKey( PCBYTE pbKey, SIZE_T cbKey ) { CHECK( cbKey == 16 || cbKey == 24 || cbKey == 32, "?" ); - initXmmRegisters(); SymCryptAesExpandKey( &state.key, pbKey, cbKey ); - verifyXmmRegisters(); + verifyVectorRegisters(); state.inComputation = FALSE; return STATUS_SUCCESS; @@ -1008,19 +1007,18 @@ AuthEncImp::encrypt( if( (flags & AUTHENC_FLAG_PARTIAL) == 0 ) { // simple straight CCM computation. - initXmmRegisters(); CHECK( SymCryptCcmValidateParameters( SymCryptAesBlockCipher, cbNonce, cbAuthData, cbData, cbTag ) == SYMCRYPT_NO_ERROR, "?" ); - verifyXmmRegisters(); + verifyVectorRegisters(); SymCryptCcmEncrypt( SymCryptAesBlockCipher, &state.key, pbNonce, cbNonce, pbAuthData, cbAuthData, pbSrc, pbDst, cbData, pbTag, cbTag ); - verifyXmmRegisters(); + verifyVectorRegisters(); // Done goto cleanup; @@ -1030,23 +1028,20 @@ AuthEncImp::encrypt( { CHECK( (flags & AUTHENC_FLAG_PARTIAL) != 0, "?" ); // total cbData is passed in the cbTag parameter in the first partial call - initXmmRegisters(); SymCryptCcmInit( &state.ccmState, SymCryptAesBlockCipher, &state.key, pbNonce, cbNonce, pbAuthData, cbAuthData, state.totalCbData, cbTag ); - verifyXmmRegisters(); + verifyVectorRegisters(); state.inComputation = TRUE; } // We can process the next part before we decide whether to produce the tag. - initXmmRegisters(); SymCryptCcmEncryptPart( &state.ccmState, pbSrc, pbDst, cbData ); - verifyXmmRegisters(); + verifyVectorRegisters(); if( pbTag != NULL ) { - initXmmRegisters(); SymCryptCcmEncryptFinal( &state.ccmState, pbTag, cbTag ); - verifyXmmRegisters(); + verifyVectorRegisters(); state.inComputation = FALSE; } @@ -1081,19 +1076,18 @@ AuthEncImp::decrypt( if( (flags & AUTHENC_FLAG_PARTIAL) == 0 ) { // simple straight CCM computation. - initXmmRegisters(); CHECK( SymCryptCcmValidateParameters( SymCryptAesBlockCipher, cbNonce, cbAuthData, cbData, cbTag ) == SYMCRYPT_NO_ERROR, "?" ); - verifyXmmRegisters(); + verifyVectorRegisters(); scError = SymCryptCcmDecrypt( SymCryptAesBlockCipher, &state.key, pbNonce, cbNonce, pbAuthData, cbAuthData, pbSrc, pbDst, cbData, pbTag, cbTag ); - verifyXmmRegisters(); + verifyVectorRegisters(); if( scError == SYMCRYPT_AUTHENTICATION_FAILURE ) { @@ -1109,21 +1103,18 @@ AuthEncImp::decrypt( if( !state.inComputation ) { // First call of a partial computation. - initXmmRegisters(); SymCryptCcmInit( &state.ccmState, SymCryptAesBlockCipher, &state.key, pbNonce, cbNonce, pbAuthData, cbAuthData, state.totalCbData, cbTag ); - verifyXmmRegisters(); + verifyVectorRegisters(); state.inComputation = TRUE; } // We can process the next part before we decide whether to produce the tag. - initXmmRegisters(); SymCryptCcmDecryptPart( &state.ccmState, pbSrc, pbDst, cbData ); - verifyXmmRegisters(); + verifyVectorRegisters(); if( pbTag != NULL ) { - initXmmRegisters(); scError = SymCryptCcmDecryptFinal( &state.ccmState, pbTag, cbTag ); if( scError == SYMCRYPT_AUTHENTICATION_FAILURE ) { @@ -1131,7 +1122,7 @@ AuthEncImp::decrypt( } else { CHECK( scError == SYMCRYPT_NO_ERROR, "?" ); } - verifyXmmRegisters(); + verifyVectorRegisters(); state.inComputation = FALSE; } @@ -1252,9 +1243,8 @@ AuthEncImp::setKey( PCBYTE pbKey, SIZE_T cbKey ) { CHECK( cbKey == 16 || cbKey == 24 || cbKey == 32, "?" ); - initXmmRegisters(); SymCryptGcmExpandKey( &state.key, SymCryptAesBlockCipher, pbKey, cbKey ); - verifyXmmRegisters(); + verifyVectorRegisters(); state.inComputation = FALSE; return STATUS_SUCCESS; @@ -1288,20 +1278,18 @@ AuthEncImp::encrypt( if( (flags & AUTHENC_FLAG_PARTIAL) == 0 ) { // simple straight GCM computation. - initXmmRegisters(); CHECK( SymCryptGcmValidateParameters( SymCryptAesBlockCipher, cbNonce, cbAuthData, cbData, cbTag ) == SYMCRYPT_NO_ERROR, "?" ); - verifyXmmRegisters(); + verifyVectorRegisters(); - initYmmRegisters(); SymCryptGcmEncrypt( &state.key, pbNonce, cbNonce, pbAuthData, cbAuthData, pbSrc, pbDst, cbData, pbTag, cbTag ); - verifyYmmRegisters(); + verifyVectorRegisters(); // Done goto cleanup; @@ -1317,37 +1305,32 @@ AuthEncImp::encrypt( { CHECK( (flags & AUTHENC_FLAG_PARTIAL) != 0, "?" ); // total cbData is passed in the cbTag parameter in the first partial call - initXmmRegisters(); SymCryptGcmInit( &gcmState1, (g_rng.byte() & 1) ? &state.key : &gcmKey2, pbNonce, cbNonce ); - verifyXmmRegisters(); + verifyVectorRegisters(); SIZE_T bytesDone = 0; while( bytesDone != cbAuthData ) { SIZE_T bytesThisLoop = g_rng.sizet( cbAuthData - bytesDone + 1); - initXmmRegisters(); SymCryptGcmAuthPart( &gcmState1, &pbAuthData[bytesDone], bytesThisLoop ); - verifyXmmRegisters(); + verifyVectorRegisters(); bytesDone += bytesThisLoop; } state.inComputation = TRUE; } else { - initXmmRegisters(); SymCryptGcmStateCopy( &state.gcmState, (g_rng.byte() & 1) ? &gcmKey2 : NULL , &gcmState1 ); - verifyXmmRegisters(); + verifyVectorRegisters(); } // Using gcmState1 which is using gcmKey2 or state.key. - initYmmRegisters(); SymCryptGcmEncryptPart( &gcmState1, pbSrc, pbDst, cbData ); - verifyYmmRegisters(); + verifyVectorRegisters(); if( pbTag != NULL ) { - initXmmRegisters(); SymCryptGcmEncryptFinal( &gcmState1, pbTag, cbTag ); - verifyXmmRegisters(); + verifyVectorRegisters(); state.inComputation = FALSE; } else { @@ -1381,20 +1364,18 @@ AuthEncImp::decrypt( if( (flags & AUTHENC_FLAG_PARTIAL) == 0 ) { // simple straight GCM computation. - initXmmRegisters(); CHECK( SymCryptGcmValidateParameters( SymCryptAesBlockCipher, cbNonce, cbAuthData, cbData, cbTag ) == SYMCRYPT_NO_ERROR, "?" ); - verifyXmmRegisters(); + verifyVectorRegisters(); - initYmmRegisters(); scError = SymCryptGcmDecrypt( &state.key, pbNonce, cbNonce, pbAuthData, cbAuthData, pbSrc, pbDst, cbData, pbTag, cbTag ); - verifyYmmRegisters(); + verifyVectorRegisters(); // Done goto cleanup; @@ -1410,37 +1391,32 @@ AuthEncImp::decrypt( { CHECK( (flags & AUTHENC_FLAG_PARTIAL) != 0, "?" ); // total cbData is passed in the cbTag parameter in the first partial call - initXmmRegisters(); SymCryptGcmInit( &gcmState1, (g_rng.byte() & 1) ? &state.key : &gcmKey2, pbNonce, cbNonce ); - verifyXmmRegisters(); + verifyVectorRegisters(); SIZE_T bytesDone = 0; while( bytesDone != cbAuthData ) { SIZE_T bytesThisLoop = g_rng.sizet( cbAuthData - bytesDone + 1); - initXmmRegisters(); SymCryptGcmAuthPart( &gcmState1, &pbAuthData[bytesDone], bytesThisLoop ); - verifyXmmRegisters(); + verifyVectorRegisters(); bytesDone += bytesThisLoop; } state.inComputation = TRUE; } else { - initXmmRegisters(); SymCryptGcmStateCopy( &state.gcmState, (g_rng.byte() & 1) ? &gcmKey2 : NULL , &gcmState1 ); - verifyXmmRegisters(); + verifyVectorRegisters(); } // Using gcmState1 which is using gcmKey2 or state.key. - initYmmRegisters(); SymCryptGcmDecryptPart( &gcmState1, pbSrc, pbDst, cbData ); - verifyYmmRegisters(); + verifyVectorRegisters(); if( pbTag != NULL ) { - initXmmRegisters(); scError = SymCryptGcmDecryptFinal( &gcmState1, pbTag, cbTag ); - verifyXmmRegisters(); + verifyVectorRegisters(); state.inComputation = FALSE; } else { @@ -1982,9 +1958,8 @@ RngSp800_90Imp::instantiate( _In_reads_( cbEntropy ) PCBYT { SYMCRYPT_ERROR scError; - initXmmRegisters(); scError = SymCryptRngAesInstantiate( &state.state, pbEntropy, cbEntropy ); - verifyXmmRegisters(); + verifyVectorRegisters(); CHECK( scError == SYMCRYPT_NO_ERROR, "Error during instantiation" ); @@ -1997,9 +1972,8 @@ RngSp800_90Imp::reseed( _In_reads_( cbEntropy ) PCBYTE pbE { SYMCRYPT_ERROR scError; - initXmmRegisters(); scError = SymCryptRngAesReseed( &state.state, pbEntropy, cbEntropy ); - verifyXmmRegisters(); + verifyVectorRegisters(); CHECK3( scError == SYMCRYPT_NO_ERROR, "Error during reseed, len=%lld", (ULONGLONG) cbEntropy ); @@ -2010,10 +1984,8 @@ template<> VOID RngSp800_90Imp::generate( _Out_writes_( cbData ) PBYTE pbData, SIZE_T cbData ) { - - initXmmRegisters(); SymCryptRngAesGenerate( &state.state, pbData, cbData ); - verifyXmmRegisters(); + verifyVectorRegisters(); } @@ -2067,9 +2039,8 @@ RngSp800_90Imp::instantiate( _In_reads_( cbEntropy ) PCBYT { SYMCRYPT_ERROR scError; - initXmmRegisters(); scError = SymCryptRngAesFips140_2Instantiate( &state.state, pbEntropy, cbEntropy ); - verifyXmmRegisters(); + verifyVectorRegisters(); CHECK( scError == SYMCRYPT_NO_ERROR, "Error during instantiation" ); @@ -2082,9 +2053,8 @@ RngSp800_90Imp::reseed( _In_reads_( cbEntropy ) PCBYTE pbE { SYMCRYPT_ERROR scError; - initXmmRegisters(); scError = SymCryptRngAesFips140_2Reseed( &state.state, pbEntropy, cbEntropy ); - verifyXmmRegisters(); + verifyVectorRegisters(); CHECK3( scError == SYMCRYPT_NO_ERROR, "Error during reseed, len=%lld", (ULONGLONG) cbEntropy ); @@ -2095,10 +2065,8 @@ template<> VOID RngSp800_90Imp::generate( _Out_writes_( cbData ) PBYTE pbData, SIZE_T cbData ) { - - initXmmRegisters(); SymCryptRngAesFips140_2Generate( &state.state, pbData, cbData ); - verifyXmmRegisters(); + verifyVectorRegisters(); } @@ -2201,9 +2169,8 @@ ParallelHashImp::init( SIZE_T nHashes ) CHECK( nHashes <= MAX_PARALLEL_HASH_STATES, "Too many hash states requested" ); state.nHashes = nHashes; - initYmmRegisters(); SymCryptParallelSha256Init( &state.sc[0], nHashes ); - verifyYmmRegisters(); + verifyVectorRegisters(); } template<> @@ -2237,14 +2204,13 @@ ParallelHashImp::process( scratch[scratchOffset + nScratch] = sentinel; SYMCRYPT_ASSERT( state.nHashes <= MAX_PARALLEL_HASH_STATES ); - initYmmRegisters(); scError = SymCryptParallelSha256Process( &state.sc[0], state.nHashes, &op[0], nOperations, &scratch[scratchOffset], nScratch ); - verifyYmmRegisters(); + verifyVectorRegisters(); CHECK( scError == SYMCRYPT_NO_ERROR, "Parallel SHA256 returned an error" ); CHECK( scratch[scratchOffset + nScratch] == sentinel, "Parallel SHA256 used too much scratch space" ); } @@ -2360,9 +2326,8 @@ ParallelHashImp::init( SIZE_T nHashes ) { CHECK( nHashes <= MAX_PARALLEL_HASH_STATES, "Too many hash states requested" ); state.nHashes = nHashes; - initYmmRegisters(); SymCryptParallelSha384Init( &state.sc[0], nHashes ); - verifyYmmRegisters(); + verifyVectorRegisters(); } template<> @@ -2396,14 +2361,13 @@ ParallelHashImp::process( scratch[scratchOffset + nScratch] = sentinel; SYMCRYPT_ASSERT( state.nHashes <= MAX_PARALLEL_HASH_STATES ); - initYmmRegisters(); scError = SymCryptParallelSha384Process( &state.sc[0], state.nHashes, &op[0], nOperations, &scratch[scratchOffset], nScratch ); - verifyYmmRegisters(); + verifyVectorRegisters(); CHECK( scError == SYMCRYPT_NO_ERROR, "Parallel SHA384 returned an error" ); CHECK( scratch[scratchOffset + nScratch] == sentinel, "Parallel SHA384 used too much scratch space" ); } @@ -2519,9 +2483,8 @@ ParallelHashImp::init( SIZE_T nHashes ) { CHECK( nHashes <= MAX_PARALLEL_HASH_STATES, "Too many hash states requested" ); state.nHashes = nHashes; - initYmmRegisters(); SymCryptParallelSha512Init( &state.sc[0], nHashes ); - verifyYmmRegisters(); + verifyVectorRegisters(); } template<> @@ -2555,14 +2518,13 @@ ParallelHashImp::process( scratch[scratchOffset + nScratch] = sentinel; SYMCRYPT_ASSERT( state.nHashes <= MAX_PARALLEL_HASH_STATES ); - initYmmRegisters(); scError = SymCryptParallelSha512Process( &state.sc[0], state.nHashes, &op[0], nOperations, &scratch[scratchOffset], nScratch ); - verifyYmmRegisters(); + verifyVectorRegisters(); CHECK( scError == SYMCRYPT_NO_ERROR, "Parallel SHA512 returned an error" ); CHECK( scratch[scratchOffset + nScratch] == sentinel, "Parallel SHA512 used too much scratch space" ); } @@ -2656,9 +2618,8 @@ XtsImp::setKey( PCBYTE pbKey, SIZE_T cbKey ) { SYMCRYPT_ERROR scError; - initXmmRegisters(); scError = SymCryptXtsAesExpandKey( &state.key, pbKey, cbKey ); - verifyXmmRegisters(); + verifyVectorRegisters(); return scError == SYMCRYPT_NO_ERROR ? 0 : STATUS_NOT_SUPPORTED; } @@ -2672,14 +2633,13 @@ XtsImp::encrypt( _Out_writes_( cbData ) PBYTE pbDst, SIZE_T cbData ) { - initYmmRegisters(); SymCryptXtsAesEncrypt( &state.key, cbDataUnit, tweak, pbSrc, pbDst, cbData ); - verifyYmmRegisters(); + verifyVectorRegisters(); } template<> @@ -2691,14 +2651,13 @@ XtsImp::decrypt( _Out_writes_( cbData ) PBYTE pbDst, SIZE_T cbData ) { - initYmmRegisters(); SymCryptXtsAesDecrypt( &state.key, cbDataUnit, tweak, pbSrc, pbDst, cbData ); - verifyYmmRegisters(); + verifyVectorRegisters(); } diff --git a/unittest/lib/sources b/unittest/lib/sources index 5faa2b2..60195ac 100644 --- a/unittest/lib/sources +++ b/unittest/lib/sources @@ -50,7 +50,7 @@ SOURCES= \ testUtil.cpp \ testKdf.cpp \ testTlsCbcHmac.cpp \ - env_symcryptunittest.cpp \ + env_windowssymcryptunittest.cpp \ testMultiThread.cpp \ rndDriver.cpp \ testArithmetic.cpp \ @@ -76,10 +76,10 @@ SOURCES= \ testPaddingPkcs7.cpp \ I386_SOURCES = \ - savexmm.asm \ + savevectors.asm \ AMD64_SOURCES = \ - saveymm.asm + savevectors.asm TARGETLIBS= \ # $(DS_LIB_PATH)\rsa32.lib \