зеркало из https://github.com/microsoft/STL.git
Activate vectorized algorithms for ARM64EC (#1798)
This commit is contained in:
Родитель
62137922ab
Коммит
90ea364a57
|
@ -21,7 +21,7 @@ _STL_DISABLE_CLANG_WARNINGS
|
|||
#pragma push_macro("new")
|
||||
#undef new
|
||||
|
||||
#if (defined(_M_IX86) || defined(_M_X64)) && !defined(_M_CEE_PURE) && !defined(_M_HYBRID) && !defined(_M_ARM64EC)
|
||||
#if (defined(_M_IX86) || defined(_M_X64)) && !defined(_M_CEE_PURE) && !defined(_M_HYBRID)
|
||||
#ifndef _USE_STD_VECTOR_ALGORITHMS
|
||||
#define _USE_STD_VECTOR_ALGORITHMS 1
|
||||
#endif // _USE_STD_VECTOR_ALGORITHMS
|
||||
|
|
|
@ -10,11 +10,15 @@
|
|||
#error _M_CEE_PURE should not be defined when compiling vector_algorithms.cpp.
|
||||
#endif
|
||||
|
||||
#if (defined(_M_IX86) || defined(_M_X64)) && !defined(_M_ARM64EC)
|
||||
#if defined(_M_IX86) || defined(_M_X64)
|
||||
|
||||
#if defined(_M_ARM64EC)
|
||||
#include <intrin.h>
|
||||
#else // defined(_M_ARM64EC)
|
||||
#include <emmintrin.h>
|
||||
#include <immintrin.h>
|
||||
#include <intrin0.h>
|
||||
#endif // defined(_M_ARM64EC)
|
||||
#include <isa_availability.h>
|
||||
|
||||
extern "C" long __isa_enabled;
|
||||
|
@ -50,6 +54,7 @@ static void _Advance_bytes(const void*& _Target, ptrdiff_t _Offset) noexcept {
|
|||
extern "C" {
|
||||
__declspec(noalias) void __cdecl __std_swap_ranges_trivially_swappable_noalias(
|
||||
void* _First1, void* _Last1, void* _First2) noexcept {
|
||||
#if !defined(_M_ARM64EC)
|
||||
constexpr size_t _Mask_32 = ~((static_cast<size_t>(1) << 5) - 1);
|
||||
if (_Byte_length(_First1, _Last1) >= 32 && _bittest(&__isa_enabled, __ISA_AVAILABLE_AVX2)) {
|
||||
const void* _Stop_at = _First1;
|
||||
|
@ -63,6 +68,7 @@ __declspec(noalias) void __cdecl __std_swap_ranges_trivially_swappable_noalias(
|
|||
_Advance_bytes(_First2, 32);
|
||||
} while (_First1 != _Stop_at);
|
||||
}
|
||||
#endif // !defined(_M_ARM64EC)
|
||||
|
||||
constexpr size_t _Mask_16 = ~((static_cast<size_t>(1) << 4) - 1);
|
||||
if (_Byte_length(_First1, _Last1) >= 16
|
||||
|
@ -131,6 +137,7 @@ void* __cdecl __std_swap_ranges_trivially_swappable(void* _First1, void* _Last1,
|
|||
}
|
||||
|
||||
__declspec(noalias) void __cdecl __std_reverse_trivially_swappable_1(void* _First, void* _Last) noexcept {
|
||||
#if !defined(_M_ARM64EC)
|
||||
if (_Byte_length(_First, _Last) >= 64 && _bittest(&__isa_enabled, __ISA_AVAILABLE_AVX2)) {
|
||||
const __m256i _Reverse_char_lanes_avx = _mm256_set_epi8( //
|
||||
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, //
|
||||
|
@ -150,6 +157,7 @@ __declspec(noalias) void __cdecl __std_reverse_trivially_swappable_1(void* _Firs
|
|||
_Advance_bytes(_First, 32);
|
||||
} while (_First != _Stop_at);
|
||||
}
|
||||
#endif // !defined(_M_ARM64EC)
|
||||
|
||||
if (_Byte_length(_First, _Last) >= 32 && _bittest(&__isa_enabled, __ISA_AVAILABLE_SSE42)) {
|
||||
const __m128i _Reverse_char_sse = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
|
||||
|
@ -171,6 +179,7 @@ __declspec(noalias) void __cdecl __std_reverse_trivially_swappable_1(void* _Firs
|
|||
}
|
||||
|
||||
__declspec(noalias) void __cdecl __std_reverse_trivially_swappable_2(void* _First, void* _Last) noexcept {
|
||||
#if !defined(_M_ARM64EC)
|
||||
if (_Byte_length(_First, _Last) >= 64 && _bittest(&__isa_enabled, __ISA_AVAILABLE_AVX2)) {
|
||||
const __m256i _Reverse_short_lanes_avx = _mm256_set_epi8( //
|
||||
1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14, //
|
||||
|
@ -188,6 +197,7 @@ __declspec(noalias) void __cdecl __std_reverse_trivially_swappable_2(void* _Firs
|
|||
_Advance_bytes(_First, 32);
|
||||
} while (_First != _Stop_at);
|
||||
}
|
||||
#endif // !defined(_M_ARM64EC)
|
||||
|
||||
if (_Byte_length(_First, _Last) >= 32 && _bittest(&__isa_enabled, __ISA_AVAILABLE_SSE42)) {
|
||||
const __m128i _Reverse_short_sse = _mm_set_epi8(1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14);
|
||||
|
@ -209,6 +219,7 @@ __declspec(noalias) void __cdecl __std_reverse_trivially_swappable_2(void* _Firs
|
|||
}
|
||||
|
||||
__declspec(noalias) void __cdecl __std_reverse_trivially_swappable_4(void* _First, void* _Last) noexcept {
|
||||
#if !defined(_M_ARM64EC)
|
||||
if (_Byte_length(_First, _Last) >= 64 && _bittest(&__isa_enabled, __ISA_AVAILABLE_AVX2)) {
|
||||
const void* _Stop_at = _First;
|
||||
_Advance_bytes(_Stop_at, _Byte_length(_First, _Last) >> 6 << 5);
|
||||
|
@ -223,6 +234,7 @@ __declspec(noalias) void __cdecl __std_reverse_trivially_swappable_4(void* _Firs
|
|||
_Advance_bytes(_First, 32);
|
||||
} while (_First != _Stop_at);
|
||||
}
|
||||
#endif // !defined(_M_ARM64EC)
|
||||
|
||||
if (_Byte_length(_First, _Last) >= 32
|
||||
#ifdef _M_IX86
|
||||
|
@ -247,6 +259,7 @@ __declspec(noalias) void __cdecl __std_reverse_trivially_swappable_4(void* _Firs
|
|||
}
|
||||
|
||||
__declspec(noalias) void __cdecl __std_reverse_trivially_swappable_8(void* _First, void* _Last) noexcept {
|
||||
#if !defined(_M_ARM64EC)
|
||||
if (_Byte_length(_First, _Last) >= 64 && _bittest(&__isa_enabled, __ISA_AVAILABLE_AVX2)) {
|
||||
const void* _Stop_at = _First;
|
||||
_Advance_bytes(_Stop_at, _Byte_length(_First, _Last) >> 6 << 5);
|
||||
|
@ -261,6 +274,7 @@ __declspec(noalias) void __cdecl __std_reverse_trivially_swappable_8(void* _Firs
|
|||
_Advance_bytes(_First, 32);
|
||||
} while (_First != _Stop_at);
|
||||
}
|
||||
#endif // !defined(_M_ARM64EC)
|
||||
|
||||
if (_Byte_length(_First, _Last) >= 32
|
||||
#ifdef _M_IX86
|
||||
|
@ -286,6 +300,7 @@ __declspec(noalias) void __cdecl __std_reverse_trivially_swappable_8(void* _Firs
|
|||
|
||||
__declspec(noalias) void __cdecl __std_reverse_copy_trivially_copyable_1(
|
||||
const void* _First, const void* _Last, void* _Dest) noexcept {
|
||||
#if !defined(_M_ARM64EC)
|
||||
if (_Byte_length(_First, _Last) >= 32 && _bittest(&__isa_enabled, __ISA_AVAILABLE_AVX2)) {
|
||||
const __m256i _Reverse_char_lanes_avx = _mm256_set_epi8( //
|
||||
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, //
|
||||
|
@ -300,6 +315,7 @@ __declspec(noalias) void __cdecl __std_reverse_copy_trivially_copyable_1(
|
|||
_Advance_bytes(_Dest, 32);
|
||||
} while (_Dest != _Stop_at);
|
||||
}
|
||||
#endif // !defined(_M_ARM64EC)
|
||||
|
||||
if (_Byte_length(_First, _Last) >= 16 && _bittest(&__isa_enabled, __ISA_AVAILABLE_SSE42)) {
|
||||
const __m128i _Reverse_char_sse = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
|
||||
|
@ -320,6 +336,7 @@ __declspec(noalias) void __cdecl __std_reverse_copy_trivially_copyable_1(
|
|||
|
||||
__declspec(noalias) void __cdecl __std_reverse_copy_trivially_copyable_2(
|
||||
const void* _First, const void* _Last, void* _Dest) noexcept {
|
||||
#if !defined(_M_ARM64EC)
|
||||
if (_Byte_length(_First, _Last) >= 32 && _bittest(&__isa_enabled, __ISA_AVAILABLE_AVX2)) {
|
||||
const __m256i _Reverse_short_lanes_avx = _mm256_set_epi8( //
|
||||
1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14, //
|
||||
|
@ -334,6 +351,7 @@ __declspec(noalias) void __cdecl __std_reverse_copy_trivially_copyable_2(
|
|||
_Advance_bytes(_Dest, 32);
|
||||
} while (_Dest != _Stop_at);
|
||||
}
|
||||
#endif // !defined(_M_ARM64EC)
|
||||
|
||||
if (_Byte_length(_First, _Last) >= 16 && _bittest(&__isa_enabled, __ISA_AVAILABLE_SSE42)) {
|
||||
const __m128i _Reverse_short_sse = _mm_set_epi8(1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14);
|
||||
|
@ -354,6 +372,7 @@ __declspec(noalias) void __cdecl __std_reverse_copy_trivially_copyable_2(
|
|||
|
||||
__declspec(noalias) void __cdecl __std_reverse_copy_trivially_copyable_4(
|
||||
const void* _First, const void* _Last, void* _Dest) noexcept {
|
||||
#if !defined(_M_ARM64EC)
|
||||
if (_Byte_length(_First, _Last) >= 32 && _bittest(&__isa_enabled, __ISA_AVAILABLE_AVX2)) {
|
||||
const void* _Stop_at = _Dest;
|
||||
_Advance_bytes(_Stop_at, _Byte_length(_First, _Last) >> 5 << 5);
|
||||
|
@ -365,6 +384,7 @@ __declspec(noalias) void __cdecl __std_reverse_copy_trivially_copyable_4(
|
|||
_Advance_bytes(_Dest, 32);
|
||||
} while (_Dest != _Stop_at);
|
||||
}
|
||||
#endif // !defined(_M_ARM64EC)
|
||||
|
||||
if (_Byte_length(_First, _Last) >= 16
|
||||
#ifdef _M_IX86
|
||||
|
@ -388,6 +408,7 @@ __declspec(noalias) void __cdecl __std_reverse_copy_trivially_copyable_4(
|
|||
|
||||
__declspec(noalias) void __cdecl __std_reverse_copy_trivially_copyable_8(
|
||||
const void* _First, const void* _Last, void* _Dest) noexcept {
|
||||
#if !defined(_M_ARM64EC)
|
||||
if (_Byte_length(_First, _Last) >= 32 && _bittest(&__isa_enabled, __ISA_AVAILABLE_AVX2)) {
|
||||
const void* _Stop_at = _Dest;
|
||||
_Advance_bytes(_Stop_at, _Byte_length(_First, _Last) >> 5 << 5);
|
||||
|
@ -399,6 +420,7 @@ __declspec(noalias) void __cdecl __std_reverse_copy_trivially_copyable_8(
|
|||
_Advance_bytes(_Dest, 32);
|
||||
} while (_Dest != _Stop_at);
|
||||
}
|
||||
#endif // !defined(_M_ARM64EC)
|
||||
|
||||
if (_Byte_length(_First, _Last) >= 16
|
||||
#ifdef _M_IX86
|
||||
|
@ -423,4 +445,4 @@ __declspec(noalias) void __cdecl __std_reverse_copy_trivially_copyable_8(
|
|||
|
||||
} // extern "C"
|
||||
|
||||
#endif // (defined(_M_IX86) || defined(_M_X64)) && !defined(_M_ARM64EC)
|
||||
#endif // defined(_M_IX86) || defined(_M_X64)
|
||||
|
|
Загрузка…
Ссылка в новой задаче