Activate vectorized algorithms for ARM64EC (#1798)

This commit is contained in:
Stephan T. Lavavej 2021-04-05 15:27:45 -07:00 коммит произвёл GitHub
Родитель 62137922ab
Коммит 90ea364a57
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: 4AEE18F83AFDEB23
2 изменённых файлов: 25 добавлений и 3 удалений

Просмотреть файл

@ -21,7 +21,7 @@ _STL_DISABLE_CLANG_WARNINGS
#pragma push_macro("new")
#undef new
#if (defined(_M_IX86) || defined(_M_X64)) && !defined(_M_CEE_PURE) && !defined(_M_HYBRID) && !defined(_M_ARM64EC)
#if (defined(_M_IX86) || defined(_M_X64)) && !defined(_M_CEE_PURE) && !defined(_M_HYBRID)
#ifndef _USE_STD_VECTOR_ALGORITHMS
#define _USE_STD_VECTOR_ALGORITHMS 1
#endif // _USE_STD_VECTOR_ALGORITHMS

Просмотреть файл

@ -10,11 +10,15 @@
#error _M_CEE_PURE should not be defined when compiling vector_algorithms.cpp.
#endif
#if (defined(_M_IX86) || defined(_M_X64)) && !defined(_M_ARM64EC)
#if defined(_M_IX86) || defined(_M_X64)
#if defined(_M_ARM64EC)
#include <intrin.h>
#else // defined(_M_ARM64EC)
#include <emmintrin.h>
#include <immintrin.h>
#include <intrin0.h>
#endif // defined(_M_ARM64EC)
#include <isa_availability.h>
extern "C" long __isa_enabled;
@ -50,6 +54,7 @@ static void _Advance_bytes(const void*& _Target, ptrdiff_t _Offset) noexcept {
extern "C" {
__declspec(noalias) void __cdecl __std_swap_ranges_trivially_swappable_noalias(
void* _First1, void* _Last1, void* _First2) noexcept {
#if !defined(_M_ARM64EC)
constexpr size_t _Mask_32 = ~((static_cast<size_t>(1) << 5) - 1);
if (_Byte_length(_First1, _Last1) >= 32 && _bittest(&__isa_enabled, __ISA_AVAILABLE_AVX2)) {
const void* _Stop_at = _First1;
@ -63,6 +68,7 @@ __declspec(noalias) void __cdecl __std_swap_ranges_trivially_swappable_noalias(
_Advance_bytes(_First2, 32);
} while (_First1 != _Stop_at);
}
#endif // !defined(_M_ARM64EC)
constexpr size_t _Mask_16 = ~((static_cast<size_t>(1) << 4) - 1);
if (_Byte_length(_First1, _Last1) >= 16
@ -131,6 +137,7 @@ void* __cdecl __std_swap_ranges_trivially_swappable(void* _First1, void* _Last1,
}
__declspec(noalias) void __cdecl __std_reverse_trivially_swappable_1(void* _First, void* _Last) noexcept {
#if !defined(_M_ARM64EC)
if (_Byte_length(_First, _Last) >= 64 && _bittest(&__isa_enabled, __ISA_AVAILABLE_AVX2)) {
const __m256i _Reverse_char_lanes_avx = _mm256_set_epi8( //
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, //
@ -150,6 +157,7 @@ __declspec(noalias) void __cdecl __std_reverse_trivially_swappable_1(void* _Firs
_Advance_bytes(_First, 32);
} while (_First != _Stop_at);
}
#endif // !defined(_M_ARM64EC)
if (_Byte_length(_First, _Last) >= 32 && _bittest(&__isa_enabled, __ISA_AVAILABLE_SSE42)) {
const __m128i _Reverse_char_sse = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
@ -171,6 +179,7 @@ __declspec(noalias) void __cdecl __std_reverse_trivially_swappable_1(void* _Firs
}
__declspec(noalias) void __cdecl __std_reverse_trivially_swappable_2(void* _First, void* _Last) noexcept {
#if !defined(_M_ARM64EC)
if (_Byte_length(_First, _Last) >= 64 && _bittest(&__isa_enabled, __ISA_AVAILABLE_AVX2)) {
const __m256i _Reverse_short_lanes_avx = _mm256_set_epi8( //
1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14, //
@ -188,6 +197,7 @@ __declspec(noalias) void __cdecl __std_reverse_trivially_swappable_2(void* _Firs
_Advance_bytes(_First, 32);
} while (_First != _Stop_at);
}
#endif // !defined(_M_ARM64EC)
if (_Byte_length(_First, _Last) >= 32 && _bittest(&__isa_enabled, __ISA_AVAILABLE_SSE42)) {
const __m128i _Reverse_short_sse = _mm_set_epi8(1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14);
@ -209,6 +219,7 @@ __declspec(noalias) void __cdecl __std_reverse_trivially_swappable_2(void* _Firs
}
__declspec(noalias) void __cdecl __std_reverse_trivially_swappable_4(void* _First, void* _Last) noexcept {
#if !defined(_M_ARM64EC)
if (_Byte_length(_First, _Last) >= 64 && _bittest(&__isa_enabled, __ISA_AVAILABLE_AVX2)) {
const void* _Stop_at = _First;
_Advance_bytes(_Stop_at, _Byte_length(_First, _Last) >> 6 << 5);
@ -223,6 +234,7 @@ __declspec(noalias) void __cdecl __std_reverse_trivially_swappable_4(void* _Firs
_Advance_bytes(_First, 32);
} while (_First != _Stop_at);
}
#endif // !defined(_M_ARM64EC)
if (_Byte_length(_First, _Last) >= 32
#ifdef _M_IX86
@ -247,6 +259,7 @@ __declspec(noalias) void __cdecl __std_reverse_trivially_swappable_4(void* _Firs
}
__declspec(noalias) void __cdecl __std_reverse_trivially_swappable_8(void* _First, void* _Last) noexcept {
#if !defined(_M_ARM64EC)
if (_Byte_length(_First, _Last) >= 64 && _bittest(&__isa_enabled, __ISA_AVAILABLE_AVX2)) {
const void* _Stop_at = _First;
_Advance_bytes(_Stop_at, _Byte_length(_First, _Last) >> 6 << 5);
@ -261,6 +274,7 @@ __declspec(noalias) void __cdecl __std_reverse_trivially_swappable_8(void* _Firs
_Advance_bytes(_First, 32);
} while (_First != _Stop_at);
}
#endif // !defined(_M_ARM64EC)
if (_Byte_length(_First, _Last) >= 32
#ifdef _M_IX86
@ -286,6 +300,7 @@ __declspec(noalias) void __cdecl __std_reverse_trivially_swappable_8(void* _Firs
__declspec(noalias) void __cdecl __std_reverse_copy_trivially_copyable_1(
const void* _First, const void* _Last, void* _Dest) noexcept {
#if !defined(_M_ARM64EC)
if (_Byte_length(_First, _Last) >= 32 && _bittest(&__isa_enabled, __ISA_AVAILABLE_AVX2)) {
const __m256i _Reverse_char_lanes_avx = _mm256_set_epi8( //
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, //
@ -300,6 +315,7 @@ __declspec(noalias) void __cdecl __std_reverse_copy_trivially_copyable_1(
_Advance_bytes(_Dest, 32);
} while (_Dest != _Stop_at);
}
#endif // !defined(_M_ARM64EC)
if (_Byte_length(_First, _Last) >= 16 && _bittest(&__isa_enabled, __ISA_AVAILABLE_SSE42)) {
const __m128i _Reverse_char_sse = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
@ -320,6 +336,7 @@ __declspec(noalias) void __cdecl __std_reverse_copy_trivially_copyable_1(
__declspec(noalias) void __cdecl __std_reverse_copy_trivially_copyable_2(
const void* _First, const void* _Last, void* _Dest) noexcept {
#if !defined(_M_ARM64EC)
if (_Byte_length(_First, _Last) >= 32 && _bittest(&__isa_enabled, __ISA_AVAILABLE_AVX2)) {
const __m256i _Reverse_short_lanes_avx = _mm256_set_epi8( //
1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14, //
@ -334,6 +351,7 @@ __declspec(noalias) void __cdecl __std_reverse_copy_trivially_copyable_2(
_Advance_bytes(_Dest, 32);
} while (_Dest != _Stop_at);
}
#endif // !defined(_M_ARM64EC)
if (_Byte_length(_First, _Last) >= 16 && _bittest(&__isa_enabled, __ISA_AVAILABLE_SSE42)) {
const __m128i _Reverse_short_sse = _mm_set_epi8(1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14);
@ -354,6 +372,7 @@ __declspec(noalias) void __cdecl __std_reverse_copy_trivially_copyable_2(
__declspec(noalias) void __cdecl __std_reverse_copy_trivially_copyable_4(
const void* _First, const void* _Last, void* _Dest) noexcept {
#if !defined(_M_ARM64EC)
if (_Byte_length(_First, _Last) >= 32 && _bittest(&__isa_enabled, __ISA_AVAILABLE_AVX2)) {
const void* _Stop_at = _Dest;
_Advance_bytes(_Stop_at, _Byte_length(_First, _Last) >> 5 << 5);
@ -365,6 +384,7 @@ __declspec(noalias) void __cdecl __std_reverse_copy_trivially_copyable_4(
_Advance_bytes(_Dest, 32);
} while (_Dest != _Stop_at);
}
#endif // !defined(_M_ARM64EC)
if (_Byte_length(_First, _Last) >= 16
#ifdef _M_IX86
@ -388,6 +408,7 @@ __declspec(noalias) void __cdecl __std_reverse_copy_trivially_copyable_4(
__declspec(noalias) void __cdecl __std_reverse_copy_trivially_copyable_8(
const void* _First, const void* _Last, void* _Dest) noexcept {
#if !defined(_M_ARM64EC)
if (_Byte_length(_First, _Last) >= 32 && _bittest(&__isa_enabled, __ISA_AVAILABLE_AVX2)) {
const void* _Stop_at = _Dest;
_Advance_bytes(_Stop_at, _Byte_length(_First, _Last) >> 5 << 5);
@ -399,6 +420,7 @@ __declspec(noalias) void __cdecl __std_reverse_copy_trivially_copyable_8(
_Advance_bytes(_Dest, 32);
} while (_Dest != _Stop_at);
}
#endif // !defined(_M_ARM64EC)
if (_Byte_length(_First, _Last) >= 16
#ifdef _M_IX86
@ -423,4 +445,4 @@ __declspec(noalias) void __cdecl __std_reverse_copy_trivially_copyable_8(
} // extern "C"
#endif // (defined(_M_IX86) || defined(_M_X64)) && !defined(_M_ARM64EC)
#endif // defined(_M_IX86) || defined(_M_X64)