зеркало из https://github.com/microsoft/STL.git
vectorize `min/max_element` using SSE4.1 for floats (#3928)
Co-authored-by: Stephan T. Lavavej <stl@nuwen.net>
This commit is contained in:
Родитель
f49ffd2e58
Коммит
192a84008a
|
@ -110,6 +110,7 @@ endfunction()
|
|||
|
||||
add_benchmark(bitset_to_string src/bitset_to_string.cpp)
|
||||
add_benchmark(locale_classic src/locale_classic.cpp)
|
||||
add_benchmark(minmax_element src/minmax_element.cpp)
|
||||
add_benchmark(path_lexically_normal src/path_lexically_normal.cpp)
|
||||
add_benchmark(priority_queue_push_range src/priority_queue_push_range.cpp)
|
||||
add_benchmark(random_integer_generation src/random_integer_generation.cpp)
|
||||
|
|
|
@ -0,0 +1,86 @@
|
|||
// Copyright (c) Microsoft Corporation.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
|
||||
#include <algorithm>
|
||||
#include <benchmark/benchmark.h>
|
||||
#include <cstddef>
|
||||
#include <cstdint>
|
||||
#include <random>
|
||||
#include <ranges>
|
||||
#include <type_traits>
|
||||
|
||||
enum class Op {
|
||||
Min,
|
||||
Max,
|
||||
Both,
|
||||
};
|
||||
|
||||
using namespace std;
|
||||
|
||||
template <class T, size_t Size, Op Operation>
|
||||
void bm(benchmark::State& state) {
|
||||
T a[Size];
|
||||
|
||||
mt19937 gen(84710);
|
||||
|
||||
if constexpr (is_floating_point_v<T>) {
|
||||
normal_distribution<T> dis(0, 10000.0);
|
||||
ranges::generate(a, [&] { return dis(gen); });
|
||||
} else {
|
||||
uniform_int_distribution<conditional_t<sizeof(T) != 1, T, int>> dis(1, 20);
|
||||
ranges::generate(a, [&] { return static_cast<T>(dis(gen)); });
|
||||
}
|
||||
|
||||
for (auto _ : state) {
|
||||
if constexpr (Operation == Op::Min) {
|
||||
benchmark::DoNotOptimize(ranges::min_element(a));
|
||||
} else if constexpr (Operation == Op::Max) {
|
||||
benchmark::DoNotOptimize(ranges::max_element(a));
|
||||
} else if constexpr (Operation == Op::Both) {
|
||||
benchmark::DoNotOptimize(ranges::minmax_element(a));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
BENCHMARK(bm<uint8_t, 8021, Op::Min>);
|
||||
BENCHMARK(bm<uint8_t, 8021, Op::Max>);
|
||||
BENCHMARK(bm<uint8_t, 8021, Op::Both>);
|
||||
|
||||
BENCHMARK(bm<uint16_t, 8021, Op::Min>);
|
||||
BENCHMARK(bm<uint16_t, 8021, Op::Max>);
|
||||
BENCHMARK(bm<uint16_t, 8021, Op::Both>);
|
||||
|
||||
BENCHMARK(bm<uint32_t, 8021, Op::Min>);
|
||||
BENCHMARK(bm<uint32_t, 8021, Op::Max>);
|
||||
BENCHMARK(bm<uint32_t, 8021, Op::Both>);
|
||||
|
||||
BENCHMARK(bm<uint64_t, 8021, Op::Min>);
|
||||
BENCHMARK(bm<uint64_t, 8021, Op::Max>);
|
||||
BENCHMARK(bm<uint64_t, 8021, Op::Both>);
|
||||
|
||||
BENCHMARK(bm<int8_t, 8021, Op::Min>);
|
||||
BENCHMARK(bm<int8_t, 8021, Op::Max>);
|
||||
BENCHMARK(bm<int8_t, 8021, Op::Both>);
|
||||
|
||||
BENCHMARK(bm<int16_t, 8021, Op::Min>);
|
||||
BENCHMARK(bm<int16_t, 8021, Op::Max>);
|
||||
BENCHMARK(bm<int16_t, 8021, Op::Both>);
|
||||
|
||||
BENCHMARK(bm<int32_t, 8021, Op::Min>);
|
||||
BENCHMARK(bm<int32_t, 8021, Op::Max>);
|
||||
BENCHMARK(bm<int32_t, 8021, Op::Both>);
|
||||
|
||||
BENCHMARK(bm<int64_t, 8021, Op::Min>);
|
||||
BENCHMARK(bm<int64_t, 8021, Op::Max>);
|
||||
BENCHMARK(bm<int64_t, 8021, Op::Both>);
|
||||
|
||||
BENCHMARK(bm<float, 8021, Op::Min>);
|
||||
BENCHMARK(bm<float, 8021, Op::Max>);
|
||||
BENCHMARK(bm<float, 8021, Op::Both>);
|
||||
|
||||
BENCHMARK(bm<double, 8021, Op::Min>);
|
||||
BENCHMARK(bm<double, 8021, Op::Max>);
|
||||
BENCHMARK(bm<double, 8021, Op::Both>);
|
||||
|
||||
|
||||
BENCHMARK_MAIN();
|
|
@ -54,6 +54,8 @@ _Min_max_element_t __stdcall __std_minmax_element_1(const void* _First, const vo
|
|||
_Min_max_element_t __stdcall __std_minmax_element_2(const void* _First, const void* _Last, bool _Signed) noexcept;
|
||||
_Min_max_element_t __stdcall __std_minmax_element_4(const void* _First, const void* _Last, bool _Signed) noexcept;
|
||||
_Min_max_element_t __stdcall __std_minmax_element_8(const void* _First, const void* _Last, bool _Signed) noexcept;
|
||||
_Min_max_element_t __stdcall __std_minmax_element_f(const void* _First, const void* _Last, bool _Unused) noexcept;
|
||||
_Min_max_element_t __stdcall __std_minmax_element_d(const void* _First, const void* _Last, bool _Unused) noexcept;
|
||||
|
||||
const void* __stdcall __std_find_last_trivial_1(const void* _First, const void* _Last, uint8_t _Val) noexcept;
|
||||
const void* __stdcall __std_find_last_trivial_2(const void* _First, const void* _Last, uint16_t _Val) noexcept;
|
||||
|
@ -68,7 +70,11 @@ _STD pair<_Ty*, _Ty*> __std_minmax_element(_Ty* _First, _Ty* _Last) noexcept {
|
|||
|
||||
_Min_max_element_t _Res;
|
||||
|
||||
if constexpr (sizeof(_Ty) == 1) {
|
||||
if constexpr (_STD is_same_v<_STD remove_const_t<_Ty>, float>) {
|
||||
_Res = ::__std_minmax_element_f(_First, _Last, false);
|
||||
} else if constexpr (_STD _Is_any_of_v<_STD remove_const_t<_Ty>, double, long double>) {
|
||||
_Res = ::__std_minmax_element_d(_First, _Last, false);
|
||||
} else if constexpr (sizeof(_Ty) == 1) {
|
||||
_Res = ::__std_minmax_element_1(_First, _Last, _Signed);
|
||||
} else if constexpr (sizeof(_Ty) == 2) {
|
||||
_Res = ::__std_minmax_element_2(_First, _Last, _Signed);
|
||||
|
|
|
@ -48,6 +48,18 @@ _STL_DISABLE_CLANG_WARNINGS
|
|||
#endif // ^^^ _USE_STD_VECTOR_ALGORITHMS != 0 ^^^
|
||||
#endif // ^^^ no support for vector algorithms ^^^
|
||||
|
||||
#ifndef _USE_STD_VECTOR_FLOATING_ALGORITHMS
|
||||
#if _USE_STD_VECTOR_ALGORITHMS && !defined(_M_FP_EXCEPT)
|
||||
#define _USE_STD_VECTOR_FLOATING_ALGORITHMS 1
|
||||
#else // ^^^ use vector algorithms and fast math / not use vector algorithms or not use fast math vvv
|
||||
#define _USE_STD_VECTOR_FLOATING_ALGORITHMS 0
|
||||
#endif // ^^^ not use vector algorithms or not use fast math ^^^
|
||||
#else // ^^^ !defined(_USE_STD_VECTOR_FLOATING_ALGORITHMS) / defined(_USE_STD_VECTOR_FLOATING_ALGORITHMS) vvv
|
||||
#if _USE_STD_VECTOR_FLOATING_ALGORITHMS && !_USE_STD_VECTOR_ALGORITHMS
|
||||
#error _USE_STD_VECTOR_FLOATING_ALGORITHMS must imply _USE_STD_VECTOR_ALGORITHMS.
|
||||
#endif // _USE_STD_VECTOR_FLOATING_ALGORITHMS && !_USE_STD_VECTOR_ALGORITHMS
|
||||
#endif // ^^^ defined(_USE_STD_VECTOR_FLOATING_ALGORITHMS) ^^^
|
||||
|
||||
#if _USE_STD_VECTOR_ALGORITHMS
|
||||
extern "C" {
|
||||
// The "noalias" attribute tells the compiler optimizer that pointers going into these hand-vectorized algorithms
|
||||
|
@ -87,11 +99,15 @@ const void* __stdcall __std_min_element_1(const void* _First, const void* _Last,
|
|||
const void* __stdcall __std_min_element_2(const void* _First, const void* _Last, bool _Signed) noexcept;
|
||||
const void* __stdcall __std_min_element_4(const void* _First, const void* _Last, bool _Signed) noexcept;
|
||||
const void* __stdcall __std_min_element_8(const void* _First, const void* _Last, bool _Signed) noexcept;
|
||||
const void* __stdcall __std_min_element_f(const void* _First, const void* _Last, bool _Unused) noexcept;
|
||||
const void* __stdcall __std_min_element_d(const void* _First, const void* _Last, bool _Unused) noexcept;
|
||||
|
||||
const void* __stdcall __std_max_element_1(const void* _First, const void* _Last, bool _Signed) noexcept;
|
||||
const void* __stdcall __std_max_element_2(const void* _First, const void* _Last, bool _Signed) noexcept;
|
||||
const void* __stdcall __std_max_element_4(const void* _First, const void* _Last, bool _Signed) noexcept;
|
||||
const void* __stdcall __std_max_element_8(const void* _First, const void* _Last, bool _Signed) noexcept;
|
||||
const void* __stdcall __std_max_element_f(const void* _First, const void* _Last, bool _Unused) noexcept;
|
||||
const void* __stdcall __std_max_element_d(const void* _First, const void* _Last, bool _Unused) noexcept;
|
||||
} // extern "C"
|
||||
|
||||
_STD_BEGIN
|
||||
|
@ -158,7 +174,11 @@ template <class _Ty>
|
|||
_Ty* __std_min_element(_Ty* _First, _Ty* _Last) noexcept {
|
||||
constexpr bool _Signed = _STD is_signed_v<_Ty>;
|
||||
|
||||
if constexpr (sizeof(_Ty) == 1) {
|
||||
if constexpr (_STD is_same_v<_STD remove_const_t<_Ty>, float>) {
|
||||
return const_cast<_Ty*>(static_cast<const _Ty*>(::__std_min_element_f(_First, _Last, false)));
|
||||
} else if constexpr (_STD _Is_any_of_v<_STD remove_const_t<_Ty>, double, long double>) {
|
||||
return const_cast<_Ty*>(static_cast<const _Ty*>(::__std_min_element_d(_First, _Last, false)));
|
||||
} else if constexpr (sizeof(_Ty) == 1) {
|
||||
return const_cast<_Ty*>(static_cast<const _Ty*>(::__std_min_element_1(_First, _Last, _Signed)));
|
||||
} else if constexpr (sizeof(_Ty) == 2) {
|
||||
return const_cast<_Ty*>(static_cast<const _Ty*>(::__std_min_element_2(_First, _Last, _Signed)));
|
||||
|
@ -175,7 +195,11 @@ template <class _Ty>
|
|||
_Ty* __std_max_element(_Ty* _First, _Ty* _Last) noexcept {
|
||||
constexpr bool _Signed = _STD is_signed_v<_Ty>;
|
||||
|
||||
if constexpr (sizeof(_Ty) == 1) {
|
||||
if constexpr (_STD is_same_v<_STD remove_const_t<_Ty>, float>) {
|
||||
return const_cast<_Ty*>(static_cast<const _Ty*>(::__std_max_element_f(_First, _Last, false)));
|
||||
} else if constexpr (_STD _Is_any_of_v<_STD remove_const_t<_Ty>, double, long double>) {
|
||||
return const_cast<_Ty*>(static_cast<const _Ty*>(::__std_max_element_d(_First, _Last, false)));
|
||||
} else if constexpr (sizeof(_Ty) == 1) {
|
||||
return const_cast<_Ty*>(static_cast<const _Ty*>(::__std_max_element_1(_First, _Last, _Signed)));
|
||||
} else if constexpr (sizeof(_Ty) == 2) {
|
||||
return const_cast<_Ty*>(static_cast<const _Ty*>(::__std_max_element_2(_First, _Last, _Signed)));
|
||||
|
@ -6607,7 +6631,15 @@ template <class _Iter, class _Pr, class _Elem = _Iter_value_t<_Iter>>
|
|||
_INLINE_VAR constexpr bool _Is_min_max_optimization_safe = // Activate the vector algorithms for min_/max_element?
|
||||
_Iterator_is_contiguous<_Iter> // The iterator must be contiguous so we can get raw pointers.
|
||||
&& !_Iterator_is_volatile<_Iter> // The iterator must not be volatile.
|
||||
&& conjunction_v<disjunction<is_integral<_Elem>, is_pointer<_Elem>>, // Element is of integral or pointer type.
|
||||
&& conjunction_v<disjunction<
|
||||
#if _USE_STD_VECTOR_FLOATING_ALGORITHMS
|
||||
#if defined(__LDBL_DIG__) && __LDBL_DIG__ == 18
|
||||
is_same<_Elem, float>, is_same<_Elem, double>,
|
||||
#else // ^^^ 80-bit long double (not supported by MSVC in general, see GH-1316) / 64-bit long double vvv
|
||||
is_floating_point<_Elem>, // Element is floating point or...
|
||||
#endif // ^^^ 64-bit long double ^^^
|
||||
#endif // _USE_STD_VECTOR_FLOATING_ALGORITHMS
|
||||
is_integral<_Elem>, is_pointer<_Elem>>, // ... integral or pointer type.
|
||||
disjunction< // And either of the following:
|
||||
#if _HAS_CXX20
|
||||
is_same<_Pr, _RANGES less>, // predicate is ranges::less
|
||||
|
|
|
@ -545,6 +545,8 @@ namespace {
|
|||
};
|
||||
|
||||
struct _Minmax_traits_1 {
|
||||
static constexpr bool _Is_floating = false;
|
||||
|
||||
using _Signed_t = int8_t;
|
||||
using _Unsigned_t = uint8_t;
|
||||
|
||||
|
@ -555,6 +557,10 @@ namespace {
|
|||
static constexpr bool _Has_portion_max = true;
|
||||
static constexpr size_t _Portion_max = 256;
|
||||
|
||||
static __m128i _Load(const void* _Src) noexcept {
|
||||
return _mm_loadu_si128(reinterpret_cast<const __m128i*>(_Src));
|
||||
}
|
||||
|
||||
static __m128i _Sign_correction(const __m128i _Val, const bool _Sign) noexcept {
|
||||
alignas(16) static constexpr _Unsigned_t _Sign_corrections[2][16] = {
|
||||
{0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80}, {}};
|
||||
|
@ -610,6 +616,10 @@ namespace {
|
|||
return _mm_cmpgt_epi8(_First, _Second);
|
||||
}
|
||||
|
||||
static __m128i _Cmp_eq_idx(const __m128i _First, const __m128i _Second) noexcept {
|
||||
return _mm_cmpeq_epi8(_First, _Second);
|
||||
}
|
||||
|
||||
static __m128i _Min(const __m128i _First, const __m128i _Second, __m128i) noexcept {
|
||||
return _mm_min_epi8(_First, _Second);
|
||||
}
|
||||
|
@ -617,10 +627,16 @@ namespace {
|
|||
static __m128i _Max(const __m128i _First, const __m128i _Second, __m128i) noexcept {
|
||||
return _mm_max_epi8(_First, _Second);
|
||||
}
|
||||
|
||||
static __m128i _Mask_cast(__m128i _Mask) noexcept {
|
||||
return _Mask;
|
||||
}
|
||||
#endif // !_M_ARM64EC
|
||||
};
|
||||
|
||||
struct _Minmax_traits_2 {
|
||||
static constexpr bool _Is_floating = false;
|
||||
|
||||
using _Signed_t = int16_t;
|
||||
using _Unsigned_t = uint16_t;
|
||||
|
||||
|
@ -631,6 +647,10 @@ namespace {
|
|||
static constexpr bool _Has_portion_max = true;
|
||||
static constexpr size_t _Portion_max = 65536;
|
||||
|
||||
static __m128i _Load(const void* _Src) noexcept {
|
||||
return _mm_loadu_si128(reinterpret_cast<const __m128i*>(_Src));
|
||||
}
|
||||
|
||||
static __m128i _Sign_correction(const __m128i _Val, const bool _Sign) noexcept {
|
||||
alignas(16) static constexpr _Unsigned_t _Sign_corrections[2][8] = {
|
||||
0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, {}};
|
||||
|
@ -687,6 +707,10 @@ namespace {
|
|||
return _mm_cmpgt_epi16(_First, _Second);
|
||||
}
|
||||
|
||||
static __m128i _Cmp_eq_idx(const __m128i _First, const __m128i _Second) noexcept {
|
||||
return _mm_cmpeq_epi16(_First, _Second);
|
||||
}
|
||||
|
||||
static __m128i _Min(const __m128i _First, const __m128i _Second, __m128i) noexcept {
|
||||
return _mm_min_epi16(_First, _Second);
|
||||
}
|
||||
|
@ -694,10 +718,16 @@ namespace {
|
|||
static __m128i _Max(const __m128i _First, const __m128i _Second, __m128i) noexcept {
|
||||
return _mm_max_epi16(_First, _Second);
|
||||
}
|
||||
|
||||
static __m128i _Mask_cast(__m128i _Mask) noexcept {
|
||||
return _Mask;
|
||||
}
|
||||
#endif // !_M_ARM64EC
|
||||
};
|
||||
|
||||
struct _Minmax_traits_4 {
|
||||
static constexpr bool _Is_floating = false;
|
||||
|
||||
using _Signed_t = int32_t;
|
||||
using _Unsigned_t = uint32_t;
|
||||
|
||||
|
@ -712,6 +742,10 @@ namespace {
|
|||
static constexpr size_t _Portion_max = 0x1'0000'0000ULL;
|
||||
#endif // ^^^ 64-bit ^^^
|
||||
|
||||
static __m128i _Load(const void* _Src) noexcept {
|
||||
return _mm_loadu_si128(reinterpret_cast<const __m128i*>(_Src));
|
||||
}
|
||||
|
||||
static __m128i _Sign_correction(const __m128i _Val, const bool _Sign) noexcept {
|
||||
alignas(16) static constexpr _Unsigned_t _Sign_corrections[2][4] = {
|
||||
0x8000'0000UL, 0x8000'0000UL, 0x8000'0000UL, 0x8000'0000UL, {}};
|
||||
|
@ -764,6 +798,10 @@ namespace {
|
|||
return _mm_cmpgt_epi32(_First, _Second);
|
||||
}
|
||||
|
||||
static __m128i _Cmp_eq_idx(const __m128i _First, const __m128i _Second) noexcept {
|
||||
return _mm_cmpeq_epi32(_First, _Second);
|
||||
}
|
||||
|
||||
static __m128i _Min(const __m128i _First, const __m128i _Second, __m128i) noexcept {
|
||||
return _mm_min_epi32(_First, _Second);
|
||||
}
|
||||
|
@ -771,10 +809,16 @@ namespace {
|
|||
static __m128i _Max(const __m128i _First, const __m128i _Second, __m128i) noexcept {
|
||||
return _mm_max_epi32(_First, _Second);
|
||||
}
|
||||
|
||||
static __m128i _Mask_cast(__m128i _Mask) noexcept {
|
||||
return _Mask;
|
||||
}
|
||||
#endif // !_M_ARM64EC
|
||||
};
|
||||
|
||||
struct _Minmax_traits_8 {
|
||||
static constexpr bool _Is_floating = false;
|
||||
|
||||
using _Signed_t = int64_t;
|
||||
using _Unsigned_t = uint64_t;
|
||||
|
||||
|
@ -784,6 +828,10 @@ namespace {
|
|||
#ifndef _M_ARM64EC
|
||||
static constexpr bool _Has_portion_max = false;
|
||||
|
||||
static __m128i _Load(const void* _Src) noexcept {
|
||||
return _mm_loadu_si128(reinterpret_cast<const __m128i*>(_Src));
|
||||
}
|
||||
|
||||
static __m128i _Sign_correction(const __m128i _Val, const bool _Sign) noexcept {
|
||||
alignas(16) static constexpr _Unsigned_t _Sign_corrections[2][2] = {
|
||||
0x8000'0000'0000'0000ULL, 0x8000'0000'0000'0000ULL, {}};
|
||||
|
@ -844,6 +892,10 @@ namespace {
|
|||
return _mm_cmpgt_epi64(_First, _Second);
|
||||
}
|
||||
|
||||
static __m128i _Cmp_eq_idx(const __m128i _First, const __m128i _Second) noexcept {
|
||||
return _mm_cmpeq_epi64(_First, _Second);
|
||||
}
|
||||
|
||||
static __m128i _Min(const __m128i _First, const __m128i _Second, const __m128i _Mask) noexcept {
|
||||
return _mm_blendv_epi8(_First, _Second, _Mask);
|
||||
}
|
||||
|
@ -851,6 +903,206 @@ namespace {
|
|||
static __m128i _Max(const __m128i _First, const __m128i _Second, const __m128i _Mask) noexcept {
|
||||
return _mm_blendv_epi8(_First, _Second, _Mask);
|
||||
}
|
||||
|
||||
static __m128i _Mask_cast(__m128i _Mask) noexcept {
|
||||
return _Mask;
|
||||
}
|
||||
#endif // !_M_ARM64EC
|
||||
};
|
||||
|
||||
struct _Minmax_traits_f {
|
||||
static constexpr bool _Is_floating = true;
|
||||
|
||||
using _Signed_t = float;
|
||||
|
||||
static constexpr _Signed_t _Init_min_val = __builtin_huge_valf();
|
||||
static constexpr _Signed_t _Init_max_val = -__builtin_huge_valf();
|
||||
|
||||
#ifndef _M_ARM64EC
|
||||
#ifdef _M_IX86
|
||||
static constexpr bool _Has_portion_max = false;
|
||||
#else // ^^^ 32-bit / 64-bit vvv
|
||||
static constexpr bool _Has_portion_max = true;
|
||||
static constexpr size_t _Portion_max = 0x1'0000'0000ULL;
|
||||
#endif // ^^^ 64-bit ^^^
|
||||
|
||||
static __m128 _Load(const void* _Src) noexcept {
|
||||
return _mm_loadu_ps(reinterpret_cast<const float*>(_Src));
|
||||
}
|
||||
|
||||
static __m128 _Sign_correction(const __m128 _Val, bool) noexcept {
|
||||
return _Val;
|
||||
}
|
||||
|
||||
static __m128i _Inc(__m128i _Idx) noexcept {
|
||||
return _mm_add_epi32(_Idx, _mm_set1_epi32(1));
|
||||
}
|
||||
|
||||
template <class _Fn>
|
||||
static __m128 _H_func(const __m128 _Cur, _Fn _Funct) noexcept {
|
||||
__m128 _H_min_val = _Cur;
|
||||
_H_min_val = _Funct(_H_min_val, _mm_shuffle_ps(_H_min_val, _H_min_val, _MM_SHUFFLE(1, 0, 3, 2)));
|
||||
_H_min_val = _Funct(_H_min_val, _mm_shuffle_ps(_H_min_val, _H_min_val, _MM_SHUFFLE(2, 3, 0, 1)));
|
||||
return _H_min_val;
|
||||
}
|
||||
|
||||
template <class _Fn>
|
||||
static __m128i _H_func_u(const __m128i _Cur, _Fn _Funct) noexcept {
|
||||
__m128i _H_min_val = _Cur;
|
||||
_H_min_val = _Funct(_H_min_val, _mm_shuffle_epi32(_H_min_val, _MM_SHUFFLE(1, 0, 3, 2)));
|
||||
_H_min_val = _Funct(_H_min_val, _mm_shuffle_epi32(_H_min_val, _MM_SHUFFLE(2, 3, 0, 1)));
|
||||
return _H_min_val;
|
||||
}
|
||||
|
||||
static __m128 _H_min(const __m128 _Cur) noexcept {
|
||||
return _H_func(_Cur, [](__m128 _First, __m128 _Second) { return _mm_min_ps(_First, _Second); });
|
||||
}
|
||||
|
||||
static __m128 _H_max(const __m128 _Cur) noexcept {
|
||||
return _H_func(_Cur, [](__m128 _First, __m128 _Second) { return _mm_max_ps(_First, _Second); });
|
||||
}
|
||||
|
||||
static __m128i _H_min_u(const __m128i _Cur) noexcept {
|
||||
return _H_func_u(_Cur, [](__m128i _First, __m128i _Second) { return _mm_min_epu32(_First, _Second); });
|
||||
}
|
||||
|
||||
static __m128i _H_max_u(const __m128i _Cur) noexcept {
|
||||
return _H_func_u(_Cur, [](__m128i _First, __m128i _Second) { return _mm_max_epu32(_First, _Second); });
|
||||
}
|
||||
|
||||
static float _Get_any(const __m128 _Cur) noexcept {
|
||||
return _mm_cvtss_f32(_Cur);
|
||||
}
|
||||
|
||||
static uint32_t _Get_v_pos(const __m128i _Idx, const unsigned long _H_pos) noexcept {
|
||||
uint32_t _Array[4];
|
||||
_mm_storeu_si128(reinterpret_cast<__m128i*>(&_Array), _Idx);
|
||||
return _Array[_H_pos >> 2];
|
||||
}
|
||||
|
||||
static __m128 _Cmp_eq(const __m128 _First, const __m128 _Second) noexcept {
|
||||
return _mm_cmpeq_ps(_First, _Second);
|
||||
}
|
||||
|
||||
static __m128 _Cmp_gt(const __m128 _First, const __m128 _Second) noexcept {
|
||||
return _mm_cmpgt_ps(_First, _Second);
|
||||
}
|
||||
|
||||
static __m128i _Cmp_eq_idx(const __m128i _First, const __m128i _Second) noexcept {
|
||||
return _mm_cmpeq_epi32(_First, _Second);
|
||||
}
|
||||
|
||||
static __m128 _Min(const __m128 _First, const __m128 _Second, __m128) noexcept {
|
||||
return _mm_min_ps(_First, _Second);
|
||||
}
|
||||
|
||||
static __m128 _Max(const __m128 _First, const __m128 _Second, __m128) noexcept {
|
||||
return _mm_max_ps(_First, _Second);
|
||||
}
|
||||
|
||||
static __m128i _Mask_cast(__m128 _Mask) noexcept {
|
||||
return _mm_castps_si128(_Mask);
|
||||
}
|
||||
#endif // !_M_ARM64EC
|
||||
};
|
||||
|
||||
struct _Minmax_traits_d {
|
||||
static constexpr bool _Is_floating = true;
|
||||
|
||||
using _Signed_t = double;
|
||||
|
||||
static constexpr _Signed_t _Init_min_val = __builtin_huge_val();
|
||||
static constexpr _Signed_t _Init_max_val = -__builtin_huge_val();
|
||||
|
||||
#ifndef _M_ARM64EC
|
||||
static constexpr bool _Has_portion_max = false;
|
||||
|
||||
static __m128d _Load(const void* _Src) noexcept {
|
||||
return _mm_loadu_pd(reinterpret_cast<const double*>(_Src));
|
||||
}
|
||||
|
||||
static __m128d _Sign_correction(const __m128d _Val, bool) noexcept {
|
||||
return _Val;
|
||||
}
|
||||
|
||||
static __m128i _Inc(__m128i _Idx) noexcept {
|
||||
return _mm_add_epi64(_Idx, _mm_set1_epi64x(1));
|
||||
}
|
||||
|
||||
template <class _Fn>
|
||||
static __m128d _H_func(const __m128d _Cur, _Fn _Funct) noexcept {
|
||||
__m128d _H_min_val = _Cur;
|
||||
_H_min_val = _Funct(_H_min_val, _mm_shuffle_pd(_H_min_val, _H_min_val, 1));
|
||||
return _H_min_val;
|
||||
}
|
||||
|
||||
template <class _Fn>
|
||||
static __m128i _H_func_u(const __m128i _Cur, _Fn _Funct) noexcept {
|
||||
uint64_t _H_min_a = _Get_any_u(_Cur);
|
||||
uint64_t _H_min_b = _Get_any_u(_mm_bsrli_si128(_Cur, 8));
|
||||
if (_Funct(_H_min_b, _H_min_a)) {
|
||||
_H_min_a = _H_min_b;
|
||||
}
|
||||
return _mm_set1_epi64x(_H_min_a);
|
||||
}
|
||||
|
||||
static __m128d _H_min(const __m128d _Cur) noexcept {
|
||||
return _H_func(_Cur, [](__m128d _First, __m128d _Second) { return _mm_min_pd(_First, _Second); });
|
||||
}
|
||||
|
||||
static __m128d _H_max(const __m128d _Cur) noexcept {
|
||||
return _H_func(_Cur, [](__m128d _First, __m128d _Second) { return _mm_max_pd(_First, _Second); });
|
||||
}
|
||||
|
||||
static __m128i _H_min_u(const __m128i _Cur) noexcept {
|
||||
return _H_func_u(_Cur, [](uint64_t _Lhs, uint64_t _Rhs) { return _Lhs < _Rhs; });
|
||||
}
|
||||
|
||||
static __m128i _H_max_u(const __m128i _Cur) noexcept {
|
||||
return _H_func_u(_Cur, [](uint64_t _Lhs, uint64_t _Rhs) { return _Lhs > _Rhs; });
|
||||
}
|
||||
static double _Get_any(const __m128d _Cur) noexcept {
|
||||
return _mm_cvtsd_f64(_Cur);
|
||||
}
|
||||
|
||||
static uint64_t _Get_any_u(const __m128i _Cur) noexcept {
|
||||
#ifdef _M_IX86
|
||||
return (static_cast<uint64_t>(static_cast<uint32_t>(_mm_extract_epi32(_Cur, 1))) << 32)
|
||||
| static_cast<uint64_t>(static_cast<uint32_t>(_mm_cvtsi128_si32(_Cur)));
|
||||
#else // ^^^ x86 / x64 vvv
|
||||
return static_cast<uint64_t>(_mm_cvtsi128_si64(_Cur));
|
||||
#endif // ^^^ x64 ^^^
|
||||
}
|
||||
|
||||
static uint64_t _Get_v_pos(const __m128i _Idx, const unsigned long _H_pos) noexcept {
|
||||
uint64_t _Array[2];
|
||||
_mm_storeu_si128(reinterpret_cast<__m128i*>(&_Array), _Idx);
|
||||
return _Array[_H_pos >> 3];
|
||||
}
|
||||
|
||||
static __m128d _Cmp_eq(const __m128d _First, const __m128d _Second) noexcept {
|
||||
return _mm_cmpeq_pd(_First, _Second);
|
||||
}
|
||||
|
||||
static __m128d _Cmp_gt(const __m128d _First, const __m128d _Second) noexcept {
|
||||
return _mm_cmpgt_pd(_First, _Second);
|
||||
}
|
||||
|
||||
static __m128i _Cmp_eq_idx(const __m128i _First, const __m128i _Second) noexcept {
|
||||
return _mm_cmpeq_epi64(_First, _Second);
|
||||
}
|
||||
|
||||
static __m128d _Min(const __m128d _First, const __m128d _Second, __m128d) noexcept {
|
||||
return _mm_min_pd(_First, _Second);
|
||||
}
|
||||
|
||||
static __m128d _Max(const __m128d _First, const __m128d _Second, __m128d) noexcept {
|
||||
return _mm_max_pd(_First, _Second);
|
||||
}
|
||||
|
||||
static __m128i _Mask_cast(__m128d _Mask) noexcept {
|
||||
return _mm_castpd_si128(_Mask);
|
||||
}
|
||||
#endif // !_M_ARM64EC
|
||||
};
|
||||
|
||||
|
@ -882,13 +1134,12 @@ namespace {
|
|||
_Advance_bytes(_Stop_at, _Portion_byte_size);
|
||||
|
||||
// Load values and if unsigned adjust them to be signed (for signed vector comparisons)
|
||||
__m128i _Cur_vals =
|
||||
_Traits::_Sign_correction(_mm_loadu_si128(reinterpret_cast<const __m128i*>(_First)), _Sign);
|
||||
__m128i _Cur_vals_min = _Cur_vals; // vector of vertical minimum values
|
||||
__m128i _Cur_idx_min = _mm_setzero_si128(); // vector of vertical minimum indices
|
||||
__m128i _Cur_vals_max = _Cur_vals; // vector of vertical maximum values
|
||||
__m128i _Cur_idx_max = _mm_setzero_si128(); // vector of vertical maximum indices
|
||||
__m128i _Cur_idx = _mm_setzero_si128(); // current vector of indices
|
||||
auto _Cur_vals = _Traits::_Sign_correction(_Traits::_Load(_First), _Sign);
|
||||
auto _Cur_vals_min = _Cur_vals; // vector of vertical minimum values
|
||||
auto _Cur_idx_min = _mm_setzero_si128(); // vector of vertical minimum indices
|
||||
auto _Cur_vals_max = _Cur_vals; // vector of vertical maximum values
|
||||
auto _Cur_idx_max = _mm_setzero_si128(); // vector of vertical maximum indices
|
||||
auto _Cur_idx = _mm_setzero_si128(); // current vector of indices
|
||||
|
||||
for (;;) {
|
||||
_Advance_bytes(_First, 16);
|
||||
|
@ -901,21 +1152,21 @@ namespace {
|
|||
// Compute horizontal min and/or max. Determine horizontal and vertical position of it.
|
||||
|
||||
if constexpr ((_Mode & _Mode_min) != 0) {
|
||||
const __m128i _H_min =
|
||||
_Traits::_H_min(_Cur_vals_min); // Vector populated by the smallest element
|
||||
const auto _H_min = _Traits::_H_min(_Cur_vals_min); // Vector populated by the smallest element
|
||||
const auto _H_min_val = _Traits::_Get_any(_H_min); // Get any element of it
|
||||
|
||||
if (_H_min_val < _Cur_min_val) { // Current horizontal min is less than the old
|
||||
_Cur_min_val = _H_min_val; // update min
|
||||
const __m128i _Eq_mask =
|
||||
const auto _Eq_mask =
|
||||
_Traits::_Cmp_eq(_H_min, _Cur_vals_min); // Mask of all elems eq to min
|
||||
int _Mask = _mm_movemask_epi8(_Eq_mask);
|
||||
int _Mask = _mm_movemask_epi8(_Traits::_Mask_cast(_Eq_mask));
|
||||
// Indices of minimum elements or the greatest index if none
|
||||
const __m128i _All_max = _mm_set1_epi8(static_cast<char>(0xFF));
|
||||
const __m128i _Idx_min_val = _mm_blendv_epi8(_All_max, _Cur_idx_min, _Eq_mask);
|
||||
__m128i _Idx_min = _Traits::_H_min_u(_Idx_min_val); // The smallest indices
|
||||
const auto _All_max = _mm_set1_epi8(static_cast<char>(0xFF));
|
||||
const auto _Idx_min_val =
|
||||
_mm_blendv_epi8(_All_max, _Cur_idx_min, _Traits::_Mask_cast(_Eq_mask));
|
||||
auto _Idx_min = _Traits::_H_min_u(_Idx_min_val); // The smallest indices
|
||||
// Select the smallest vertical indices from the smallest element mask
|
||||
_Mask &= _mm_movemask_epi8(_Traits::_Cmp_eq(_Idx_min, _Idx_min_val));
|
||||
_Mask &= _mm_movemask_epi8(_Traits::_Cmp_eq_idx(_Idx_min, _Idx_min_val));
|
||||
unsigned long _H_pos;
|
||||
|
||||
// Find the smallest horizontal index
|
||||
|
@ -928,8 +1179,7 @@ namespace {
|
|||
}
|
||||
|
||||
if constexpr ((_Mode & _Mode_max) != 0) {
|
||||
const __m128i _H_max =
|
||||
_Traits::_H_max(_Cur_vals_max); // Vector populated by the largest element
|
||||
const auto _H_max = _Traits::_H_max(_Cur_vals_max); // Vector populated by the largest element
|
||||
const auto _H_max_val = _Traits::_Get_any(_H_max); // Get any element of it
|
||||
|
||||
if (_Mode == _Mode_both && _Cur_max_val <= _H_max_val
|
||||
|
@ -937,19 +1187,19 @@ namespace {
|
|||
// max_element: current horizontal max is greater than the old, update max
|
||||
// minmax_element: current horizontal max is not less than the old, update max
|
||||
_Cur_max_val = _H_max_val;
|
||||
const __m128i _Eq_mask =
|
||||
const auto _Eq_mask =
|
||||
_Traits::_Cmp_eq(_H_max, _Cur_vals_max); // Mask of all elems eq to max
|
||||
int _Mask = _mm_movemask_epi8(_Eq_mask);
|
||||
int _Mask = _mm_movemask_epi8(_Traits::_Mask_cast(_Eq_mask));
|
||||
|
||||
unsigned long _H_pos;
|
||||
if constexpr (_Mode == _Mode_both) {
|
||||
// Looking for the last occurrence of maximum
|
||||
// Indices of maximum elements or zero if none
|
||||
const __m128i _Idx_max_val =
|
||||
_mm_blendv_epi8(_mm_setzero_si128(), _Cur_idx_max, _Eq_mask);
|
||||
const __m128i _Idx_max = _Traits::_H_max_u(_Idx_max_val); // The greatest indices
|
||||
const auto _Idx_max_val =
|
||||
_mm_blendv_epi8(_mm_setzero_si128(), _Cur_idx_max, _Traits::_Mask_cast(_Eq_mask));
|
||||
const auto _Idx_max = _Traits::_H_max_u(_Idx_max_val); // The greatest indices
|
||||
// Select the greatest vertical indices from the largest element mask
|
||||
_Mask &= _mm_movemask_epi8(_Traits::_Cmp_eq(_Idx_max, _Idx_max_val));
|
||||
_Mask &= _mm_movemask_epi8(_Traits::_Cmp_eq_idx(_Idx_max, _Idx_max_val));
|
||||
|
||||
// Find the largest horizontal index
|
||||
_BitScanReverse(&_H_pos, _Mask); // lgtm [cpp/conditionallyuninitializedvariable]
|
||||
|
@ -958,11 +1208,12 @@ namespace {
|
|||
} else {
|
||||
// Looking for the first occurrence of maximum
|
||||
// Indices of maximum elements or the greatest index if none
|
||||
const __m128i _All_max = _mm_set1_epi8(static_cast<char>(0xFF));
|
||||
const __m128i _Idx_max_val = _mm_blendv_epi8(_All_max, _Cur_idx_max, _Eq_mask);
|
||||
const __m128i _Idx_max = _Traits::_H_min_u(_Idx_max_val); // The smallest indices
|
||||
const auto _All_max = _mm_set1_epi8(static_cast<char>(0xFF));
|
||||
const auto _Idx_max_val =
|
||||
_mm_blendv_epi8(_All_max, _Cur_idx_max, _Traits::_Mask_cast(_Eq_mask));
|
||||
const auto _Idx_max = _Traits::_H_min_u(_Idx_max_val); // The smallest indices
|
||||
// Select the smallest vertical indices from the largest element mask
|
||||
_Mask &= _mm_movemask_epi8(_Traits::_Cmp_eq(_Idx_max, _Idx_max_val));
|
||||
_Mask &= _mm_movemask_epi8(_Traits::_Cmp_eq_idx(_Idx_max, _Idx_max_val));
|
||||
|
||||
// Find the smallest horizontal index
|
||||
_BitScanForward(&_H_pos, _Mask); // lgtm [cpp/conditionallyuninitializedvariable]
|
||||
|
@ -991,8 +1242,7 @@ namespace {
|
|||
// Indices will be relative to the new base
|
||||
_Base = static_cast<const char*>(_First);
|
||||
// Load values and if unsigned adjust them to be signed (for signed vector comparisons)
|
||||
_Cur_vals =
|
||||
_Traits::_Sign_correction(_mm_loadu_si128(reinterpret_cast<const __m128i*>(_First)), _Sign);
|
||||
_Cur_vals = _Traits::_Sign_correction(_Traits::_Load(_First), _Sign);
|
||||
|
||||
if constexpr ((_Mode & _Mode_min) != 0) {
|
||||
_Cur_vals_min = _Cur_vals;
|
||||
|
@ -1012,55 +1262,67 @@ namespace {
|
|||
// This is the main part, finding vertical minimum/maximum
|
||||
|
||||
// Load values and if unsigned adjust them to be signed (for signed vector comparisons)
|
||||
_Cur_vals = _Traits::_Sign_correction(_mm_loadu_si128(reinterpret_cast<const __m128i*>(_First)), _Sign);
|
||||
_Cur_vals = _Traits::_Sign_correction(_Traits::_Load(_First), _Sign);
|
||||
|
||||
if constexpr ((_Mode & _Mode_min) != 0) {
|
||||
// Looking for the first occurrence of minimum, don't overwrite with newly found occurrences
|
||||
const __m128i _Is_less = _Traits::_Cmp_gt(_Cur_vals_min, _Cur_vals); // _Cur_vals < _Cur_vals_min
|
||||
_Cur_idx_min = _mm_blendv_epi8(_Cur_idx_min, _Cur_idx, _Is_less); // Remember their vertical indices
|
||||
const auto _Is_less = _Traits::_Cmp_gt(_Cur_vals_min, _Cur_vals); // _Cur_vals < _Cur_vals_min
|
||||
_Cur_idx_min = _mm_blendv_epi8(
|
||||
_Cur_idx_min, _Cur_idx, _Traits::_Mask_cast(_Is_less)); // Remember their vertical indices
|
||||
_Cur_vals_min = _Traits::_Min(_Cur_vals_min, _Cur_vals, _Is_less); // Update the current minimum
|
||||
}
|
||||
|
||||
if constexpr (_Mode == _Mode_max) {
|
||||
// Looking for the first occurrence of maximum, don't overwrite with newly found occurrences
|
||||
const __m128i _Is_greater = _Traits::_Cmp_gt(_Cur_vals, _Cur_vals_max); // _Cur_vals > _Cur_vals_max
|
||||
_Cur_idx_max =
|
||||
_mm_blendv_epi8(_Cur_idx_max, _Cur_idx, _Is_greater); // Remember their vertical indices
|
||||
const auto _Is_greater = _Traits::_Cmp_gt(_Cur_vals, _Cur_vals_max); // _Cur_vals > _Cur_vals_max
|
||||
_Cur_idx_max = _mm_blendv_epi8(
|
||||
_Cur_idx_max, _Cur_idx, _Traits::_Mask_cast(_Is_greater)); // Remember their vertical indices
|
||||
_Cur_vals_max = _Traits::_Max(_Cur_vals_max, _Cur_vals, _Is_greater); // Update the current maximum
|
||||
} else if constexpr (_Mode == _Mode_both) {
|
||||
// Looking for the last occurrence of maximum, do overwrite with newly found occurrences
|
||||
const __m128i _Is_less =
|
||||
_Traits::_Cmp_gt(_Cur_vals_max, _Cur_vals); // !(_Cur_vals >= _Cur_vals_max)
|
||||
_Cur_idx_max = _mm_blendv_epi8(_Cur_idx, _Cur_idx_max, _Is_less); // Remember their vertical indices
|
||||
const auto _Is_less = _Traits::_Cmp_gt(_Cur_vals_max, _Cur_vals); // !(_Cur_vals >= _Cur_vals_max)
|
||||
_Cur_idx_max = _mm_blendv_epi8(_Cur_idx, _Cur_idx_max,
|
||||
_Traits::_Mask_cast(_Is_less)); // Remember their vertical indices
|
||||
_Cur_vals_max = _Traits::_Max(_Cur_vals, _Cur_vals_max, _Is_less); // Update the current maximum
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif // !_M_ARM64EC
|
||||
using _STy = _Traits::_Signed_t;
|
||||
using _UTy = _Traits::_Unsigned_t;
|
||||
|
||||
constexpr _UTy _Correction = _UTy{1} << (sizeof(_UTy) * 8 - 1);
|
||||
|
||||
if constexpr (_Mode == _Mode_min) {
|
||||
if (_Sign) {
|
||||
return _Min_tail(_First, _Last, _Res._Min, static_cast<_STy>(_Cur_min_val));
|
||||
if constexpr (_Traits::_Is_floating) {
|
||||
if constexpr (_Mode == _Mode_min) {
|
||||
return _Min_tail(_First, _Last, _Res._Min, _Cur_min_val);
|
||||
} else if constexpr (_Mode == _Mode_max) {
|
||||
return _Max_tail(_First, _Last, _Res._Max, _Cur_max_val);
|
||||
} else {
|
||||
return _Min_tail(_First, _Last, _Res._Min, static_cast<_UTy>(_Cur_min_val + _Correction));
|
||||
}
|
||||
} else if constexpr (_Mode == _Mode_max) {
|
||||
if (_Sign) {
|
||||
return _Max_tail(_First, _Last, _Res._Max, static_cast<_STy>(_Cur_max_val));
|
||||
} else {
|
||||
return _Max_tail(_First, _Last, _Res._Max, static_cast<_UTy>(_Cur_max_val + _Correction));
|
||||
return _Both_tail(_First, _Last, _Res, _Cur_min_val, _Cur_max_val);
|
||||
}
|
||||
} else {
|
||||
if (_Sign) {
|
||||
return _Both_tail(
|
||||
_First, _Last, _Res, static_cast<_STy>(_Cur_min_val), static_cast<_STy>(_Cur_max_val));
|
||||
using _STy = _Traits::_Signed_t;
|
||||
using _UTy = _Traits::_Unsigned_t;
|
||||
|
||||
constexpr _UTy _Correction = _UTy{1} << (sizeof(_UTy) * 8 - 1);
|
||||
|
||||
if constexpr (_Mode == _Mode_min) {
|
||||
if (_Sign) {
|
||||
return _Min_tail(_First, _Last, _Res._Min, static_cast<_STy>(_Cur_min_val));
|
||||
} else {
|
||||
return _Min_tail(_First, _Last, _Res._Min, static_cast<_UTy>(_Cur_min_val + _Correction));
|
||||
}
|
||||
} else if constexpr (_Mode == _Mode_max) {
|
||||
if (_Sign) {
|
||||
return _Max_tail(_First, _Last, _Res._Max, static_cast<_STy>(_Cur_max_val));
|
||||
} else {
|
||||
return _Max_tail(_First, _Last, _Res._Max, static_cast<_UTy>(_Cur_max_val + _Correction));
|
||||
}
|
||||
} else {
|
||||
return _Both_tail(_First, _Last, _Res, static_cast<_UTy>(_Cur_min_val + _Correction),
|
||||
static_cast<_UTy>(_Cur_max_val + _Correction));
|
||||
if (_Sign) {
|
||||
return _Both_tail(
|
||||
_First, _Last, _Res, static_cast<_STy>(_Cur_min_val), static_cast<_STy>(_Cur_max_val));
|
||||
} else {
|
||||
return _Both_tail(_First, _Last, _Res, static_cast<_UTy>(_Cur_min_val + _Correction),
|
||||
static_cast<_UTy>(_Cur_max_val + _Correction));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -1089,6 +1351,16 @@ const void* __stdcall __std_min_element_8(
|
|||
return _Minmax_element<_Mode_min, _Minmax_traits_8>(_First, _Last, _Signed);
|
||||
}
|
||||
|
||||
const void* __stdcall __std_min_element_f(
|
||||
const void* const _First, const void* const _Last, const bool _Unused) noexcept {
|
||||
return _Minmax_element<_Mode_min, _Minmax_traits_f>(_First, _Last, _Unused);
|
||||
}
|
||||
|
||||
const void* __stdcall __std_min_element_d(
|
||||
const void* const _First, const void* const _Last, const bool _Unused) noexcept {
|
||||
return _Minmax_element<_Mode_min, _Minmax_traits_d>(_First, _Last, _Unused);
|
||||
}
|
||||
|
||||
const void* __stdcall __std_max_element_1(
|
||||
const void* const _First, const void* const _Last, const bool _Signed) noexcept {
|
||||
return _Minmax_element<_Mode_max, _Minmax_traits_1>(_First, _Last, _Signed);
|
||||
|
@ -1109,6 +1381,16 @@ const void* __stdcall __std_max_element_8(
|
|||
return _Minmax_element<_Mode_max, _Minmax_traits_8>(_First, _Last, _Signed);
|
||||
}
|
||||
|
||||
const void* __stdcall __std_max_element_f(
|
||||
const void* const _First, const void* const _Last, const bool _Unused) noexcept {
|
||||
return _Minmax_element<_Mode_max, _Minmax_traits_f>(_First, _Last, _Unused);
|
||||
}
|
||||
|
||||
const void* __stdcall __std_max_element_d(
|
||||
const void* const _First, const void* const _Last, const bool _Unused) noexcept {
|
||||
return _Minmax_element<_Mode_max, _Minmax_traits_d>(_First, _Last, _Unused);
|
||||
}
|
||||
|
||||
_Min_max_element_t __stdcall __std_minmax_element_1(
|
||||
const void* const _First, const void* const _Last, const bool _Signed) noexcept {
|
||||
return _Minmax_element<_Mode_both, _Minmax_traits_1>(_First, _Last, _Signed);
|
||||
|
@ -1129,6 +1411,15 @@ _Min_max_element_t __stdcall __std_minmax_element_8(
|
|||
return _Minmax_element<_Mode_both, _Minmax_traits_8>(_First, _Last, _Signed);
|
||||
}
|
||||
|
||||
_Min_max_element_t __stdcall __std_minmax_element_f(
|
||||
const void* const _First, const void* const _Last, const bool _Unused) noexcept {
|
||||
return _Minmax_element<_Mode_both, _Minmax_traits_f>(_First, _Last, _Unused);
|
||||
}
|
||||
|
||||
_Min_max_element_t __stdcall __std_minmax_element_d(
|
||||
const void* const _First, const void* const _Last, const bool _Unused) noexcept {
|
||||
return _Minmax_element<_Mode_both, _Minmax_traits_d>(_First, _Last, _Unused);
|
||||
}
|
||||
} // extern "C"
|
||||
|
||||
namespace {
|
||||
|
|
|
@ -178,6 +178,31 @@ void test_min_max_element(mt19937_64& gen) {
|
|||
}
|
||||
}
|
||||
|
||||
template <class T>
|
||||
void test_min_max_element_floating(mt19937_64& gen) {
|
||||
normal_distribution<T> dis(-100000.0, 100000.0);
|
||||
|
||||
constexpr auto input_of_input_size = dataCount / 2;
|
||||
vector<T> input_of_input(input_of_input_size);
|
||||
input_of_input[0] = -numeric_limits<T>::infinity();
|
||||
input_of_input[1] = +numeric_limits<T>::infinity();
|
||||
input_of_input[2] = -0.0;
|
||||
input_of_input[3] = +0.0;
|
||||
for (size_t i = 4; i < input_of_input_size; ++i) {
|
||||
input_of_input[i] = dis(gen);
|
||||
}
|
||||
|
||||
uniform_int_distribution<size_t> idx_dis(0, input_of_input_size - 1);
|
||||
|
||||
vector<T> input;
|
||||
input.reserve(dataCount);
|
||||
test_case_min_max_element(input);
|
||||
for (size_t attempts = 0; attempts < dataCount; ++attempts) {
|
||||
input.push_back(input_of_input[idx_dis(gen)]);
|
||||
test_case_min_max_element(input);
|
||||
}
|
||||
}
|
||||
|
||||
void test_min_max_element_pointers(mt19937_64& gen) {
|
||||
const short arr[20]{};
|
||||
|
||||
|
@ -367,6 +392,10 @@ void test_vector_algorithms(mt19937_64& gen) {
|
|||
test_min_max_element<long long>(gen);
|
||||
test_min_max_element<unsigned long long>(gen);
|
||||
|
||||
test_min_max_element_floating<float>(gen);
|
||||
test_min_max_element_floating<double>(gen);
|
||||
test_min_max_element_floating<long double>(gen);
|
||||
|
||||
test_min_max_element_pointers(gen);
|
||||
|
||||
test_min_max_element_special_cases<int8_t, 16>(); // SSE2 vectors
|
||||
|
|
Загрузка…
Ссылка в новой задаче