diff --git a/benchmarks/CMakeLists.txt b/benchmarks/CMakeLists.txt index 237710c5f..74b694f03 100644 --- a/benchmarks/CMakeLists.txt +++ b/benchmarks/CMakeLists.txt @@ -118,6 +118,7 @@ add_benchmark(mismatch src/mismatch.cpp) add_benchmark(path_lexically_normal src/path_lexically_normal.cpp) add_benchmark(priority_queue_push_range src/priority_queue_push_range.cpp) add_benchmark(random_integer_generation src/random_integer_generation.cpp) +add_benchmark(remove src/remove.cpp) add_benchmark(replace src/replace.cpp) add_benchmark(search src/search.cpp) add_benchmark(std_copy src/std_copy.cpp) diff --git a/benchmarks/inc/lorem.hpp b/benchmarks/inc/lorem.hpp new file mode 100644 index 000000000..5671e2ae7 --- /dev/null +++ b/benchmarks/inc/lorem.hpp @@ -0,0 +1,37 @@ +// Copyright (c) Microsoft Corporation. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +#pragma once + +#include + +inline constexpr std::string_view lorem_ipsum = + "Lorem ipsum dolor sit amet, consectetur adipiscing elit. Nullam mollis imperdiet massa, at dapibus elit interdum " + "ac. In eget sollicitudin mi. Nam at tellus at sapien tincidunt sollicitudin vel non eros. Pellentesque nunc nunc, " + "ullamcorper eu accumsan at, pulvinar non turpis. Quisque vel mauris pulvinar, pretium purus vel, ultricies erat. " + "Curabitur a magna in ligula tristique ornare. Quisque commodo, massa viverra laoreet luctus, sem nisi aliquam " + "velit, fermentum pulvinar velit leo eget justo. Suspendisse vel erat efficitur, pulvinar eros volutpat, vulputate " + "ex. Phasellus non purus vel velit tristique tristique id at ligula. Quisque mollis sodales magna. Mauris et quam " + "eu quam viverra tempus. Nullam tempus maximus porta. Nunc mattis eleifend fermentum. Nullam aliquam libero " + "accumsan velit elementum, eu laoreet metus convallis. Donec pellentesque lacus ut iaculis iaculis. Curabitur orci " + "elit, bibendum sit amet feugiat at, iaculis sit amet massa. Maecenas imperdiet lacus at vehicula iaculis. Donec " + "volutpat nunc sit amet accumsan tempor. Quisque pretium vestibulum ultricies. Suspendisse potenti. Aenean at diam " + "iaculis, condimentum felis venenatis, condimentum erat. Nam quis elit dui. Duis quis odio vitae metus hendrerit " + "rhoncus ut et magna. Cras ac augue quis nibh pharetra sagittis. Donec ullamcorper vel eros semper pretium. Proin " + "vel sollicitudin eros. Nulla sollicitudin mattis turpis id suscipit. Aliquam sed risus velit. Aliquam iaculis nec " + "nibh ac egestas. Duis finibus semper est sed consequat. Sed in sapien quis nibh dignissim mattis. Vestibulum nec " + "metus sodales, euismod mauris ac, sollicitudin libero. Maecenas non arcu ac velit ullamcorper fringilla et quis " + "nulla. Curabitur posuere leo eget ipsum tincidunt dignissim. Cras ultricies suscipit neque, quis suscipit tortor " + "venenatis non. Cras nisl mi, bibendum in vulputate quis, vestibulum ornare enim. Nunc hendrerit placerat dui, " + "aliquam mollis sem convallis et. Integer vitae urna diam. Phasellus et imperdiet est. Maecenas auctor facilisis " + "nibh non commodo. Suspendisse iaculis quam id bibendum feugiat. Pellentesque felis erat, egestas a libero ac, " + "laoreet consectetur elit. Cras ut suscipit ex. Etiam gravida sem quis ex porta, eu lacinia tortor fermentum. " + "Nulla consequat odio enim, sed condimentum est sagittis a. Quisque nec commodo tellus. Phasellus elementum " + "feugiat dolor et feugiat. Praesent sed mattis tortor. In vitae sodales purus. Morbi accumsan, ligula et interdum " + "lacinia, leo risus suscipit urna, non luctus mi justo eu ipsum. Curabitur venenatis pretium orci id porttitor. " + "Quisque dapibus nisl sit amet elit lobortis sagittis. Orci varius natoque penatibus et magnis dis parturient " + "montes, nascetur ridiculus mus. Mauris varius dui sit amet tortor facilisis vestibulum. Curabitur condimentum " + "justo nec orci mattis auctor. Quisque aliquet condimentum arcu ac sollicitudin. Maecenas elit elit, condimentum " + "vitae auctor a, cursus et sem. Cras vehicula ante in consequat fermentum. Praesent at massa nisi. Mauris pretium " + "euismod eros, ut posuere ligula ullamcorper id. Nullam aliquet malesuada est at dignissim. Pellentesque finibus " + "sagittis libero nec bibendum. Phasellus dolor ipsum, finibus quis turpis quis, mollis interdum felis."; diff --git a/benchmarks/src/remove.cpp b/benchmarks/src/remove.cpp new file mode 100644 index 000000000..f0d28f6d7 --- /dev/null +++ b/benchmarks/src/remove.cpp @@ -0,0 +1,39 @@ +// Copyright (c) Microsoft Corporation. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +#include +#include +#include +#include + +#include "lorem.hpp" + +enum class alg_type { std_fn, rng }; + +template +void r(benchmark::State& state) { + const std::vector src(lorem_ipsum.begin(), lorem_ipsum.end()); + std::vector v; + v.reserve(lorem_ipsum.size()); + for (auto _ : state) { + v = src; + benchmark::DoNotOptimize(v); + if constexpr (Type == alg_type::std_fn) { + benchmark::DoNotOptimize(std::remove(v.begin(), v.end(), T{'l'})); + } else { + benchmark::DoNotOptimize(std::ranges::remove(v, T{'l'})); + } + } +} + +BENCHMARK(r); +BENCHMARK(r); +BENCHMARK(r); +BENCHMARK(r); + +BENCHMARK(r); +BENCHMARK(r); +BENCHMARK(r); +BENCHMARK(r); + +BENCHMARK_MAIN(); diff --git a/benchmarks/src/replace.cpp b/benchmarks/src/replace.cpp index 5740edaaa..7a41e0415 100644 --- a/benchmarks/src/replace.cpp +++ b/benchmarks/src/replace.cpp @@ -6,41 +6,12 @@ #include #include -const char src[] = - "Lorem ipsum dolor sit amet, consectetur adipiscing elit. Nullam mollis imperdiet massa, at dapibus elit interdum " - "ac. In eget sollicitudin mi. Nam at tellus at sapien tincidunt sollicitudin vel non eros. Pellentesque nunc nunc, " - "ullamcorper eu accumsan at, pulvinar non turpis. Quisque vel mauris pulvinar, pretium purus vel, ultricies erat. " - "Curabitur a magna in ligula tristique ornare. Quisque commodo, massa viverra laoreet luctus, sem nisi aliquam " - "velit, fermentum pulvinar velit leo eget justo. Suspendisse vel erat efficitur, pulvinar eros volutpat, vulputate " - "ex. Phasellus non purus vel velit tristique tristique id at ligula. Quisque mollis sodales magna. Mauris et quam " - "eu quam viverra tempus. Nullam tempus maximus porta. Nunc mattis eleifend fermentum. Nullam aliquam libero " - "accumsan velit elementum, eu laoreet metus convallis. Donec pellentesque lacus ut iaculis iaculis. Curabitur orci " - "elit, bibendum sit amet feugiat at, iaculis sit amet massa. Maecenas imperdiet lacus at vehicula iaculis. Donec " - "volutpat nunc sit amet accumsan tempor. Quisque pretium vestibulum ultricies. Suspendisse potenti. Aenean at diam " - "iaculis, condimentum felis venenatis, condimentum erat. Nam quis elit dui. Duis quis odio vitae metus hendrerit " - "rhoncus ut et magna. Cras ac augue quis nibh pharetra sagittis. Donec ullamcorper vel eros semper pretium. Proin " - "vel sollicitudin eros. Nulla sollicitudin mattis turpis id suscipit. Aliquam sed risus velit. Aliquam iaculis nec " - "nibh ac egestas. Duis finibus semper est sed consequat. Sed in sapien quis nibh dignissim mattis. Vestibulum nec " - "metus sodales, euismod mauris ac, sollicitudin libero. Maecenas non arcu ac velit ullamcorper fringilla et quis " - "nulla. Curabitur posuere leo eget ipsum tincidunt dignissim. Cras ultricies suscipit neque, quis suscipit tortor " - "venenatis non. Cras nisl mi, bibendum in vulputate quis, vestibulum ornare enim. Nunc hendrerit placerat dui, " - "aliquam mollis sem convallis et. Integer vitae urna diam. Phasellus et imperdiet est. Maecenas auctor facilisis " - "nibh non commodo. Suspendisse iaculis quam id bibendum feugiat. Pellentesque felis erat, egestas a libero ac, " - "laoreet consectetur elit. Cras ut suscipit ex. Etiam gravida sem quis ex porta, eu lacinia tortor fermentum. " - "Nulla consequat odio enim, sed condimentum est sagittis a. Quisque nec commodo tellus. Phasellus elementum " - "feugiat dolor et feugiat. Praesent sed mattis tortor. In vitae sodales purus. Morbi accumsan, ligula et interdum " - "lacinia, leo risus suscipit urna, non luctus mi justo eu ipsum. Curabitur venenatis pretium orci id porttitor. " - "Quisque dapibus nisl sit amet elit lobortis sagittis. Orci varius natoque penatibus et magnis dis parturient " - "montes, nascetur ridiculus mus. Mauris varius dui sit amet tortor facilisis vestibulum. Curabitur condimentum " - "justo nec orci mattis auctor. Quisque aliquet condimentum arcu ac sollicitudin. Maecenas elit elit, condimentum " - "vitae auctor a, cursus et sem. Cras vehicula ante in consequat fermentum. Praesent at massa nisi. Mauris pretium " - "euismod eros, ut posuere ligula ullamcorper id. Nullam aliquet malesuada est at dignissim. Pellentesque finibus " - "sagittis libero nec bibendum. Phasellus dolor ipsum, finibus quis turpis quis, mollis interdum felis."; +#include "lorem.hpp" template void r(benchmark::State& state) { - const std::vector a(std::begin(src), std::end(src)); - std::vector b(std::size(src)); + const std::vector a(lorem_ipsum.begin(), lorem_ipsum.end()); + std::vector b(lorem_ipsum.size()); for (auto _ : state) { b = a; @@ -50,8 +21,8 @@ void r(benchmark::State& state) { template void rc(benchmark::State& state) { - const std::vector a(std::begin(src), std::end(src)); - std::vector b(std::size(src)); + const std::vector a(lorem_ipsum.begin(), lorem_ipsum.end()); + std::vector b(lorem_ipsum.size()); for (auto _ : state) { std::replace_copy(std::begin(a), std::end(a), std::begin(b), T{'m'}, T{'w'}); @@ -60,8 +31,8 @@ void rc(benchmark::State& state) { template void rc_if(benchmark::State& state) { - const std::vector a(std::begin(src), std::end(src)); - std::vector b(std::size(src)); + const std::vector a(lorem_ipsum.begin(), lorem_ipsum.end()); + std::vector b(lorem_ipsum.size()); for (auto _ : state) { (void) std::replace_copy_if( diff --git a/benchmarks/src/search.cpp b/benchmarks/src/search.cpp index f28b97219..3bbf45f70 100644 --- a/benchmarks/src/search.cpp +++ b/benchmarks/src/search.cpp @@ -10,38 +10,10 @@ #include #include #include + +#include "lorem.hpp" using namespace std::string_view_literals; -constexpr std::string_view common_src_data = - "Lorem ipsum dolor sit amet, consectetur adipiscing elit. Nullam mollis imperdiet massa, at dapibus elit interdum " - "ac. In eget sollicitudin mi. Nam at tellus at sapien tincidunt sollicitudin vel non eros. Pellentesque nunc nunc, " - "ullamcorper eu accumsan at, pulvinar non turpis. Quisque vel mauris pulvinar, pretium purus vel, ultricies erat. " - "Curabitur a magna in ligula tristique ornare. Quisque commodo, massa viverra laoreet luctus, sem nisi aliquam " - "velit, fermentum pulvinar velit leo eget justo. Suspendisse vel erat efficitur, pulvinar eros volutpat, vulputate " - "ex. Phasellus non purus vel velit tristique tristique id at ligula. Quisque mollis sodales magna. Mauris et quam " - "eu quam viverra tempus. Nullam tempus maximus porta. Nunc mattis eleifend fermentum. Nullam aliquam libero " - "accumsan velit elementum, eu laoreet metus convallis. Donec pellentesque lacus ut iaculis iaculis. Curabitur orci " - "elit, bibendum sit amet feugiat at, iaculis sit amet massa. Maecenas imperdiet lacus at vehicula iaculis. Donec " - "volutpat nunc sit amet accumsan tempor. Quisque pretium vestibulum ultricies. Suspendisse potenti. Aenean at diam " - "iaculis, condimentum felis venenatis, condimentum erat. Nam quis elit dui. Duis quis odio vitae metus hendrerit " - "rhoncus ut et magna. Cras ac augue quis nibh pharetra sagittis. Donec ullamcorper vel eros semper pretium. Proin " - "vel sollicitudin eros. Nulla sollicitudin mattis turpis id suscipit. Aliquam sed risus velit. Aliquam iaculis nec " - "nibh ac egestas. Duis finibus semper est sed consequat. Sed in sapien quis nibh dignissim mattis. Vestibulum nec " - "metus sodales, euismod mauris ac, sollicitudin libero. Maecenas non arcu ac velit ullamcorper fringilla et quis " - "nulla. Curabitur posuere leo eget ipsum tincidunt dignissim. Cras ultricies suscipit neque, quis suscipit tortor " - "venenatis non. Cras nisl mi, bibendum in vulputate quis, vestibulum ornare enim. Nunc hendrerit placerat dui, " - "aliquam mollis sem convallis et. Integer vitae urna diam. Phasellus et imperdiet est. Maecenas auctor facilisis " - "nibh non commodo. Suspendisse iaculis quam id bibendum feugiat. Pellentesque felis erat, egestas a libero ac, " - "laoreet consectetur elit. Cras ut suscipit ex. Etiam gravida sem quis ex porta, eu lacinia tortor fermentum. " - "Nulla consequat odio enim, sed condimentum est sagittis a. Quisque nec commodo tellus. Phasellus elementum " - "feugiat dolor et feugiat. Praesent sed mattis tortor. In vitae sodales purus. Morbi accumsan, ligula et interdum " - "lacinia, leo risus suscipit urna, non luctus mi justo eu ipsum. Curabitur venenatis pretium orci id porttitor. " - "Quisque dapibus nisl sit amet elit lobortis sagittis. Orci varius natoque penatibus et magnis dis parturient " - "montes, nascetur ridiculus mus. Mauris varius dui sit amet tortor facilisis vestibulum. Curabitur condimentum " - "justo nec orci mattis auctor. Quisque aliquet condimentum arcu ac sollicitudin. Maecenas elit elit, condimentum " - "vitae auctor a, cursus et sem. Cras vehicula ante in consequat fermentum. Praesent at massa nisi. Mauris pretium " - "euismod eros, ut posuere ligula ullamcorper id. Nullam aliquet malesuada est at dignissim. Pellentesque finibus " - "sagittis libero nec bibendum. Phasellus dolor ipsum, finibus quis turpis quis, mollis interdum felis."; template constexpr auto make_fill_pattern_array() { @@ -67,10 +39,10 @@ struct data_and_pattern { }; constexpr data_and_pattern patterns[] = { - /* 0. Small, closer to end */ {common_src_data, "aliquet"sv}, - /* 1. Large, closer to end */ {common_src_data, "aliquet malesuada"sv}, - /* 2. Small, closer to begin */ {common_src_data, "pulvinar"sv}, - /* 3. Large, closer to begin */ {common_src_data, "dapibus elit interdum"sv}, + /* 0. Small, closer to end */ {lorem_ipsum, "aliquet"sv}, + /* 1. Large, closer to end */ {lorem_ipsum, "aliquet malesuada"sv}, + /* 2. Small, closer to begin */ {lorem_ipsum, "pulvinar"sv}, + /* 3. Large, closer to begin */ {lorem_ipsum, "dapibus elit interdum"sv}, /* 4. Small, evil */ {fill_pattern_view<3000, false>, fill_pattern_view<7, true>}, /* 5. Large, evil */ {fill_pattern_view<3000, false>, fill_pattern_view<20, true>}, @@ -80,8 +52,8 @@ void c_strstr(benchmark::State& state) { const auto& src_haystack = patterns[static_cast(state.range())].data; const auto& src_needle = patterns[static_cast(state.range())].pattern; - const std::string haystack(std::begin(src_haystack), std::end(src_haystack)); - const std::string needle(std::begin(src_needle), std::end(src_needle)); + const std::string haystack(src_haystack); + const std::string needle(src_needle); for (auto _ : state) { benchmark::DoNotOptimize(haystack); @@ -96,8 +68,8 @@ void classic_search(benchmark::State& state) { const auto& src_haystack = patterns[static_cast(state.range())].data; const auto& src_needle = patterns[static_cast(state.range())].pattern; - const std::vector haystack(std::begin(src_haystack), std::end(src_haystack)); - const std::vector needle(std::begin(src_needle), std::end(src_needle)); + const std::vector haystack(src_haystack.begin(), src_haystack.end()); + const std::vector needle(src_needle.begin(), src_needle.end()); for (auto _ : state) { benchmark::DoNotOptimize(haystack); @@ -112,8 +84,8 @@ void ranges_search(benchmark::State& state) { const auto& src_haystack = patterns[static_cast(state.range())].data; const auto& src_needle = patterns[static_cast(state.range())].pattern; - const std::vector haystack(std::begin(src_haystack), std::end(src_haystack)); - const std::vector needle(std::begin(src_needle), std::end(src_needle)); + const std::vector haystack(src_haystack.begin(), src_haystack.end()); + const std::vector needle(src_needle.begin(), src_needle.end()); for (auto _ : state) { benchmark::DoNotOptimize(haystack); @@ -128,8 +100,8 @@ void search_default_searcher(benchmark::State& state) { const auto& src_haystack = patterns[static_cast(state.range())].data; const auto& src_needle = patterns[static_cast(state.range())].pattern; - const std::vector haystack(std::begin(src_haystack), std::end(src_haystack)); - const std::vector needle(std::begin(src_needle), std::end(src_needle)); + const std::vector haystack(src_haystack.begin(), src_haystack.end()); + const std::vector needle(src_needle.begin(), src_needle.end()); for (auto _ : state) { benchmark::DoNotOptimize(haystack); @@ -144,8 +116,8 @@ void classic_find_end(benchmark::State& state) { const auto& src_haystack = patterns[static_cast(state.range())].data; const auto& src_needle = patterns[static_cast(state.range())].pattern; - const std::vector haystack(std::begin(src_haystack), std::end(src_haystack)); - const std::vector needle(std::begin(src_needle), std::end(src_needle)); + const std::vector haystack(src_haystack.begin(), src_haystack.end()); + const std::vector needle(src_needle.begin(), src_needle.end()); for (auto _ : state) { benchmark::DoNotOptimize(haystack); @@ -160,8 +132,8 @@ void ranges_find_end(benchmark::State& state) { const auto& src_haystack = patterns[static_cast(state.range())].data; const auto& src_needle = patterns[static_cast(state.range())].pattern; - const std::vector haystack(std::begin(src_haystack), std::end(src_haystack)); - const std::vector needle(std::begin(src_needle), std::end(src_needle)); + const std::vector haystack(src_haystack.begin(), src_haystack.end()); + const std::vector needle(src_needle.begin(), src_needle.end()); for (auto _ : state) { benchmark::DoNotOptimize(haystack); diff --git a/benchmarks/src/sv_equal.cpp b/benchmarks/src/sv_equal.cpp index ee47ab63c..219b6b927 100644 --- a/benchmarks/src/sv_equal.cpp +++ b/benchmarks/src/sv_equal.cpp @@ -8,28 +8,9 @@ #include #include -using namespace std::string_view_literals; +#include "lorem.hpp" -constexpr auto haystack = - "Lorem ipsum dolor sit amet, consectetur adipiscing elit. Nullam mollis imperdiet massa, at dapibus elit " - "interdumac. In eget sollicitudin mi. Nam at tellus at sapien tincidunt sollicitudin vel non eros. Pellentesque " - "nunc nunc,ullamcorper eu accumsan at, pulvinar non turpis. Quisque vel mauris pulvinar, pretium purus vel, " - "ultricies erat.Curabitur a magna in ligula tristique ornare. Quisque commodo, massa viverra laoreet luctus, sem " - "nisi aliquamvelit, fermentum pulvinar velit leo eget justo. Suspendisse vel erat efficitur, pulvinar eros " - "volutpat, vulputateex. Phasellus non purus vel velit tristique tristique id at ligula. Quisque mollis sodales " - "magna. Mauris et quameu quam viverra tempus. Nullam tempus maximus porta. Nunc mattis eleifend fermentum. Nullam " - "aliquam liberoaccumsan velit elementum, eu laoreet metus convallis. Donec pellentesque lacus ut iaculis iaculis. " - "Curabitur orcielit, bibendum sit amet feugiat at, iaculis sit amet massa. Maecenas imperdiet lacus at vehicula " - "iaculis. Donecvolutpat nunc sit amet accumsan tempor. Quisque pretium vestibulum ultricies. Suspendisse potenti. " - "Aenean at diamiaculis, condimentum felis venenatis, condimentum erat. Nam quis elit dui. Duis quis odio vitae " - "metus hendreritrhoncus ut et magna. Cras ac augue quis nibh pharetra sagittis. Donec ullamcorper vel eros semper " - "pretium. Proinvel sollicitudin eros. Nulla sollicitudin mattis turpis id suscipit. Aliquam sed risus velit. " - "Aliquam iaculis necnibh ac egestas. Duis finibus semper est sed consequat. Sed in sapien quis nibh dignissim " - "mattis. Vestibulum necmetus sodales, euismod mauris ac, sollicitudin libero. Maecenas non arcu ac velit " - "ullamcorper fringilla et quisnulla. Curabitur posuere leo eget ipsum tincidunt dignissim. Cras ultricies suscipit " - "neque, quis suscipit tortorvenenatis non. Cras nisl mi, bibendum in vulputate quis, vestibulum ornare enim. Nunc " - "hendrerit placerat dui,aliquam mollis sem convallis et. Integer vitae urna diam. Phasellus et imperdiet est. " - "Maecenas auctor facilisisnibh non commodo. Suspendisse iaculis quam "sv; +constexpr std::string_view haystack = lorem_ipsum.substr(0, 2048); constexpr std::size_t Count = 8u; diff --git a/stl/inc/algorithm b/stl/inc/algorithm index 436f26846..1b6603756 100644 --- a/stl/inc/algorithm +++ b/stl/inc/algorithm @@ -84,7 +84,7 @@ __declspec(noalias) void __stdcall __std_replace_8( _STD_BEGIN template -__declspec(noalias) void _Reverse_copy_vectorized(const void* _First, const void* _Last, void* _Dest) { +__declspec(noalias) void _Reverse_copy_vectorized(const void* _First, const void* _Last, void* _Dest) noexcept { if constexpr (_Nx == 1) { ::__std_reverse_copy_trivially_copyable_1(_First, _Last, _Dest); } else if constexpr (_Nx == 2) { @@ -4661,6 +4661,24 @@ namespace ranges { return {_STD move(_Next), _STD move(_First)}; } +#if _USE_STD_VECTOR_ALGORITHMS + if constexpr (_Vector_alg_in_find_is_safe<_It, _Ty> && sized_sentinel_for<_Se, _It> + && is_same_v<_Pj, identity>) { + if (!_STD is_constant_evaluated()) { + const auto _Size = _Last - _First; + const auto _First_ptr = _STD to_address(_First); + const auto _Last_ptr = _First_ptr + static_cast(_Size); + const auto _Result = _STD _Remove_vectorized(_First_ptr, _Last_ptr, _Val); + + if constexpr (is_pointer_v<_It>) { + return {_Result, _Last_ptr}; + } else { + return {_First + (_Result - _First_ptr), _First + _Size}; + } + } + } +#endif // _USE_STD_VECTOR_ALGORITHMS + while (++_First != _Last) { if (_STD invoke(_Proj, *_First) != _Val) { *_Next = _RANGES iter_move(_First); diff --git a/stl/inc/xmemory b/stl/inc/xmemory index 9a0850296..b8e62c261 100644 --- a/stl/inc/xmemory +++ b/stl/inc/xmemory @@ -25,6 +25,39 @@ _STL_DISABLE_CLANG_WARNINGS #pragma push_macro("new") #undef new +#if _USE_STD_VECTOR_ALGORITHMS +extern "C" { +void* __stdcall __std_remove_1(void* _First, void* _Last, uint8_t _Val) noexcept; +void* __stdcall __std_remove_2(void* _First, void* _Last, uint16_t _Val) noexcept; +void* __stdcall __std_remove_4(void* _First, void* _Last, uint32_t _Val) noexcept; +void* __stdcall __std_remove_8(void* _First, void* _Last, uint64_t _Val) noexcept; +} // extern "C" + +_STD_BEGIN +template +_Ty* _Remove_vectorized(_Ty* const _First, _Ty* const _Last, const _TVal _Val) noexcept { + if constexpr (is_pointer_v<_Ty>) { +#ifdef _WIN64 + return reinterpret_cast<_Ty*>(::__std_remove_8(_First, _Last, reinterpret_cast(_Val))); +#else + return reinterpret_cast<_Ty*>(::__std_remove_4(_First, _Last, reinterpret_cast(_Val))); +#endif + } else if constexpr (sizeof(_Ty) == 1) { + return reinterpret_cast<_Ty*>(::__std_remove_1(_First, _Last, static_cast(_Val))); + } else if constexpr (sizeof(_Ty) == 2) { + return reinterpret_cast<_Ty*>(::__std_remove_2(_First, _Last, static_cast(_Val))); + } else if constexpr (sizeof(_Ty) == 4) { + return reinterpret_cast<_Ty*>(::__std_remove_4(_First, _Last, static_cast(_Val))); + } else if constexpr (sizeof(_Ty) == 8) { + return reinterpret_cast<_Ty*>(::__std_remove_8(_First, _Last, static_cast(_Val))); + } else { + _STL_INTERNAL_STATIC_ASSERT(false); // Unexpected size + } +} +_STD_END + +#endif // _USE_STD_VECTOR_ALGORITHMS + _STD_BEGIN template _NODISCARD constexpr auto _Unfancy(_Ptrty _Ptr) noexcept { // converts from a fancy pointer to a plain pointer @@ -2198,6 +2231,27 @@ _NODISCARD_REMOVE_ALG _CONSTEXPR20 _FwdIt remove(_FwdIt _First, const _FwdIt _La _UFirst = _STD _Find_unchecked(_UFirst, _ULast, _Val); auto _UNext = _UFirst; if (_UFirst != _ULast) { +#if _USE_STD_VECTOR_ALGORITHMS + if constexpr (_Vector_alg_in_find_is_safe) { +#if _HAS_CXX20 + if (!_STD is_constant_evaluated()) +#endif // _HAS_CXX20 + { + const auto _First_ptr = _STD _To_address(_UFirst); + const auto _Result = _STD _Remove_vectorized(_First_ptr, _STD _To_address(_ULast), _Val); + + if constexpr (is_pointer_v) { + _UNext = _Result; + } else { + _UNext += _Result - _First_ptr; + } + + _STD _Seek_wrapped(_First, _UNext); + return _First; + } + } +#endif // _USE_STD_VECTOR_ALGORITHMS + while (++_UFirst != _ULast) { if (!(*_UFirst == _Val)) { *_UNext = _STD move(*_UFirst); diff --git a/stl/src/vector_algorithms.cpp b/stl/src/vector_algorithms.cpp index a55129bc1..2e6436843 100644 --- a/stl/src/vector_algorithms.cpp +++ b/stl/src/vector_algorithms.cpp @@ -4107,6 +4107,216 @@ __declspec(noalias) void __stdcall __std_replace_8( } // extern "C" +namespace { + template + void* _Remove_fallback(void* const _First, void* const _Last, void* const _Out, const _Ty _Val) noexcept { + _Ty* _Src = reinterpret_cast<_Ty*>(_First); + _Ty* _Dest = reinterpret_cast<_Ty*>(_Out); + + while (_Src != _Last) { + if (*_Src != _Val) { + *_Dest = *_Src; + ++_Dest; + } + + ++_Src; + } + + return _Dest; + } + +#ifndef _M_ARM64EC + template + struct _Remove_tables { + uint8_t _Shuf[_Size_v][_Size_h]; + uint8_t _Size[_Size_v]; + }; + + template + constexpr auto _Make_remove_tables(const uint32_t _Mul, const uint32_t _Ew) { + _Remove_tables<_Size_v, _Size_h> _Result; + + for (uint32_t _Vx = 0; _Vx != _Size_v; ++_Vx) { + uint32_t _Nx = 0; + + // Make shuffle mask for pshufb / vpermd corresponding to _Vx bit value. + // Every bit set corresponds to an element skipped. + for (uint32_t _Hx = 0; _Hx != _Size_h / _Ew; ++_Hx) { + if ((_Vx & (1 << _Hx)) == 0) { + // Inner loop needed for cases where the shuffle mask operates on element parts rather than whole + // elements; for whole elements there would be one iteration. + for (uint32_t _Ex = 0; _Ex != _Ew; ++_Ex) { + _Result._Shuf[_Vx][_Nx * _Ew + _Ex] = static_cast(_Hx * _Ew + _Ex); + } + ++_Nx; + } + } + + // Size of elements that are not removed in bytes. + _Result._Size[_Vx] = static_cast(_Nx * _Mul); + + // Fill the remaining with arbitrary elements. + // It is not possible to leave them untouched while keeping this optimization efficient. + // This should not be a problem though, as they should be either overwritten by the next step, + // or left in the removed range. + for (; _Nx != _Size_h / _Ew; ++_Nx) { + // Inner loop needed for cases where the shuffle mask operates on element parts rather than whole + // elements; for whole elements there would be one iteration. + for (uint32_t _Ex = 0; _Ex != _Ew; ++_Ex) { + _Result._Shuf[_Vx][_Nx * _Ew + _Ex] = static_cast(_Nx * _Ew + _Ex); + } + } + } + + return _Result; + } + + constexpr auto _Remove_tables_1_sse = _Make_remove_tables<256, 8>(1, 1); + constexpr auto _Remove_tables_2_sse = _Make_remove_tables<256, 16>(2, 2); + constexpr auto _Remove_tables_4_sse = _Make_remove_tables<16, 16>(4, 4); + constexpr auto _Remove_tables_4_avx = _Make_remove_tables<256, 8>(4, 1); + constexpr auto _Remove_tables_8_sse = _Make_remove_tables<4, 16>(8, 8); + constexpr auto _Remove_tables_8_avx = _Make_remove_tables<16, 8>(8, 2); +#endif // !defined(_M_ARM64EC) +} // unnamed namespace + +extern "C" { + +void* __stdcall __std_remove_1(void* _First, void* const _Last, const uint8_t _Val) noexcept { + void* _Out = _First; + +#ifndef _M_ARM64EC + if (const size_t _Size_bytes = _Byte_length(_First, _Last); _Use_sse42() && _Size_bytes >= 8) { + const __m128i _Match = _mm_shuffle_epi8(_mm_cvtsi32_si128(_Val), _mm_setzero_si128()); + + void* _Stop = _First; + _Advance_bytes(_Stop, _Size_bytes & ~size_t{7}); + do { + const __m128i _Src = _mm_loadu_si64(_First); + const uint32_t _Bingo = _mm_movemask_epi8(_mm_cmpeq_epi8(_Src, _Match)) & 0xFF; + const __m128i _Shuf = _mm_loadu_si64(_Remove_tables_1_sse._Shuf[_Bingo]); + const __m128i _Dest = _mm_shuffle_epi8(_Src, _Shuf); + _mm_storeu_si64(_Out, _Dest); + _Advance_bytes(_Out, _Remove_tables_1_sse._Size[_Bingo]); + _Advance_bytes(_First, 8); + } while (_First != _Stop); + } +#endif // !defined(_M_ARM64EC) + + return _Remove_fallback(_First, _Last, _Out, _Val); +} + +void* __stdcall __std_remove_2(void* _First, void* const _Last, const uint16_t _Val) noexcept { + void* _Out = _First; + +#ifndef _M_ARM64EC + if (const size_t _Size_bytes = _Byte_length(_First, _Last); _Use_sse42() && _Size_bytes >= 16) { + const __m128i _Match = _mm_set1_epi16(_Val); + + void* _Stop = _First; + _Advance_bytes(_Stop, _Size_bytes & ~size_t{0xF}); + do { + const __m128i _Src = _mm_loadu_si128(reinterpret_cast(_First)); + const __m128i _Mask = _mm_cmpeq_epi16(_Src, _Match); + const uint32_t _Bingo = _mm_movemask_epi8(_mm_packs_epi16(_Mask, _mm_setzero_si128())); + const __m128i _Shuf = _mm_loadu_si128(reinterpret_cast(_Remove_tables_2_sse._Shuf[_Bingo])); + const __m128i _Dest = _mm_shuffle_epi8(_Src, _Shuf); + _mm_storeu_si128(reinterpret_cast<__m128i*>(_Out), _Dest); + _Advance_bytes(_Out, _Remove_tables_2_sse._Size[_Bingo]); + _Advance_bytes(_First, 16); + } while (_First != _Stop); + } +#endif // !defined(_M_ARM64EC) + + return _Remove_fallback(_First, _Last, _Out, _Val); +} + +void* __stdcall __std_remove_4(void* _First, void* const _Last, const uint32_t _Val) noexcept { + void* _Out = _First; + +#ifndef _M_ARM64EC + if (const size_t _Size_bytes = _Byte_length(_First, _Last); _Use_avx2() && _Size_bytes >= 32) { + const __m256i _Match = _mm256_set1_epi32(_Val); + + void* _Stop = _First; + _Advance_bytes(_Stop, _Size_bytes & ~size_t{0x1F}); + do { + const __m256i _Src = _mm256_loadu_si256(reinterpret_cast(_First)); + const __m256i _Mask = _mm256_cmpeq_epi32(_Src, _Match); + const uint32_t _Bingo = _mm256_movemask_ps(_mm256_castsi256_ps(_Mask)); + const __m256i _Shuf = _mm256_cvtepu8_epi32(_mm_loadu_si64(_Remove_tables_4_avx._Shuf[_Bingo])); + const __m256i _Dest = _mm256_permutevar8x32_epi32(_Src, _Shuf); + _mm256_storeu_si256(reinterpret_cast<__m256i*>(_Out), _Dest); + _Advance_bytes(_Out, _Remove_tables_4_avx._Size[_Bingo]); + _Advance_bytes(_First, 32); + } while (_First != _Stop); + + _mm256_zeroupper(); // TRANSITION, DevCom-10331414 + } else if (_Use_sse42() && _Size_bytes >= 16) { + const __m128i _Match = _mm_set1_epi32(_Val); + + void* _Stop = _First; + _Advance_bytes(_Stop, _Size_bytes & ~size_t{0xF}); + do { + const __m128i _Src = _mm_loadu_si128(reinterpret_cast(_First)); + const __m128i _Mask = _mm_cmpeq_epi32(_Src, _Match); + const uint32_t _Bingo = _mm_movemask_ps(_mm_castsi128_ps(_Mask)); + const __m128i _Shuf = _mm_loadu_si128(reinterpret_cast(_Remove_tables_4_sse._Shuf[_Bingo])); + const __m128i _Dest = _mm_shuffle_epi8(_Src, _Shuf); + _mm_storeu_si128(reinterpret_cast<__m128i*>(_Out), _Dest); + _Advance_bytes(_Out, _Remove_tables_4_sse._Size[_Bingo]); + _Advance_bytes(_First, 16); + } while (_First != _Stop); + } +#endif // !defined(_M_ARM64EC) + + return _Remove_fallback(_First, _Last, _Out, _Val); +} + +void* __stdcall __std_remove_8(void* _First, void* const _Last, const uint64_t _Val) noexcept { + void* _Out = _First; + +#ifndef _M_ARM64EC + if (const size_t _Size_bytes = _Byte_length(_First, _Last); _Use_avx2() && _Size_bytes >= 32) { + const __m256i _Match = _mm256_set1_epi64x(_Val); + + void* _Stop = _First; + _Advance_bytes(_Stop, _Size_bytes & ~size_t{0x1F}); + do { + const __m256i _Src = _mm256_loadu_si256(reinterpret_cast(_First)); + const __m256i _Mask = _mm256_cmpeq_epi64(_Src, _Match); + const uint32_t _Bingo = _mm256_movemask_pd(_mm256_castsi256_pd(_Mask)); + const __m256i _Shuf = _mm256_cvtepu8_epi32(_mm_loadu_si64(_Remove_tables_8_avx._Shuf[_Bingo])); + const __m256i _Dest = _mm256_permutevar8x32_epi32(_Src, _Shuf); + _mm256_storeu_si256(reinterpret_cast<__m256i*>(_Out), _Dest); + _Advance_bytes(_Out, _Remove_tables_8_avx._Size[_Bingo]); + _Advance_bytes(_First, 32); + } while (_First != _Stop); + + _mm256_zeroupper(); // TRANSITION, DevCom-10331414 + } else if (_Use_sse42() && _Size_bytes >= 16) { + const __m128i _Match = _mm_set1_epi64x(_Val); + + void* _Stop = _First; + _Advance_bytes(_Stop, _Size_bytes & ~size_t{0xF}); + do { + const __m128i _Src = _mm_loadu_si128(reinterpret_cast(_First)); + const __m128i _Mask = _mm_cmpeq_epi64(_Src, _Match); + const uint32_t _Bingo = _mm_movemask_pd(_mm_castsi128_pd(_Mask)); + const __m128i _Shuf = _mm_loadu_si128(reinterpret_cast(_Remove_tables_8_sse._Shuf[_Bingo])); + const __m128i _Dest = _mm_shuffle_epi8(_Src, _Shuf); + _mm_storeu_si128(reinterpret_cast<__m128i*>(_Out), _Dest); + _Advance_bytes(_Out, _Remove_tables_8_sse._Size[_Bingo]); + _Advance_bytes(_First, 16); + } while (_First != _Stop); + } +#endif // !defined(_M_ARM64EC) + + return _Remove_fallback(_First, _Last, _Out, _Val); +} + +} // extern "C" + namespace { namespace __std_bitset_to_string { #ifdef _M_ARM64EC diff --git a/tests/std/tests/VSO_0000000_vector_algorithms/test.cpp b/tests/std/tests/VSO_0000000_vector_algorithms/test.cpp index 586e4a0f4..6a10cb29b 100644 --- a/tests/std/tests/VSO_0000000_vector_algorithms/test.cpp +++ b/tests/std/tests/VSO_0000000_vector_algorithms/test.cpp @@ -786,6 +786,62 @@ FwdIt2 last_known_good_swap_ranges(FwdIt1 first1, const FwdIt1 last1, FwdIt2 des return dest; } +template +FwdIt last_known_good_remove(FwdIt first, FwdIt last, T val) { + FwdIt dest = first; + + while (first != last) { + if (*first != val) { + *dest = *first; + ++dest; + } + + ++first; + } + + return dest; +} + +template +void test_case_remove(vector& in_out_expected, vector& in_out_actual, vector& in_out_actual_r, const T val) { + auto rem_expected = last_known_good_remove(in_out_expected.begin(), in_out_expected.end(), val); + auto rem_actual = remove(in_out_actual.begin(), in_out_actual.end(), val); + assert(equal(in_out_expected.begin(), rem_expected, in_out_actual.begin(), rem_actual)); + +#if _HAS_CXX20 + auto rem_actual_r = ranges::remove(in_out_actual_r, val); + assert(equal(in_out_expected.begin(), rem_expected, begin(in_out_actual_r), begin(rem_actual_r))); +#else // ^^^ _HAS_CXX20 / !_HAS_CXX20 vvv + (void) in_out_actual_r; +#endif // ^^^ !_HAS_CXX20 ^^^ +} + +template +void test_remove(mt19937_64& gen) { + using TD = conditional_t; + binomial_distribution dis(10); + + vector source; + vector in_out_expected; + vector in_out_actual; + vector in_out_actual_r; + + for (const auto& v : {&source, &in_out_expected, &in_out_actual, &in_out_actual_r}) { + v->reserve(dataCount); + } + + test_case_remove(in_out_expected, in_out_actual, in_out_actual_r, static_cast(dis(gen))); + for (size_t attempts = 0; attempts < dataCount; ++attempts) { + source.push_back(static_cast(dis(gen))); + + for (const auto& v : {&in_out_expected, &in_out_actual, &in_out_actual_r}) { + *v = source; + } + + test_case_remove(in_out_expected, in_out_actual, in_out_actual_r, static_cast(dis(gen))); + } +} + template void test_swap_ranges(mt19937_64& gen) { const auto fn = [&]() { return static_cast(gen()); }; @@ -956,6 +1012,16 @@ void test_vector_algorithms(mt19937_64& gen) { test_reverse_copy(gen); test_reverse_copy(gen); + test_remove(gen); + test_remove(gen); + test_remove(gen); + test_remove(gen); + test_remove(gen); + test_remove(gen); + test_remove(gen); + test_remove(gen); + test_remove(gen); + test_swap_ranges(gen); test_swap_ranges(gen); test_swap_ranges(gen);