Concentrated header for internal bit utilities (#3721)

Co-authored-by: Stephan T. Lavavej <stl@microsoft.com>
This commit is contained in:
A. Jiang 2023-06-15 16:21:12 +08:00 коммит произвёл GitHub
Родитель ef7df32f98
Коммит 47679bbaa8
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: 4AEE18F83AFDEB23
16 изменённых файлов: 536 добавлений и 457 удалений

Просмотреть файл

@ -7,6 +7,7 @@
set(HEADERS
${CMAKE_CURRENT_LIST_DIR}/inc/__msvc_all_public_headers.hpp
${CMAKE_CURRENT_LIST_DIR}/inc/__msvc_bit_utils.hpp
${CMAKE_CURRENT_LIST_DIR}/inc/__msvc_chrono.hpp
${CMAKE_CURRENT_LIST_DIR}/inc/__msvc_cxx_stdatomic.hpp
${CMAKE_CURRENT_LIST_DIR}/inc/__msvc_filebuf.hpp

Просмотреть файл

@ -0,0 +1,448 @@
// __msvc_bit_utils.hpp internal header (core)
// Copyright (c) Microsoft Corporation.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
#pragma once
#ifndef __MSVC_BIT_UTILS_HPP
#define __MSVC_BIT_UTILS_HPP
#include <yvals_core.h>
#if _STL_COMPILER_PREPROCESSOR
#include <climits>
#include <xtr1common>
#include _STL_INTRIN_HEADER
// TRANSITION, GH-2129, move down to _Arm64_popcount
#if (defined(_M_ARM64) || defined(_M_ARM64EC)) && !defined(_M_CEE_PURE) && !defined(__CUDACC__) \
&& !defined(__INTEL_COMPILER) && !defined(__clang__) // TRANSITION, LLVM-51488
#define _HAS_NEON_INTRINSICS 1
#else // ^^^ intrinsics available / intrinsics unavailable vvv
#define _HAS_NEON_INTRINSICS 0
#endif // ^^^ intrinsics unavailable ^^^
#if _HAS_NEON_INTRINSICS
#include <arm64_neon.h> // TRANSITION, GH-2129
#endif // _HAS_NEON_INTRINSICS
#pragma pack(push, _CRT_PACKING)
#pragma warning(push, _STL_WARNING_LEVEL)
#pragma warning(disable : _STL_DISABLED_WARNINGS)
_STL_DISABLE_CLANG_WARNINGS
#pragma push_macro("new")
#undef new
_STD_BEGIN
extern "C" {
extern int __isa_available;
}
_INLINE_VAR constexpr int _Stl_isa_available_sse42 = 2; // equal to __ISA_AVAILABLE_SSE42
_INLINE_VAR constexpr int _Stl_isa_available_avx2 = 5; // equal to __ISA_AVAILABLE_AVX2
template <class _UInt>
_INLINE_VAR constexpr int _Unsigned_integer_digits = sizeof(_UInt) * CHAR_BIT;
// Implementation of countl_zero without using specialized CPU instructions.
// Used at compile time and when said instructions are not supported.
// see "Hacker's Delight" section 5-3
template <class _Ty>
_NODISCARD constexpr int _Countl_zero_fallback(_Ty _Val) noexcept {
_Ty _Yy = 0;
unsigned int _Nn = _Unsigned_integer_digits<_Ty>;
unsigned int _Cc = _Unsigned_integer_digits<_Ty> / 2;
do {
_Yy = static_cast<_Ty>(_Val >> _Cc);
if (_Yy != 0) {
_Nn -= _Cc;
_Val = _Yy;
}
_Cc >>= 1;
} while (_Cc != 0);
return static_cast<int>(_Nn) - static_cast<int>(_Val);
}
#if !defined(_M_CEE_PURE) && !defined(__CUDACC__) && !defined(__INTEL_COMPILER)
#define _HAS_COUNTL_ZERO_INTRINSICS 1
#else // ^^^ intrinsics available / intrinsics unavailable vvv
#define _HAS_COUNTL_ZERO_INTRINSICS 0
#endif // ^^^ intrinsics unavailable ^^^
#if _HAS_COUNTL_ZERO_INTRINSICS
#if defined(_M_IX86) || (defined(_M_X64) && !defined(_M_ARM64EC))
template <class _Ty>
_NODISCARD int _Countl_zero_lzcnt(const _Ty _Val) noexcept {
constexpr int _Digits = _Unsigned_integer_digits<_Ty>;
if constexpr (_Digits <= 16) {
return static_cast<int>(__lzcnt16(_Val) - (16 - _Digits));
} else if constexpr (_Digits == 32) {
return static_cast<int>(__lzcnt(_Val));
} else {
#ifdef _M_IX86
const unsigned int _High = _Val >> 32;
const auto _Low = static_cast<unsigned int>(_Val);
if (_High == 0) {
return 32 + _Countl_zero_lzcnt(_Low);
} else {
return _Countl_zero_lzcnt(_High);
}
#else // ^^^ _M_IX86 / !_M_IX86 vvv
return static_cast<int>(__lzcnt64(_Val));
#endif // _M_IX86
}
}
template <class _Ty>
_NODISCARD int _Countl_zero_bsr(const _Ty _Val) noexcept {
constexpr int _Digits = _Unsigned_integer_digits<_Ty>;
unsigned long _Result;
if constexpr (_Digits <= 32) {
if (!_BitScanReverse(&_Result, _Val)) {
return _Digits;
}
} else {
#ifdef _M_IX86
const unsigned int _High = _Val >> 32;
if (_BitScanReverse(&_Result, _High)) {
return static_cast<int>(31 - _Result);
}
const auto _Low = static_cast<unsigned int>(_Val);
if (!_BitScanReverse(&_Result, _Low)) {
return _Digits;
}
#else // ^^^ _M_IX86 / !_M_IX86 vvv
if (!_BitScanReverse64(&_Result, _Val)) {
return _Digits;
}
#endif // _M_IX86
}
return static_cast<int>(_Digits - 1 - _Result);
}
template <class _Ty>
_NODISCARD int _Checked_x86_x64_countl_zero(const _Ty _Val) noexcept {
#ifdef __AVX2__
return _Countl_zero_lzcnt(_Val);
#else // __AVX2__
const bool _Definitely_have_lzcnt = __isa_available >= _Stl_isa_available_avx2;
if (_Definitely_have_lzcnt) {
return _Countl_zero_lzcnt(_Val);
} else {
return _Countl_zero_bsr(_Val);
}
#endif // __AVX2__
}
#endif // defined(_M_IX86) || (defined(_M_X64) && !defined(_M_ARM64EC))
#if defined(_M_ARM) || defined(_M_ARM64)
#ifdef __clang__ // TRANSITION, GH-1586
_NODISCARD constexpr int _Clang_arm_arm64_countl_zero(const unsigned short _Val) {
return __builtin_clzs(_Val);
}
_NODISCARD constexpr int _Clang_arm_arm64_countl_zero(const unsigned int _Val) {
return __builtin_clz(_Val);
}
_NODISCARD constexpr int _Clang_arm_arm64_countl_zero(const unsigned long _Val) {
return __builtin_clzl(_Val);
}
_NODISCARD constexpr int _Clang_arm_arm64_countl_zero(const unsigned long long _Val) {
return __builtin_clzll(_Val);
}
#endif // TRANSITION, GH-1586
template <class _Ty>
_NODISCARD int _Checked_arm_arm64_countl_zero(const _Ty _Val) noexcept {
constexpr int _Digits = _Unsigned_integer_digits<_Ty>;
if (_Val == 0) {
return _Digits;
}
#ifdef __clang__ // TRANSITION, GH-1586
if constexpr (is_same_v<remove_cv_t<_Ty>, unsigned char>) {
return _Clang_arm_arm64_countl_zero(static_cast<unsigned short>(_Val))
- (_Unsigned_integer_digits<unsigned short> - _Digits);
} else {
return _Clang_arm_arm64_countl_zero(_Val);
}
#else // ^^^ workaround / no workaround vvv
if constexpr (_Digits <= 32) {
return static_cast<int>(_CountLeadingZeros(_Val)) - (_Unsigned_integer_digits<unsigned long> - _Digits);
} else {
return static_cast<int>(_CountLeadingZeros64(_Val));
}
#endif // TRANSITION, GH-1586
}
#endif // defined(_M_ARM) || defined(_M_ARM64)
#endif // _HAS_COUNTL_ZERO_INTRINSICS
// Implementation of countr_zero without using specialized CPU instructions.
// Used at compile time and when said instructions are not supported.
// see "Hacker's Delight" section 5-4
template <class _Ty>
_NODISCARD constexpr int _Countr_zero_fallback(const _Ty _Val) noexcept {
constexpr int _Digits = _Unsigned_integer_digits<_Ty>;
return _Digits - _Countl_zero_fallback(static_cast<_Ty>(static_cast<_Ty>(~_Val) & static_cast<_Ty>(_Val - 1)));
}
// Implementation of popcount without using specialized CPU instructions.
// Used at compile time and when said instructions are not supported.
template <class _Ty>
_NODISCARD constexpr int _Popcount_fallback(_Ty _Val) noexcept {
constexpr int _Digits = _Unsigned_integer_digits<_Ty>;
#if defined(_M_IX86) || defined(_M_ARM)
if constexpr (_Digits == 64) {
// 64-bit bit operations on architectures without 64-bit registers are less efficient,
// hence we split the value so that it fits in 32-bit registers
return _Popcount_fallback(static_cast<unsigned long>(_Val))
+ _Popcount_fallback(static_cast<unsigned long>(_Val >> 32));
}
#endif // defined(_M_IX86) || defined(_M_ARM)
// we static_cast these bit patterns in order to truncate them to the correct size
_Val = static_cast<_Ty>(_Val - ((_Val >> 1) & static_cast<_Ty>(0x5555'5555'5555'5555ull)));
_Val = static_cast<_Ty>((_Val & static_cast<_Ty>(0x3333'3333'3333'3333ull))
+ ((_Val >> 2) & static_cast<_Ty>(0x3333'3333'3333'3333ull)));
_Val = static_cast<_Ty>((_Val + (_Val >> 4)) & static_cast<_Ty>(0x0F0F'0F0F'0F0F'0F0Full));
// Multiply by one in each byte, so that it will have the sum of all source bytes in the highest byte
_Val = static_cast<_Ty>(_Val * static_cast<_Ty>(0x0101'0101'0101'0101ull));
// Extract highest byte
return static_cast<int>(_Val >> (_Digits - 8));
}
#if (defined(_M_IX86) || (defined(_M_X64) && !defined(_M_ARM64EC))) && !defined(_M_CEE_PURE) && !defined(__CUDACC__) \
&& !defined(__INTEL_COMPILER)
#define _HAS_TZCNT_BSF_INTRINSICS 1
#else // ^^^ intrinsics available / intrinsics unavailable vvv
#define _HAS_TZCNT_BSF_INTRINSICS 0
#endif // ^^^ intrinsics unavailable ^^^
#if _HAS_TZCNT_BSF_INTRINSICS
#ifdef __clang__
#define _TZCNT_U32 __builtin_ia32_tzcnt_u32
#define _TZCNT_U64 __builtin_ia32_tzcnt_u64
#else // ^^^ __clang__ / !__clang__ vvv
#define _TZCNT_U32 _tzcnt_u32
#define _TZCNT_U64 _tzcnt_u64
#endif // __clang__
template <class _Ty>
_NODISCARD int _Countr_zero_tzcnt(const _Ty _Val) noexcept {
constexpr int _Digits = _Unsigned_integer_digits<_Ty>;
constexpr _Ty _Max = static_cast<_Ty>(-1); // equal to (numeric_limits<_Ty>::max)()
if constexpr (_Digits <= 32) {
// Intended widening to int. This operation means that a narrow 0 will widen
// to 0xFFFF....FFFF0... instead of 0. We need this to avoid counting all the zeros
// of the wider type.
return static_cast<int>(_TZCNT_U32(static_cast<unsigned int>(~_Max | _Val)));
} else {
#ifdef _M_IX86
const auto _Low = static_cast<unsigned int>(_Val);
if (_Low == 0) {
const unsigned int _High = _Val >> 32;
return static_cast<int>(32 + _TZCNT_U32(_High));
} else {
return static_cast<int>(_TZCNT_U32(_Low));
}
#else // ^^^ _M_IX86 / !_M_IX86 vvv
return static_cast<int>(_TZCNT_U64(_Val));
#endif // _M_IX86
}
}
#undef _TZCNT_U32
#undef _TZCNT_U64
template <class _Ty>
_NODISCARD int _Countr_zero_bsf(const _Ty _Val) noexcept {
constexpr int _Digits = _Unsigned_integer_digits<_Ty>;
constexpr _Ty _Max = static_cast<_Ty>(-1); // equal to (numeric_limits<_Ty>::max)()
unsigned long _Result;
if constexpr (_Digits <= 32) {
// Intended widening to int. This operation means that a narrow 0 will widen
// to 0xFFFF....FFFF0... instead of 0. We need this to avoid counting all the zeros
// of the wider type.
if (!_BitScanForward(&_Result, static_cast<unsigned int>(~_Max | _Val))) {
return _Digits;
}
} else {
#ifdef _M_IX86
const auto _Low = static_cast<unsigned int>(_Val);
if (_BitScanForward(&_Result, _Low)) {
return static_cast<int>(_Result);
}
const unsigned int _High = _Val >> 32;
if (!_BitScanForward(&_Result, _High)) {
return _Digits;
} else {
return static_cast<int>(_Result + 32);
}
#else // ^^^ _M_IX86 / !_M_IX86 vvv
if (!_BitScanForward64(&_Result, _Val)) {
return _Digits;
}
#endif // _M_IX86
}
return static_cast<int>(_Result);
}
template <class _Ty>
_NODISCARD int _Checked_x86_x64_countr_zero(const _Ty _Val) noexcept {
#ifdef __AVX2__
return _Countr_zero_tzcnt(_Val);
#else // __AVX2__
const bool _Definitely_have_tzcnt = __isa_available >= _Stl_isa_available_avx2;
if (_Definitely_have_tzcnt) {
return _Countr_zero_tzcnt(_Val);
} else {
return _Countr_zero_bsf(_Val);
}
#endif // __AVX2__
}
#endif // _HAS_TZCNT_BSF_INTRINSICS
#if (defined(_M_IX86) || (defined(_M_X64) && !defined(_M_ARM64EC))) && !defined(_M_CEE_PURE) && !defined(__CUDACC__) \
&& !defined(__INTEL_COMPILER)
#define _HAS_POPCNT_INTRINSICS 1
#else // ^^^ intrinsics available / intrinsics unavailable vvv
#define _HAS_POPCNT_INTRINSICS 0
#endif // ^^^ intrinsics unavailable ^^^
#if _HAS_POPCNT_INTRINSICS
template <class _Ty>
_NODISCARD int _Unchecked_x86_x64_popcount(const _Ty _Val) noexcept {
constexpr int _Digits = _Unsigned_integer_digits<_Ty>;
if constexpr (_Digits <= 16) {
return static_cast<int>(__popcnt16(_Val));
} else if constexpr (_Digits == 32) {
return static_cast<int>(__popcnt(_Val));
} else {
#ifdef _M_IX86
return static_cast<int>(__popcnt(_Val >> 32) + __popcnt(static_cast<unsigned int>(_Val)));
#else // ^^^ _M_IX86 / !_M_IX86 vvv
return static_cast<int>(__popcnt64(_Val));
#endif // _M_IX86
}
}
template <class _Ty>
_NODISCARD int _Checked_x86_x64_popcount(const _Ty _Val) noexcept {
#ifndef __AVX__
const bool _Definitely_have_popcnt = __isa_available >= _Stl_isa_available_sse42;
if (!_Definitely_have_popcnt) {
return _Popcount_fallback(_Val);
}
#endif // !defined(__AVX__)
return _Unchecked_x86_x64_popcount(_Val);
}
#endif // _HAS_POPCNT_INTRINSICS
#if _HAS_NEON_INTRINSICS
_NODISCARD inline int _Arm64_popcount(const unsigned long long _Val) noexcept {
const __n64 _Temp = neon_cnt(__uint64ToN64_v(_Val));
return neon_addv8(_Temp).n8_i8[0];
}
#endif // _HAS_NEON_INTRINSICS
template <class _Ty>
_INLINE_VAR constexpr bool _Is_standard_unsigned_integer =
_Is_any_of_v<remove_cv_t<_Ty>, unsigned char, unsigned short, unsigned int, unsigned long, unsigned long long>;
template <class _Ty, enable_if_t<_Is_standard_unsigned_integer<_Ty>, int> = 0>
_NODISCARD _CONSTEXPR20 int _Countr_zero(const _Ty _Val) noexcept {
#if _HAS_TZCNT_BSF_INTRINSICS
#if _HAS_CXX20
if (!_STD is_constant_evaluated())
#endif // _HAS_CXX20
{
return _Checked_x86_x64_countr_zero(_Val);
}
#endif // _HAS_TZCNT_BSF_INTRINSICS
return _Countr_zero_fallback(_Val);
}
template <class _Ty, class _Fn>
constexpr decltype(auto) _Select_countr_zero_impl(_Fn _Callback) {
// TRANSITION, DevCom-1527995: Lambdas in this function ensure inlining
#if _HAS_TZCNT_BSF_INTRINSICS && _HAS_CXX20
if (!_STD is_constant_evaluated()) {
#ifdef __AVX2__
return _Callback([](_Ty _Val) { return _Countr_zero_tzcnt(_Val); });
#else // ^^^ AVX2 / not AVX2 vvv
const bool _Definitely_have_tzcnt = __isa_available >= _Stl_isa_available_avx2;
if (_Definitely_have_tzcnt) {
return _Callback([](_Ty _Val) { return _Countr_zero_tzcnt(_Val); });
} else {
return _Callback([](_Ty _Val) { return _Countr_zero_bsf(_Val); });
}
#endif // ^^^ not AVX2 ^^^
}
#endif // ^^^ _HAS_TZCNT_BSF_INTRINSICS && _HAS_CXX20 ^^^
// C++17 constexpr gcd() calls this function, so it should be constexpr unless we detect runtime evaluation.
return _Callback([](_Ty _Val) { return _Countr_zero_fallback(_Val); });
}
template <class _Ty, enable_if_t<_Is_standard_unsigned_integer<_Ty>, int> = 0>
_NODISCARD _CONSTEXPR20 int _Popcount(const _Ty _Val) noexcept {
#if _HAS_POPCNT_INTRINSICS || _HAS_NEON_INTRINSICS
#if _HAS_CXX20
if (!_STD is_constant_evaluated())
#endif // _HAS_CXX20
{
#if _HAS_POPCNT_INTRINSICS
return _Checked_x86_x64_popcount(_Val);
#elif _HAS_NEON_INTRINSICS // ^^^ x86/x64 intrinsics available / ARM64 intrinsics available vvv
return _Arm64_popcount(_Val);
#endif // ^^^ ARM64 intrinsics available ^^^
}
#endif // ^^^ any intrinsics available ^^^
return _Popcount_fallback(_Val);
}
template <class _Ty, class _Fn>
_CONSTEXPR20 decltype(auto) _Select_popcount_impl(_Fn _Callback) {
// TRANSITION, DevCom-1527995: Lambdas in this function ensure inlining
#if _HAS_POPCNT_INTRINSICS || _HAS_NEON_INTRINSICS
#if _HAS_CXX20
if (!_STD is_constant_evaluated())
#endif // _HAS_CXX20
{
#if _HAS_POPCNT_INTRINSICS
#ifndef __AVX__
const bool _Definitely_have_popcnt = __isa_available >= _Stl_isa_available_sse42;
if (!_Definitely_have_popcnt) {
return _Callback([](_Ty _Val) { return _Popcount_fallback(_Val); });
}
#endif // !defined(__AVX__)
return _Callback([](_Ty _Val) { return _Unchecked_x86_x64_popcount(_Val); });
#elif _HAS_NEON_INTRINSICS // ^^^ x86/x64 intrinsics available / ARM64 intrinsics available vvv
return _Callback([](_Ty _Val) { return _Arm64_popcount(_Val); });
#endif // ^^^ ARM64 intrinsics available ^^^
}
#endif // ^^^ any intrinsics available ^^^
return _Callback([](_Ty _Val) { return _Popcount_fallback(_Val); });
}
#undef _HAS_POPCNT_INTRINSICS
#undef _HAS_TZCNT_BSF_INTRINSICS
_STD_END
#undef _HAS_NEON_INTRINSICS
#pragma pop_macro("new")
_STL_RESTORE_CLANG_WARNINGS
#pragma warning(pop)
#pragma pack(pop)
#endif // _STL_COMPILER_PREPROCESSOR
#endif // __MSVC_BIT_UTILS_HPP

Просмотреть файл

@ -61,7 +61,6 @@
#if _STL_COMPILER_PREPROCESSOR
#include <cstdint>
#include <limits>
#include <xutility>
#pragma pack(push, _CRT_PACKING)
@ -79,7 +78,7 @@ struct _Unicode_property_data {
uint16_t _Props_and_size[_NumRanges];
_NODISCARD constexpr _ValueEnum _Get_property_for_codepoint(const uint32_t _Code_point) const noexcept {
ptrdiff_t _Upper_idx = _STD upper_bound(_Lower_bounds, _STD end(_Lower_bounds), _Code_point) - _Lower_bounds;
constexpr auto _No_value_constant = static_cast<_ValueEnum>((numeric_limits<uint8_t>::max)());
constexpr auto _No_value_constant = static_cast<_ValueEnum>(UINT8_MAX);
if (_Upper_idx == 0) {
return _No_value_constant;
}

Просмотреть файл

@ -9,6 +9,7 @@
#include <yvals_core.h>
#if _STL_COMPILER_PREPROCESSOR
#include <__msvc_bit_utils.hpp>
#include <cstdint>
#include <limits>
#include <type_traits>
@ -16,7 +17,6 @@
#include _STL_INTRIN_HEADER
#if _HAS_CXX20
#include <bit>
#include <compare>
#define _ZERO_OR_NO_INIT
#else // ^^^ _HAS_CXX20 / !_HAS_CXX20 vvv
@ -56,6 +56,24 @@ _STD_BEGIN
#define _STL_128_DIV_INTRINSICS 0
#endif // ^^^ intrinsics unavailable ^^^
template <class _Ty>
_NODISCARD constexpr int _Countl_zero_internal(const _Ty _Val) noexcept {
_STL_INTERNAL_STATIC_ASSERT(_Is_standard_unsigned_integer<_Ty>);
#if _HAS_COUNTL_ZERO_INTRINSICS
#if defined(_M_IX86) || (defined(_M_X64) && !defined(_M_ARM64EC))
if (!_Is_constant_evaluated()) {
return _Checked_x86_x64_countl_zero(_Val);
}
#elif defined(_M_ARM) || defined(_M_ARM64)
if (!_Is_constant_evaluated()) {
return _Checked_arm_arm64_countl_zero(_Val);
}
#endif // defined(_M_ARM) || defined(_M_ARM64)
#endif // _HAS_COUNTL_ZERO_INTRINSICS
return _Countl_zero_fallback(_Val);
}
struct
#ifndef _M_ARM
alignas(16)
@ -143,7 +161,7 @@ struct
static constexpr void _Knuth_4_3_1_M(
const uint32_t (&__u)[__m], const uint32_t (&__v)[__n], uint32_t (&__w)[__n + __m]) noexcept {
#ifdef _ENABLE_STL_INTERNAL_CHECK
constexpr auto _Int_max = static_cast<size_t>((numeric_limits<int>::max)());
constexpr auto _Int_max = static_cast<size_t>(INT_MAX);
_STL_INTERNAL_STATIC_ASSERT(__m <= _Int_max);
_STL_INTERNAL_STATIC_ASSERT(__n <= _Int_max);
#endif // _ENABLE_STL_INTERNAL_CHECK
@ -192,7 +210,7 @@ struct
static constexpr void _Knuth_4_3_1_D(uint32_t* const __u, const size_t __u_size, const uint32_t* const __v,
const size_t __v_size, uint32_t* const __q) noexcept {
// Pre: __u + [0, __u_size), __v + [0, __v_size), and __q + [0, __u_size - __v_size) are all valid ranges
// constexpr auto _Int_max = static_cast<size_t>((numeric_limits<int>::max)());
// constexpr auto _Int_max = static_cast<size_t>(INT_MAX);
// _STL_INTERNAL_CHECK(__v_size <= _Int_max);
const int __n = static_cast<int>(__v_size);
// _STL_INTERNAL_CHECK(__u_size > __v_size);
@ -252,11 +270,7 @@ struct
}
#endif // _STL_128_DIV_INTRINSICS
#if _HAS_CXX20
const auto __d = _STD countl_zero(static_cast<uint32_t>(_Div >> 32));
#else // ^^^ _HAS_CXX20 / !_HAS_CXX20 vvv
const auto __d = _Countl_zero_fallback(static_cast<uint32_t>(_Div >> 32));
#endif // ^^^ !_HAS_CXX20 ^^^
const auto __d = _Countl_zero_internal(static_cast<uint32_t>(_Div >> 32));
if (__d >= 32) { // _Div < 2^32
auto _Rem = (_High << 32) | (_Low >> 32);
auto _Result = _Rem / static_cast<uint32_t>(_Div);
@ -464,11 +478,7 @@ struct
// _STL_INTERNAL_CHECK(_Den._Word[1] != 0);
// _STL_INTERNAL_CHECK(_Num._Word[1] > _Den._Word[1]);
// Normalize by shifting both left until _Den's high bit is set (So _Den's high digit is >= b / 2)
#if _HAS_CXX20
const auto __d = _STD countl_zero(_Den._Word[1]);
#else // ^^^ _HAS_CXX20 / !_HAS_CXX20 vvv
const auto __d = _Countl_zero_fallback(_Den._Word[1]);
#endif // ^^^ !_HAS_CXX20 ^^^
const auto __d = _Countl_zero_internal(_Den._Word[1]);
_Den <<= __d;
auto _High_digit = __d == 0 ? 0 : _Num._Word[1] >> (64 - __d); // This creates a third digit for _Num
_Num <<= __d;
@ -513,11 +523,7 @@ struct
}
return __qhat;
#else // ^^^ 128-bit intrinsics / no such intrinsics vvv
#if _HAS_CXX20
auto __d = _STD countl_zero(_Den._Word[1]);
#else // ^^^ _HAS_CXX20 / !_HAS_CXX20 vvv
auto __d = _Countl_zero_fallback(_Den._Word[1]);
#endif // ^^^ !_HAS_CXX20 ^^^
auto __d = _Countl_zero_internal(_Den._Word[1]);
const bool _Three_word_den = __d >= 32;
__d &= 31;
uint32_t __u[5]{
@ -597,11 +603,7 @@ struct
// _STL_INTERNAL_CHECK(_Den._Word[1] != 0);
// _STL_INTERNAL_CHECK(_Num._Word[1] > _Den._Word[1]);
// Normalize by shifting both left until _Den's high bit is set (So _Den's high digit is >= b / 2)
#if _HAS_CXX20
const auto __d = _STD countl_zero(_Den._Word[1]);
#else // ^^^ _HAS_CXX20 / !_HAS_CXX20 vvv
const auto __d = _Countl_zero_fallback(_Den._Word[1]);
#endif // ^^^ !_HAS_CXX20 ^^^
const auto __d = _Countl_zero_internal(_Den._Word[1]);
_Den <<= __d;
auto _High_digit = __d == 0 ? 0 : _Num._Word[1] >> (64 - __d); // This creates a third digit for _Num
_Num <<= __d;
@ -648,11 +650,7 @@ struct
(void) _AddCarry64(_Carry, _Num._Word[1], _Den._Word[1], _Num._Word[1]);
}
#else // ^^^ 128-bit intrinsics / no such intrinsics vvv
#if _HAS_CXX20
auto __d = _STD countl_zero(_Den._Word[1]);
#else // ^^^ _HAS_CXX20 / !_HAS_CXX20 vvv
auto __d = _Countl_zero_fallback(_Den._Word[1]);
#endif // ^^^ !_HAS_CXX20 ^^^
auto __d = _Countl_zero_internal(_Den._Word[1]);
const bool _Three_word_den = __d >= 32;
__d &= 31;
uint32_t __u[5]{

Просмотреть файл

@ -12,9 +12,8 @@
_EMIT_STL_WARNING(STL4038, "The contents of <bit> are available only with C++20 or later.");
#else // ^^^ !_HAS_CXX20 / _HAS_CXX20 vvv
#include <__msvc_bit_utils.hpp>
#include <cstdlib>
#include <isa_availability.h>
#include <limits>
#include <type_traits>
#include _STL_INTRIN_HEADER
@ -95,7 +94,7 @@ _NODISCARD constexpr _Ty bit_ceil(const _Ty _Val) noexcept /* strengthened */ {
return _Ty{1};
}
const int _Num = numeric_limits<_Ty>::digits - _STD countl_zero(static_cast<_Ty>(_Val - 1));
const int _Num = _Unsigned_integer_digits<_Ty> - _STD countl_zero(static_cast<_Ty>(_Val - 1));
if constexpr (sizeof(_Ty) < sizeof(unsigned int)) { // for types subject to integral promotion
if (_STD is_constant_evaluated()) {
@ -108,7 +107,7 @@ _NODISCARD constexpr _Ty bit_ceil(const _Ty _Val) noexcept /* strengthened */ {
// "Preconditions: N is representable as a value of type T."
// "Remarks: A function call expression that violates the precondition in the Preconditions: element
// is not a core constant expression (7.7)."
if (_Num == numeric_limits<_Ty>::digits) {
if (_Num == _Unsigned_integer_digits<_Ty>) {
_Precondition_violation_in_bit_ceil();
}
}
@ -123,12 +122,12 @@ _NODISCARD constexpr _Ty bit_floor(const _Ty _Val) noexcept {
return 0;
}
return static_cast<_Ty>(_Ty{1} << (numeric_limits<_Ty>::digits - 1 - _STD countl_zero(_Val)));
return static_cast<_Ty>(_Ty{1} << (_Unsigned_integer_digits<_Ty> - 1 - _STD countl_zero(_Val)));
}
_EXPORT_STD template <class _Ty, enable_if_t<_Is_standard_unsigned_integer<_Ty>, int> = 0>
_NODISCARD constexpr int bit_width(const _Ty _Val) noexcept {
return numeric_limits<_Ty>::digits - _STD countl_zero(_Val);
return _Unsigned_integer_digits<_Ty> - _STD countl_zero(_Val);
}
_EXPORT_STD template <class _Ty, enable_if_t<_Is_standard_unsigned_integer<_Ty>, int> = 0>
@ -136,7 +135,7 @@ _NODISCARD constexpr _Ty rotr(_Ty _Val, int _Rotation) noexcept;
_EXPORT_STD template <class _Ty, enable_if_t<_Is_standard_unsigned_integer<_Ty>, int> = 0>
_NODISCARD constexpr _Ty rotl(const _Ty _Val, const int _Rotation) noexcept {
constexpr auto _Digits = numeric_limits<_Ty>::digits;
constexpr auto _Digits = _Unsigned_integer_digits<_Ty>;
if (!_STD is_constant_evaluated()) {
if constexpr (_Digits == 64) {
@ -164,7 +163,7 @@ _NODISCARD constexpr _Ty rotl(const _Ty _Val, const int _Rotation) noexcept {
_EXPORT_STD template <class _Ty, enable_if_t<_Is_standard_unsigned_integer<_Ty>, int> /* = 0 */>
_NODISCARD constexpr _Ty rotr(const _Ty _Val, const int _Rotation) noexcept {
constexpr auto _Digits = numeric_limits<_Ty>::digits;
constexpr auto _Digits = _Unsigned_integer_digits<_Ty>;
if (!_STD is_constant_evaluated()) {
if constexpr (_Digits == 64) {
@ -190,124 +189,9 @@ _NODISCARD constexpr _Ty rotr(const _Ty _Val, const int _Rotation) noexcept {
}
}
#if defined(_M_IX86) || (defined(_M_X64) && !defined(_M_ARM64EC))
extern "C" {
extern int __isa_available;
}
template <class _Ty>
_NODISCARD int _Countl_zero_lzcnt(const _Ty _Val) noexcept {
constexpr int _Digits = numeric_limits<_Ty>::digits;
if constexpr (_Digits <= 16) {
return static_cast<int>(__lzcnt16(_Val) - (16 - _Digits));
} else if constexpr (_Digits == 32) {
return static_cast<int>(__lzcnt(_Val));
} else {
#ifdef _M_IX86
const unsigned int _High = _Val >> 32;
const auto _Low = static_cast<unsigned int>(_Val);
if (_High == 0) {
return 32 + _Countl_zero_lzcnt(_Low);
} else {
return _Countl_zero_lzcnt(_High);
}
#else // ^^^ _M_IX86 / !_M_IX86 vvv
return static_cast<int>(__lzcnt64(_Val));
#endif // _M_IX86
}
}
template <class _Ty>
_NODISCARD int _Countl_zero_bsr(const _Ty _Val) noexcept {
constexpr int _Digits = numeric_limits<_Ty>::digits;
unsigned long _Result;
if constexpr (_Digits <= 32) {
if (!_BitScanReverse(&_Result, _Val)) {
return _Digits;
}
} else {
#ifdef _M_IX86
const unsigned int _High = _Val >> 32;
if (_BitScanReverse(&_Result, _High)) {
return static_cast<int>(31 - _Result);
}
const auto _Low = static_cast<unsigned int>(_Val);
if (!_BitScanReverse(&_Result, _Low)) {
return _Digits;
}
#else // ^^^ _M_IX86 / !_M_IX86 vvv
if (!_BitScanReverse64(&_Result, _Val)) {
return _Digits;
}
#endif // _M_IX86
}
return static_cast<int>(_Digits - 1 - _Result);
}
template <class _Ty>
_NODISCARD int _Checked_x86_x64_countl_zero(const _Ty _Val) noexcept {
#ifdef __AVX2__
return _Countl_zero_lzcnt(_Val);
#else // __AVX2__
const bool _Definitely_have_lzcnt = __isa_available >= __ISA_AVAILABLE_AVX2;
if (_Definitely_have_lzcnt) {
return _Countl_zero_lzcnt(_Val);
} else {
return _Countl_zero_bsr(_Val);
}
#endif // __AVX2__
}
#endif // defined(_M_IX86) || (defined(_M_X64) && !defined(_M_ARM64EC))
#if defined(_M_ARM) || defined(_M_ARM64)
#ifdef __clang__ // TRANSITION, GH-1586
_NODISCARD constexpr int _Clang_arm_arm64_countl_zero(const unsigned short _Val) {
return __builtin_clzs(_Val);
}
_NODISCARD constexpr int _Clang_arm_arm64_countl_zero(const unsigned int _Val) {
return __builtin_clz(_Val);
}
_NODISCARD constexpr int _Clang_arm_arm64_countl_zero(const unsigned long _Val) {
return __builtin_clzl(_Val);
}
_NODISCARD constexpr int _Clang_arm_arm64_countl_zero(const unsigned long long _Val) {
return __builtin_clzll(_Val);
}
#endif // TRANSITION, GH-1586
template <class _Ty>
_NODISCARD int _Checked_arm_arm64_countl_zero(const _Ty _Val) noexcept {
constexpr int _Digits = numeric_limits<_Ty>::digits;
if (_Val == 0) {
return _Digits;
}
#ifdef __clang__ // TRANSITION, GH-1586
if constexpr (is_same_v<remove_cv_t<_Ty>, unsigned char>) {
return _Clang_arm_arm64_countl_zero(static_cast<unsigned short>(_Val))
- (numeric_limits<unsigned short>::digits - _Digits);
} else {
return _Clang_arm_arm64_countl_zero(_Val);
}
#else // ^^^ workaround / no workaround vvv
if constexpr (_Digits <= 32) {
return static_cast<int>(_CountLeadingZeros(_Val)) - (numeric_limits<unsigned long>::digits - _Digits);
} else {
return static_cast<int>(_CountLeadingZeros64(_Val));
}
#endif // TRANSITION, GH-1586
}
#endif // defined(_M_ARM) || defined(_M_ARM64)
_EXPORT_STD template <class _Ty, enable_if_t<_Is_standard_unsigned_integer<_Ty>, int> /* = 0 */>
_NODISCARD constexpr int countl_zero(const _Ty _Val) noexcept {
#if _HAS_COUNTL_ZERO_INTRINSICS
#if defined(_M_IX86) || (defined(_M_X64) && !defined(_M_ARM64EC))
if (!_STD is_constant_evaluated()) {
return _Checked_x86_x64_countl_zero(_Val);
@ -317,6 +201,7 @@ _NODISCARD constexpr int countl_zero(const _Ty _Val) noexcept {
return _Checked_arm_arm64_countl_zero(_Val);
}
#endif // defined(_M_ARM) || defined(_M_ARM64)
#endif // _HAS_COUNTL_ZERO_INTRINSICS
return _Countl_zero_fallback(_Val);
}

Просмотреть файл

@ -8,8 +8,8 @@
#define _BITSET_
#include <yvals_core.h>
#if _STL_COMPILER_PREPROCESSOR
#include <__msvc_bit_utils.hpp>
#include <iosfwd>
#include <limits>
#include <xstring>
#pragma pack(push, _CRT_PACKING)

Просмотреть файл

@ -26,9 +26,8 @@
// TRANSITION, not using x86/x64 FMA intrinsics for Clang yet
#elif defined(_M_IX86) || defined(_M_X64)
#define _FMP_USING_X86_X64_INTRINSICS
#include <__msvc_bit_utils.hpp>
#include <emmintrin.h>
#include <isa_availability.h>
extern "C" int __isa_available;
extern "C" __m128d __cdecl _mm_fmsub_sd(__m128d, __m128d, __m128d);
#endif // ^^^ defined(_M_IX86) || defined(_M_X64) ^^^
@ -177,7 +176,7 @@ namespace _Float_multi_prec {
#ifdef __AVX2__
return {_Prod0, _Sqr_error_x86_x64_fma(_Xval, _Prod0)};
#else // ^^^ defined(__AVX2__) / !defined(__AVX2__) vvv
const bool _Definitely_have_fma = __isa_available >= __ISA_AVAILABLE_AVX2;
const bool _Definitely_have_fma = __isa_available >= _Stl_isa_available_avx2;
if (_Definitely_have_fma) {
return {_Prod0, _Sqr_error_x86_x64_fma(_Xval, _Prod0)};
} else {

Просмотреть файл

@ -5,6 +5,7 @@
"Version": "1.0",
"BuildAsHeaderUnits": [
// "__msvc_all_public_headers.hpp", // for testing, not production
"__msvc_bit_utils.hpp",
"__msvc_chrono.hpp",
"__msvc_cxx_stdatomic.hpp",
"__msvc_filebuf.hpp",

Просмотреть файл

@ -11,23 +11,10 @@
#include <cfloat>
#include <climits>
#include <cwchar>
#include <isa_availability.h>
#include <xtr1common>
#include _STL_INTRIN_HEADER
// TRANSITION, GH-2129, move down to _Arm64_popcount
#if (defined(_M_ARM64) || defined(_M_ARM64EC)) && !defined(_M_CEE_PURE) && !defined(__CUDACC__) \
&& !defined(__INTEL_COMPILER) && !defined(__clang__) // TRANSITION, LLVM-51488
#define _HAS_NEON_INTRINSICS 1
#else // ^^^ intrinsics available / intrinsics unavailable vvv
#define _HAS_NEON_INTRINSICS 0
#endif // ^^^ intrinsics unavailable ^^^
#if _HAS_NEON_INTRINSICS
#include <arm64_neon.h> // TRANSITION, GH-2129
#endif
#pragma pack(push, _CRT_PACKING)
#pragma warning(push, _STL_WARNING_LEVEL)
#pragma warning(disable : _STL_DISABLED_WARNINGS)
@ -1006,284 +993,6 @@ public:
static constexpr int min_exponent10 = LDBL_MIN_10_EXP;
};
// Implementation of countl_zero without using specialized CPU instructions.
// Used at compile time and when said instructions are not supported.
// see "Hacker's Delight" section 5-3
template <class _Ty>
_NODISCARD constexpr int _Countl_zero_fallback(_Ty _Val) noexcept {
_Ty _Yy = 0;
unsigned int _Nn = numeric_limits<_Ty>::digits;
unsigned int _Cc = numeric_limits<_Ty>::digits / 2;
do {
_Yy = static_cast<_Ty>(_Val >> _Cc);
if (_Yy != 0) {
_Nn -= _Cc;
_Val = _Yy;
}
_Cc >>= 1;
} while (_Cc != 0);
return static_cast<int>(_Nn) - static_cast<int>(_Val);
}
// Implementation of countr_zero without using specialized CPU instructions.
// Used at compile time and when said instructions are not supported.
// see "Hacker's Delight" section 5-4
template <class _Ty>
_NODISCARD constexpr int _Countr_zero_fallback(const _Ty _Val) noexcept {
constexpr int _Digits = numeric_limits<_Ty>::digits;
return _Digits - _Countl_zero_fallback(static_cast<_Ty>(static_cast<_Ty>(~_Val) & static_cast<_Ty>(_Val - 1)));
}
// Implementation of popcount without using specialized CPU instructions.
// Used at compile time and when said instructions are not supported.
template <class _Ty>
_NODISCARD constexpr int _Popcount_fallback(_Ty _Val) noexcept {
constexpr int _Digits = numeric_limits<_Ty>::digits;
#if defined(_M_IX86) || defined(_M_ARM)
if constexpr (_Digits == 64) {
// 64-bit bit operations on architectures without 64-bit registers are less efficient,
// hence we split the value so that it fits in 32-bit registers
return _Popcount_fallback(static_cast<unsigned long>(_Val))
+ _Popcount_fallback(static_cast<unsigned long>(_Val >> 32));
}
#endif // defined(_M_IX86) || defined(_M_ARM)
// we static_cast these bit patterns in order to truncate them to the correct size
_Val = static_cast<_Ty>(_Val - ((_Val >> 1) & static_cast<_Ty>(0x5555'5555'5555'5555ull)));
_Val = static_cast<_Ty>((_Val & static_cast<_Ty>(0x3333'3333'3333'3333ull))
+ ((_Val >> 2) & static_cast<_Ty>(0x3333'3333'3333'3333ull)));
_Val = static_cast<_Ty>((_Val + (_Val >> 4)) & static_cast<_Ty>(0x0F0F'0F0F'0F0F'0F0Full));
// Multiply by one in each byte, so that it will have the sum of all source bytes in the highest byte
_Val = static_cast<_Ty>(_Val * static_cast<_Ty>(0x0101'0101'0101'0101ull));
// Extract highest byte
return static_cast<int>(_Val >> (_Digits - 8));
}
#if (defined(_M_IX86) || (defined(_M_X64) && !defined(_M_ARM64EC))) && !defined(_M_CEE_PURE) && !defined(__CUDACC__) \
&& !defined(__INTEL_COMPILER)
#define _HAS_TZCNT_BSF_INTRINSICS 1
#else // ^^^ intrinsics available / intrinsics unavailable vvv
#define _HAS_TZCNT_BSF_INTRINSICS 0
#endif // ^^^ intrinsics unavailable ^^^
#if _HAS_TZCNT_BSF_INTRINSICS
extern "C" {
extern int __isa_available;
}
#ifdef __clang__
#define _TZCNT_U32 __builtin_ia32_tzcnt_u32
#define _TZCNT_U64 __builtin_ia32_tzcnt_u64
#else // ^^^ __clang__ / !__clang__ vvv
#define _TZCNT_U32 _tzcnt_u32
#define _TZCNT_U64 _tzcnt_u64
#endif // __clang__
template <class _Ty>
_NODISCARD int _Countr_zero_tzcnt(const _Ty _Val) noexcept {
constexpr int _Digits = numeric_limits<_Ty>::digits;
constexpr _Ty _Max = (numeric_limits<_Ty>::max)();
if constexpr (_Digits <= 32) {
// Intended widening to int. This operation means that a narrow 0 will widen
// to 0xFFFF....FFFF0... instead of 0. We need this to avoid counting all the zeros
// of the wider type.
return static_cast<int>(_TZCNT_U32(static_cast<unsigned int>(~_Max | _Val)));
} else {
#ifdef _M_IX86
const auto _Low = static_cast<unsigned int>(_Val);
if (_Low == 0) {
const unsigned int _High = _Val >> 32;
return static_cast<int>(32 + _TZCNT_U32(_High));
} else {
return static_cast<int>(_TZCNT_U32(_Low));
}
#else // ^^^ _M_IX86 / !_M_IX86 vvv
return static_cast<int>(_TZCNT_U64(_Val));
#endif // _M_IX86
}
}
#undef _TZCNT_U32
#undef _TZCNT_U64
template <class _Ty>
_NODISCARD int _Countr_zero_bsf(const _Ty _Val) noexcept {
constexpr int _Digits = numeric_limits<_Ty>::digits;
constexpr _Ty _Max = (numeric_limits<_Ty>::max)();
unsigned long _Result;
if constexpr (_Digits <= 32) {
// Intended widening to int. This operation means that a narrow 0 will widen
// to 0xFFFF....FFFF0... instead of 0. We need this to avoid counting all the zeros
// of the wider type.
if (!_BitScanForward(&_Result, static_cast<unsigned int>(~_Max | _Val))) {
return _Digits;
}
} else {
#ifdef _M_IX86
const auto _Low = static_cast<unsigned int>(_Val);
if (_BitScanForward(&_Result, _Low)) {
return static_cast<int>(_Result);
}
const unsigned int _High = _Val >> 32;
if (!_BitScanForward(&_Result, _High)) {
return _Digits;
} else {
return static_cast<int>(_Result + 32);
}
#else // ^^^ _M_IX86 / !_M_IX86 vvv
if (!_BitScanForward64(&_Result, _Val)) {
return _Digits;
}
#endif // _M_IX86
}
return static_cast<int>(_Result);
}
template <class _Ty>
_NODISCARD int _Checked_x86_x64_countr_zero(const _Ty _Val) noexcept {
#ifdef __AVX2__
return _Countr_zero_tzcnt(_Val);
#else // __AVX2__
const bool _Definitely_have_tzcnt = __isa_available >= __ISA_AVAILABLE_AVX2;
if (_Definitely_have_tzcnt) {
return _Countr_zero_tzcnt(_Val);
} else {
return _Countr_zero_bsf(_Val);
}
#endif // __AVX2__
}
#endif // _HAS_TZCNT_BSF_INTRINSICS
#if (defined(_M_IX86) || (defined(_M_X64) && !defined(_M_ARM64EC))) && !defined(_M_CEE_PURE) && !defined(__CUDACC__) \
&& !defined(__INTEL_COMPILER)
#define _HAS_POPCNT_INTRINSICS 1
#else // ^^^ intrinsics available / intrinsics unavailable vvv
#define _HAS_POPCNT_INTRINSICS 0
#endif // ^^^ intrinsics unavailable ^^^
#if _HAS_POPCNT_INTRINSICS
template <class _Ty>
_NODISCARD int _Unchecked_x86_x64_popcount(const _Ty _Val) noexcept {
constexpr int _Digits = numeric_limits<_Ty>::digits;
if constexpr (_Digits <= 16) {
return static_cast<int>(__popcnt16(_Val));
} else if constexpr (_Digits == 32) {
return static_cast<int>(__popcnt(_Val));
} else {
#ifdef _M_IX86
return static_cast<int>(__popcnt(_Val >> 32) + __popcnt(static_cast<unsigned int>(_Val)));
#else // ^^^ _M_IX86 / !_M_IX86 vvv
return static_cast<int>(__popcnt64(_Val));
#endif // _M_IX86
}
}
template <class _Ty>
_NODISCARD int _Checked_x86_x64_popcount(const _Ty _Val) noexcept {
#ifndef __AVX__
const bool _Definitely_have_popcnt = __isa_available >= __ISA_AVAILABLE_SSE42;
if (!_Definitely_have_popcnt) {
return _Popcount_fallback(_Val);
}
#endif // !defined(__AVX__)
return _Unchecked_x86_x64_popcount(_Val);
}
#endif // _HAS_POPCNT_INTRINSICS
#if _HAS_NEON_INTRINSICS
_NODISCARD inline int _Arm64_popcount(const unsigned long long _Val) noexcept {
const __n64 _Temp = neon_cnt(__uint64ToN64_v(_Val));
return neon_addv8(_Temp).n8_i8[0];
}
#endif // _HAS_NEON_INTRINSICS
template <class _Ty>
constexpr bool _Is_standard_unsigned_integer =
_Is_any_of_v<remove_cv_t<_Ty>, unsigned char, unsigned short, unsigned int, unsigned long, unsigned long long>;
template <class _Ty, enable_if_t<_Is_standard_unsigned_integer<_Ty>, int> = 0>
_NODISCARD _CONSTEXPR20 int _Countr_zero(const _Ty _Val) noexcept {
#if _HAS_TZCNT_BSF_INTRINSICS
#if _HAS_CXX20
if (!_STD is_constant_evaluated())
#endif // _HAS_CXX20
{
return _Checked_x86_x64_countr_zero(_Val);
}
#endif // _HAS_TZCNT_BSF_INTRINSICS
return _Countr_zero_fallback(_Val);
}
template <class _Ty, class _Fn>
constexpr decltype(auto) _Select_countr_zero_impl(_Fn _Callback) {
// TRANSITION, DevCom-1527995: Lambdas in this function ensure inlining
#if _HAS_TZCNT_BSF_INTRINSICS && _HAS_CXX20
if (!_STD is_constant_evaluated()) {
#ifdef __AVX2__
return _Callback([](_Ty _Val) { return _Countr_zero_tzcnt(_Val); });
#else // ^^^ AVX2 / not AVX2 vvv
const bool _Definitely_have_tzcnt = __isa_available >= __ISA_AVAILABLE_AVX2;
if (_Definitely_have_tzcnt) {
return _Callback([](_Ty _Val) { return _Countr_zero_tzcnt(_Val); });
} else {
return _Callback([](_Ty _Val) { return _Countr_zero_bsf(_Val); });
}
#endif // ^^^ not AVX2 ^^^
}
#endif // ^^^ _HAS_TZCNT_BSF_INTRINSICS && _HAS_CXX20 ^^^
// C++17 constexpr gcd() calls this function, so it should be constexpr unless we detect runtime evaluation.
return _Callback([](_Ty _Val) { return _Countr_zero_fallback(_Val); });
}
template <class _Ty, enable_if_t<_Is_standard_unsigned_integer<_Ty>, int> = 0>
_NODISCARD _CONSTEXPR20 int _Popcount(const _Ty _Val) noexcept {
#if _HAS_POPCNT_INTRINSICS || _HAS_NEON_INTRINSICS
#if _HAS_CXX20
if (!_STD is_constant_evaluated())
#endif // _HAS_CXX20
{
#if _HAS_POPCNT_INTRINSICS
return _Checked_x86_x64_popcount(_Val);
#elif _HAS_NEON_INTRINSICS // ^^^ x86/x64 intrinsics available / ARM64 intrinsics available vvv
return _Arm64_popcount(_Val);
#endif // ^^^ ARM64 intrinsics available ^^^
}
#endif // ^^^ any intrinsics available ^^^
return _Popcount_fallback(_Val);
}
template <class _Ty, class _Fn>
_CONSTEXPR20 decltype(auto) _Select_popcount_impl(_Fn _Callback) {
// TRANSITION, DevCom-1527995: Lambdas in this function ensure inlining
#if _HAS_POPCNT_INTRINSICS || _HAS_NEON_INTRINSICS
#if _HAS_CXX20
if (!_STD is_constant_evaluated())
#endif // _HAS_CXX20
{
#if _HAS_POPCNT_INTRINSICS
#ifndef __AVX__
const bool _Definitely_have_popcnt = __isa_available >= __ISA_AVAILABLE_SSE42;
if (!_Definitely_have_popcnt) {
return _Callback([](_Ty _Val) { return _Popcount_fallback(_Val); });
}
#endif // !defined(__AVX__)
return _Callback([](_Ty _Val) { return _Unchecked_x86_x64_popcount(_Val); });
#elif _HAS_NEON_INTRINSICS // ^^^ x86/x64 intrinsics available / ARM64 intrinsics available vvv
return _Callback([](_Ty _Val) { return _Arm64_popcount(_Val); });
#endif // ^^^ ARM64 intrinsics available ^^^
}
#endif // ^^^ any intrinsics available ^^^
return _Callback([](_Ty _Val) { return _Popcount_fallback(_Val); });
}
#undef _HAS_POPCNT_INTRINSICS
#undef _HAS_TZCNT_BSF_INTRINSICS
#undef _HAS_NEON_INTRINSICS
_STD_END
#pragma pop_macro("new")
_STL_RESTORE_CLANG_WARNINGS

Просмотреть файл

@ -11,9 +11,13 @@
#include <xutility>
#if _HAS_CXX17
#include <limits>
#include <__msvc_bit_utils.hpp>
#endif // _HAS_CXX17
#if _HAS_CXX20
#include <cfloat>
#endif // _HAS_CXX20
#pragma pack(push, _CRT_PACKING)
#pragma warning(push, _STL_WARNING_LEVEL)
#pragma warning(disable : _STL_DISABLED_WARNINGS)
@ -648,6 +652,24 @@ _NODISCARD constexpr common_type_t<_Mt, _Nt> lcm(const _Mt _Mx, const _Nt _Nx) n
#endif // _HAS_CXX17
#if _HAS_CXX20
template <class _Flt>
inline constexpr _Flt _Floating_max{};
template <>
inline constexpr float _Floating_max<float> = FLT_MAX;
template <>
inline constexpr double _Floating_max<double> = DBL_MAX;
template <>
inline constexpr long double _Floating_max<long double> = LDBL_MAX;
template <class _Flt>
inline constexpr _Flt _Floating_min{};
template <>
inline constexpr float _Floating_min<float> = FLT_MIN;
template <>
inline constexpr double _Floating_min<double> = DBL_MIN;
template <>
inline constexpr long double _Floating_min<long double> = LDBL_MIN;
_EXPORT_STD template <class _Ty, enable_if_t<is_arithmetic_v<_Ty> && !is_same_v<remove_cv_t<_Ty>, bool>, int> = 0>
_NODISCARD constexpr _Ty midpoint(const _Ty _Val1, const _Ty _Val2) noexcept {
if constexpr (is_floating_point_v<_Ty>) {
@ -666,7 +688,7 @@ _NODISCARD constexpr _Ty midpoint(const _Ty _Val1, const _Ty _Val2) noexcept {
}
}
constexpr _Ty _High_limit = (numeric_limits<_Ty>::max)() / 2;
constexpr _Ty _High_limit = _Floating_max<remove_cv_t<_Ty>> / 2;
const auto _Val1_a = _Float_abs(_Val1);
const auto _Val2_a = _Float_abs(_Val2);
if (_Val1_a <= _High_limit && _Val2_a <= _High_limit) {
@ -690,7 +712,7 @@ _NODISCARD constexpr _Ty midpoint(const _Ty _Val1, const _Ty _Val2) noexcept {
// In the default rounding mode this less than one ULP difference will always be rounded away, so under
// /fp:fast we could avoid these tests if we had some means of detecting it in the caller.
constexpr _Ty _Low_limit = (numeric_limits<_Ty>::min)() * 2;
constexpr _Ty _Low_limit = _Floating_min<remove_cv_t<_Ty>> * 2;
if (_Val1_a < _Low_limit) {
return _Val1 + _Val2 / 2;
}

Просмотреть файл

@ -2169,7 +2169,7 @@ _NODISCARD _Flt _Float_upper_bound(_Ty _Val) {
constexpr auto _Mask = static_cast<_Ty>(-1) << (_Ty_digits - _Flt_digits);
#ifdef _M_CEE_PURE
constexpr auto _Ty_32or64_digits = numeric_limits<_Ty_32or64>::digits;
const auto _Log_plus1 = _Ty_32or64_digits - _Countl_zero_fallback(static_cast<_Ty_32or64>(_Val | _Ty{1}));
const auto _Log_plus1 = _Ty_32or64_digits - _Countl_zero_internal(static_cast<_Ty_32or64>(_Val | _Ty{1}));
#else // _M_CEE_PURE
const auto _Log_plus1 = _Bit_scan_reverse(static_cast<_Ty_32or64>(_Val | _Ty{1}));
#endif // _M_CEE_PURE

Просмотреть файл

@ -21,6 +21,7 @@ _EMIT_STL_WARNING(STL4038, "The contents of <ranges> are available only with C++
#if _HAS_CXX23
#include <array>
#include <bit>
#endif // _HAS_CXX23
#pragma pack(push, _CRT_PACKING)

Просмотреть файл

@ -8,6 +8,7 @@
#define _VECTOR_
#include <yvals_core.h>
#if _STL_COMPILER_PREPROCESSOR
#include <__msvc_bit_utils.hpp>
#include <xmemory>
#if _HAS_CXX17

Просмотреть файл

@ -19,11 +19,24 @@
#include <xfilesystem_abi.h>
#endif // _HAS_CXX17
#if _HAS_CXX23
#include <__msvc_print.hpp>
#endif // _HAS_CXX23
// <__msvc_bit_utils.hpp> is included by <bit> and <limits>
// <__msvc_iter_core.hpp> is included by <tuple>
// <xkeycheck.h> should not be included outside of <yvals_core.h>
// <xtr1common> is included by <cstddef>
// <yvals_core.h> is included by every public core header
// Also test GH-3692 "Including <isa_availability.h> emits a non-reserved name"
#include <isa_availability.h>
#define STATIC_ASSERT(...) static_assert(__VA_ARGS__, #__VA_ARGS__)
STATIC_ASSERT(std::_Stl_isa_available_sse42 == __ISA_AVAILABLE_SSE42);
STATIC_ASSERT(std::_Stl_isa_available_avx2 == __ISA_AVAILABLE_AVX2);
#ifdef _YVALS
#error Core headers should not include <yvals.h>.
#endif

Просмотреть файл

@ -1,9 +1,10 @@
// Copyright (c) Microsoft Corporation.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
#define nsec delete
#define sec delete
#define xtime delete
#define xtime_get delete
#define ISA_AVAILABILITY delete
#define nsec delete
#define sec delete
#define xtime delete
#define xtime_get delete
#include <__msvc_all_public_headers.hpp>

Просмотреть файл

@ -4,6 +4,7 @@
#include <algorithm>
#include <cassert>
#include <cstddef>
#include <isa_availability.h>
#include <ranges>
#include <Windows.h>