зеркало из https://github.com/microsoft/STL.git
Concentrated header for internal bit utilities (#3721)
Co-authored-by: Stephan T. Lavavej <stl@microsoft.com>
This commit is contained in:
Родитель
ef7df32f98
Коммит
47679bbaa8
|
@ -7,6 +7,7 @@
|
|||
|
||||
set(HEADERS
|
||||
${CMAKE_CURRENT_LIST_DIR}/inc/__msvc_all_public_headers.hpp
|
||||
${CMAKE_CURRENT_LIST_DIR}/inc/__msvc_bit_utils.hpp
|
||||
${CMAKE_CURRENT_LIST_DIR}/inc/__msvc_chrono.hpp
|
||||
${CMAKE_CURRENT_LIST_DIR}/inc/__msvc_cxx_stdatomic.hpp
|
||||
${CMAKE_CURRENT_LIST_DIR}/inc/__msvc_filebuf.hpp
|
||||
|
|
|
@ -0,0 +1,448 @@
|
|||
// __msvc_bit_utils.hpp internal header (core)
|
||||
|
||||
// Copyright (c) Microsoft Corporation.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
|
||||
#pragma once
|
||||
#ifndef __MSVC_BIT_UTILS_HPP
|
||||
#define __MSVC_BIT_UTILS_HPP
|
||||
#include <yvals_core.h>
|
||||
#if _STL_COMPILER_PREPROCESSOR
|
||||
|
||||
#include <climits>
|
||||
#include <xtr1common>
|
||||
|
||||
#include _STL_INTRIN_HEADER
|
||||
|
||||
// TRANSITION, GH-2129, move down to _Arm64_popcount
|
||||
#if (defined(_M_ARM64) || defined(_M_ARM64EC)) && !defined(_M_CEE_PURE) && !defined(__CUDACC__) \
|
||||
&& !defined(__INTEL_COMPILER) && !defined(__clang__) // TRANSITION, LLVM-51488
|
||||
#define _HAS_NEON_INTRINSICS 1
|
||||
#else // ^^^ intrinsics available / intrinsics unavailable vvv
|
||||
#define _HAS_NEON_INTRINSICS 0
|
||||
#endif // ^^^ intrinsics unavailable ^^^
|
||||
|
||||
#if _HAS_NEON_INTRINSICS
|
||||
#include <arm64_neon.h> // TRANSITION, GH-2129
|
||||
#endif // _HAS_NEON_INTRINSICS
|
||||
|
||||
#pragma pack(push, _CRT_PACKING)
|
||||
#pragma warning(push, _STL_WARNING_LEVEL)
|
||||
#pragma warning(disable : _STL_DISABLED_WARNINGS)
|
||||
_STL_DISABLE_CLANG_WARNINGS
|
||||
#pragma push_macro("new")
|
||||
#undef new
|
||||
|
||||
_STD_BEGIN
|
||||
extern "C" {
|
||||
extern int __isa_available;
|
||||
}
|
||||
|
||||
_INLINE_VAR constexpr int _Stl_isa_available_sse42 = 2; // equal to __ISA_AVAILABLE_SSE42
|
||||
_INLINE_VAR constexpr int _Stl_isa_available_avx2 = 5; // equal to __ISA_AVAILABLE_AVX2
|
||||
|
||||
template <class _UInt>
|
||||
_INLINE_VAR constexpr int _Unsigned_integer_digits = sizeof(_UInt) * CHAR_BIT;
|
||||
|
||||
// Implementation of countl_zero without using specialized CPU instructions.
|
||||
// Used at compile time and when said instructions are not supported.
|
||||
// see "Hacker's Delight" section 5-3
|
||||
template <class _Ty>
|
||||
_NODISCARD constexpr int _Countl_zero_fallback(_Ty _Val) noexcept {
|
||||
_Ty _Yy = 0;
|
||||
|
||||
unsigned int _Nn = _Unsigned_integer_digits<_Ty>;
|
||||
unsigned int _Cc = _Unsigned_integer_digits<_Ty> / 2;
|
||||
do {
|
||||
_Yy = static_cast<_Ty>(_Val >> _Cc);
|
||||
if (_Yy != 0) {
|
||||
_Nn -= _Cc;
|
||||
_Val = _Yy;
|
||||
}
|
||||
_Cc >>= 1;
|
||||
} while (_Cc != 0);
|
||||
return static_cast<int>(_Nn) - static_cast<int>(_Val);
|
||||
}
|
||||
|
||||
#if !defined(_M_CEE_PURE) && !defined(__CUDACC__) && !defined(__INTEL_COMPILER)
|
||||
#define _HAS_COUNTL_ZERO_INTRINSICS 1
|
||||
#else // ^^^ intrinsics available / intrinsics unavailable vvv
|
||||
#define _HAS_COUNTL_ZERO_INTRINSICS 0
|
||||
#endif // ^^^ intrinsics unavailable ^^^
|
||||
|
||||
#if _HAS_COUNTL_ZERO_INTRINSICS
|
||||
#if defined(_M_IX86) || (defined(_M_X64) && !defined(_M_ARM64EC))
|
||||
template <class _Ty>
|
||||
_NODISCARD int _Countl_zero_lzcnt(const _Ty _Val) noexcept {
|
||||
constexpr int _Digits = _Unsigned_integer_digits<_Ty>;
|
||||
|
||||
if constexpr (_Digits <= 16) {
|
||||
return static_cast<int>(__lzcnt16(_Val) - (16 - _Digits));
|
||||
} else if constexpr (_Digits == 32) {
|
||||
return static_cast<int>(__lzcnt(_Val));
|
||||
} else {
|
||||
#ifdef _M_IX86
|
||||
const unsigned int _High = _Val >> 32;
|
||||
const auto _Low = static_cast<unsigned int>(_Val);
|
||||
if (_High == 0) {
|
||||
return 32 + _Countl_zero_lzcnt(_Low);
|
||||
} else {
|
||||
return _Countl_zero_lzcnt(_High);
|
||||
}
|
||||
#else // ^^^ _M_IX86 / !_M_IX86 vvv
|
||||
return static_cast<int>(__lzcnt64(_Val));
|
||||
#endif // _M_IX86
|
||||
}
|
||||
}
|
||||
|
||||
template <class _Ty>
|
||||
_NODISCARD int _Countl_zero_bsr(const _Ty _Val) noexcept {
|
||||
constexpr int _Digits = _Unsigned_integer_digits<_Ty>;
|
||||
|
||||
unsigned long _Result;
|
||||
if constexpr (_Digits <= 32) {
|
||||
if (!_BitScanReverse(&_Result, _Val)) {
|
||||
return _Digits;
|
||||
}
|
||||
} else {
|
||||
#ifdef _M_IX86
|
||||
const unsigned int _High = _Val >> 32;
|
||||
if (_BitScanReverse(&_Result, _High)) {
|
||||
return static_cast<int>(31 - _Result);
|
||||
}
|
||||
|
||||
const auto _Low = static_cast<unsigned int>(_Val);
|
||||
if (!_BitScanReverse(&_Result, _Low)) {
|
||||
return _Digits;
|
||||
}
|
||||
#else // ^^^ _M_IX86 / !_M_IX86 vvv
|
||||
if (!_BitScanReverse64(&_Result, _Val)) {
|
||||
return _Digits;
|
||||
}
|
||||
#endif // _M_IX86
|
||||
}
|
||||
return static_cast<int>(_Digits - 1 - _Result);
|
||||
}
|
||||
|
||||
template <class _Ty>
|
||||
_NODISCARD int _Checked_x86_x64_countl_zero(const _Ty _Val) noexcept {
|
||||
#ifdef __AVX2__
|
||||
return _Countl_zero_lzcnt(_Val);
|
||||
#else // __AVX2__
|
||||
const bool _Definitely_have_lzcnt = __isa_available >= _Stl_isa_available_avx2;
|
||||
if (_Definitely_have_lzcnt) {
|
||||
return _Countl_zero_lzcnt(_Val);
|
||||
} else {
|
||||
return _Countl_zero_bsr(_Val);
|
||||
}
|
||||
#endif // __AVX2__
|
||||
}
|
||||
#endif // defined(_M_IX86) || (defined(_M_X64) && !defined(_M_ARM64EC))
|
||||
|
||||
#if defined(_M_ARM) || defined(_M_ARM64)
|
||||
#ifdef __clang__ // TRANSITION, GH-1586
|
||||
_NODISCARD constexpr int _Clang_arm_arm64_countl_zero(const unsigned short _Val) {
|
||||
return __builtin_clzs(_Val);
|
||||
}
|
||||
|
||||
_NODISCARD constexpr int _Clang_arm_arm64_countl_zero(const unsigned int _Val) {
|
||||
return __builtin_clz(_Val);
|
||||
}
|
||||
|
||||
_NODISCARD constexpr int _Clang_arm_arm64_countl_zero(const unsigned long _Val) {
|
||||
return __builtin_clzl(_Val);
|
||||
}
|
||||
|
||||
_NODISCARD constexpr int _Clang_arm_arm64_countl_zero(const unsigned long long _Val) {
|
||||
return __builtin_clzll(_Val);
|
||||
}
|
||||
#endif // TRANSITION, GH-1586
|
||||
|
||||
template <class _Ty>
|
||||
_NODISCARD int _Checked_arm_arm64_countl_zero(const _Ty _Val) noexcept {
|
||||
constexpr int _Digits = _Unsigned_integer_digits<_Ty>;
|
||||
if (_Val == 0) {
|
||||
return _Digits;
|
||||
}
|
||||
|
||||
#ifdef __clang__ // TRANSITION, GH-1586
|
||||
if constexpr (is_same_v<remove_cv_t<_Ty>, unsigned char>) {
|
||||
return _Clang_arm_arm64_countl_zero(static_cast<unsigned short>(_Val))
|
||||
- (_Unsigned_integer_digits<unsigned short> - _Digits);
|
||||
} else {
|
||||
return _Clang_arm_arm64_countl_zero(_Val);
|
||||
}
|
||||
#else // ^^^ workaround / no workaround vvv
|
||||
if constexpr (_Digits <= 32) {
|
||||
return static_cast<int>(_CountLeadingZeros(_Val)) - (_Unsigned_integer_digits<unsigned long> - _Digits);
|
||||
} else {
|
||||
return static_cast<int>(_CountLeadingZeros64(_Val));
|
||||
}
|
||||
#endif // TRANSITION, GH-1586
|
||||
}
|
||||
#endif // defined(_M_ARM) || defined(_M_ARM64)
|
||||
#endif // _HAS_COUNTL_ZERO_INTRINSICS
|
||||
|
||||
// Implementation of countr_zero without using specialized CPU instructions.
|
||||
// Used at compile time and when said instructions are not supported.
|
||||
// see "Hacker's Delight" section 5-4
|
||||
template <class _Ty>
|
||||
_NODISCARD constexpr int _Countr_zero_fallback(const _Ty _Val) noexcept {
|
||||
constexpr int _Digits = _Unsigned_integer_digits<_Ty>;
|
||||
return _Digits - _Countl_zero_fallback(static_cast<_Ty>(static_cast<_Ty>(~_Val) & static_cast<_Ty>(_Val - 1)));
|
||||
}
|
||||
|
||||
// Implementation of popcount without using specialized CPU instructions.
|
||||
// Used at compile time and when said instructions are not supported.
|
||||
template <class _Ty>
|
||||
_NODISCARD constexpr int _Popcount_fallback(_Ty _Val) noexcept {
|
||||
constexpr int _Digits = _Unsigned_integer_digits<_Ty>;
|
||||
#if defined(_M_IX86) || defined(_M_ARM)
|
||||
if constexpr (_Digits == 64) {
|
||||
// 64-bit bit operations on architectures without 64-bit registers are less efficient,
|
||||
// hence we split the value so that it fits in 32-bit registers
|
||||
return _Popcount_fallback(static_cast<unsigned long>(_Val))
|
||||
+ _Popcount_fallback(static_cast<unsigned long>(_Val >> 32));
|
||||
}
|
||||
#endif // defined(_M_IX86) || defined(_M_ARM)
|
||||
// we static_cast these bit patterns in order to truncate them to the correct size
|
||||
_Val = static_cast<_Ty>(_Val - ((_Val >> 1) & static_cast<_Ty>(0x5555'5555'5555'5555ull)));
|
||||
_Val = static_cast<_Ty>((_Val & static_cast<_Ty>(0x3333'3333'3333'3333ull))
|
||||
+ ((_Val >> 2) & static_cast<_Ty>(0x3333'3333'3333'3333ull)));
|
||||
_Val = static_cast<_Ty>((_Val + (_Val >> 4)) & static_cast<_Ty>(0x0F0F'0F0F'0F0F'0F0Full));
|
||||
// Multiply by one in each byte, so that it will have the sum of all source bytes in the highest byte
|
||||
_Val = static_cast<_Ty>(_Val * static_cast<_Ty>(0x0101'0101'0101'0101ull));
|
||||
// Extract highest byte
|
||||
return static_cast<int>(_Val >> (_Digits - 8));
|
||||
}
|
||||
|
||||
#if (defined(_M_IX86) || (defined(_M_X64) && !defined(_M_ARM64EC))) && !defined(_M_CEE_PURE) && !defined(__CUDACC__) \
|
||||
&& !defined(__INTEL_COMPILER)
|
||||
#define _HAS_TZCNT_BSF_INTRINSICS 1
|
||||
#else // ^^^ intrinsics available / intrinsics unavailable vvv
|
||||
#define _HAS_TZCNT_BSF_INTRINSICS 0
|
||||
#endif // ^^^ intrinsics unavailable ^^^
|
||||
|
||||
#if _HAS_TZCNT_BSF_INTRINSICS
|
||||
#ifdef __clang__
|
||||
#define _TZCNT_U32 __builtin_ia32_tzcnt_u32
|
||||
#define _TZCNT_U64 __builtin_ia32_tzcnt_u64
|
||||
#else // ^^^ __clang__ / !__clang__ vvv
|
||||
#define _TZCNT_U32 _tzcnt_u32
|
||||
#define _TZCNT_U64 _tzcnt_u64
|
||||
#endif // __clang__
|
||||
|
||||
template <class _Ty>
|
||||
_NODISCARD int _Countr_zero_tzcnt(const _Ty _Val) noexcept {
|
||||
constexpr int _Digits = _Unsigned_integer_digits<_Ty>;
|
||||
constexpr _Ty _Max = static_cast<_Ty>(-1); // equal to (numeric_limits<_Ty>::max)()
|
||||
|
||||
if constexpr (_Digits <= 32) {
|
||||
// Intended widening to int. This operation means that a narrow 0 will widen
|
||||
// to 0xFFFF....FFFF0... instead of 0. We need this to avoid counting all the zeros
|
||||
// of the wider type.
|
||||
return static_cast<int>(_TZCNT_U32(static_cast<unsigned int>(~_Max | _Val)));
|
||||
} else {
|
||||
#ifdef _M_IX86
|
||||
const auto _Low = static_cast<unsigned int>(_Val);
|
||||
if (_Low == 0) {
|
||||
const unsigned int _High = _Val >> 32;
|
||||
return static_cast<int>(32 + _TZCNT_U32(_High));
|
||||
} else {
|
||||
return static_cast<int>(_TZCNT_U32(_Low));
|
||||
}
|
||||
#else // ^^^ _M_IX86 / !_M_IX86 vvv
|
||||
return static_cast<int>(_TZCNT_U64(_Val));
|
||||
#endif // _M_IX86
|
||||
}
|
||||
}
|
||||
|
||||
#undef _TZCNT_U32
|
||||
#undef _TZCNT_U64
|
||||
|
||||
template <class _Ty>
|
||||
_NODISCARD int _Countr_zero_bsf(const _Ty _Val) noexcept {
|
||||
constexpr int _Digits = _Unsigned_integer_digits<_Ty>;
|
||||
constexpr _Ty _Max = static_cast<_Ty>(-1); // equal to (numeric_limits<_Ty>::max)()
|
||||
|
||||
unsigned long _Result;
|
||||
if constexpr (_Digits <= 32) {
|
||||
// Intended widening to int. This operation means that a narrow 0 will widen
|
||||
// to 0xFFFF....FFFF0... instead of 0. We need this to avoid counting all the zeros
|
||||
// of the wider type.
|
||||
if (!_BitScanForward(&_Result, static_cast<unsigned int>(~_Max | _Val))) {
|
||||
return _Digits;
|
||||
}
|
||||
} else {
|
||||
#ifdef _M_IX86
|
||||
const auto _Low = static_cast<unsigned int>(_Val);
|
||||
if (_BitScanForward(&_Result, _Low)) {
|
||||
return static_cast<int>(_Result);
|
||||
}
|
||||
|
||||
const unsigned int _High = _Val >> 32;
|
||||
if (!_BitScanForward(&_Result, _High)) {
|
||||
return _Digits;
|
||||
} else {
|
||||
return static_cast<int>(_Result + 32);
|
||||
}
|
||||
#else // ^^^ _M_IX86 / !_M_IX86 vvv
|
||||
if (!_BitScanForward64(&_Result, _Val)) {
|
||||
return _Digits;
|
||||
}
|
||||
#endif // _M_IX86
|
||||
}
|
||||
return static_cast<int>(_Result);
|
||||
}
|
||||
|
||||
template <class _Ty>
|
||||
_NODISCARD int _Checked_x86_x64_countr_zero(const _Ty _Val) noexcept {
|
||||
#ifdef __AVX2__
|
||||
return _Countr_zero_tzcnt(_Val);
|
||||
#else // __AVX2__
|
||||
const bool _Definitely_have_tzcnt = __isa_available >= _Stl_isa_available_avx2;
|
||||
if (_Definitely_have_tzcnt) {
|
||||
return _Countr_zero_tzcnt(_Val);
|
||||
} else {
|
||||
return _Countr_zero_bsf(_Val);
|
||||
}
|
||||
#endif // __AVX2__
|
||||
}
|
||||
|
||||
#endif // _HAS_TZCNT_BSF_INTRINSICS
|
||||
|
||||
#if (defined(_M_IX86) || (defined(_M_X64) && !defined(_M_ARM64EC))) && !defined(_M_CEE_PURE) && !defined(__CUDACC__) \
|
||||
&& !defined(__INTEL_COMPILER)
|
||||
#define _HAS_POPCNT_INTRINSICS 1
|
||||
#else // ^^^ intrinsics available / intrinsics unavailable vvv
|
||||
#define _HAS_POPCNT_INTRINSICS 0
|
||||
#endif // ^^^ intrinsics unavailable ^^^
|
||||
|
||||
#if _HAS_POPCNT_INTRINSICS
|
||||
template <class _Ty>
|
||||
_NODISCARD int _Unchecked_x86_x64_popcount(const _Ty _Val) noexcept {
|
||||
constexpr int _Digits = _Unsigned_integer_digits<_Ty>;
|
||||
if constexpr (_Digits <= 16) {
|
||||
return static_cast<int>(__popcnt16(_Val));
|
||||
} else if constexpr (_Digits == 32) {
|
||||
return static_cast<int>(__popcnt(_Val));
|
||||
} else {
|
||||
#ifdef _M_IX86
|
||||
return static_cast<int>(__popcnt(_Val >> 32) + __popcnt(static_cast<unsigned int>(_Val)));
|
||||
#else // ^^^ _M_IX86 / !_M_IX86 vvv
|
||||
return static_cast<int>(__popcnt64(_Val));
|
||||
#endif // _M_IX86
|
||||
}
|
||||
}
|
||||
|
||||
template <class _Ty>
|
||||
_NODISCARD int _Checked_x86_x64_popcount(const _Ty _Val) noexcept {
|
||||
#ifndef __AVX__
|
||||
const bool _Definitely_have_popcnt = __isa_available >= _Stl_isa_available_sse42;
|
||||
if (!_Definitely_have_popcnt) {
|
||||
return _Popcount_fallback(_Val);
|
||||
}
|
||||
#endif // !defined(__AVX__)
|
||||
return _Unchecked_x86_x64_popcount(_Val);
|
||||
}
|
||||
#endif // _HAS_POPCNT_INTRINSICS
|
||||
|
||||
#if _HAS_NEON_INTRINSICS
|
||||
_NODISCARD inline int _Arm64_popcount(const unsigned long long _Val) noexcept {
|
||||
const __n64 _Temp = neon_cnt(__uint64ToN64_v(_Val));
|
||||
return neon_addv8(_Temp).n8_i8[0];
|
||||
}
|
||||
#endif // _HAS_NEON_INTRINSICS
|
||||
|
||||
template <class _Ty>
|
||||
_INLINE_VAR constexpr bool _Is_standard_unsigned_integer =
|
||||
_Is_any_of_v<remove_cv_t<_Ty>, unsigned char, unsigned short, unsigned int, unsigned long, unsigned long long>;
|
||||
|
||||
template <class _Ty, enable_if_t<_Is_standard_unsigned_integer<_Ty>, int> = 0>
|
||||
_NODISCARD _CONSTEXPR20 int _Countr_zero(const _Ty _Val) noexcept {
|
||||
#if _HAS_TZCNT_BSF_INTRINSICS
|
||||
#if _HAS_CXX20
|
||||
if (!_STD is_constant_evaluated())
|
||||
#endif // _HAS_CXX20
|
||||
{
|
||||
return _Checked_x86_x64_countr_zero(_Val);
|
||||
}
|
||||
#endif // _HAS_TZCNT_BSF_INTRINSICS
|
||||
return _Countr_zero_fallback(_Val);
|
||||
}
|
||||
|
||||
template <class _Ty, class _Fn>
|
||||
constexpr decltype(auto) _Select_countr_zero_impl(_Fn _Callback) {
|
||||
// TRANSITION, DevCom-1527995: Lambdas in this function ensure inlining
|
||||
#if _HAS_TZCNT_BSF_INTRINSICS && _HAS_CXX20
|
||||
if (!_STD is_constant_evaluated()) {
|
||||
#ifdef __AVX2__
|
||||
return _Callback([](_Ty _Val) { return _Countr_zero_tzcnt(_Val); });
|
||||
#else // ^^^ AVX2 / not AVX2 vvv
|
||||
const bool _Definitely_have_tzcnt = __isa_available >= _Stl_isa_available_avx2;
|
||||
if (_Definitely_have_tzcnt) {
|
||||
return _Callback([](_Ty _Val) { return _Countr_zero_tzcnt(_Val); });
|
||||
} else {
|
||||
return _Callback([](_Ty _Val) { return _Countr_zero_bsf(_Val); });
|
||||
}
|
||||
#endif // ^^^ not AVX2 ^^^
|
||||
}
|
||||
#endif // ^^^ _HAS_TZCNT_BSF_INTRINSICS && _HAS_CXX20 ^^^
|
||||
// C++17 constexpr gcd() calls this function, so it should be constexpr unless we detect runtime evaluation.
|
||||
return _Callback([](_Ty _Val) { return _Countr_zero_fallback(_Val); });
|
||||
}
|
||||
|
||||
template <class _Ty, enable_if_t<_Is_standard_unsigned_integer<_Ty>, int> = 0>
|
||||
_NODISCARD _CONSTEXPR20 int _Popcount(const _Ty _Val) noexcept {
|
||||
#if _HAS_POPCNT_INTRINSICS || _HAS_NEON_INTRINSICS
|
||||
#if _HAS_CXX20
|
||||
if (!_STD is_constant_evaluated())
|
||||
#endif // _HAS_CXX20
|
||||
{
|
||||
#if _HAS_POPCNT_INTRINSICS
|
||||
return _Checked_x86_x64_popcount(_Val);
|
||||
#elif _HAS_NEON_INTRINSICS // ^^^ x86/x64 intrinsics available / ARM64 intrinsics available vvv
|
||||
return _Arm64_popcount(_Val);
|
||||
#endif // ^^^ ARM64 intrinsics available ^^^
|
||||
}
|
||||
#endif // ^^^ any intrinsics available ^^^
|
||||
return _Popcount_fallback(_Val);
|
||||
}
|
||||
|
||||
template <class _Ty, class _Fn>
|
||||
_CONSTEXPR20 decltype(auto) _Select_popcount_impl(_Fn _Callback) {
|
||||
// TRANSITION, DevCom-1527995: Lambdas in this function ensure inlining
|
||||
#if _HAS_POPCNT_INTRINSICS || _HAS_NEON_INTRINSICS
|
||||
#if _HAS_CXX20
|
||||
if (!_STD is_constant_evaluated())
|
||||
#endif // _HAS_CXX20
|
||||
{
|
||||
#if _HAS_POPCNT_INTRINSICS
|
||||
#ifndef __AVX__
|
||||
const bool _Definitely_have_popcnt = __isa_available >= _Stl_isa_available_sse42;
|
||||
if (!_Definitely_have_popcnt) {
|
||||
return _Callback([](_Ty _Val) { return _Popcount_fallback(_Val); });
|
||||
}
|
||||
#endif // !defined(__AVX__)
|
||||
return _Callback([](_Ty _Val) { return _Unchecked_x86_x64_popcount(_Val); });
|
||||
#elif _HAS_NEON_INTRINSICS // ^^^ x86/x64 intrinsics available / ARM64 intrinsics available vvv
|
||||
return _Callback([](_Ty _Val) { return _Arm64_popcount(_Val); });
|
||||
#endif // ^^^ ARM64 intrinsics available ^^^
|
||||
}
|
||||
#endif // ^^^ any intrinsics available ^^^
|
||||
return _Callback([](_Ty _Val) { return _Popcount_fallback(_Val); });
|
||||
}
|
||||
|
||||
#undef _HAS_POPCNT_INTRINSICS
|
||||
#undef _HAS_TZCNT_BSF_INTRINSICS
|
||||
|
||||
_STD_END
|
||||
|
||||
#undef _HAS_NEON_INTRINSICS
|
||||
|
||||
#pragma pop_macro("new")
|
||||
_STL_RESTORE_CLANG_WARNINGS
|
||||
#pragma warning(pop)
|
||||
#pragma pack(pop)
|
||||
#endif // _STL_COMPILER_PREPROCESSOR
|
||||
#endif // __MSVC_BIT_UTILS_HPP
|
|
@ -61,7 +61,6 @@
|
|||
#if _STL_COMPILER_PREPROCESSOR
|
||||
|
||||
#include <cstdint>
|
||||
#include <limits>
|
||||
#include <xutility>
|
||||
|
||||
#pragma pack(push, _CRT_PACKING)
|
||||
|
@ -79,7 +78,7 @@ struct _Unicode_property_data {
|
|||
uint16_t _Props_and_size[_NumRanges];
|
||||
_NODISCARD constexpr _ValueEnum _Get_property_for_codepoint(const uint32_t _Code_point) const noexcept {
|
||||
ptrdiff_t _Upper_idx = _STD upper_bound(_Lower_bounds, _STD end(_Lower_bounds), _Code_point) - _Lower_bounds;
|
||||
constexpr auto _No_value_constant = static_cast<_ValueEnum>((numeric_limits<uint8_t>::max)());
|
||||
constexpr auto _No_value_constant = static_cast<_ValueEnum>(UINT8_MAX);
|
||||
if (_Upper_idx == 0) {
|
||||
return _No_value_constant;
|
||||
}
|
||||
|
|
|
@ -9,6 +9,7 @@
|
|||
|
||||
#include <yvals_core.h>
|
||||
#if _STL_COMPILER_PREPROCESSOR
|
||||
#include <__msvc_bit_utils.hpp>
|
||||
#include <cstdint>
|
||||
#include <limits>
|
||||
#include <type_traits>
|
||||
|
@ -16,7 +17,6 @@
|
|||
#include _STL_INTRIN_HEADER
|
||||
|
||||
#if _HAS_CXX20
|
||||
#include <bit>
|
||||
#include <compare>
|
||||
#define _ZERO_OR_NO_INIT
|
||||
#else // ^^^ _HAS_CXX20 / !_HAS_CXX20 vvv
|
||||
|
@ -56,6 +56,24 @@ _STD_BEGIN
|
|||
#define _STL_128_DIV_INTRINSICS 0
|
||||
#endif // ^^^ intrinsics unavailable ^^^
|
||||
|
||||
template <class _Ty>
|
||||
_NODISCARD constexpr int _Countl_zero_internal(const _Ty _Val) noexcept {
|
||||
_STL_INTERNAL_STATIC_ASSERT(_Is_standard_unsigned_integer<_Ty>);
|
||||
#if _HAS_COUNTL_ZERO_INTRINSICS
|
||||
#if defined(_M_IX86) || (defined(_M_X64) && !defined(_M_ARM64EC))
|
||||
if (!_Is_constant_evaluated()) {
|
||||
return _Checked_x86_x64_countl_zero(_Val);
|
||||
}
|
||||
#elif defined(_M_ARM) || defined(_M_ARM64)
|
||||
if (!_Is_constant_evaluated()) {
|
||||
return _Checked_arm_arm64_countl_zero(_Val);
|
||||
}
|
||||
#endif // defined(_M_ARM) || defined(_M_ARM64)
|
||||
#endif // _HAS_COUNTL_ZERO_INTRINSICS
|
||||
|
||||
return _Countl_zero_fallback(_Val);
|
||||
}
|
||||
|
||||
struct
|
||||
#ifndef _M_ARM
|
||||
alignas(16)
|
||||
|
@ -143,7 +161,7 @@ struct
|
|||
static constexpr void _Knuth_4_3_1_M(
|
||||
const uint32_t (&__u)[__m], const uint32_t (&__v)[__n], uint32_t (&__w)[__n + __m]) noexcept {
|
||||
#ifdef _ENABLE_STL_INTERNAL_CHECK
|
||||
constexpr auto _Int_max = static_cast<size_t>((numeric_limits<int>::max)());
|
||||
constexpr auto _Int_max = static_cast<size_t>(INT_MAX);
|
||||
_STL_INTERNAL_STATIC_ASSERT(__m <= _Int_max);
|
||||
_STL_INTERNAL_STATIC_ASSERT(__n <= _Int_max);
|
||||
#endif // _ENABLE_STL_INTERNAL_CHECK
|
||||
|
@ -192,7 +210,7 @@ struct
|
|||
static constexpr void _Knuth_4_3_1_D(uint32_t* const __u, const size_t __u_size, const uint32_t* const __v,
|
||||
const size_t __v_size, uint32_t* const __q) noexcept {
|
||||
// Pre: __u + [0, __u_size), __v + [0, __v_size), and __q + [0, __u_size - __v_size) are all valid ranges
|
||||
// constexpr auto _Int_max = static_cast<size_t>((numeric_limits<int>::max)());
|
||||
// constexpr auto _Int_max = static_cast<size_t>(INT_MAX);
|
||||
// _STL_INTERNAL_CHECK(__v_size <= _Int_max);
|
||||
const int __n = static_cast<int>(__v_size);
|
||||
// _STL_INTERNAL_CHECK(__u_size > __v_size);
|
||||
|
@ -252,11 +270,7 @@ struct
|
|||
}
|
||||
#endif // _STL_128_DIV_INTRINSICS
|
||||
|
||||
#if _HAS_CXX20
|
||||
const auto __d = _STD countl_zero(static_cast<uint32_t>(_Div >> 32));
|
||||
#else // ^^^ _HAS_CXX20 / !_HAS_CXX20 vvv
|
||||
const auto __d = _Countl_zero_fallback(static_cast<uint32_t>(_Div >> 32));
|
||||
#endif // ^^^ !_HAS_CXX20 ^^^
|
||||
const auto __d = _Countl_zero_internal(static_cast<uint32_t>(_Div >> 32));
|
||||
if (__d >= 32) { // _Div < 2^32
|
||||
auto _Rem = (_High << 32) | (_Low >> 32);
|
||||
auto _Result = _Rem / static_cast<uint32_t>(_Div);
|
||||
|
@ -464,11 +478,7 @@ struct
|
|||
// _STL_INTERNAL_CHECK(_Den._Word[1] != 0);
|
||||
// _STL_INTERNAL_CHECK(_Num._Word[1] > _Den._Word[1]);
|
||||
// Normalize by shifting both left until _Den's high bit is set (So _Den's high digit is >= b / 2)
|
||||
#if _HAS_CXX20
|
||||
const auto __d = _STD countl_zero(_Den._Word[1]);
|
||||
#else // ^^^ _HAS_CXX20 / !_HAS_CXX20 vvv
|
||||
const auto __d = _Countl_zero_fallback(_Den._Word[1]);
|
||||
#endif // ^^^ !_HAS_CXX20 ^^^
|
||||
const auto __d = _Countl_zero_internal(_Den._Word[1]);
|
||||
_Den <<= __d;
|
||||
auto _High_digit = __d == 0 ? 0 : _Num._Word[1] >> (64 - __d); // This creates a third digit for _Num
|
||||
_Num <<= __d;
|
||||
|
@ -513,11 +523,7 @@ struct
|
|||
}
|
||||
return __qhat;
|
||||
#else // ^^^ 128-bit intrinsics / no such intrinsics vvv
|
||||
#if _HAS_CXX20
|
||||
auto __d = _STD countl_zero(_Den._Word[1]);
|
||||
#else // ^^^ _HAS_CXX20 / !_HAS_CXX20 vvv
|
||||
auto __d = _Countl_zero_fallback(_Den._Word[1]);
|
||||
#endif // ^^^ !_HAS_CXX20 ^^^
|
||||
auto __d = _Countl_zero_internal(_Den._Word[1]);
|
||||
const bool _Three_word_den = __d >= 32;
|
||||
__d &= 31;
|
||||
uint32_t __u[5]{
|
||||
|
@ -597,11 +603,7 @@ struct
|
|||
// _STL_INTERNAL_CHECK(_Den._Word[1] != 0);
|
||||
// _STL_INTERNAL_CHECK(_Num._Word[1] > _Den._Word[1]);
|
||||
// Normalize by shifting both left until _Den's high bit is set (So _Den's high digit is >= b / 2)
|
||||
#if _HAS_CXX20
|
||||
const auto __d = _STD countl_zero(_Den._Word[1]);
|
||||
#else // ^^^ _HAS_CXX20 / !_HAS_CXX20 vvv
|
||||
const auto __d = _Countl_zero_fallback(_Den._Word[1]);
|
||||
#endif // ^^^ !_HAS_CXX20 ^^^
|
||||
const auto __d = _Countl_zero_internal(_Den._Word[1]);
|
||||
_Den <<= __d;
|
||||
auto _High_digit = __d == 0 ? 0 : _Num._Word[1] >> (64 - __d); // This creates a third digit for _Num
|
||||
_Num <<= __d;
|
||||
|
@ -648,11 +650,7 @@ struct
|
|||
(void) _AddCarry64(_Carry, _Num._Word[1], _Den._Word[1], _Num._Word[1]);
|
||||
}
|
||||
#else // ^^^ 128-bit intrinsics / no such intrinsics vvv
|
||||
#if _HAS_CXX20
|
||||
auto __d = _STD countl_zero(_Den._Word[1]);
|
||||
#else // ^^^ _HAS_CXX20 / !_HAS_CXX20 vvv
|
||||
auto __d = _Countl_zero_fallback(_Den._Word[1]);
|
||||
#endif // ^^^ !_HAS_CXX20 ^^^
|
||||
auto __d = _Countl_zero_internal(_Den._Word[1]);
|
||||
const bool _Three_word_den = __d >= 32;
|
||||
__d &= 31;
|
||||
uint32_t __u[5]{
|
||||
|
|
133
stl/inc/bit
133
stl/inc/bit
|
@ -12,9 +12,8 @@
|
|||
_EMIT_STL_WARNING(STL4038, "The contents of <bit> are available only with C++20 or later.");
|
||||
#else // ^^^ !_HAS_CXX20 / _HAS_CXX20 vvv
|
||||
|
||||
#include <__msvc_bit_utils.hpp>
|
||||
#include <cstdlib>
|
||||
#include <isa_availability.h>
|
||||
#include <limits>
|
||||
#include <type_traits>
|
||||
|
||||
#include _STL_INTRIN_HEADER
|
||||
|
@ -95,7 +94,7 @@ _NODISCARD constexpr _Ty bit_ceil(const _Ty _Val) noexcept /* strengthened */ {
|
|||
return _Ty{1};
|
||||
}
|
||||
|
||||
const int _Num = numeric_limits<_Ty>::digits - _STD countl_zero(static_cast<_Ty>(_Val - 1));
|
||||
const int _Num = _Unsigned_integer_digits<_Ty> - _STD countl_zero(static_cast<_Ty>(_Val - 1));
|
||||
|
||||
if constexpr (sizeof(_Ty) < sizeof(unsigned int)) { // for types subject to integral promotion
|
||||
if (_STD is_constant_evaluated()) {
|
||||
|
@ -108,7 +107,7 @@ _NODISCARD constexpr _Ty bit_ceil(const _Ty _Val) noexcept /* strengthened */ {
|
|||
// "Preconditions: N is representable as a value of type T."
|
||||
// "Remarks: A function call expression that violates the precondition in the Preconditions: element
|
||||
// is not a core constant expression (7.7)."
|
||||
if (_Num == numeric_limits<_Ty>::digits) {
|
||||
if (_Num == _Unsigned_integer_digits<_Ty>) {
|
||||
_Precondition_violation_in_bit_ceil();
|
||||
}
|
||||
}
|
||||
|
@ -123,12 +122,12 @@ _NODISCARD constexpr _Ty bit_floor(const _Ty _Val) noexcept {
|
|||
return 0;
|
||||
}
|
||||
|
||||
return static_cast<_Ty>(_Ty{1} << (numeric_limits<_Ty>::digits - 1 - _STD countl_zero(_Val)));
|
||||
return static_cast<_Ty>(_Ty{1} << (_Unsigned_integer_digits<_Ty> - 1 - _STD countl_zero(_Val)));
|
||||
}
|
||||
|
||||
_EXPORT_STD template <class _Ty, enable_if_t<_Is_standard_unsigned_integer<_Ty>, int> = 0>
|
||||
_NODISCARD constexpr int bit_width(const _Ty _Val) noexcept {
|
||||
return numeric_limits<_Ty>::digits - _STD countl_zero(_Val);
|
||||
return _Unsigned_integer_digits<_Ty> - _STD countl_zero(_Val);
|
||||
}
|
||||
|
||||
_EXPORT_STD template <class _Ty, enable_if_t<_Is_standard_unsigned_integer<_Ty>, int> = 0>
|
||||
|
@ -136,7 +135,7 @@ _NODISCARD constexpr _Ty rotr(_Ty _Val, int _Rotation) noexcept;
|
|||
|
||||
_EXPORT_STD template <class _Ty, enable_if_t<_Is_standard_unsigned_integer<_Ty>, int> = 0>
|
||||
_NODISCARD constexpr _Ty rotl(const _Ty _Val, const int _Rotation) noexcept {
|
||||
constexpr auto _Digits = numeric_limits<_Ty>::digits;
|
||||
constexpr auto _Digits = _Unsigned_integer_digits<_Ty>;
|
||||
|
||||
if (!_STD is_constant_evaluated()) {
|
||||
if constexpr (_Digits == 64) {
|
||||
|
@ -164,7 +163,7 @@ _NODISCARD constexpr _Ty rotl(const _Ty _Val, const int _Rotation) noexcept {
|
|||
|
||||
_EXPORT_STD template <class _Ty, enable_if_t<_Is_standard_unsigned_integer<_Ty>, int> /* = 0 */>
|
||||
_NODISCARD constexpr _Ty rotr(const _Ty _Val, const int _Rotation) noexcept {
|
||||
constexpr auto _Digits = numeric_limits<_Ty>::digits;
|
||||
constexpr auto _Digits = _Unsigned_integer_digits<_Ty>;
|
||||
|
||||
if (!_STD is_constant_evaluated()) {
|
||||
if constexpr (_Digits == 64) {
|
||||
|
@ -190,124 +189,9 @@ _NODISCARD constexpr _Ty rotr(const _Ty _Val, const int _Rotation) noexcept {
|
|||
}
|
||||
}
|
||||
|
||||
#if defined(_M_IX86) || (defined(_M_X64) && !defined(_M_ARM64EC))
|
||||
|
||||
extern "C" {
|
||||
extern int __isa_available;
|
||||
}
|
||||
|
||||
template <class _Ty>
|
||||
_NODISCARD int _Countl_zero_lzcnt(const _Ty _Val) noexcept {
|
||||
constexpr int _Digits = numeric_limits<_Ty>::digits;
|
||||
|
||||
if constexpr (_Digits <= 16) {
|
||||
return static_cast<int>(__lzcnt16(_Val) - (16 - _Digits));
|
||||
} else if constexpr (_Digits == 32) {
|
||||
return static_cast<int>(__lzcnt(_Val));
|
||||
} else {
|
||||
#ifdef _M_IX86
|
||||
const unsigned int _High = _Val >> 32;
|
||||
const auto _Low = static_cast<unsigned int>(_Val);
|
||||
if (_High == 0) {
|
||||
return 32 + _Countl_zero_lzcnt(_Low);
|
||||
} else {
|
||||
return _Countl_zero_lzcnt(_High);
|
||||
}
|
||||
#else // ^^^ _M_IX86 / !_M_IX86 vvv
|
||||
return static_cast<int>(__lzcnt64(_Val));
|
||||
#endif // _M_IX86
|
||||
}
|
||||
}
|
||||
|
||||
template <class _Ty>
|
||||
_NODISCARD int _Countl_zero_bsr(const _Ty _Val) noexcept {
|
||||
constexpr int _Digits = numeric_limits<_Ty>::digits;
|
||||
|
||||
unsigned long _Result;
|
||||
if constexpr (_Digits <= 32) {
|
||||
if (!_BitScanReverse(&_Result, _Val)) {
|
||||
return _Digits;
|
||||
}
|
||||
} else {
|
||||
#ifdef _M_IX86
|
||||
const unsigned int _High = _Val >> 32;
|
||||
if (_BitScanReverse(&_Result, _High)) {
|
||||
return static_cast<int>(31 - _Result);
|
||||
}
|
||||
|
||||
const auto _Low = static_cast<unsigned int>(_Val);
|
||||
if (!_BitScanReverse(&_Result, _Low)) {
|
||||
return _Digits;
|
||||
}
|
||||
#else // ^^^ _M_IX86 / !_M_IX86 vvv
|
||||
if (!_BitScanReverse64(&_Result, _Val)) {
|
||||
return _Digits;
|
||||
}
|
||||
#endif // _M_IX86
|
||||
}
|
||||
return static_cast<int>(_Digits - 1 - _Result);
|
||||
}
|
||||
|
||||
template <class _Ty>
|
||||
_NODISCARD int _Checked_x86_x64_countl_zero(const _Ty _Val) noexcept {
|
||||
#ifdef __AVX2__
|
||||
return _Countl_zero_lzcnt(_Val);
|
||||
#else // __AVX2__
|
||||
const bool _Definitely_have_lzcnt = __isa_available >= __ISA_AVAILABLE_AVX2;
|
||||
if (_Definitely_have_lzcnt) {
|
||||
return _Countl_zero_lzcnt(_Val);
|
||||
} else {
|
||||
return _Countl_zero_bsr(_Val);
|
||||
}
|
||||
#endif // __AVX2__
|
||||
}
|
||||
#endif // defined(_M_IX86) || (defined(_M_X64) && !defined(_M_ARM64EC))
|
||||
|
||||
#if defined(_M_ARM) || defined(_M_ARM64)
|
||||
#ifdef __clang__ // TRANSITION, GH-1586
|
||||
_NODISCARD constexpr int _Clang_arm_arm64_countl_zero(const unsigned short _Val) {
|
||||
return __builtin_clzs(_Val);
|
||||
}
|
||||
|
||||
_NODISCARD constexpr int _Clang_arm_arm64_countl_zero(const unsigned int _Val) {
|
||||
return __builtin_clz(_Val);
|
||||
}
|
||||
|
||||
_NODISCARD constexpr int _Clang_arm_arm64_countl_zero(const unsigned long _Val) {
|
||||
return __builtin_clzl(_Val);
|
||||
}
|
||||
|
||||
_NODISCARD constexpr int _Clang_arm_arm64_countl_zero(const unsigned long long _Val) {
|
||||
return __builtin_clzll(_Val);
|
||||
}
|
||||
#endif // TRANSITION, GH-1586
|
||||
|
||||
template <class _Ty>
|
||||
_NODISCARD int _Checked_arm_arm64_countl_zero(const _Ty _Val) noexcept {
|
||||
constexpr int _Digits = numeric_limits<_Ty>::digits;
|
||||
if (_Val == 0) {
|
||||
return _Digits;
|
||||
}
|
||||
|
||||
#ifdef __clang__ // TRANSITION, GH-1586
|
||||
if constexpr (is_same_v<remove_cv_t<_Ty>, unsigned char>) {
|
||||
return _Clang_arm_arm64_countl_zero(static_cast<unsigned short>(_Val))
|
||||
- (numeric_limits<unsigned short>::digits - _Digits);
|
||||
} else {
|
||||
return _Clang_arm_arm64_countl_zero(_Val);
|
||||
}
|
||||
#else // ^^^ workaround / no workaround vvv
|
||||
if constexpr (_Digits <= 32) {
|
||||
return static_cast<int>(_CountLeadingZeros(_Val)) - (numeric_limits<unsigned long>::digits - _Digits);
|
||||
} else {
|
||||
return static_cast<int>(_CountLeadingZeros64(_Val));
|
||||
}
|
||||
#endif // TRANSITION, GH-1586
|
||||
}
|
||||
#endif // defined(_M_ARM) || defined(_M_ARM64)
|
||||
|
||||
_EXPORT_STD template <class _Ty, enable_if_t<_Is_standard_unsigned_integer<_Ty>, int> /* = 0 */>
|
||||
_NODISCARD constexpr int countl_zero(const _Ty _Val) noexcept {
|
||||
#if _HAS_COUNTL_ZERO_INTRINSICS
|
||||
#if defined(_M_IX86) || (defined(_M_X64) && !defined(_M_ARM64EC))
|
||||
if (!_STD is_constant_evaluated()) {
|
||||
return _Checked_x86_x64_countl_zero(_Val);
|
||||
|
@ -317,6 +201,7 @@ _NODISCARD constexpr int countl_zero(const _Ty _Val) noexcept {
|
|||
return _Checked_arm_arm64_countl_zero(_Val);
|
||||
}
|
||||
#endif // defined(_M_ARM) || defined(_M_ARM64)
|
||||
#endif // _HAS_COUNTL_ZERO_INTRINSICS
|
||||
|
||||
return _Countl_zero_fallback(_Val);
|
||||
}
|
||||
|
|
|
@ -8,8 +8,8 @@
|
|||
#define _BITSET_
|
||||
#include <yvals_core.h>
|
||||
#if _STL_COMPILER_PREPROCESSOR
|
||||
#include <__msvc_bit_utils.hpp>
|
||||
#include <iosfwd>
|
||||
#include <limits>
|
||||
#include <xstring>
|
||||
|
||||
#pragma pack(push, _CRT_PACKING)
|
||||
|
|
|
@ -26,9 +26,8 @@
|
|||
// TRANSITION, not using x86/x64 FMA intrinsics for Clang yet
|
||||
#elif defined(_M_IX86) || defined(_M_X64)
|
||||
#define _FMP_USING_X86_X64_INTRINSICS
|
||||
#include <__msvc_bit_utils.hpp>
|
||||
#include <emmintrin.h>
|
||||
#include <isa_availability.h>
|
||||
extern "C" int __isa_available;
|
||||
extern "C" __m128d __cdecl _mm_fmsub_sd(__m128d, __m128d, __m128d);
|
||||
#endif // ^^^ defined(_M_IX86) || defined(_M_X64) ^^^
|
||||
|
||||
|
@ -177,7 +176,7 @@ namespace _Float_multi_prec {
|
|||
#ifdef __AVX2__
|
||||
return {_Prod0, _Sqr_error_x86_x64_fma(_Xval, _Prod0)};
|
||||
#else // ^^^ defined(__AVX2__) / !defined(__AVX2__) vvv
|
||||
const bool _Definitely_have_fma = __isa_available >= __ISA_AVAILABLE_AVX2;
|
||||
const bool _Definitely_have_fma = __isa_available >= _Stl_isa_available_avx2;
|
||||
if (_Definitely_have_fma) {
|
||||
return {_Prod0, _Sqr_error_x86_x64_fma(_Xval, _Prod0)};
|
||||
} else {
|
||||
|
|
|
@ -5,6 +5,7 @@
|
|||
"Version": "1.0",
|
||||
"BuildAsHeaderUnits": [
|
||||
// "__msvc_all_public_headers.hpp", // for testing, not production
|
||||
"__msvc_bit_utils.hpp",
|
||||
"__msvc_chrono.hpp",
|
||||
"__msvc_cxx_stdatomic.hpp",
|
||||
"__msvc_filebuf.hpp",
|
||||
|
|
291
stl/inc/limits
291
stl/inc/limits
|
@ -11,23 +11,10 @@
|
|||
#include <cfloat>
|
||||
#include <climits>
|
||||
#include <cwchar>
|
||||
#include <isa_availability.h>
|
||||
#include <xtr1common>
|
||||
|
||||
#include _STL_INTRIN_HEADER
|
||||
|
||||
// TRANSITION, GH-2129, move down to _Arm64_popcount
|
||||
#if (defined(_M_ARM64) || defined(_M_ARM64EC)) && !defined(_M_CEE_PURE) && !defined(__CUDACC__) \
|
||||
&& !defined(__INTEL_COMPILER) && !defined(__clang__) // TRANSITION, LLVM-51488
|
||||
#define _HAS_NEON_INTRINSICS 1
|
||||
#else // ^^^ intrinsics available / intrinsics unavailable vvv
|
||||
#define _HAS_NEON_INTRINSICS 0
|
||||
#endif // ^^^ intrinsics unavailable ^^^
|
||||
|
||||
#if _HAS_NEON_INTRINSICS
|
||||
#include <arm64_neon.h> // TRANSITION, GH-2129
|
||||
#endif
|
||||
|
||||
#pragma pack(push, _CRT_PACKING)
|
||||
#pragma warning(push, _STL_WARNING_LEVEL)
|
||||
#pragma warning(disable : _STL_DISABLED_WARNINGS)
|
||||
|
@ -1006,284 +993,6 @@ public:
|
|||
static constexpr int min_exponent10 = LDBL_MIN_10_EXP;
|
||||
};
|
||||
|
||||
// Implementation of countl_zero without using specialized CPU instructions.
|
||||
// Used at compile time and when said instructions are not supported.
|
||||
// see "Hacker's Delight" section 5-3
|
||||
template <class _Ty>
|
||||
_NODISCARD constexpr int _Countl_zero_fallback(_Ty _Val) noexcept {
|
||||
_Ty _Yy = 0;
|
||||
|
||||
unsigned int _Nn = numeric_limits<_Ty>::digits;
|
||||
unsigned int _Cc = numeric_limits<_Ty>::digits / 2;
|
||||
do {
|
||||
_Yy = static_cast<_Ty>(_Val >> _Cc);
|
||||
if (_Yy != 0) {
|
||||
_Nn -= _Cc;
|
||||
_Val = _Yy;
|
||||
}
|
||||
_Cc >>= 1;
|
||||
} while (_Cc != 0);
|
||||
return static_cast<int>(_Nn) - static_cast<int>(_Val);
|
||||
}
|
||||
|
||||
// Implementation of countr_zero without using specialized CPU instructions.
|
||||
// Used at compile time and when said instructions are not supported.
|
||||
// see "Hacker's Delight" section 5-4
|
||||
template <class _Ty>
|
||||
_NODISCARD constexpr int _Countr_zero_fallback(const _Ty _Val) noexcept {
|
||||
constexpr int _Digits = numeric_limits<_Ty>::digits;
|
||||
return _Digits - _Countl_zero_fallback(static_cast<_Ty>(static_cast<_Ty>(~_Val) & static_cast<_Ty>(_Val - 1)));
|
||||
}
|
||||
|
||||
// Implementation of popcount without using specialized CPU instructions.
|
||||
// Used at compile time and when said instructions are not supported.
|
||||
template <class _Ty>
|
||||
_NODISCARD constexpr int _Popcount_fallback(_Ty _Val) noexcept {
|
||||
constexpr int _Digits = numeric_limits<_Ty>::digits;
|
||||
#if defined(_M_IX86) || defined(_M_ARM)
|
||||
if constexpr (_Digits == 64) {
|
||||
// 64-bit bit operations on architectures without 64-bit registers are less efficient,
|
||||
// hence we split the value so that it fits in 32-bit registers
|
||||
return _Popcount_fallback(static_cast<unsigned long>(_Val))
|
||||
+ _Popcount_fallback(static_cast<unsigned long>(_Val >> 32));
|
||||
}
|
||||
#endif // defined(_M_IX86) || defined(_M_ARM)
|
||||
// we static_cast these bit patterns in order to truncate them to the correct size
|
||||
_Val = static_cast<_Ty>(_Val - ((_Val >> 1) & static_cast<_Ty>(0x5555'5555'5555'5555ull)));
|
||||
_Val = static_cast<_Ty>((_Val & static_cast<_Ty>(0x3333'3333'3333'3333ull))
|
||||
+ ((_Val >> 2) & static_cast<_Ty>(0x3333'3333'3333'3333ull)));
|
||||
_Val = static_cast<_Ty>((_Val + (_Val >> 4)) & static_cast<_Ty>(0x0F0F'0F0F'0F0F'0F0Full));
|
||||
// Multiply by one in each byte, so that it will have the sum of all source bytes in the highest byte
|
||||
_Val = static_cast<_Ty>(_Val * static_cast<_Ty>(0x0101'0101'0101'0101ull));
|
||||
// Extract highest byte
|
||||
return static_cast<int>(_Val >> (_Digits - 8));
|
||||
}
|
||||
|
||||
#if (defined(_M_IX86) || (defined(_M_X64) && !defined(_M_ARM64EC))) && !defined(_M_CEE_PURE) && !defined(__CUDACC__) \
|
||||
&& !defined(__INTEL_COMPILER)
|
||||
#define _HAS_TZCNT_BSF_INTRINSICS 1
|
||||
#else // ^^^ intrinsics available / intrinsics unavailable vvv
|
||||
#define _HAS_TZCNT_BSF_INTRINSICS 0
|
||||
#endif // ^^^ intrinsics unavailable ^^^
|
||||
|
||||
#if _HAS_TZCNT_BSF_INTRINSICS
|
||||
extern "C" {
|
||||
extern int __isa_available;
|
||||
}
|
||||
|
||||
#ifdef __clang__
|
||||
#define _TZCNT_U32 __builtin_ia32_tzcnt_u32
|
||||
#define _TZCNT_U64 __builtin_ia32_tzcnt_u64
|
||||
#else // ^^^ __clang__ / !__clang__ vvv
|
||||
#define _TZCNT_U32 _tzcnt_u32
|
||||
#define _TZCNT_U64 _tzcnt_u64
|
||||
#endif // __clang__
|
||||
|
||||
template <class _Ty>
|
||||
_NODISCARD int _Countr_zero_tzcnt(const _Ty _Val) noexcept {
|
||||
constexpr int _Digits = numeric_limits<_Ty>::digits;
|
||||
constexpr _Ty _Max = (numeric_limits<_Ty>::max)();
|
||||
|
||||
if constexpr (_Digits <= 32) {
|
||||
// Intended widening to int. This operation means that a narrow 0 will widen
|
||||
// to 0xFFFF....FFFF0... instead of 0. We need this to avoid counting all the zeros
|
||||
// of the wider type.
|
||||
return static_cast<int>(_TZCNT_U32(static_cast<unsigned int>(~_Max | _Val)));
|
||||
} else {
|
||||
#ifdef _M_IX86
|
||||
const auto _Low = static_cast<unsigned int>(_Val);
|
||||
if (_Low == 0) {
|
||||
const unsigned int _High = _Val >> 32;
|
||||
return static_cast<int>(32 + _TZCNT_U32(_High));
|
||||
} else {
|
||||
return static_cast<int>(_TZCNT_U32(_Low));
|
||||
}
|
||||
#else // ^^^ _M_IX86 / !_M_IX86 vvv
|
||||
return static_cast<int>(_TZCNT_U64(_Val));
|
||||
#endif // _M_IX86
|
||||
}
|
||||
}
|
||||
|
||||
#undef _TZCNT_U32
|
||||
#undef _TZCNT_U64
|
||||
|
||||
template <class _Ty>
|
||||
_NODISCARD int _Countr_zero_bsf(const _Ty _Val) noexcept {
|
||||
constexpr int _Digits = numeric_limits<_Ty>::digits;
|
||||
constexpr _Ty _Max = (numeric_limits<_Ty>::max)();
|
||||
|
||||
unsigned long _Result;
|
||||
if constexpr (_Digits <= 32) {
|
||||
// Intended widening to int. This operation means that a narrow 0 will widen
|
||||
// to 0xFFFF....FFFF0... instead of 0. We need this to avoid counting all the zeros
|
||||
// of the wider type.
|
||||
if (!_BitScanForward(&_Result, static_cast<unsigned int>(~_Max | _Val))) {
|
||||
return _Digits;
|
||||
}
|
||||
} else {
|
||||
#ifdef _M_IX86
|
||||
const auto _Low = static_cast<unsigned int>(_Val);
|
||||
if (_BitScanForward(&_Result, _Low)) {
|
||||
return static_cast<int>(_Result);
|
||||
}
|
||||
|
||||
const unsigned int _High = _Val >> 32;
|
||||
if (!_BitScanForward(&_Result, _High)) {
|
||||
return _Digits;
|
||||
} else {
|
||||
return static_cast<int>(_Result + 32);
|
||||
}
|
||||
#else // ^^^ _M_IX86 / !_M_IX86 vvv
|
||||
if (!_BitScanForward64(&_Result, _Val)) {
|
||||
return _Digits;
|
||||
}
|
||||
#endif // _M_IX86
|
||||
}
|
||||
return static_cast<int>(_Result);
|
||||
}
|
||||
|
||||
template <class _Ty>
|
||||
_NODISCARD int _Checked_x86_x64_countr_zero(const _Ty _Val) noexcept {
|
||||
#ifdef __AVX2__
|
||||
return _Countr_zero_tzcnt(_Val);
|
||||
#else // __AVX2__
|
||||
const bool _Definitely_have_tzcnt = __isa_available >= __ISA_AVAILABLE_AVX2;
|
||||
if (_Definitely_have_tzcnt) {
|
||||
return _Countr_zero_tzcnt(_Val);
|
||||
} else {
|
||||
return _Countr_zero_bsf(_Val);
|
||||
}
|
||||
#endif // __AVX2__
|
||||
}
|
||||
|
||||
#endif // _HAS_TZCNT_BSF_INTRINSICS
|
||||
|
||||
#if (defined(_M_IX86) || (defined(_M_X64) && !defined(_M_ARM64EC))) && !defined(_M_CEE_PURE) && !defined(__CUDACC__) \
|
||||
&& !defined(__INTEL_COMPILER)
|
||||
#define _HAS_POPCNT_INTRINSICS 1
|
||||
#else // ^^^ intrinsics available / intrinsics unavailable vvv
|
||||
#define _HAS_POPCNT_INTRINSICS 0
|
||||
#endif // ^^^ intrinsics unavailable ^^^
|
||||
|
||||
#if _HAS_POPCNT_INTRINSICS
|
||||
template <class _Ty>
|
||||
_NODISCARD int _Unchecked_x86_x64_popcount(const _Ty _Val) noexcept {
|
||||
constexpr int _Digits = numeric_limits<_Ty>::digits;
|
||||
if constexpr (_Digits <= 16) {
|
||||
return static_cast<int>(__popcnt16(_Val));
|
||||
} else if constexpr (_Digits == 32) {
|
||||
return static_cast<int>(__popcnt(_Val));
|
||||
} else {
|
||||
#ifdef _M_IX86
|
||||
return static_cast<int>(__popcnt(_Val >> 32) + __popcnt(static_cast<unsigned int>(_Val)));
|
||||
#else // ^^^ _M_IX86 / !_M_IX86 vvv
|
||||
return static_cast<int>(__popcnt64(_Val));
|
||||
#endif // _M_IX86
|
||||
}
|
||||
}
|
||||
|
||||
template <class _Ty>
|
||||
_NODISCARD int _Checked_x86_x64_popcount(const _Ty _Val) noexcept {
|
||||
#ifndef __AVX__
|
||||
const bool _Definitely_have_popcnt = __isa_available >= __ISA_AVAILABLE_SSE42;
|
||||
if (!_Definitely_have_popcnt) {
|
||||
return _Popcount_fallback(_Val);
|
||||
}
|
||||
#endif // !defined(__AVX__)
|
||||
return _Unchecked_x86_x64_popcount(_Val);
|
||||
}
|
||||
#endif // _HAS_POPCNT_INTRINSICS
|
||||
|
||||
#if _HAS_NEON_INTRINSICS
|
||||
_NODISCARD inline int _Arm64_popcount(const unsigned long long _Val) noexcept {
|
||||
const __n64 _Temp = neon_cnt(__uint64ToN64_v(_Val));
|
||||
return neon_addv8(_Temp).n8_i8[0];
|
||||
}
|
||||
#endif // _HAS_NEON_INTRINSICS
|
||||
|
||||
template <class _Ty>
|
||||
constexpr bool _Is_standard_unsigned_integer =
|
||||
_Is_any_of_v<remove_cv_t<_Ty>, unsigned char, unsigned short, unsigned int, unsigned long, unsigned long long>;
|
||||
|
||||
template <class _Ty, enable_if_t<_Is_standard_unsigned_integer<_Ty>, int> = 0>
|
||||
_NODISCARD _CONSTEXPR20 int _Countr_zero(const _Ty _Val) noexcept {
|
||||
#if _HAS_TZCNT_BSF_INTRINSICS
|
||||
#if _HAS_CXX20
|
||||
if (!_STD is_constant_evaluated())
|
||||
#endif // _HAS_CXX20
|
||||
{
|
||||
return _Checked_x86_x64_countr_zero(_Val);
|
||||
}
|
||||
#endif // _HAS_TZCNT_BSF_INTRINSICS
|
||||
return _Countr_zero_fallback(_Val);
|
||||
}
|
||||
|
||||
template <class _Ty, class _Fn>
|
||||
constexpr decltype(auto) _Select_countr_zero_impl(_Fn _Callback) {
|
||||
// TRANSITION, DevCom-1527995: Lambdas in this function ensure inlining
|
||||
#if _HAS_TZCNT_BSF_INTRINSICS && _HAS_CXX20
|
||||
if (!_STD is_constant_evaluated()) {
|
||||
#ifdef __AVX2__
|
||||
return _Callback([](_Ty _Val) { return _Countr_zero_tzcnt(_Val); });
|
||||
#else // ^^^ AVX2 / not AVX2 vvv
|
||||
const bool _Definitely_have_tzcnt = __isa_available >= __ISA_AVAILABLE_AVX2;
|
||||
if (_Definitely_have_tzcnt) {
|
||||
return _Callback([](_Ty _Val) { return _Countr_zero_tzcnt(_Val); });
|
||||
} else {
|
||||
return _Callback([](_Ty _Val) { return _Countr_zero_bsf(_Val); });
|
||||
}
|
||||
#endif // ^^^ not AVX2 ^^^
|
||||
}
|
||||
#endif // ^^^ _HAS_TZCNT_BSF_INTRINSICS && _HAS_CXX20 ^^^
|
||||
// C++17 constexpr gcd() calls this function, so it should be constexpr unless we detect runtime evaluation.
|
||||
return _Callback([](_Ty _Val) { return _Countr_zero_fallback(_Val); });
|
||||
}
|
||||
|
||||
template <class _Ty, enable_if_t<_Is_standard_unsigned_integer<_Ty>, int> = 0>
|
||||
_NODISCARD _CONSTEXPR20 int _Popcount(const _Ty _Val) noexcept {
|
||||
#if _HAS_POPCNT_INTRINSICS || _HAS_NEON_INTRINSICS
|
||||
#if _HAS_CXX20
|
||||
if (!_STD is_constant_evaluated())
|
||||
#endif // _HAS_CXX20
|
||||
{
|
||||
#if _HAS_POPCNT_INTRINSICS
|
||||
return _Checked_x86_x64_popcount(_Val);
|
||||
#elif _HAS_NEON_INTRINSICS // ^^^ x86/x64 intrinsics available / ARM64 intrinsics available vvv
|
||||
return _Arm64_popcount(_Val);
|
||||
#endif // ^^^ ARM64 intrinsics available ^^^
|
||||
}
|
||||
#endif // ^^^ any intrinsics available ^^^
|
||||
return _Popcount_fallback(_Val);
|
||||
}
|
||||
|
||||
template <class _Ty, class _Fn>
|
||||
_CONSTEXPR20 decltype(auto) _Select_popcount_impl(_Fn _Callback) {
|
||||
// TRANSITION, DevCom-1527995: Lambdas in this function ensure inlining
|
||||
#if _HAS_POPCNT_INTRINSICS || _HAS_NEON_INTRINSICS
|
||||
#if _HAS_CXX20
|
||||
if (!_STD is_constant_evaluated())
|
||||
#endif // _HAS_CXX20
|
||||
{
|
||||
#if _HAS_POPCNT_INTRINSICS
|
||||
#ifndef __AVX__
|
||||
const bool _Definitely_have_popcnt = __isa_available >= __ISA_AVAILABLE_SSE42;
|
||||
if (!_Definitely_have_popcnt) {
|
||||
return _Callback([](_Ty _Val) { return _Popcount_fallback(_Val); });
|
||||
}
|
||||
#endif // !defined(__AVX__)
|
||||
return _Callback([](_Ty _Val) { return _Unchecked_x86_x64_popcount(_Val); });
|
||||
#elif _HAS_NEON_INTRINSICS // ^^^ x86/x64 intrinsics available / ARM64 intrinsics available vvv
|
||||
return _Callback([](_Ty _Val) { return _Arm64_popcount(_Val); });
|
||||
#endif // ^^^ ARM64 intrinsics available ^^^
|
||||
}
|
||||
#endif // ^^^ any intrinsics available ^^^
|
||||
return _Callback([](_Ty _Val) { return _Popcount_fallback(_Val); });
|
||||
}
|
||||
|
||||
#undef _HAS_POPCNT_INTRINSICS
|
||||
#undef _HAS_TZCNT_BSF_INTRINSICS
|
||||
#undef _HAS_NEON_INTRINSICS
|
||||
|
||||
_STD_END
|
||||
#pragma pop_macro("new")
|
||||
_STL_RESTORE_CLANG_WARNINGS
|
||||
|
|
|
@ -11,9 +11,13 @@
|
|||
#include <xutility>
|
||||
|
||||
#if _HAS_CXX17
|
||||
#include <limits>
|
||||
#include <__msvc_bit_utils.hpp>
|
||||
#endif // _HAS_CXX17
|
||||
|
||||
#if _HAS_CXX20
|
||||
#include <cfloat>
|
||||
#endif // _HAS_CXX20
|
||||
|
||||
#pragma pack(push, _CRT_PACKING)
|
||||
#pragma warning(push, _STL_WARNING_LEVEL)
|
||||
#pragma warning(disable : _STL_DISABLED_WARNINGS)
|
||||
|
@ -648,6 +652,24 @@ _NODISCARD constexpr common_type_t<_Mt, _Nt> lcm(const _Mt _Mx, const _Nt _Nx) n
|
|||
#endif // _HAS_CXX17
|
||||
|
||||
#if _HAS_CXX20
|
||||
template <class _Flt>
|
||||
inline constexpr _Flt _Floating_max{};
|
||||
template <>
|
||||
inline constexpr float _Floating_max<float> = FLT_MAX;
|
||||
template <>
|
||||
inline constexpr double _Floating_max<double> = DBL_MAX;
|
||||
template <>
|
||||
inline constexpr long double _Floating_max<long double> = LDBL_MAX;
|
||||
|
||||
template <class _Flt>
|
||||
inline constexpr _Flt _Floating_min{};
|
||||
template <>
|
||||
inline constexpr float _Floating_min<float> = FLT_MIN;
|
||||
template <>
|
||||
inline constexpr double _Floating_min<double> = DBL_MIN;
|
||||
template <>
|
||||
inline constexpr long double _Floating_min<long double> = LDBL_MIN;
|
||||
|
||||
_EXPORT_STD template <class _Ty, enable_if_t<is_arithmetic_v<_Ty> && !is_same_v<remove_cv_t<_Ty>, bool>, int> = 0>
|
||||
_NODISCARD constexpr _Ty midpoint(const _Ty _Val1, const _Ty _Val2) noexcept {
|
||||
if constexpr (is_floating_point_v<_Ty>) {
|
||||
|
@ -666,7 +688,7 @@ _NODISCARD constexpr _Ty midpoint(const _Ty _Val1, const _Ty _Val2) noexcept {
|
|||
}
|
||||
}
|
||||
|
||||
constexpr _Ty _High_limit = (numeric_limits<_Ty>::max)() / 2;
|
||||
constexpr _Ty _High_limit = _Floating_max<remove_cv_t<_Ty>> / 2;
|
||||
const auto _Val1_a = _Float_abs(_Val1);
|
||||
const auto _Val2_a = _Float_abs(_Val2);
|
||||
if (_Val1_a <= _High_limit && _Val2_a <= _High_limit) {
|
||||
|
@ -690,7 +712,7 @@ _NODISCARD constexpr _Ty midpoint(const _Ty _Val1, const _Ty _Val2) noexcept {
|
|||
|
||||
// In the default rounding mode this less than one ULP difference will always be rounded away, so under
|
||||
// /fp:fast we could avoid these tests if we had some means of detecting it in the caller.
|
||||
constexpr _Ty _Low_limit = (numeric_limits<_Ty>::min)() * 2;
|
||||
constexpr _Ty _Low_limit = _Floating_min<remove_cv_t<_Ty>> * 2;
|
||||
if (_Val1_a < _Low_limit) {
|
||||
return _Val1 + _Val2 / 2;
|
||||
}
|
||||
|
|
|
@ -2169,7 +2169,7 @@ _NODISCARD _Flt _Float_upper_bound(_Ty _Val) {
|
|||
constexpr auto _Mask = static_cast<_Ty>(-1) << (_Ty_digits - _Flt_digits);
|
||||
#ifdef _M_CEE_PURE
|
||||
constexpr auto _Ty_32or64_digits = numeric_limits<_Ty_32or64>::digits;
|
||||
const auto _Log_plus1 = _Ty_32or64_digits - _Countl_zero_fallback(static_cast<_Ty_32or64>(_Val | _Ty{1}));
|
||||
const auto _Log_plus1 = _Ty_32or64_digits - _Countl_zero_internal(static_cast<_Ty_32or64>(_Val | _Ty{1}));
|
||||
#else // _M_CEE_PURE
|
||||
const auto _Log_plus1 = _Bit_scan_reverse(static_cast<_Ty_32or64>(_Val | _Ty{1}));
|
||||
#endif // _M_CEE_PURE
|
||||
|
|
|
@ -21,6 +21,7 @@ _EMIT_STL_WARNING(STL4038, "The contents of <ranges> are available only with C++
|
|||
|
||||
#if _HAS_CXX23
|
||||
#include <array>
|
||||
#include <bit>
|
||||
#endif // _HAS_CXX23
|
||||
|
||||
#pragma pack(push, _CRT_PACKING)
|
||||
|
|
|
@ -8,6 +8,7 @@
|
|||
#define _VECTOR_
|
||||
#include <yvals_core.h>
|
||||
#if _STL_COMPILER_PREPROCESSOR
|
||||
#include <__msvc_bit_utils.hpp>
|
||||
#include <xmemory>
|
||||
|
||||
#if _HAS_CXX17
|
||||
|
|
|
@ -19,11 +19,24 @@
|
|||
#include <xfilesystem_abi.h>
|
||||
#endif // _HAS_CXX17
|
||||
|
||||
#if _HAS_CXX23
|
||||
#include <__msvc_print.hpp>
|
||||
#endif // _HAS_CXX23
|
||||
|
||||
// <__msvc_bit_utils.hpp> is included by <bit> and <limits>
|
||||
// <__msvc_iter_core.hpp> is included by <tuple>
|
||||
// <xkeycheck.h> should not be included outside of <yvals_core.h>
|
||||
// <xtr1common> is included by <cstddef>
|
||||
// <yvals_core.h> is included by every public core header
|
||||
|
||||
// Also test GH-3692 "Including <isa_availability.h> emits a non-reserved name"
|
||||
#include <isa_availability.h>
|
||||
|
||||
#define STATIC_ASSERT(...) static_assert(__VA_ARGS__, #__VA_ARGS__)
|
||||
|
||||
STATIC_ASSERT(std::_Stl_isa_available_sse42 == __ISA_AVAILABLE_SSE42);
|
||||
STATIC_ASSERT(std::_Stl_isa_available_avx2 == __ISA_AVAILABLE_AVX2);
|
||||
|
||||
#ifdef _YVALS
|
||||
#error Core headers should not include <yvals.h>.
|
||||
#endif
|
||||
|
|
|
@ -1,9 +1,10 @@
|
|||
// Copyright (c) Microsoft Corporation.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
|
||||
#define nsec delete
|
||||
#define sec delete
|
||||
#define xtime delete
|
||||
#define xtime_get delete
|
||||
#define ISA_AVAILABILITY delete
|
||||
#define nsec delete
|
||||
#define sec delete
|
||||
#define xtime delete
|
||||
#define xtime_get delete
|
||||
|
||||
#include <__msvc_all_public_headers.hpp>
|
||||
|
|
|
@ -4,6 +4,7 @@
|
|||
#include <algorithm>
|
||||
#include <cassert>
|
||||
#include <cstddef>
|
||||
#include <isa_availability.h>
|
||||
#include <ranges>
|
||||
|
||||
#include <Windows.h>
|
||||
|
|
Загрузка…
Ссылка в новой задаче