<complex>: Improve numerical accuracy of sqrt and log (#935)

* Fix undue overflow and underflow in complex sqrt

Modifies the scale factors in `_Fabs` (used by `sqrt`) such that:

- `_Fabs` doesn't underflow when the input is tiny.

- `sqrt` doesn't overflow when the input is huge.

* Improve accuracy of `log` when |z| is close to 1

When |z| is close to 1, compute log(|z|) as log1p(norm_minus_1(z)) / 2,
where norm_minus_1(z) = real(z) ^ 2 + imag(z) ^ 2 - 1 computed with
double width arithmetic to avoid catastrophic cancellation.

* Fix log(complex{1, tiny}) incorrectly returning -0 under FE_DOWNWARD

Co-authored-by: Curtis J Bezault <curtbezault@gmail.com>
Co-authored-by: Stephan T. Lavavej <stl@microsoft.com>
This commit is contained in:
statementreply 2020-11-10 07:24:43 +08:00 коммит произвёл GitHub
Родитель 51ccd93319
Коммит 9959929c77
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: 4AEE18F83AFDEB23
13 изменённых файлов: 1426 добавлений и 46 удалений

Просмотреть файл

@ -10,9 +10,27 @@
#if _STL_COMPILER_PREPROCESSOR
#include <cmath>
#include <cstdint>
#include <limits>
#include <sstream>
#include <type_traits>
#include <xutility>
#include <ymath.h>
#ifdef _M_CEE_PURE
// no intrinsics for /clr:pure
#elif defined(__clang__)
// TRANSITION, not using FMA intrinsics for Clang yet
#elif defined(_M_IX86) || defined(_M_X64)
#define _FMP_USING_X86_X64_INTRINSICS
#include <emmintrin.h>
#include <isa_availability.h>
extern "C" int __isa_available;
extern "C" __m128d __cdecl _mm_fmsub_sd(__m128d, __m128d, __m128d);
#elif defined(_M_ARM64)
#define _FMP_USING_ARM64_INTRINSICS
#include <arm64_neon.h>
#endif // ^^^ defined(_M_ARM64) ^^^
#pragma pack(push, _CRT_PACKING)
#pragma warning(push, _STL_WARNING_LEVEL)
#pragma warning(disable : _STL_DISABLED_WARNINGS)
@ -41,6 +59,296 @@ struct _C_ldouble_complex {
#define _IM 1
_STD_BEGIN
// implements multi-precision floating point arithmetic for numerical algorithms
#pragma float_control(precise, on, push)
namespace _Float_multi_prec {
// multi-precision floating point types
template <class _Ty, int _Prec>
struct _Fmp_t;
template <class _Ty>
struct _Fmp_t<_Ty, 2> {
static_assert(is_floating_point_v<_Ty>, "_Ty must be floating-point");
_Ty _Val0; // most significant numeric_limits<_Ty>::precision bits
_Ty _Val1; // least significant numeric_limits<_Ty>::precision bits
};
// addition
// 1x precision + 1x precision -> 2x precision
// the result is exact when:
// 1) the result doesn't overflow
// 2) either underflow is gradual, or no internal underflow occurs
// 3) intermediate precision is either the same as _Ty, or greater than twice the precision of _Ty
// 4) parameters and local variables do not retain extra intermediate precision
// 5) rounding mode is rounding to nearest
// violation of condition 3 or 5 could lead to relative error on the order of epsilon^2
// violation of other conditions could lead to worse results
template <class _Ty>
_NODISCARD constexpr _Fmp_t<_Ty, 2> _Add_x2(const _Ty _Xval, const _Ty _Yval) noexcept {
const _Ty _Sum0 = _Xval + _Yval;
const _Ty _Ymod = _Sum0 - _Xval;
const _Ty _Xmod = _Sum0 - _Ymod;
const _Ty _Yerr = _Yval - _Ymod;
const _Ty _Xerr = _Xval - _Xmod;
return {_Sum0, _Xerr + _Yerr};
}
// 1x precision + 1x precision -> 2x precision
// requires: exponent(_Xval) + countr_zero(significand(_Xval)) >= exponent(_Yval) || _Xval == 0
// the result is exact when:
// 0) the requirement above is satisfied
// 1) no internal overflow occurs
// 2) either underflow is gradual, or no internal underflow occurs
// 3) intermediate precision is either the same as _Ty, or greater than twice the precision of _Ty
// 4) parameters and local variables do not retain extra intermediate precision
// 5) rounding mode is rounding to nearest
// violation of condition 3 or 5 could lead to relative error on the order of epsilon^2
// violation of other conditions could lead to worse results
template <class _Ty>
_NODISCARD constexpr _Fmp_t<_Ty, 2> _Add_small_x2(const _Ty _Xval, const _Ty _Yval) noexcept {
const _Ty _Sum0 = _Xval + _Yval;
const _Ty _Ymod = _Sum0 - _Xval;
const _Ty _Yerr = _Yval - _Ymod;
return {_Sum0, _Yerr};
}
// 1x precision + 2x precision -> 2x precision
// requires: exponent(_Xval) + countr_zero(significand(_Xval)) >= exponent(_Yval._Val0) || _Xval == 0
template <class _Ty>
_NODISCARD constexpr _Fmp_t<_Ty, 2> _Add_small_x2(const _Ty _Xval, const _Fmp_t<_Ty, 2>& _Yval) noexcept {
const _Fmp_t<_Ty, 2> _Sum0 = _Add_small_x2(_Xval, _Yval._Val0);
return _Add_small_x2(_Sum0._Val0, _Sum0._Val1 + _Yval._Val1);
}
// 2x precision + 2x precision -> 1x precision
template <class _Ty>
_NODISCARD constexpr _Ty _Add_x1(const _Fmp_t<_Ty, 2>& _Xval, const _Fmp_t<_Ty, 2>& _Yval) noexcept {
const _Fmp_t<_Ty, 2> _Sum00 = _Add_x2(_Xval._Val0, _Yval._Val0);
return _Sum00._Val0 + (_Sum00._Val1 + (_Xval._Val1 + _Yval._Val1));
}
// multiplication
// round to 26 significant bits, ties toward zero
_NODISCARD _CONSTEXPR_BIT_CAST double _High_half(const double _Val) noexcept {
const auto _Bits = _Bit_cast<unsigned long long>(_Val);
const auto _High_half_bits = (_Bits + 0x3ff'ffffULL) & 0xffff'ffff'f800'0000ULL;
return _Bit_cast<double>(_High_half_bits);
}
// _Xval * _Xval - _Prod0
// the result is exact when:
// 1) _Prod0 is _Xval^2 faithfully rounded
// 2) no internal overflow or underflow occurs
// violation of condition 1 could lead to relative error on the order of epsilon
_NODISCARD _CONSTEXPR_BIT_CAST double _Sqr_error_fallback(const double _Xval, const double _Prod0) noexcept {
const double _Xhigh = _High_half(_Xval);
const double _Xlow = _Xval - _Xhigh;
return ((_Xhigh * _Xhigh - _Prod0) + 2.0 * _Xhigh * _Xlow) + _Xlow * _Xlow;
}
#ifdef _FMP_USING_X86_X64_INTRINSICS
_NODISCARD inline double _Sqr_error_x86_x64_fma(const double _Xval, const double _Prod0) noexcept {
const __m128d _Mx = _mm_set_sd(_Xval);
const __m128d _Mprod0 = _mm_set_sd(_Prod0);
const __m128d _Mresult = _mm_fmsub_sd(_Mx, _Mx, _Mprod0);
double _Result;
_mm_store_sd(&_Result, _Mresult);
return _Result;
}
#endif // _FMP_USING_X86_X64_INTRINSICS
#ifdef _FMP_USING_ARM64_INTRINSICS
_NODISCARD inline double _Sqr_error_arm64_neon(const double _Xval, const double _Prod0) noexcept {
const float64x1_t _Mx = vld1_f64(&_Xval);
const float64x1_t _Mprod0 = vld1_f64(&_Prod0);
const float64x1_t _Mresult = vfma_f64(vneg_f64(_Mprod0), _Mx, _Mx);
double _Result;
vst1_f64(&_Result, _Mresult);
return _Result;
}
#endif // _FMP_USING_ARM64_INTRINSICS
// square(1x precision) -> 2x precision
// the result is exact when no internal overflow or underflow occurs
_NODISCARD inline _Fmp_t<double, 2> _Sqr_x2(const double _Xval) noexcept {
const double _Prod0 = _Xval * _Xval;
#if defined(_FMP_USING_X86_X64_INTRINSICS)
#ifdef __AVX2__
return {_Prod0, _Sqr_error_x86_x64_fma(_Xval, _Prod0)};
#else // ^^^ defined(__AVX2__) / !defined(__AVX2__) vvv
const bool _Definitely_have_fma = __isa_available >= __ISA_AVAILABLE_AVX2;
if (_Definitely_have_fma) {
return {_Prod0, _Sqr_error_x86_x64_fma(_Xval, _Prod0)};
} else {
return {_Prod0, _Sqr_error_fallback(_Xval, _Prod0)};
}
#endif // ^^^ !defined(__AVX2__) ^^^
#elif defined(_FMP_USING_ARM64_INTRINSICS)
// https://docs.microsoft.com/en-us/cpp/build/arm64-windows-abi-conventions?view=vs-2019#base-requirements
// Both floating-point and NEON support are presumed to be present in hardware.
return {_Prod0, _Sqr_error_arm64_neon(_Xval, _Prod0)};
#else // ^^^ defined(_FMP_USING_ARM64_INTRINSICS) / not using intrinsics vvv
return {_Prod0, _Sqr_error_fallback(_Xval, _Prod0)};
#endif // ^^^ not using intrinsics ^^^
}
} // namespace _Float_multi_prec
#pragma float_control(pop)
#undef _FMP_USING_X86_X64_INTRINSICS
#undef _FMP_USING_ARM64_INTRINSICS
#define _FMP ::std::_Float_multi_prec::
// implements numerical algorithms for <complex>
namespace _Math_algorithms {
// TRANSITION: sqrt() isn't constexpr
// _Hypot_leg_huge = _Ty{0.5} * _STD sqrt((_STD numeric_limits<_Ty>::max)());
// _Hypot_leg_tiny = _STD sqrt(_Ty{2.0} * (_STD numeric_limits<_Ty>::min)() / _STD numeric_limits<_Ty>::epsilon());
template <class _Ty>
struct _Hypot_leg_huge_helper {
static constexpr _Ty value{6.703903964971298e+153};
};
template <>
struct _Hypot_leg_huge_helper<float> {
static constexpr float value{9.2233715e+18f};
};
template <class _Ty>
_INLINE_VAR constexpr _Ty _Hypot_leg_huge = _Hypot_leg_huge_helper<_Ty>::value;
template <class _Ty>
struct _Hypot_leg_tiny_helper {
static constexpr _Ty value{1.4156865331029228e-146};
};
template <>
struct _Hypot_leg_tiny_helper<float> {
static constexpr float value{4.440892e-16f};
};
template <class _Ty>
_INLINE_VAR constexpr _Ty _Hypot_leg_tiny = _Hypot_leg_tiny_helper<_Ty>::value;
template <class _Ty>
_NODISCARD _Ty _Norm_minus_one(const _Ty _Xval, const _Ty _Yval) noexcept {
// requires |_Xval| >= |_Yval| and 0.5 <= |_Xval| < 2^12
// returns _Xval * _Xval + _Yval * _Yval - 1
const _FMP _Fmp_t<_Ty, 2> _Xsqr = _FMP _Sqr_x2(_Xval);
const _FMP _Fmp_t<_Ty, 2> _Ysqr = _FMP _Sqr_x2(_Yval);
const _FMP _Fmp_t<_Ty, 2> _Xsqr_m1 = _FMP _Add_small_x2(_Ty{-1.0}, _Xsqr);
return _Add_x1(_Xsqr_m1, _Ysqr);
}
_NODISCARD inline float _Norm_minus_one(const float _Xval, const float _Yval) noexcept {
const auto _Dx = static_cast<double>(_Xval);
const auto _Dy = static_cast<double>(_Yval);
return static_cast<float>((_Dx * _Dx - 1.0) + _Dy * _Dy);
}
// TRANSITION: CRT log1p can be inaccurate for tiny inputs under directed rounding modes
template <class _Ty>
_NODISCARD _Ty _Logp1(const _Ty _Xval) { // returns log(1 + _Xval)
static_assert(is_floating_point_v<_Ty>, "_Ty must be floating-point");
if (_Is_nan(_Xval)) { // NaN
return _Xval + _Xval; // raise FE_INVALID if _Xval is a signaling NaN
}
if (_Xval <= _Ty{-0.5} || _Ty{2.0} <= _Xval) { // naive formula is moderately accurate
if (_Xval == (numeric_limits<_Ty>::max)()) { // avoid overflow
return _STD log(_Xval);
}
return _STD log(_Ty{1.0} + _Xval);
}
const _Ty _Xabs = _Float_abs(_Xval);
if (_Xabs < numeric_limits<_Ty>::epsilon()) { // zero or tiny
if (_Xval == _Ty{0.0}) {
return _Xval;
}
// honor rounding mode, raise FE_INEXACT
return _Xval - _Ty{0.5} * _Xval * _Xval;
}
// compute log(1 + _Xval) with fixup for small _Xval
const _FMP _Fmp_t<_Ty, 2> _Xp1 = _FMP _Add_small_x2(_Ty{1.0}, _Xval);
return _STD log(_Xp1._Val0) + _Xp1._Val1 / _Xp1._Val0;
}
template <class _Ty>
_NODISCARD _Ty _Log_hypot(const _Ty _Xval, const _Ty _Yval) noexcept { // returns log(hypot(_Xval, _Yval))
static_assert(is_floating_point_v<_Ty>, "_Ty must be floating-point");
if (!_Is_finite(_Xval) || !_Is_finite(_Yval)) { // Inf or NaN
// raise FE_INVALID and return NaN if at least one of them is a signaling NaN
if (_Is_signaling_nan(_Xval) || _Is_signaling_nan(_Yval)) {
return _Xval + _Yval;
}
// return +Inf if at least one of them is an infinity, even when the other is a quiet NaN
if (_Is_inf(_Xval)) {
return _Float_abs(_Xval);
}
if (_Is_inf(_Yval)) {
return _Float_abs(_Yval);
}
// at least one of them is a quiet NaN, and the other is not an infinity
return _Xval + _Yval;
}
_Ty _Av = _Float_abs(_Xval);
_Ty _Bv = _Float_abs(_Yval);
if (_Av < _Bv) { // ensure that _Bv <= _Av
_STD swap(_Av, _Bv);
}
if (_Bv == 0) {
return _STD log(_Av);
}
if (_Hypot_leg_tiny<_Ty> < _Av && _Av < _Hypot_leg_huge<_Ty>) { // no overflow or harmful underflow
constexpr _Ty _Norm_small = _Ty{0.5};
constexpr _Ty _Norm_big = _Ty{3.0};
const _Ty _Bv_sqr = _Bv * _Bv;
if (_Av == _Ty{1.0}) { // correctly return +0 when _Av == 1 and _Bv * _Bv underflows
// _Norm_minus_one(_Av, _Bv) could return -0 under FE_DOWNWARD rounding mode
return _Logp1(_Bv_sqr) * _Ty{0.5};
}
const _Ty _Norm = _Av * _Av + _Bv_sqr;
if (_Norm_small < _Norm && _Norm < _Norm_big) { // avoid catastrophic cancellation
return _Logp1(_Norm_minus_one(_Av, _Bv)) * _Ty{0.5};
} else {
return _STD log(_Norm) * _Ty{0.5};
}
} else { // use 1 1/2 precision to preserve bits
constexpr _Ty _Cm = _Ty{22713.0L / 32768.0L};
constexpr _Ty _Cl = _Ty{1.4286068203094172321214581765680755e-6L};
const int _Exponent = _STD ilogb(_Av);
const _Ty _Av_scaled = _STD scalbn(_Av, -_Exponent);
const _Ty _Bv_scaled = _STD scalbn(_Bv, -_Exponent);
const _Ty _Bv_scaled_sqr = _Bv_scaled * _Bv_scaled;
const _Ty _Norm_scaled = _Av_scaled * _Av_scaled + _Bv_scaled_sqr;
const _Ty _Real_shifted = _STD log(_Norm_scaled) * _Ty{0.5};
return (_Real_shifted + _Exponent * _Cl) + _Exponent * _Cm;
}
}
} // namespace _Math_algorithms
#undef _FMP
using _Dcomplex_value = _CSTD _C_double_complex;
using _Fcomplex_value = _CSTD _C_float_complex;
using _Lcomplex_value = _CSTD _C_ldouble_complex;
@ -66,8 +374,12 @@ public:
return (numeric_limits<_Ty>::max)();
}
static constexpr _Ty _Flt_norm_min() {
return (numeric_limits<_Ty>::min)() > 0 ? (numeric_limits<_Ty>::min)() : 0;
}
static _Ty _Abs(_Ty _Left) {
return static_cast<_Ty>(_Left < 0 ? -_Left : _Left);
return static_cast<_Ty>(_Signbit(_Left) ? -_Left : _Left);
}
static _Ty _Cosh(_Ty _Left, _Ty _Right) { // return cosh(_Left) * _Right
@ -75,7 +387,7 @@ public:
}
static _Ty _Copysign(_Ty _Magnitude, _Ty _Sign) {
return static_cast<_Ty>(_Sign < 0 ? -_Abs(_Magnitude) : _Abs(_Magnitude));
return static_cast<_Ty>(_Signbit(_Sign) ? -_Abs(_Magnitude) : _Abs(_Magnitude));
}
static short _Exp(_Ty* _Pleft, _Ty _Right, short _Exponent) { // compute exp(*_Pleft) * _Right * 2 ^ _Exponent
@ -106,7 +418,7 @@ public:
}
static bool _Signbit(_Ty _Left) {
return _Left < 0;
return (_STD signbit)(static_cast<double>(_Left));
}
static _Ty _Sinh(_Ty _Left, _Ty _Right) { // return sinh(_Left) * _Right
@ -200,6 +512,10 @@ public:
return (numeric_limits<long double>::max)();
}
static constexpr _Ty _Flt_norm_min() {
return (numeric_limits<long double>::min)();
}
static _Ty _Abs(_Ty _Left) {
// testing _Left < 0 would be incorrect when _Left is -0.0
return _CSTD fabsl(_Left);
@ -340,6 +656,10 @@ public:
return (numeric_limits<double>::max)();
}
static constexpr _Ty _Flt_norm_min() {
return (numeric_limits<double>::min)();
}
static _Ty _Abs(_Ty _Left) {
// testing _Left < 0 would be incorrect when _Left is -0.0
return _CSTD fabs(_Left);
@ -475,6 +795,10 @@ public:
return (numeric_limits<float>::max)();
}
static constexpr _Ty _Flt_norm_min() {
return (numeric_limits<float>::min)();
}
static _Ty _Abs(_Ty _Left) {
// testing _Left < 0 would be incorrect when _Left is -0.0
return _CSTD fabsf(_Left);
@ -1524,10 +1848,13 @@ _NODISCARD complex<_Ty> exp(const complex<_Ty>& _Left) {
// FUNCTION TEMPLATE _Fabs
template <class _Ty>
_Ty _Fabs(const complex<_Ty>& _Left, int* _Pexp) { // return magnitude and scale factor
_Ty _Fabs(const complex<_Ty>& _Left, int* _Pexp) { // Used by sqrt(), return magnitude and scale factor.
// Returns a non-zero even integer in *_Pexp when _Left is finite
// and non-zero.
// Returns 0 in *_Pexp when _Left is zero, infinity, or NaN.
*_Pexp = 0;
_Ty _Av = real(_Left);
_Ty _Bv = imag(_Left);
_Ty _Av = _Ctraits<_Ty>::_Abs(_STD real(_Left));
_Ty _Bv = _Ctraits<_Ty>::_Abs(_STD imag(_Left));
if (_Ctraits<_Ty>::_Isinf(_Av) || _Ctraits<_Ty>::_Isinf(_Bv)) {
return _Ctraits<_Ty>::_Infv(); // at least one component is INF
@ -1536,13 +1863,8 @@ _Ty _Fabs(const complex<_Ty>& _Left, int* _Pexp) { // return magnitude and scale
} else if (_Ctraits<_Ty>::_Isnan(_Bv)) {
return _Bv; // imaginary component is NaN
} else { // neither component is NaN or INF
_Av = _Ctraits<_Ty>::_Abs(_Av);
_Bv = _Ctraits<_Ty>::_Abs(_Bv);
if (_Av < _Bv) { // ensure that |_Bv| <= |_Av|
_Ty _Tmp = _Av;
_Av = _Bv;
_Bv = _Tmp;
_STD swap(_Av, _Bv);
}
if (_Av == 0) {
@ -1550,16 +1872,28 @@ _Ty _Fabs(const complex<_Ty>& _Left, int* _Pexp) { // return magnitude and scale
}
if (1 <= _Av) {
*_Pexp = 2;
_Av = _Av * static_cast<_Ty>(0.25);
_Bv = _Bv * static_cast<_Ty>(0.25);
*_Pexp = 4;
_Av = _Av * static_cast<_Ty>(0.0625);
_Bv = _Bv * static_cast<_Ty>(0.0625);
} else {
*_Pexp = -2;
_Av = _Av * 4;
_Bv = _Bv * 4;
constexpr _Ty _Flt_eps = _Ctraits<_Ty>::_Flt_eps();
// TRANSITION, workaround for non floating point _Ty
constexpr _Ty _Leg_tiny = _Flt_eps == 0 ? _Ty{0} : 2 * _Ctraits<_Ty>::_Flt_norm_min() / _Flt_eps;
if (_Av < _Leg_tiny) {
constexpr int _Exponent = -2 * numeric_limits<_Ty>::digits;
*_Pexp = _Exponent;
_Av = _Ctraits<_Ty>::ldexp(_Av, -_Exponent);
_Bv = _Ctraits<_Ty>::ldexp(_Bv, -_Exponent);
} else {
*_Pexp = -2;
_Av = _Av * 4;
_Bv = _Bv * 4;
}
}
_Ty _Tmp = _Av - _Bv;
const _Ty _Tmp = _Av - _Bv;
if (_Tmp == _Av) {
return _Av; // _Bv unimportant
} else if (_Bv < _Tmp) { // use simple approximation
@ -1579,33 +1913,21 @@ _Ty _Fabs(const complex<_Ty>& _Left, int* _Pexp) { // return magnitude and scale
}
// FUNCTION TEMPLATE log
template <class _Ty>
_NODISCARD _Ty _Log_abs(const complex<_Ty>& _Left) noexcept { // for double, long double, and non floating point types
return static_cast<_Ty>(
_Math_algorithms::_Log_hypot(static_cast<double>(_STD real(_Left)), static_cast<double>(_STD imag(_Left))));
}
_NODISCARD inline float _Log_abs(const complex<float>& _Left) noexcept {
return _Math_algorithms::_Log_hypot(_STD real(_Left), _STD imag(_Left));
}
template <class _Ty>
_NODISCARD complex<_Ty> log(const complex<_Ty>& _Left) {
_Ty _Theta = _Ctraits<_Ty>::atan2(imag(_Left), real(_Left)); // get phase
if (_Ctraits<_Ty>::_Isnan(_Theta)) {
return complex<_Ty>(_Theta, _Theta); // real or imag is NaN
} else { // use 1 1/2 precision to preserve bits
constexpr _Ty _Cm = static_cast<_Ty>(22713.0L / 32768.0L);
constexpr _Ty _Cl = static_cast<_Ty>(1.4286068203094172321214581765680755e-6L);
int _Leftexp;
_Ty _Rho = _Fabs(_Left, &_Leftexp); // get magnitude and scale factor
_Ty _Leftn = static_cast<_Ty>(_Leftexp);
_Ty _Real;
if (_Rho == 0) {
_Real = -_Ctraits<_Ty>::_Infv(); // log(0) == -INF
} else if (_Ctraits<_Ty>::_Isinf(_Rho)) {
_Real = _Rho; // log(INF) == INF
} else {
_Real = static_cast<_Ty>(_Ctraits<_Ty>::log(_Rho)); // These casts are TRANSITION, DevCom-1093507
_Real += static_cast<_Ty>(_Leftn * _Cl);
_Real += static_cast<_Ty>(_Leftn * _Cm);
}
return complex<_Ty>(_Real, _Theta);
}
const _Ty _Log_abs_v = _STD _Log_abs(_Left); // get logarithm of magnitude
const _Ty _Theta = _Ctraits<_Ty>::atan2(_STD imag(_Left), _STD real(_Left)); // get phase
return complex<_Ty>(_Log_abs_v, _Theta);
}
// FUNCTION TEMPLATE pow
@ -1678,7 +2000,7 @@ _NODISCARD complex<_Ty> sqrt(const complex<_Ty>& _Left) {
return complex<_Ty>(_Ctraits<_Ty>::_Infv(), _Im); // (any, +/-Inf)
} else if (_Ctraits<_Ty>::_Isnan(_Im)) {
if (_Re < 0) {
return complex<_Ty>(_Im, _Re); // (-Inf, NaN)
return complex<_Ty>(_Ctraits<_Ty>::_Abs(_Im), _Ctraits<_Ty>::_Copysign(_Re, _Im)); // (-Inf, NaN)
} else {
return _Left; // (+Inf, NaN)
}

Просмотреть файл

@ -6131,16 +6131,20 @@ struct _Float_traits {
// traits for double and long double:
using type = unsigned long long;
static constexpr type _Sign_mask = 0x8000'0000'0000'0000ULL;
static constexpr type _Magnitude_mask = 0x7fff'ffff'ffff'ffffULL;
static constexpr type _Exponent_mask = 0x7ff0'0000'0000'0000ULL;
static constexpr type _Quiet_nan_mask = 0x0008'0000'0000'0000ULL;
};
template <>
struct _Float_traits<float> {
using type = unsigned int;
static constexpr type _Sign_mask = 0x8000'0000U;
static constexpr type _Magnitude_mask = 0x7fff'ffffU;
static constexpr type _Exponent_mask = 0x7f80'0000U;
static constexpr type _Quiet_nan_mask = 0x0040'0000U;
};
// FUNCTION TEMPLATE _Float_abs_bits
@ -6156,12 +6160,36 @@ _NODISCARD _CONSTEXPR_BIT_CAST _Ty _Float_abs(const _Ty _Xx) { // constexpr floa
return _Bit_cast<_Ty>(_Float_abs_bits(_Xx));
}
// FUNCTION TEMPLATE _Float_copysign
template <class _Ty, enable_if_t<is_floating_point_v<_Ty>, int> = 0>
_NODISCARD _CONSTEXPR_BIT_CAST _Ty _Float_copysign(const _Ty _Magnitude, const _Ty _Sign) { // constexpr copysign()
const auto _Signbit = _Bit_cast<typename _Float_traits<_Ty>::type>(_Sign) & _Float_traits<_Ty>::_Sign_mask;
return _Bit_cast<_Ty>(_Float_abs_bits(_Magnitude) | _Signbit);
}
// FUNCTION TEMPLATE _Is_nan
template <class _Ty, enable_if_t<is_floating_point_v<_Ty>, int> = 0>
_NODISCARD _CONSTEXPR_BIT_CAST bool _Is_nan(const _Ty _Xx) { // constexpr isnan()
return _Float_abs_bits(_Xx) > _Float_traits<_Ty>::_Exponent_mask;
}
// FUNCTION TEMPLATE _Is_signaling_nan
// TRANSITION, workaround x86 ABI
// On x86 ABI, floating point by-value arguments and return values are passed in 80-bit x87 registers.
// When the value is a 32-bit or 64-bit signaling NaN, the conversion to/from 80-bit raises FE_INVALID
// and turns it into a quiet NaN. This behavior is undesirable if we want to test for signaling NaNs.
template <class _Ty, enable_if_t<is_floating_point_v<_Ty>, int> = 0>
_NODISCARD _CONSTEXPR_BIT_CAST bool _Is_signaling_nan(const _Ty& _Xx) { // returns true if input is a signaling NaN
const auto _Abs_bits = _Float_abs_bits(_Xx);
return _Abs_bits > _Float_traits<_Ty>::_Exponent_mask && ((_Abs_bits & _Float_traits<_Ty>::_Quiet_nan_mask) == 0);
}
// FUNCTION TEMPLATE _Is_inf
template <class _Ty, enable_if_t<is_floating_point_v<_Ty>, int> = 0>
_NODISCARD _CONSTEXPR_BIT_CAST bool _Is_inf(const _Ty _Xx) { // constexpr isinf()
return _Float_abs_bits(_Xx) == _Float_traits<_Ty>::_Exponent_mask;
}
// FUNCTION TEMPLATE _Is_finite
template <class _Ty, enable_if_t<is_floating_point_v<_Ty>, int> = 0>
_NODISCARD _CONSTEXPR_BIT_CAST bool _Is_finite(const _Ty _Xx) { // constexpr isfinite()
@ -6177,7 +6205,6 @@ struct _Nontrivial_dummy_type {
_STL_INTERNAL_STATIC_ASSERT(!is_trivially_default_constructible_v<_Nontrivial_dummy_type>);
_STD_END
#undef _CONSTEXPR_BIT_CAST
#pragma pop_macro("new")
_STL_RESTORE_CLANG_WARNINGS
#pragma warning(pop)

Просмотреть файл

@ -0,0 +1,61 @@
// Copyright (c) Microsoft Corporation.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
#pragma once
#ifdef FP_CONFIG_PRESET
#if FP_CONFIG_PRESET == 3
#define FP_PRESET_FAST 1
#else // ^^^ FP_CONFIG_PRESET == 3 / FP_CONFIG_PRESET != 3 vvv
#define FP_PRESET_FAST 0
#endif // ^^^ FP_CONFIG_PRESET != 3 ^^^
#endif // defined(FP_CONFIG_PRESET)
#ifdef FP_CONTRACT_MODE
#ifdef __clang__
#if FP_CONTRACT_MODE == 0
#pragma STDC FP_CONTRACT OFF
#elif FP_CONTRACT_MODE == 1 // ^^^ no floating point contraction / standard floating point contraction vvv
#pragma STDC FP_CONTRACT ON
#elif FP_CONTRACT_MODE == 2 // ^^^ standard floating point contraction / fast floating point contraction vvv
#pragma STDC FP_CONTRACT ON
#else // ^^^ fast floating point contraction / invalid FP_CONTRACT_MODE vvv
#error invalid FP_CONTRACT_MODE
#endif // ^^^ invalid FP_CONTRACT_MODE ^^^
#else // ^^^ clang / MSVC vvv
#if FP_CONTRACT_MODE == 0
#pragma fp_contract(off)
#elif FP_CONTRACT_MODE == 1 // ^^^ no floating point contraction / standard floating point contraction vvv
#pragma fp_contract(on)
#elif FP_CONTRACT_MODE == 2 // ^^^ standard floating point contraction / fast floating point contraction vvv
#pragma fp_contract(on)
#else // ^^^ fast floating point contraction / invalid FP_CONTRACT_MODE vvv
#error invalid FP_CONTRACT_MODE
#endif // ^^^ invalid FP_CONTRACT_MODE ^^^
#endif // ^^^ MSVC ^^^
#endif // defined(FP_CONTRACT_MODE)
#include <cassert>
#include <float.h>
struct fenv_initializer_t {
fenv_initializer_t() {
#if WITH_FP_ABRUPT_UNDERFLOW
{
const errno_t result = _controlfp_s(nullptr, _DN_FLUSH, _MCW_DN);
assert(result == 0);
}
#endif // WITH_FP_ABRUPT_UNDERFLOW
}
~fenv_initializer_t() = default;
fenv_initializer_t(const fenv_initializer_t&) = delete;
fenv_initializer_t& operator=(const fenv_initializer_t&) = delete;
};
const fenv_initializer_t fenv_initializer{};

Просмотреть файл

@ -161,6 +161,7 @@ tests\GH_000625_vector_bool_optimization
tests\GH_000685_condition_variable_any
tests\GH_000690_overaligned_function
tests\GH_000890_pow_template
tests\GH_000935_complex_numerical_accuracy
tests\GH_000940_missing_valarray_copy
tests\GH_001001_random_rejection_rounding
tests\GH_001010_filesystem_error_encoding

Просмотреть файл

@ -0,0 +1,4 @@
# Copyright (c) Microsoft Corporation.
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
RUNALL_INCLUDE ..\floating_point_model_matrix.lst

Просмотреть файл

@ -0,0 +1,241 @@
// Copyright (c) Microsoft Corporation.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
#pragma once
#include <cassert>
#include <cfenv>
#include <cmath>
#include <float.h>
#include <type_traits>
#include <xutility>
namespace fputil {
template <typename T>
using float_bits_t = typename _STD _Float_traits<T>::type;
template <typename T>
_INLINE_VAR constexpr float_bits_t<T> magnitude_mask_v = _STD _Float_traits<T>::_Magnitude_mask;
template <typename T>
_INLINE_VAR constexpr float_bits_t<T> exponent_mask_v = _STD _Float_traits<T>::_Exponent_mask;
template <typename T>
_INLINE_VAR constexpr float_bits_t<T> significand_mask_v = magnitude_mask_v<T> & ~exponent_mask_v<T>;
template <typename T>
_INLINE_VAR constexpr float_bits_t<T> sign_mask_v = _STD _Float_traits<T>::_Sign_mask;
template <typename T>
_INLINE_VAR constexpr float_bits_t<T> norm_min_bits_v = significand_mask_v<T> + 1U;
template <typename T>
_INLINE_VAR constexpr float_bits_t<T> norm_max_bits_v = exponent_mask_v<T> - 1U;
template <typename T>
_INLINE_VAR constexpr float_bits_t<T> infinity_bits_v = exponent_mask_v<T>;
// not affected by abrupt underflow
template <typename T, std::enable_if_t<std::is_floating_point_v<T>, int> = 0>
constexpr bool iszero(const T& x) {
return _STD _Float_abs_bits(x) == 0;
}
// not affected by /fp:fast
template <typename T, std::enable_if_t<std::is_floating_point_v<T>, int> = 0>
constexpr bool signbit(const T& x) {
const auto bits = std::_Bit_cast<float_bits_t<T>>(x);
return (bits & sign_mask_v<T>) != 0;
}
enum class rounding_mode {
to_nearest_ties_even = FE_TONEAREST,
toward_zero = FE_TOWARDZERO,
toward_positive = FE_UPWARD,
toward_negative = FE_DOWNWARD,
};
bool is_directed_rounding_mode(const rounding_mode mode) {
switch (mode) {
case rounding_mode::to_nearest_ties_even:
return false;
case rounding_mode::toward_zero:
case rounding_mode::toward_positive:
case rounding_mode::toward_negative:
return true;
default:
assert(false);
return false;
}
}
#if TEST_FP_ROUNDING
#ifdef __clang__
// TRANSITION, should be #pragma STDC FENV_ACCESS ON
#else // ^^^ clang / MSVC vvv
// TRANSITION, VSO-923474 -- should be #pragma STDC FENV_ACCESS ON
#pragma fenv_access(on)
#endif // ^^^ MSVC ^^^
constexpr rounding_mode all_rounding_modes[] = {
rounding_mode::to_nearest_ties_even,
rounding_mode::toward_zero,
rounding_mode::toward_positive,
rounding_mode::toward_negative,
};
class rounding_guard {
public:
explicit rounding_guard(const rounding_mode mode) : old_mode{static_cast<rounding_mode>(std::fegetround())} {
const int result = std::fesetround(static_cast<int>(mode));
assert(result == 0);
}
~rounding_guard() {
const int result = std::fesetround(static_cast<int>(old_mode));
assert(result == 0);
}
rounding_guard(const rounding_guard&) = delete;
rounding_guard& operator=(const rounding_guard&) = delete;
private:
rounding_mode old_mode;
};
#else // ^^^ alternative rounding modes / default rounding mode only vvv
constexpr rounding_mode all_rounding_modes[] = {rounding_mode::to_nearest_ties_even};
class rounding_guard {
public:
explicit rounding_guard(const rounding_mode mode) {
static_cast<void>(mode);
}
~rounding_guard() = default;
rounding_guard(const rounding_guard&) = delete;
rounding_guard& operator=(const rounding_guard&) = delete;
};
#endif // ^^^ default rounding mode only ^^^
// compares whether two floating point values are equal
// all NaNs are equal, +0.0 and -0.0 are not equal
template <typename T, std::enable_if_t<std::is_floating_point_v<T>, int> = 0>
bool precise_equal(const T& actual, const T& expected) {
if (_STD _Is_nan(actual) || _STD _Is_nan(expected)) {
return _STD _Is_nan(actual) == _STD _Is_nan(expected);
} else {
return actual == expected && fputil::signbit(actual) == fputil::signbit(expected);
}
}
namespace detail {
// 0x80...00 = zero, 0x80...01 = numeric_limits<T>::denorm_min(), 0x7f...ff = -numeric_limits<T>::denorm_min()
template <typename T, std::enable_if_t<std::is_floating_point_v<T>, int> = 0>
float_bits_t<T> offset_representation(const T& x) {
const float_bits_t<T> abs_bits = _STD _Float_abs_bits(x);
return fputil::signbit(x) ? sign_mask_v<T> - abs_bits : sign_mask_v<T> + abs_bits;
}
template <typename T, std::enable_if_t<std::is_floating_point_v<T>, int> = 0>
float_bits_t<T> is_offset_value_subnormal_or_zero(const float_bits_t<T> offset_value) {
constexpr float_bits_t<T> positive_norm_min_offset = sign_mask_v<T> + norm_min_bits_v<T>;
constexpr float_bits_t<T> negative_norm_min_offset = sign_mask_v<T> - norm_min_bits_v<T>;
return negative_norm_min_offset < offset_value && offset_value < positive_norm_min_offset;
}
// number of ulps above zero, if we count [0, numeric_limits<T>::min()) as 1 ulp
template <typename T, std::enable_if_t<std::is_floating_point_v<T>, int> = 0>
double abrupt_underflow_ulp(const float_bits_t<T> offset_value) {
using bits_type = float_bits_t<T>;
constexpr bits_type offset_positive_norm_min = sign_mask_v<T> + norm_min_bits_v<T>;
constexpr bits_type offset_negative_norm_min = sign_mask_v<T> - norm_min_bits_v<T>;
if (offset_value >= offset_positive_norm_min) {
return 1.0 + (offset_value - offset_positive_norm_min);
} else if (offset_value <= offset_negative_norm_min) {
return -1.0 - (offset_negative_norm_min - offset_value);
} else if (offset_value >= sign_mask_v<T>) {
return static_cast<double>(offset_value - sign_mask_v<T>) / norm_min_bits_v<T>;
} else {
return -static_cast<double>(sign_mask_v<T> - offset_value) / norm_min_bits_v<T>;
}
}
template <typename T, std::enable_if_t<std::is_floating_point_v<T>, int> = 0>
bool is_within_ulp_tolerance(const T& actual, const T& expected, const int ulp_tolerance) {
if (_STD _Is_nan(actual) || _STD _Is_nan(expected)) {
return _STD _Is_nan(actual) == _STD _Is_nan(expected);
}
if (_STD _Is_inf(expected)) {
return actual == expected;
}
if (fputil::signbit(actual) != fputil::signbit(expected)) {
return false;
}
using bits_type = float_bits_t<T>;
// compute ulp difference
const bits_type actual_offset = detail::offset_representation(actual);
const bits_type expected_offset = detail::offset_representation(expected);
const bits_type ulp_diff =
actual_offset < expected_offset ? expected_offset - actual_offset : actual_offset - expected_offset;
if (ulp_diff <= static_cast<unsigned int>(ulp_tolerance) && ulp_tolerance >= 0) {
return true;
}
#if WITH_FP_ABRUPT_UNDERFLOW
// handle abrupt underflow
if (detail::is_offset_value_subnormal_or_zero<T>(expected_offset)
|| detail::is_offset_value_subnormal_or_zero<T>(actual_offset)) {
const double adjusted_actual_ulp = detail::abrupt_underflow_ulp<T>(actual_offset);
const double adjusted_expected_ulp = detail::abrupt_underflow_ulp<T>(expected_offset);
const double adjusted_ulp_diff = std::abs(adjusted_actual_ulp - adjusted_expected_ulp);
if (adjusted_ulp_diff <= ulp_tolerance) {
return true;
}
}
#endif // WITH_FP_ABRUPT_UNDERFLOW
return false;
}
template <typename T, std::enable_if_t<std::is_floating_point_v<T>, int> = 0>
bool is_within_absolute_tolerance(const T& actual, const T& expected, const double absolute_tolerance) {
return _STD _Is_finite(actual) && _STD _Is_finite(expected)
&& std::abs(actual - expected) <= absolute_tolerance;
}
} // namespace detail
// returns whether floating point result is nearly equal to the expected value
template <typename T, std::enable_if_t<std::is_floating_point_v<T>, int> = 0>
bool near_equal(
const T& actual, const T& expected, const int ulp_tolerance = 1, const double absolute_tolerance = 0) {
if (precise_equal(actual, expected)) {
return true;
}
if (ulp_tolerance > 0 && detail::is_within_ulp_tolerance(actual, expected, ulp_tolerance)) {
return true;
}
if (absolute_tolerance > 0 && detail::is_within_absolute_tolerance(actual, expected, absolute_tolerance)) {
return true;
}
return false;
}
} // namespace fputil

Просмотреть файл

@ -0,0 +1,276 @@
// Copyright (c) Microsoft Corporation.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
#pragma once
#include <complex>
#include "floating_point_utils.hpp"
#include "test.hpp"
template <typename T>
constexpr T pi_over_4_v = T{0.7853981633974483};
template <typename T>
constexpr T pi_over_2_v = T{1.5707963267948966};
template <typename T>
constexpr T pi_3_over_4_v = T{2.356194490192345};
template <typename T>
constexpr T pi_v = T{3.141592653589793};
constexpr complex_unary_test_case<double> log_double_cases[] = {
// normal cases
{{+0x1.8p+0, +0x1p+1}, {+0x1.d5240f0e0e078p-1, +0x1.dac670561bb4fp-1}},
{{+0x1.8p+0, -0x1p+1}, {+0x1.d5240f0e0e078p-1, -0x1.dac670561bb4fp-1}},
{{-0x1.8p+0, +0x1p+1}, {+0x1.d5240f0e0e078p-1, +0x1.1b6e192ebbe44p+1}},
{{-0x1.8p+0, -0x1p+1}, {+0x1.d5240f0e0e078p-1, -0x1.1b6e192ebbe44p+1}},
{{+0x1.8p-1, +0x1p+0}, {+0x1.c8ff7c79a9a22p-3, +0x1.dac670561bb4fp-1}},
{{+0x1.8p-1, -0x1p+0}, {+0x1.c8ff7c79a9a22p-3, -0x1.dac670561bb4fp-1}},
{{-0x1.8p-1, +0x1p+0}, {+0x1.c8ff7c79a9a22p-3, +0x1.1b6e192ebbe44p+1}},
{{-0x1.8p-1, -0x1p+0}, {+0x1.c8ff7c79a9a22p-3, -0x1.1b6e192ebbe44p+1}},
{{+0x1.8p-2, +0x1p-1}, {-0x1.e148a1a2726cep-2, +0x1.dac670561bb4fp-1}},
{{+0x1.8p-2, -0x1p-1}, {-0x1.e148a1a2726cep-2, -0x1.dac670561bb4fp-1}},
{{-0x1.8p-2, +0x1p-1}, {-0x1.e148a1a2726cep-2, +0x1.1b6e192ebbe44p+1}},
{{-0x1.8p-2, -0x1p-1}, {-0x1.e148a1a2726cep-2, -0x1.1b6e192ebbe44p+1}},
// special cases
{{+1.0, +0.0}, {0.0, +0.0}, {true, true}},
{{+1.0, -0.0}, {0.0, -0.0}, {true, true}},
{{+0.0, +1.0}, {0.0, +pi_over_2_v<double>}, {true, false}},
{{+0.0, -1.0}, {0.0, -pi_over_2_v<double>}, {true, false}},
{{-0.0, +1.0}, {0.0, +pi_over_2_v<double>}, {true, false}},
{{-0.0, -1.0}, {0.0, -pi_over_2_v<double>}, {true, false}},
{{-1.0, +0.0}, {0.0, +pi_v<double>}, {true, false}},
{{-1.0, -0.0}, {0.0, -pi_v<double>}, {true, false}},
#if !FP_PRESET_FAST
{{+0.0, +0.0}, {-double_inf, +0.0}, {true, true}},
{{+0.0, -0.0}, {-double_inf, -0.0}, {true, true}},
{{-0.0, +0.0}, {-double_inf, +pi_v<double>}, {true, false}},
{{-0.0, -0.0}, {-double_inf, -pi_v<double>}, {true, false}},
{{+double_inf, +0.0}, {+double_inf, +0.0}, {true, true}},
{{+double_inf, -0.0}, {+double_inf, -0.0}, {true, true}},
{{+double_inf, +1.0}, {+double_inf, +0.0}, {true, true}},
{{+double_inf, -1.0}, {+double_inf, -0.0}, {true, true}},
{{+double_inf, +double_inf}, {+double_inf, +pi_over_4_v<double>}, {true, false}},
{{+double_inf, -double_inf}, {+double_inf, -pi_over_4_v<double>}, {true, false}},
{{+1.0, +double_inf}, {+double_inf, +pi_over_2_v<double>}, {true, false}},
{{+1.0, -double_inf}, {+double_inf, -pi_over_2_v<double>}, {true, false}},
{{+0.0, +double_inf}, {+double_inf, +pi_over_2_v<double>}, {true, false}},
{{+0.0, -double_inf}, {+double_inf, -pi_over_2_v<double>}, {true, false}},
{{-0.0, +double_inf}, {+double_inf, +pi_over_2_v<double>}, {true, false}},
{{-0.0, -double_inf}, {+double_inf, -pi_over_2_v<double>}, {true, false}},
{{-1.0, +double_inf}, {+double_inf, +pi_over_2_v<double>}, {true, false}},
{{-1.0, -double_inf}, {+double_inf, -pi_over_2_v<double>}, {true, false}},
{{-double_inf, +double_inf}, {+double_inf, +pi_3_over_4_v<double>}, {true, false}},
{{-double_inf, -double_inf}, {+double_inf, -pi_3_over_4_v<double>}, {true, false}},
{{-double_inf, +1.0}, {+double_inf, +pi_v<double>}, {true, false}},
{{-double_inf, -1.0}, {+double_inf, -pi_v<double>}, {true, false}},
{{-double_inf, +0.0}, {+double_inf, +pi_v<double>}, {true, false}},
{{-double_inf, -0.0}, {+double_inf, -pi_v<double>}, {true, false}},
{{+double_inf, double_nan}, {+double_inf, double_nan}, {true, true}},
{{-double_inf, double_nan}, {+double_inf, double_nan}, {true, true}},
{{double_nan, +double_inf}, {+double_inf, double_nan}, {true, true}},
{{double_nan, -double_inf}, {+double_inf, double_nan}, {true, true}},
{{double_nan, +0.0}, {double_nan, double_nan}, {true, true}},
{{+0.0, double_nan}, {double_nan, double_nan}, {true, true}},
{{double_nan, double_nan}, {double_nan, double_nan}, {true, true}},
#endif // !FP_PRESET_FAST
// abs(z) overflows
{{+0x1.fffffffffffffp+1023, +0x1.fffffffffffffp+1023}, {+0x1.63108c75a1936p+9, +0x1.921fb54442d18p-1}},
{{-0x1.bb67ae8584caap+1023, +0x1.0000000000000p+1023}, {+0x1.62e42fefa39efp+9, +0x1.4f1a6c638d03fp+1}},
{{+0x1.fffffffffffffp+1023, -0x0.0000000000001p-1022}, {+0x1.62e42fefa39efp+9, -0x0.0000000000000p-1022}},
// norm(z) overflows
{{-0x1.4e718d7d7625ap+664, -0x1.4e718d7d7625ap+665}, {+0x1.cd525d6474bb8p+8, -0x1.0468a8ace4df6p+1}},
{{+0x1.ca3d8e6d80cbbp+511, -0x1.57ae2ad22098cp+511}, {+0x1.6300e9ed15a44p+8, -0x1.4978fa3269ee1p-1}},
#if !WITH_FP_ABRUPT_UNDERFLOW
// abs(z) underflows
{{-0x0.0000000000001p-1022, +0x0.0000000000001p-1022}, {-0x1.740bf7c0d927cp+9, +0x1.2d97c7f3321d2p+1}},
{{+0x0.0000000000001p-1022, +0x0.8000000000000p-1022}, {-0x1.628b76e3a7b61p+9, +0x1.921fb54442d16p+0}},
#endif // !WITH_FP_ABRUPT_UNDERFLOW
// abs(z) close to underflow
{{+0x1.4p-1022, +0x1p-1022}, {-0x1.61f684c577299p+9, +0x1.5977a5103ea92p-1}},
// norm(z) underflows
{{+0x1.87e92154ef7acp-664, -0x1.87e92154ef7acp-665}, {-0x1.cbb65944f5e2bp+8, -0x1.dac670561bb4fp-2}},
{{-0x1.9be34ac46b18fp-513, -0x1.1297872d9cbb5p-512}, {-0x1.62991d5d62a5ep+8, -0x1.1b6e192ebbe44p+1}},
// z close to 1
{{+0x1.0000000000001p+0, -0.0}, {+0x1.fffffffffffffp-53, -0.0}, {false, true}},
{{+0x1.fffffffffffffp-1, +0.0}, {-0x1.0000000000000p-53, +0.0}, {false, true}},
#if !WITH_FP_ABRUPT_UNDERFLOW
{{+0x1.0000000000001p+0, -0x0.0000000000001p-1022}, {+0x1.fffffffffffffp-53, -0x0.0000000000001p-1022}},
{{+0x1.0000000000000p+0, +0x0.0000000000001p-1022}, {+0x0.0000000000000p-1022, +0x0.0000000000001p-1022}},
{{+0x1.fffffffffffffp-1, -0x0.0000000000001p-1022}, {-0x1.0000000000000p-53, -0x0.0000000000001p-1022}},
#endif // !WITH_FP_ABRUPT_UNDERFLOW
{{+0x1.0000000000001p+0, +0x1p-1022}, {+0x1.fffffffffffffp-53, +0x0.fffffffffffffp-1022}},
{{+0x1.0000000000000p+0, -0x1p-1022}, {+0x0.0000000000000p-1022, -0x1.0000000000000p-1022}},
{{+0x1.fffffffffffffp-1, +0x1p-1022}, {-0x1.0000000000000p-53, +0x1.0000000000001p-1022}},
{{+0x1.0000000000001p+0, -0x1p-52}, {+0x1.0000000000000p-52, -0x1.ffffffffffffep-53}},
{{+0x1.0000000000000p+0, +0x1p-52}, {+0x1.0000000000000p-105, +0x1.0000000000000p-52}},
{{+0x1.fffffffffffffp-1, -0x1p-52}, {-0x1.ffffffffffffep-54, -0x1.0000000000000p-52}},
{{+0x1.fffffffffffffp-1, +0x1p-26}, {+0x1.0000000000000p-107, +0x1.0000000000000p-26}},
// z close to -1, i, or -i
{{-0x1.0000000000001p+0, -0x1p-52}, {+0x1.0000000000000p-52, -0x1.921fb54442d18p+1}},
{{-0x1.0000000000000p+0, +0x1p-52}, {+0x1.0000000000000p-105, +0x1.921fb54442d18p+1}},
{{-0x1.fffffffffffffp-1, -0x1p-52}, {-0x1.ffffffffffffep-54, -0x1.921fb54442d18p+1}},
{{+0x1p-52, +0x1.0000000000001p+0}, {+0x1.0000000000000p-52, +0x1.921fb54442d17p+0}},
{{-0x1p-52, +0x1.0000000000000p+0}, {+0x1.0000000000000p-105, +0x1.921fb54442d19p+0}},
{{+0x1p-52, +0x1.fffffffffffffp-1}, {-0x1.ffffffffffffep-54, +0x1.921fb54442d17p+0}},
{{-0x1p-52, -0x1.0000000000001p+0}, {+0x1.0000000000000p-52, -0x1.921fb54442d19p+0}},
{{+0x1p-52, -0x1.0000000000000p+0}, {+0x1.0000000000000p-105, -0x1.921fb54442d17p+0}},
{{-0x1p-52, -0x1.fffffffffffffp-1}, {-0x1.ffffffffffffep-54, -0x1.921fb54442d19p+0}},
// abs(z) close to 1
{{+0x1.6a09e667f3bccp-1, +0x1.6a09e667f3bccp-1}, {-0x1.98d4d0da05571p-54, +0x1.921fb54442d18p-1}},
{{+0x1.6a09e667f3bcdp-1, -0x1.6a09e667f3bcdp-1}, {+0x1.3b3efbf5e2229p-54, -0x1.921fb54442d18p-1}},
{{-0x1.3333333333333p-1, -0x1.999999999999ap-1}, {+0x1.999999999999ap-56, -0x1.1b6e192ebbe44p+1}},
{{-0x1.3333333333333p-1, +0x1.9999999999999p-1}, {-0x1.3333333333333p-54, +0x1.1b6e192ebbe44p+1}},
{{+0x1.69fbe76c8b439p-1, +0x1.69fbe76c8b439p-1}, {-0x1.3cb7c059d6699p-13, +0x1.921fb54442d18p-1}},
{{-0x1.d89d89d89d89ep-1, +0x1.89d89d89d89d6p-2}, {-0x1.3b13b13b13b0cp-57, +0x1.5f97315254857p+1}},
// control flow edge cases
{{+0x1p-1, +0x1.fffffffffffffp-2}, {-0x1.62e42fefa39f0p-2, +0x1.921fb54442d18p-1}},
{{+0x1p-1, +0x1.0000000000000p-1}, {-0x1.62e42fefa39efp-2, +0x1.921fb54442d18p-1}},
{{+0x1p-1, +0x1.0000000000001p-1}, {-0x1.62e42fefa39edp-2, +0x1.921fb54442d19p-1}},
{{+0x1p-1, +0x1.a887293fd6f33p+0}, {+0x1.193ea7aad0309p-1, +0x1.4727f6d4d118cp+0}},
{{+0x1p-1, +0x1.a887293fd6f34p+0}, {+0x1.193ea7aad030ap-1, +0x1.4727f6d4d118dp+0}},
{{+0x1p-1, +0x1.a887293fd6f35p+0}, {+0x1.193ea7aad030cp-1, +0x1.4727f6d4d118dp+0}},
{{+6.703903964971297e+153, +6e+153}, {+0x1.627e0d1e7a85dp+8, +0x1.75c8a07421461p-1}},
{{+6.703903964971298e+153, +6e+153}, {+0x1.627e0d1e7a85dp+8, +0x1.75c8a07421461p-1}},
{{+1e-154, +1.4156865331029228e-146}, {-0x1.4fd46e5c84953p+8, +0x1.921fb525ec2fcp+0}},
{{+1e-154, +1.415686533102923e-146}, {-0x1.4fd46e5c84953p+8, +0x1.921fb525ec2fcp+0}},
};
constexpr complex_unary_test_case<float> log_float_cases[] = {
// normal cases
{{+0x1.8p+0F, +0x1p+1F}, {+0x1.d52410p-1F, +0x1.dac670p-1F}},
{{+0x1.8p+0F, -0x1p+1F}, {+0x1.d52410p-1F, -0x1.dac670p-1F}},
{{-0x1.8p+0F, +0x1p+1F}, {+0x1.d52410p-1F, +0x1.1b6e1ap+1F}},
{{-0x1.8p+0F, -0x1p+1F}, {+0x1.d52410p-1F, -0x1.1b6e1ap+1F}},
{{+0x1.8p-1F, +0x1p+0F}, {+0x1.c8ff7cp-3F, +0x1.dac670p-1F}},
{{+0x1.8p-1F, -0x1p+0F}, {+0x1.c8ff7cp-3F, -0x1.dac670p-1F}},
{{-0x1.8p-1F, +0x1p+0F}, {+0x1.c8ff7cp-3F, +0x1.1b6e1ap+1F}},
{{-0x1.8p-1F, -0x1p+0F}, {+0x1.c8ff7cp-3F, -0x1.1b6e1ap+1F}},
{{+0x1.8p-2F, +0x1p-1F}, {-0x1.e148a2p-2F, +0x1.dac670p-1F}},
{{+0x1.8p-2F, -0x1p-1F}, {-0x1.e148a2p-2F, -0x1.dac670p-1F}},
{{-0x1.8p-2F, +0x1p-1F}, {-0x1.e148a2p-2F, +0x1.1b6e1ap+1F}},
{{-0x1.8p-2F, -0x1p-1F}, {-0x1.e148a2p-2F, -0x1.1b6e1ap+1F}},
// special cases
{{+1.0F, +0.0F}, {0.0F, +0.0F}, {true, true}},
{{+1.0F, -0.0F}, {0.0F, -0.0F}, {true, true}},
{{+0.0F, +1.0F}, {0.0F, +pi_over_2_v<float>}, {true, false}},
{{+0.0F, -1.0F}, {0.0F, -pi_over_2_v<float>}, {true, false}},
{{-0.0F, +1.0F}, {0.0F, +pi_over_2_v<float>}, {true, false}},
{{-0.0F, -1.0F}, {0.0F, -pi_over_2_v<float>}, {true, false}},
{{-1.0F, +0.0F}, {0.0F, +pi_v<float>}, {true, false}},
{{-1.0F, -0.0F}, {0.0F, -pi_v<float>}, {true, false}},
#if !FP_PRESET_FAST
{{+0.0F, +0.0F}, {-float_inf, +0.0F}, {true, true}},
{{+0.0F, -0.0F}, {-float_inf, -0.0F}, {true, true}},
{{-0.0F, +0.0F}, {-float_inf, +pi_v<float>}, {true, false}},
{{-0.0F, -0.0F}, {-float_inf, -pi_v<float>}, {true, false}},
{{+float_inf, +0.0F}, {+float_inf, +0.0F}, {true, true}},
{{+float_inf, -0.0F}, {+float_inf, -0.0F}, {true, true}},
{{+float_inf, +1.0F}, {+float_inf, +0.0F}, {true, true}},
{{+float_inf, -1.0F}, {+float_inf, -0.0F}, {true, true}},
{{+float_inf, +float_inf}, {+float_inf, +pi_over_4_v<float>}, {true, false}},
{{+float_inf, -float_inf}, {+float_inf, -pi_over_4_v<float>}, {true, false}},
{{+1.0F, +float_inf}, {+float_inf, +pi_over_2_v<float>}, {true, false}},
{{+1.0F, -float_inf}, {+float_inf, -pi_over_2_v<float>}, {true, false}},
{{+0.0F, +float_inf}, {+float_inf, +pi_over_2_v<float>}, {true, false}},
{{+0.0F, -float_inf}, {+float_inf, -pi_over_2_v<float>}, {true, false}},
{{-0.0F, +float_inf}, {+float_inf, +pi_over_2_v<float>}, {true, false}},
{{-0.0F, -float_inf}, {+float_inf, -pi_over_2_v<float>}, {true, false}},
{{-1.0F, +float_inf}, {+float_inf, +pi_over_2_v<float>}, {true, false}},
{{-1.0F, -float_inf}, {+float_inf, -pi_over_2_v<float>}, {true, false}},
{{-float_inf, +float_inf}, {+float_inf, +pi_3_over_4_v<float>}, {true, false}},
{{-float_inf, -float_inf}, {+float_inf, -pi_3_over_4_v<float>}, {true, false}},
{{-float_inf, +1.0F}, {+float_inf, +pi_v<float>}, {true, false}},
{{-float_inf, -1.0F}, {+float_inf, -pi_v<float>}, {true, false}},
{{-float_inf, +0.0F}, {+float_inf, +pi_v<float>}, {true, false}},
{{-float_inf, -0.0F}, {+float_inf, -pi_v<float>}, {true, false}},
{{+float_inf, float_nan}, {+float_inf, float_nan}, {true, true}},
{{-float_inf, float_nan}, {+float_inf, float_nan}, {true, true}},
{{float_nan, +float_inf}, {+float_inf, float_nan}, {true, true}},
{{float_nan, -float_inf}, {+float_inf, float_nan}, {true, true}},
{{float_nan, +0.0F}, {float_nan, float_nan}, {true, true}},
{{+0.0F, float_nan}, {float_nan, float_nan}, {true, true}},
{{float_nan, float_nan}, {float_nan, float_nan}, {true, true}},
#endif // !FP_PRESET_FAST
// abs(z) overflows
{{+0x1.fffffep+127F, +0x1.fffffep+127F}, {+0x1.644714p+6F, +0x1.921fb6p-1F}},
{{-0x1.bb67aep+127F, +0x1.000000p+127F}, {+0x1.62e430p+6F, +0x1.4f1a6cp+1F}},
{{+0x1.fffffep+127F, -0x0.000002p-126F}, {+0x1.62e430p+6F, -0x0.000000p-126F}},
// norm(z) overflows
{{-0x1.08b2a2p+83F, -0x1.08b2a2p+84F}, {+0x1.d2f46cp+5F, -0x1.0468a8p+1F}},
{{+0x1.bc16d6p+63F, -0x1.4d1120p+63F}, {+0x1.6389c2p+5F, -0x1.4978fap-1F}},
#if !WITH_FP_ABRUPT_UNDERFLOW
// abs(z) underflows
{{-0x0.000002p-126F, +0x0.000002p-126F}, {-0x1.9bbabcp+6F, +0x1.2d97c8p+1F}},
{{+0x0.000002p-126F, +0x0.800000p-126F}, {-0x1.601e68p+6F, +0x1.921fb2p+0F}},
#endif // !WITH_FP_ABRUPT_UNDERFLOW
// abs(z) close to underflow
{{+0x1.4p-126F, +0x1p-126F}, {-0x1.5b76d6p+6F, +0x1.5977a6p-1F}},
// norm(z) underflows
{{+0x1.ef2d10p-83F, -0x1.ef2d10p-84F}, {-0x1.c6144ap+5F, -0x1.dac670p-2F}},
{{-0x1.622d70p-61F, -0x1.d83c94p-61F}, {-0x1.4b9280p+5F, -0x1.1b6e1ap+1F}},
// z close to 1
{{+0x1.000002p+0F, -0.0F}, {+0x1.fffffep-24F, -0.0F}, {false, true}},
{{+0x1.fffffep-1F, +0.0F}, {-0x1.000000p-24F, +0.0F}, {false, true}},
#if !WITH_FP_ABRUPT_UNDERFLOW
{{+0x1.000002p+0F, -0x0.000002p-126F}, {+0x1.fffffep-24F, -0x0.000002p-126F}},
{{+0x1.000000p+0F, +0x0.000002p-126F}, {+0x0.000000p-126F, +0x0.000002p-126F}},
{{+0x1.fffffep-1F, -0x0.000002p-126F}, {-0x1.000000p-24F, -0x0.000002p-126F}},
#endif // !WITH_FP_ABRUPT_UNDERFLOW
{{+0x1.000002p+0F, +0x1.000000p-126F}, {+0x1.fffffep-24F, +0x0.fffffep-126F}},
{{+0x1.000000p+0F, -0x1.000000p-126F}, {+0x0.000000p+0F, -0x1.000000p-126F}},
{{+0x1.fffffep-1F, +0x1.000000p-126F}, {-0x1.000000p-24F, +0x1.000002p-126F}},
{{+0x1.000002p+0F, -0x1.000000p-23F}, {+0x1.000000p-23F, -0x1.fffffcp-24F}},
{{+0x1.000000p+0F, +0x1.000000p-23F}, {+0x1.000000p-47F, +0x1.000000p-23F}},
{{+0x1.fffffep-1F, -0x1.000000p-23F}, {-0x1.fffffcp-25F, -0x1.000000p-23F}},
{{+0x1.fffffep-1F, +0x1.6a09e6p-12F}, {-0x1.302ae0p-52F, +0x1.6a09e6p-12F}},
// z close to -1, i, or -i
{{-0x1.000002p+0F, -0x1.000000p-23F}, {+0x1.000000p-23F, -0x1.921fb4p+1F}},
{{-0x1.000000p+0F, +0x1.000000p-23F}, {+0x1.000000p-47F, +0x1.921fb4p+1F}},
{{-0x1.fffffep-1F, -0x1.000000p-23F}, {-0x1.fffffcp-25F, -0x1.921fb4p+1F}},
{{+0x1.000000p-23F, +0x1.000002p+0F}, {+0x1.000000p-23F, +0x1.921fb4p+0F}},
{{-0x1.000000p-23F, +0x1.000000p+0F}, {+0x1.000000p-47F, +0x1.921fb8p+0F}},
{{+0x1.000000p-23F, +0x1.fffffep-1F}, {-0x1.fffffcp-25F, +0x1.921fb4p+0F}},
{{-0x1.000000p-23F, -0x1.000002p+0F}, {+0x1.000000p-23F, -0x1.921fb8p+0F}},
{{+0x1.000000p-23F, -0x1.000000p+0F}, {+0x1.000000p-47F, -0x1.921fb4p+0F}},
{{-0x1.000000p-23F, -0x1.fffffep-1F}, {-0x1.fffffcp-25F, -0x1.921fb8p+0F}},
// abs(z) close to 1
{{+0x1.6a09e6p-1F, +0x1.6a09e6p-1F}, {-0x1.26055cp-26F, +0x1.921fb6p-1F}},
{{+0x1.6a09e8p-1F, -0x1.6a09e8p-1F}, {+0x1.20888ep-24F, -0x1.921fb6p-1F}},
{{-0x1.333334p-1F, -0x1.99999ap-1F}, {+0x1.99999ap-26F, -0x1.1b6e1ap+1F}},
{{-0x1.333332p-1F, +0x1.99999ap-1F}, {-0x1.999998p-27F, +0x1.1b6e18p+1F}},
{{+0x1.69fbe8p-1F, +0x1.69fbe8p-1F}, {-0x1.3caab8p-13F, +0x1.921fb6p-1F}},
{{-0x1.d89d8ap-1F, +0x1.89d89ep-2F}, {+0x1.d89d8ap-28F, +0x1.5f9732p+1F}},
// control flow edge cases
{{+0x1p-1F, +0x1.fffffep-2F}, {-0x1.62e432p-2F, +0x1.921fb4p-1F}},
{{+0x1p-1F, +0x1.000000p-1F}, {-0x1.62e430p-2F, +0x1.921fb6p-1F}},
{{+0x1p-1F, +0x1.000002p-1F}, {-0x1.62e42cp-2F, +0x1.921fb8p-1F}},
{{+0x1p-1F, +0x1.a88728p+0F}, {+0x1.193ea6p-1F, +0x1.4727f6p+0F}},
{{+0x1p-1F, +0x1.a8872ap+0F}, {+0x1.193ea8p-1F, +0x1.4727f6p+0F}},
{{+0x1p-1F, +0x1.a8872cp+0F}, {+0x1.193eaap-1F, +0x1.4727f8p+0F}},
{{+9.223371e+18F, +9e+18F}, {+0x1.60059cp+5F, +0x1.8bd930p-1F}},
{{+9.2233715e+18F, +9e+18F}, {+0x1.60059cp+5F, +0x1.8bd930p-1F}},
{{+7e-20F, +4.440892e-16F}, {-0x1.1acdd6p+5F, +0x1.921560p+0F}},
{{+7e-20F, +4.4408926e-16F}, {-0x1.1acdd6p+5F, +0x1.921560p+0F}},
};

Просмотреть файл

@ -0,0 +1,249 @@
// Copyright (c) Microsoft Corporation.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
#pragma once
#include <complex>
#include "floating_point_utils.hpp"
#include "test.hpp"
constexpr complex_unary_test_case<double> sqrt_double_cases[] = {
// normal cases
{{+0x3p-0, +0x4p-0}, {+0x2p-0, +0x1p-0}},
{{+0x3p-0, -0x4p-0}, {+0x2p-0, -0x1p-0}},
{{-0x3p-0, +0x4p-0}, {+0x1p-0, +0x2p-0}},
{{-0x3p-0, -0x4p-0}, {+0x1p-0, -0x2p-0}},
{{+0x3p-2, +0x4p-2}, {+0x2p-1, +0x1p-1}},
{{+0x3p-2, -0x4p-2}, {+0x2p-1, -0x1p-1}},
{{-0x3p-2, +0x4p-2}, {+0x1p-1, +0x2p-1}},
{{-0x3p-2, -0x4p-2}, {+0x1p-1, -0x2p-1}},
{{+0x3p-4, +0x4p-4}, {+0x2p-2, +0x1p-2}},
{{+0x3p-4, -0x4p-4}, {+0x2p-2, -0x1p-2}},
{{-0x3p-4, +0x4p-4}, {+0x1p-2, +0x2p-2}},
{{-0x3p-4, -0x4p-4}, {+0x1p-2, -0x2p-2}},
// special cases
{{+0.0, +0.0}, {+0.0, +0.0}, {true, true}},
{{+0.0, -0.0}, {+0.0, -0.0}, {true, true}},
{{-0.0, +0.0}, {+0.0, +0.0}, {true, true}},
{{-0.0, -0.0}, {+0.0, -0.0}, {true, true}},
{{+1.0, +0.0}, {+1.0, +0.0}, {false, true}},
{{+1.0, -0.0}, {+1.0, -0.0}, {false, true}},
{{-1.0, +0.0}, {+0.0, +1.0}, {true, false}},
{{-1.0, -0.0}, {+0.0, -1.0}, {true, false}},
{{+0.0, +1.0}, {+0x1.6a09e667f3bcdp-1, +0x1.6a09e667f3bcdp-1}},
{{+0.0, -1.0}, {+0x1.6a09e667f3bcdp-1, -0x1.6a09e667f3bcdp-1}},
{{-0.0, +1.0}, {+0x1.6a09e667f3bcdp-1, +0x1.6a09e667f3bcdp-1}},
{{-0.0, -1.0}, {+0x1.6a09e667f3bcdp-1, -0x1.6a09e667f3bcdp-1}},
#if !FP_PRESET_FAST
{{+double_inf, +0.0}, {+double_inf, +0.0}, {true, true}},
{{+double_inf, -0.0}, {+double_inf, -0.0}, {true, true}},
{{-double_inf, +0.0}, {+0.0, +double_inf}, {true, true}},
{{-double_inf, -0.0}, {+0.0, -double_inf}, {true, true}},
{{+double_inf, +1.0}, {+double_inf, +0.0}, {true, true}},
{{+double_inf, -1.0}, {+double_inf, -0.0}, {true, true}},
{{-double_inf, +1.0}, {+0.0, +double_inf}, {true, true}},
{{-double_inf, -1.0}, {+0.0, -double_inf}, {true, true}},
{{+double_inf, +double_inf}, {+double_inf, +double_inf}, {true, true}},
{{+double_inf, -double_inf}, {+double_inf, -double_inf}, {true, true}},
{{-double_inf, +double_inf}, {+double_inf, +double_inf}, {true, true}},
{{-double_inf, -double_inf}, {+double_inf, -double_inf}, {true, true}},
{{+1.0, +double_inf}, {+double_inf, +double_inf}, {true, true}},
{{+1.0, -double_inf}, {+double_inf, -double_inf}, {true, true}},
{{-1.0, +double_inf}, {+double_inf, +double_inf}, {true, true}},
{{-1.0, -double_inf}, {+double_inf, -double_inf}, {true, true}},
{{+0.0, +double_inf}, {+double_inf, +double_inf}, {true, true}},
{{+0.0, -double_inf}, {+double_inf, -double_inf}, {true, true}},
{{-0.0, +double_inf}, {+double_inf, +double_inf}, {true, true}},
{{-0.0, -double_inf}, {+double_inf, -double_inf}, {true, true}},
{{+double_inf, +double_nan}, {+double_inf, +double_nan}, {true, true}},
{{+double_inf, -double_nan}, {+double_inf, -double_nan}, {true, true}},
{{-double_inf, +double_nan}, {+double_nan, +double_inf}, {true, true}},
{{-double_inf, -double_nan}, {+double_nan, -double_inf}, {true, true}},
{{+double_nan, +double_inf}, {+double_inf, +double_inf}, {true, true}},
{{+double_nan, -double_inf}, {+double_inf, -double_inf}, {true, true}},
{{+double_nan, +0.0}, {+double_nan, +double_nan}, {true, true}},
{{+double_nan, -0.0}, {+double_nan, -double_nan}, {true, true}},
{{+0.0, +double_nan}, {+double_nan, +double_nan}, {true, true}},
{{+0.0, -double_nan}, {+double_nan, -double_nan}, {true, true}},
{{+double_nan, +double_nan}, {+double_nan, +double_nan}, {true, true}},
{{+double_nan, -double_nan}, {+double_nan, -double_nan}, {true, true}},
#endif // !FP_PRESET_FAST
// abs(z) overflows
{{+0x1.fffffffffffffp+1023, +0x1.fffffffffffffp+1023}, {+0x1.19435caffa9f8p+512, +0x1.d203138f6c828p+510}},
{{-0x1.bb67ae8584caap+1023, +0x1.0000000000000p+1023}, {+0x1.0907dc1930691p+510, +0x1.ee8dd4748bf15p+511}},
{{+0x1.fffffffffffffp+1023, -0x0.0000000000001p-1022}, {+0x1.fffffffffffffp+511, -0x0.0000000000000p-1022}},
// norm(z) overflows
{{-0x1.4e718d7d7625ap+664, -0x1.4e718d7d7625ap+665}, {+0x1.cc1033be914a7p+331, -0x1.7432f2f528ea0p+332}},
{{+0x1.ca3d8e6d80cbbp+511, -0x1.57ae2ad22098cp+511}, {+0x1.00e0ed3ec75c3p+256, -0x1.56813c53b47afp+254}},
#if !WITH_FP_ABRUPT_UNDERFLOW
// abs(z) underflows
{{-0x0.0000000000001p-1022, +0x0.0000000000001p-1022}, {+0x1.d203138f6c828p-539, +0x1.19435caffa9f9p-537}},
{{+0x0.0000000000001p-1022, +0x0.8000000000000p-1022}, {+0x1.0000000000001p-512, +0x1.ffffffffffffep-513}},
#endif // !WITH_FP_ABRUPT_UNDERFLOW
// abs(z) close to underflow
{{+0x1.4p-1022, +0x1p-1022}, {+0x1.31a33f3eb2fd9p-511, +0x1.acd8ff10ebe7ep-513}},
// norm(z) underflows
{{+0x1.87e92154ef7acp-664, -0x1.87e92154ef7acp-665}, {+0x1.45f5e3f782563p-332, -0x1.33cb9c4327c54p-334}},
{{-0x1.9be34ac46b18fp-513, -0x1.1297872d9cbb5p-512}, {+0x1.09220ecd9c241p-257, -0x1.09220ecd9c241p-256}},
// control flow edge cases
{{+0x1p-2, +0x1.fffffffffffffp-1}, {+0x1.99b96593b936dp-1, +0x1.3fe72a921c6f4p-1}},
{{+0x1p-2, +0x1.0000000000000p+0}, {+0x1.99b96593b936ep-1, +0x1.3fe72a921c6f4p-1}},
{{+0x1p-2, +0x1.0000000000001p+0}, {+0x1.99b96593b936ep-1, +0x1.3fe72a921c6f5p-1}},
{{+0x1p+0, +0x1p-54}, {+0x1.0000000000000p+0, +0x1.0000000000000p-55}},
{{+0x1p+0, +0x1p-53}, {+0x1.0000000000000p+0, +0x1.0000000000000p-54}},
{{+0x1p+0, +0x1p-52}, {+0x1.0000000000000p+0, +0x1.0000000000000p-53}},
{{+0x1p+0, +0x1.ffffffffffffep-2}, {+0x1.077225f1da572p+0, +0x1.f18773c56f720p-3}},
{{+0x1p+0, +0x1.fffffffffffffp-2}, {+0x1.077225f1da572p+0, +0x1.f18773c56f721p-3}},
{{+0x1p+0, +0x1.0000000000000p-1}, {+0x1.077225f1da572p+0, +0x1.f18773c56f721p-3}},
{{+0x1p+0, +0x1.0000000000001p-1}, {+0x1.077225f1da572p+0, +0x1.f18773c56f723p-3}},
{{+0x1.ffffffffffffep-970, +0x1.fffffffffffffp-970}, {+0x1.8dc42193d5c02p-485, +0x1.49852f983efddp-486}},
{{+0x1.fffffffffffffp-970, +0x1.0000000000000p-969}, {+0x1.8dc42193d5c02p-485, +0x1.49852f983efdep-486}},
{{+0x1.0000000000000p-969, +0x1.0000000000001p-969}, {+0x1.8dc42193d5c03p-485, +0x1.49852f983efdep-486}},
{{+0x1.ffffffffffffep-971, +0x1.fffffffffffffp-970}, {+0x1.45a3146a88455p-485, +0x1.92826ef258d1bp-486}},
{{+0x1.fffffffffffffp-971, +0x1.0000000000000p-969}, {+0x1.45a3146a88456p-485, +0x1.92826ef258d1bp-486}},
{{+0x1.0000000000000p-970, +0x1.0000000000001p-969}, {+0x1.45a3146a88456p-485, +0x1.92826ef258d1cp-486}},
{{+0x1.fffffffffffffp-971, +0x1.fffffffffffffp-970}, {+0x1.45a3146a88456p-485, +0x1.92826ef258d1bp-486}},
{{+0x1.0000000000000p-970, +0x1.0000000000000p-969}, {+0x1.45a3146a88456p-485, +0x1.92826ef258d1bp-486}},
{{+0x1.0000000000001p-970, +0x1.0000000000001p-969}, {+0x1.45a3146a88457p-485, +0x1.92826ef258d1cp-486}},
{{+0x1.0000000000000p-970, +0x1.fffffffffffffp-970}, {+0x1.45a3146a88456p-485, +0x1.92826ef258d1bp-486}},
{{+0x1.0000000000001p-970, +0x1.0000000000000p-969}, {+0x1.45a3146a88456p-485, +0x1.92826ef258d1bp-486}},
{{+0x1.0000000000002p-970, +0x1.0000000000001p-969}, {+0x1.45a3146a88457p-485, +0x1.92826ef258d1cp-486}},
{{+0x1p-1022, +0x1.fffffffffffffp-970}, {+0x1.0000000000000p-485, +0x1.fffffffffffffp-486}},
{{+0x1p-1022, +0x1.0000000000000p-969}, {+0x1.0000000000000p-485, +0x1.0000000000000p-485}},
{{+0x1p-1022, +0x1.0000000000001p-969}, {+0x1.0000000000001p-485, +0x1.0000000000000p-485}},
#if !WITH_FP_ABRUPT_UNDERFLOW
{{+0x0.0000000000001p-1022, +0x1.fffffffffffffp-970}, {+0x1.0000000000000p-485, +0x1.fffffffffffffp-486}},
{{+0x0.0000000000001p-1022, +0x1.0000000000000p-969}, {+0x1.0000000000000p-485, +0x1.0000000000000p-485}},
{{+0x0.0000000000001p-1022, +0x1.0000000000001p-969}, {+0x1.0000000000001p-485, +0x1.0000000000000p-485}},
#endif // !WITH_FP_ABRUPT_UNDERFLOW
};
constexpr complex_unary_test_case<float> sqrt_float_cases[] = {
// normal cases
{{+0x3p-0F, +0x4p-0F}, {+0x2p-0F, +0x1p-0F}},
{{+0x3p-0F, -0x4p-0F}, {+0x2p-0F, -0x1p-0F}},
{{-0x3p-0F, +0x4p-0F}, {+0x1p-0F, +0x2p-0F}},
{{-0x3p-0F, -0x4p-0F}, {+0x1p-0F, -0x2p-0F}},
{{+0x3p-2F, +0x4p-2F}, {+0x2p-1F, +0x1p-1F}},
{{+0x3p-2F, -0x4p-2F}, {+0x2p-1F, -0x1p-1F}},
{{-0x3p-2F, +0x4p-2F}, {+0x1p-1F, +0x2p-1F}},
{{-0x3p-2F, -0x4p-2F}, {+0x1p-1F, -0x2p-1F}},
{{+0x3p-4F, +0x4p-4F}, {+0x2p-2F, +0x1p-2F}},
{{+0x3p-4F, -0x4p-4F}, {+0x2p-2F, -0x1p-2F}},
{{-0x3p-4F, +0x4p-4F}, {+0x1p-2F, +0x2p-2F}},
{{-0x3p-4F, -0x4p-4F}, {+0x1p-2F, -0x2p-2F}},
// special cases
{{+0.0F, +0.0F}, {+0.0F, +0.0F}, {true, true}},
{{+0.0F, -0.0F}, {+0.0F, -0.0F}, {true, true}},
{{-0.0F, +0.0F}, {+0.0F, +0.0F}, {true, true}},
{{-0.0F, -0.0F}, {+0.0F, -0.0F}, {true, true}},
{{+1.0F, +0.0F}, {+1.0F, +0.0F}, {false, true}},
{{+1.0F, -0.0F}, {+1.0F, -0.0F}, {false, true}},
{{-1.0F, +0.0F}, {+0.0F, +1.0F}, {true, false}},
{{-1.0F, -0.0F}, {+0.0F, -1.0F}, {true, false}},
{{+0.0F, +1.0F}, {+0x1.6a09e6p-1F, +0x1.6a09e6p-1F}},
{{+0.0F, -1.0F}, {+0x1.6a09e6p-1F, -0x1.6a09e6p-1F}},
{{-0.0F, +1.0F}, {+0x1.6a09e6p-1F, +0x1.6a09e6p-1F}},
{{-0.0F, -1.0F}, {+0x1.6a09e6p-1F, -0x1.6a09e6p-1F}},
#if !FP_PRESET_FAST
{{+float_inf, +0.0F}, {+float_inf, +0.0F}, {true, true}},
{{+float_inf, -0.0F}, {+float_inf, -0.0F}, {true, true}},
{{-float_inf, +0.0F}, {+0.0F, +float_inf}, {true, true}},
{{-float_inf, -0.0F}, {+0.0F, -float_inf}, {true, true}},
{{+float_inf, +1.0F}, {+float_inf, +0.0F}, {true, true}},
{{+float_inf, -1.0F}, {+float_inf, -0.0F}, {true, true}},
{{-float_inf, +1.0F}, {+0.0F, +float_inf}, {true, true}},
{{-float_inf, -1.0F}, {+0.0F, -float_inf}, {true, true}},
{{+float_inf, +float_inf}, {+float_inf, +float_inf}, {true, true}},
{{+float_inf, -float_inf}, {+float_inf, -float_inf}, {true, true}},
{{-float_inf, +float_inf}, {+float_inf, +float_inf}, {true, true}},
{{-float_inf, -float_inf}, {+float_inf, -float_inf}, {true, true}},
{{+1.0F, +float_inf}, {+float_inf, +float_inf}, {true, true}},
{{+1.0F, -float_inf}, {+float_inf, -float_inf}, {true, true}},
{{-1.0F, +float_inf}, {+float_inf, +float_inf}, {true, true}},
{{-1.0F, -float_inf}, {+float_inf, -float_inf}, {true, true}},
{{+0.0F, +float_inf}, {+float_inf, +float_inf}, {true, true}},
{{+0.0F, -float_inf}, {+float_inf, -float_inf}, {true, true}},
{{-0.0F, +float_inf}, {+float_inf, +float_inf}, {true, true}},
{{-0.0F, -float_inf}, {+float_inf, -float_inf}, {true, true}},
{{+float_inf, +float_nan}, {+float_inf, +float_nan}, {true, true}},
{{+float_inf, -float_nan}, {+float_inf, -float_nan}, {true, true}},
{{-float_inf, +float_nan}, {+float_nan, +float_inf}, {true, true}},
{{-float_inf, -float_nan}, {+float_nan, -float_inf}, {true, true}},
{{+float_nan, +float_inf}, {+float_inf, +float_inf}, {true, true}},
{{+float_nan, -float_inf}, {+float_inf, -float_inf}, {true, true}},
{{+float_nan, +0.0}, {+float_nan, +float_nan}, {true, true}},
{{+float_nan, -0.0}, {+float_nan, -float_nan}, {true, true}},
{{+0.0, +float_nan}, {+float_nan, +float_nan}, {true, true}},
{{+0.0, -float_nan}, {+float_nan, -float_nan}, {true, true}},
{{+float_nan, +float_nan}, {+float_nan, +float_nan}, {true, true}},
{{+float_nan, -float_nan}, {+float_nan, -float_nan}, {true, true}},
#endif // !FP_PRESET_FAST
// abs(z) overflows
{{+0x1.fffffep+127F, +0x1.fffffep+127F}, {+0x1.19435cp+64F, +0x1.d20312p+62F}},
{{-0x1.bb67aep+127F, +0x1.000000p+127F}, {+0x1.0907dcp+62F, +0x1.ee8dd4p+63F}},
{{+0x1.fffffep+127F, -0x0.000002p-126F}, {+0x1.fffffep+63F, -0x0.000000p-126F}},
// norm(z) overflows
{{-0x1.08b2a2p+83F, -0x1.08b2a2p+84F}, {+0x1.216970p+41F, -0x1.d4473ap+41F}},
{{+0x1.bc16d6p+63F, -0x1.4d1120p+63F}, {+0x1.f9c31ep+31F, -0x1.512cbep+30F}},
#if !WITH_FP_ABRUPT_UNDERFLOW
// abs(z) underflows
{{-0x0.000002p-126F, +0x0.000002p-126F}, {+0x1.498530p-76F, +0x1.8dc422p-75F}},
{{+0x0.000002p-126F, +0x0.800000p-126F}, {+0x1.000002p-64F, +0x1.fffffcp-65F}},
#endif // !WITH_FP_ABRUPT_UNDERFLOW
// abs(z) close to underflow
{{+0x1.4p-126F, +0x1p-126F}, {+0x1.31a340p-63F, +0x1.acd900p-65F}},
// norm(z) underflows
{{+0x1.ef2d1p-83F, -0x1.ef2d10p-84F}, {+0x1.0314d8p-41F, -0x1.e9495ep-44F}},
{{-0x1.622d7p-61F, -0x1.d83c94p-61F}, {+0x1.ebb76ep-32F, -0x1.ebb770p-31F}},
// control flow edge cases
{{+0x1p-2F, +0x1.fffffep-1F}, {+0x1.99b964p-1F, +0x1.3fe72ap-1F}},
{{+0x1p-2F, +0x1.000000p+0F}, {+0x1.99b966p-1F, +0x1.3fe72ap-1F}},
{{+0x1p-2F, +0x1.000002p+0F}, {+0x1.99b966p-1F, +0x1.3fe72cp-1F}},
{{+0x1p+0F, +0x1p-25F}, {+0x1.000000p+0F, +0x1.000000p-26F}},
{{+0x1p+0F, +0x1p-24F}, {+0x1.000000p+0F, +0x1.000000p-25F}},
{{+0x1p+0F, +0x1p-23F}, {+0x1.000000p+0F, +0x1.000000p-24F}},
{{+0x1p+0F, +0x1.fffffcp-2F}, {+0x1.077226p+0F, +0x1.f18770p-3F}},
{{+0x1p+0F, +0x1.fffffep-2F}, {+0x1.077226p+0F, +0x1.f18772p-3F}},
{{+0x1p+0F, +0x1.000000p-1F}, {+0x1.077226p+0F, +0x1.f18774p-3F}},
{{+0x1p+0F, +0x1.000002p-1F}, {+0x1.077226p+0F, +0x1.f18778p-3F}},
{{+0x1.fffffcp-103F, +0x1.fffffep-103F}, {+0x1.19435cp-51F, +0x1.d20314p-53F}},
{{+0x1.fffffep-103F, +0x1.000000p-102F}, {+0x1.19435cp-51F, +0x1.d20314p-53F}},
{{+0x1.000000p-102F, +0x1.000002p-102F}, {+0x1.19435ep-51F, +0x1.d20316p-53F}},
{{+0x1.fffffcp-104F, +0x1.fffffep-103F}, {+0x1.cc8532p-52F, +0x1.1c9e00p-52F}},
{{+0x1.fffffep-104F, +0x1.000000p-102F}, {+0x1.cc8532p-52F, +0x1.1c9e02p-52F}},
{{+0x1.000000p-103F, +0x1.000002p-102F}, {+0x1.cc8534p-52F, +0x1.1c9e02p-52F}},
{{+0x1.fffffep-104F, +0x1.fffffep-103F}, {+0x1.cc8532p-52F, +0x1.1c9e00p-52F}},
{{+0x1.000000p-103F, +0x1.000000p-102F}, {+0x1.cc8532p-52F, +0x1.1c9e00p-52F}},
{{+0x1.000002p-103F, +0x1.000002p-102F}, {+0x1.cc8534p-52F, +0x1.1c9e02p-52F}},
{{+0x1.000000p-103F, +0x1.fffffep-103F}, {+0x1.cc8532p-52F, +0x1.1c9e00p-52F}},
{{+0x1.000002p-103F, +0x1.000000p-102F}, {+0x1.cc8534p-52F, +0x1.1c9e00p-52F}},
{{+0x1.000004p-103F, +0x1.000002p-102F}, {+0x1.cc8536p-52F, +0x1.1c9e02p-52F}},
{{+0x1.000000p-126F, +0x1.fffffep-103F}, {+0x1.6a09e6p-52F, +0x1.6a09e4p-52F}},
{{+0x1.000000p-126F, +0x1.000000p-102F}, {+0x1.6a09e8p-52F, +0x1.6a09e6p-52F}},
{{+0x1.000000p-126F, +0x1.000002p-102F}, {+0x1.6a09e8p-52F, +0x1.6a09e8p-52F}},
#if !WITH_FP_ABRUPT_UNDERFLOW
{{+0x0.000002p-126F, +0x1.fffffep-103F}, {+0x1.6a09e6p-52F, +0x1.6a09e6p-52F}},
{{+0x0.000002p-126F, +0x1.000000p-102F}, {+0x1.6a09e6p-52F, +0x1.6a09e6p-52F}},
{{+0x0.000002p-126F, +0x1.000002p-102F}, {+0x1.6a09e8p-52F, +0x1.6a09e8p-52F}},
#endif // !WITH_FP_ABRUPT_UNDERFLOW
};

Просмотреть файл

@ -0,0 +1,125 @@
// Copyright (c) Microsoft Corporation.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
#include <cassert>
#include <complex>
#include <limits>
#include "floating_point_utils.hpp"
#include "log_test_cases.hpp"
#include "sqrt_test_cases.hpp"
using namespace std;
using namespace fputil;
void test_sqrt(const rounding_mode mode) {
#if FP_PRESET_FAST
constexpr int ulp_tolerance = 4;
#else // ^^^ fp:fast / not fp:fast vvv
const int ulp_tolerance = is_directed_rounding_mode(mode) ? 3 : 2;
#endif // ^^^ not fp:fast ^^^
const auto check_result = [&](const auto& result, const auto& test_case) {
const int case_real_ulp_tolerance = test_case.result_exactness.real ? 0 : ulp_tolerance;
const int case_imag_ulp_tolerance = test_case.result_exactness.imag ? 0 : ulp_tolerance;
return near_equal(result.real(), test_case.expected_result.real(), case_real_ulp_tolerance)
&& near_equal(result.imag(), test_case.expected_result.imag(), case_imag_ulp_tolerance);
};
for (const auto& c : sqrt_double_cases) {
const auto result = [&] {
rounding_guard guard(mode);
return sqrt(c.input);
}();
assert(check_result(result, c));
}
for (const auto& c : sqrt_float_cases) {
const auto result = [&] {
rounding_guard guard(mode);
return sqrt(c.input);
}();
assert(check_result(result, c));
}
for (const auto& c : sqrt_double_cases) {
const auto result = [&] {
rounding_guard guard(mode);
return sqrt(static_cast<complex<long double>>(c.input));
}();
assert(check_result(static_cast<complex<double>>(result), c));
}
}
void test_log(const rounding_mode mode) {
#if FP_PRESET_FAST
constexpr int ulp_tolerance = 4;
// under /fp:fast, allow inaccurate real(log(z)) when |z| is close to 1
constexpr double real_absolute_epsilon_tolerance = 4;
#else // ^^^ fp:fast / not fp:fast vvv
const int ulp_tolerance = is_directed_rounding_mode(mode) ? 3 : 2;
constexpr double real_absolute_epsilon_tolerance = 0;
#endif // ^^^ not fp:fast ^^^
const auto check_result = [&](const auto& result, const auto& test_case) {
using Float = decltype(result.real());
constexpr auto epsilon = static_cast<double>(numeric_limits<Float>::epsilon());
const int case_real_ulp_tolerance = test_case.result_exactness.real ? 0 : ulp_tolerance;
const int case_imag_ulp_tolerance = test_case.result_exactness.imag ? 0 : ulp_tolerance;
const double case_real_absolute_tolerance =
test_case.result_exactness.real ? 0.0 : real_absolute_epsilon_tolerance * epsilon;
// TRANSITION: under rounding toward negative mode, log(1.0) returns +0.0 on x86, -0.0 on x64
const auto is_mod_exactly_one = [](const auto& z) {
// no other complex<binary_floating_point> has mod of exactly 1
return (abs(real(z)) == 1 && imag(z) == 0) || (real(z) == 0 && abs(imag(z)) == 1);
};
if (mode == rounding_mode::toward_negative && is_mod_exactly_one(test_case.input)) {
return abs(result.real()) <= case_real_absolute_tolerance
&& near_equal(result.imag(), test_case.expected_result.imag(), case_imag_ulp_tolerance);
}
return near_equal(result.real(), test_case.expected_result.real(), case_real_ulp_tolerance,
case_real_absolute_tolerance)
&& near_equal(result.imag(), test_case.expected_result.imag(), case_imag_ulp_tolerance);
};
for (const auto& c : log_double_cases) {
const auto result = [&] {
rounding_guard guard(mode);
return log(c.input);
}();
assert(check_result(result, c));
}
for (const auto& c : log_float_cases) {
const auto result = [&] {
rounding_guard guard(mode);
return log(c.input);
}();
assert(check_result(result, c));
}
for (const auto& c : log_double_cases) {
const auto result = [&] {
rounding_guard guard(mode);
return log(static_cast<complex<long double>>(c.input));
}();
assert(check_result(static_cast<complex<double>>(result), c));
}
}
int main() {
for (const auto& mode : all_rounding_modes) {
test_sqrt(mode);
test_log(mode);
}
}

Просмотреть файл

@ -0,0 +1,27 @@
// Copyright (c) Microsoft Corporation.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
#pragma once
#include <complex>
#include <limits>
#include "floating_point_utils.hpp"
constexpr double double_inf = std::numeric_limits<double>::infinity();
constexpr double double_nan = std::numeric_limits<double>::quiet_NaN();
constexpr float float_inf = std::numeric_limits<float>::infinity();
constexpr float float_nan = std::numeric_limits<float>::quiet_NaN();
struct complex_result_exactness {
bool real = false;
bool imag = false;
};
template <typename T>
struct complex_unary_test_case {
std::complex<T> input;
std::complex<T> expected_result;
complex_result_exactness result_exactness{};
};

Просмотреть файл

@ -0,0 +1,33 @@
# Copyright (c) Microsoft Corporation.
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
RUNALL_INCLUDE .\prefix.lst
RUNALL_CROSSLIST
PM_CL="/FIfenv_prefix.hpp"
RUNALL_CROSSLIST
PM_CL="/w14640 /Zc:threadSafeInit- /EHsc /std:c++latest"
RUNALL_CROSSLIST
PM_CL="/Od /MDd"
PM_CL="/O2 /MD /permissive-"
PM_CL="/O2 /MT /GL"
# TRANSITION, -Wno-unused-command-line-argument is needed for the internal test harness
PM_COMPILER="clang-cl" PM_CL="-fno-ms-compatibility -fno-delayed-template-parsing -Wno-unused-command-line-argument /Od /MTd"
PM_COMPILER="clang-cl" PM_CL="-fno-ms-compatibility -fno-delayed-template-parsing -Wno-unused-command-line-argument /O2 /MT"
PM_COMPILER="clang-cl" PM_CL="-fno-ms-compatibility -fno-delayed-template-parsing -Wno-unused-command-line-argument /O2 /MD /Oi-"
RUNALL_CROSSLIST
PM_CL=""
PM_CL="/arch:IA32"
PM_CL="/arch:AVX2"
PM_CL="/arch:VFPv4"
RUNALL_CROSSLIST
PM_CL="/fp:strict /DFP_CONFIG_PRESET=1 /DTEST_FP_ROUNDING=1"
PM_CL="/fp:precise /DFP_CONFIG_PRESET=2 /DTEST_FP_ROUNDING=1"
PM_CL="/fp:precise /DFP_CONFIG_PRESET=2 /DTEST_FP_ROUNDING=0"
PM_CL="/fp:fast /DFP_CONFIG_PRESET=3 /DTEST_FP_ROUNDING=0"
RUNALL_CROSSLIST
PM_CL="/DWITH_FP_ABRUPT_UNDERFLOW=0"
PM_CL="/DWITH_FP_ABRUPT_UNDERFLOW=1" PM_LINK="loosefpmath.obj"
RUNALL_CROSSLIST
PM_CL="/DFP_CONTRACT_MODE=0 /clang:-ffp-contract=off"
PM_CL="/DFP_CONTRACT_MODE=1 /clang:-ffp-contract=on"
PM_CL="/DFP_CONTRACT_MODE=2 /clang:-ffp-contract=fast"

Просмотреть файл

@ -44,5 +44,13 @@ def getDefaultFeatures(config, litConfig):
if litConfig.target_arch.casefold() == 'x86'.casefold():
DEFAULT_FEATURES.append(Feature(name='edg'))
DEFAULT_FEATURES.append(Feature(name='arch_ia32'))
DEFAULT_FEATURES.append(Feature(name='arch_avx2'))
if litConfig.target_arch.casefold() == 'x64'.casefold():
DEFAULT_FEATURES.append(Feature(name='arch_avx2'))
if litConfig.target_arch.casefold() == 'arm'.casefold():
DEFAULT_FEATURES.append(Feature(name='arch_vfpv4'))
return DEFAULT_FEATURES

Просмотреть файл

@ -217,6 +217,12 @@ class STLTest(Test):
self.requires.append('clr') # TRANSITION, GH-797
elif flag[1:] == 'BE':
self.requires.append('edg') # available for x86, see features.py
elif flag[1:] == 'arch:AVX2':
self.requires.append('arch_avx2') # available for x86 and x64, see features.py
elif flag[1:] == 'arch:IA32':
self.requires.append('arch_ia32') # available for x86, see features.py
elif flag[1:] == 'arch:VFPv4':
self.requires.append('arch_vfpv4') # available for arm, see features.py
if not foundStd:
Feature('c++14').enableIn(self.config)