зеркало из https://github.com/microsoft/STL.git
<complex>: Improve numerical accuracy of sqrt and log (#935)
* Fix undue overflow and underflow in complex sqrt Modifies the scale factors in `_Fabs` (used by `sqrt`) such that: - `_Fabs` doesn't underflow when the input is tiny. - `sqrt` doesn't overflow when the input is huge. * Improve accuracy of `log` when |z| is close to 1 When |z| is close to 1, compute log(|z|) as log1p(norm_minus_1(z)) / 2, where norm_minus_1(z) = real(z) ^ 2 + imag(z) ^ 2 - 1 computed with double width arithmetic to avoid catastrophic cancellation. * Fix log(complex{1, tiny}) incorrectly returning -0 under FE_DOWNWARD Co-authored-by: Curtis J Bezault <curtbezault@gmail.com> Co-authored-by: Stephan T. Lavavej <stl@microsoft.com>
This commit is contained in:
Родитель
51ccd93319
Коммит
9959929c77
412
stl/inc/complex
412
stl/inc/complex
|
@ -10,9 +10,27 @@
|
|||
#if _STL_COMPILER_PREPROCESSOR
|
||||
#include <cmath>
|
||||
#include <cstdint>
|
||||
#include <limits>
|
||||
#include <sstream>
|
||||
#include <type_traits>
|
||||
#include <xutility>
|
||||
#include <ymath.h>
|
||||
|
||||
#ifdef _M_CEE_PURE
|
||||
// no intrinsics for /clr:pure
|
||||
#elif defined(__clang__)
|
||||
// TRANSITION, not using FMA intrinsics for Clang yet
|
||||
#elif defined(_M_IX86) || defined(_M_X64)
|
||||
#define _FMP_USING_X86_X64_INTRINSICS
|
||||
#include <emmintrin.h>
|
||||
#include <isa_availability.h>
|
||||
extern "C" int __isa_available;
|
||||
extern "C" __m128d __cdecl _mm_fmsub_sd(__m128d, __m128d, __m128d);
|
||||
#elif defined(_M_ARM64)
|
||||
#define _FMP_USING_ARM64_INTRINSICS
|
||||
#include <arm64_neon.h>
|
||||
#endif // ^^^ defined(_M_ARM64) ^^^
|
||||
|
||||
#pragma pack(push, _CRT_PACKING)
|
||||
#pragma warning(push, _STL_WARNING_LEVEL)
|
||||
#pragma warning(disable : _STL_DISABLED_WARNINGS)
|
||||
|
@ -41,6 +59,296 @@ struct _C_ldouble_complex {
|
|||
#define _IM 1
|
||||
|
||||
_STD_BEGIN
|
||||
|
||||
// implements multi-precision floating point arithmetic for numerical algorithms
|
||||
#pragma float_control(precise, on, push)
|
||||
namespace _Float_multi_prec {
|
||||
// multi-precision floating point types
|
||||
template <class _Ty, int _Prec>
|
||||
struct _Fmp_t;
|
||||
|
||||
template <class _Ty>
|
||||
struct _Fmp_t<_Ty, 2> {
|
||||
static_assert(is_floating_point_v<_Ty>, "_Ty must be floating-point");
|
||||
_Ty _Val0; // most significant numeric_limits<_Ty>::precision bits
|
||||
_Ty _Val1; // least significant numeric_limits<_Ty>::precision bits
|
||||
};
|
||||
|
||||
// addition
|
||||
|
||||
// 1x precision + 1x precision -> 2x precision
|
||||
// the result is exact when:
|
||||
// 1) the result doesn't overflow
|
||||
// 2) either underflow is gradual, or no internal underflow occurs
|
||||
// 3) intermediate precision is either the same as _Ty, or greater than twice the precision of _Ty
|
||||
// 4) parameters and local variables do not retain extra intermediate precision
|
||||
// 5) rounding mode is rounding to nearest
|
||||
// violation of condition 3 or 5 could lead to relative error on the order of epsilon^2
|
||||
// violation of other conditions could lead to worse results
|
||||
template <class _Ty>
|
||||
_NODISCARD constexpr _Fmp_t<_Ty, 2> _Add_x2(const _Ty _Xval, const _Ty _Yval) noexcept {
|
||||
const _Ty _Sum0 = _Xval + _Yval;
|
||||
const _Ty _Ymod = _Sum0 - _Xval;
|
||||
const _Ty _Xmod = _Sum0 - _Ymod;
|
||||
const _Ty _Yerr = _Yval - _Ymod;
|
||||
const _Ty _Xerr = _Xval - _Xmod;
|
||||
return {_Sum0, _Xerr + _Yerr};
|
||||
}
|
||||
|
||||
// 1x precision + 1x precision -> 2x precision
|
||||
// requires: exponent(_Xval) + countr_zero(significand(_Xval)) >= exponent(_Yval) || _Xval == 0
|
||||
// the result is exact when:
|
||||
// 0) the requirement above is satisfied
|
||||
// 1) no internal overflow occurs
|
||||
// 2) either underflow is gradual, or no internal underflow occurs
|
||||
// 3) intermediate precision is either the same as _Ty, or greater than twice the precision of _Ty
|
||||
// 4) parameters and local variables do not retain extra intermediate precision
|
||||
// 5) rounding mode is rounding to nearest
|
||||
// violation of condition 3 or 5 could lead to relative error on the order of epsilon^2
|
||||
// violation of other conditions could lead to worse results
|
||||
template <class _Ty>
|
||||
_NODISCARD constexpr _Fmp_t<_Ty, 2> _Add_small_x2(const _Ty _Xval, const _Ty _Yval) noexcept {
|
||||
const _Ty _Sum0 = _Xval + _Yval;
|
||||
const _Ty _Ymod = _Sum0 - _Xval;
|
||||
const _Ty _Yerr = _Yval - _Ymod;
|
||||
return {_Sum0, _Yerr};
|
||||
}
|
||||
|
||||
// 1x precision + 2x precision -> 2x precision
|
||||
// requires: exponent(_Xval) + countr_zero(significand(_Xval)) >= exponent(_Yval._Val0) || _Xval == 0
|
||||
template <class _Ty>
|
||||
_NODISCARD constexpr _Fmp_t<_Ty, 2> _Add_small_x2(const _Ty _Xval, const _Fmp_t<_Ty, 2>& _Yval) noexcept {
|
||||
const _Fmp_t<_Ty, 2> _Sum0 = _Add_small_x2(_Xval, _Yval._Val0);
|
||||
return _Add_small_x2(_Sum0._Val0, _Sum0._Val1 + _Yval._Val1);
|
||||
}
|
||||
|
||||
// 2x precision + 2x precision -> 1x precision
|
||||
template <class _Ty>
|
||||
_NODISCARD constexpr _Ty _Add_x1(const _Fmp_t<_Ty, 2>& _Xval, const _Fmp_t<_Ty, 2>& _Yval) noexcept {
|
||||
const _Fmp_t<_Ty, 2> _Sum00 = _Add_x2(_Xval._Val0, _Yval._Val0);
|
||||
return _Sum00._Val0 + (_Sum00._Val1 + (_Xval._Val1 + _Yval._Val1));
|
||||
}
|
||||
|
||||
// multiplication
|
||||
|
||||
// round to 26 significant bits, ties toward zero
|
||||
_NODISCARD _CONSTEXPR_BIT_CAST double _High_half(const double _Val) noexcept {
|
||||
const auto _Bits = _Bit_cast<unsigned long long>(_Val);
|
||||
const auto _High_half_bits = (_Bits + 0x3ff'ffffULL) & 0xffff'ffff'f800'0000ULL;
|
||||
return _Bit_cast<double>(_High_half_bits);
|
||||
}
|
||||
|
||||
// _Xval * _Xval - _Prod0
|
||||
// the result is exact when:
|
||||
// 1) _Prod0 is _Xval^2 faithfully rounded
|
||||
// 2) no internal overflow or underflow occurs
|
||||
// violation of condition 1 could lead to relative error on the order of epsilon
|
||||
_NODISCARD _CONSTEXPR_BIT_CAST double _Sqr_error_fallback(const double _Xval, const double _Prod0) noexcept {
|
||||
const double _Xhigh = _High_half(_Xval);
|
||||
const double _Xlow = _Xval - _Xhigh;
|
||||
return ((_Xhigh * _Xhigh - _Prod0) + 2.0 * _Xhigh * _Xlow) + _Xlow * _Xlow;
|
||||
}
|
||||
|
||||
#ifdef _FMP_USING_X86_X64_INTRINSICS
|
||||
_NODISCARD inline double _Sqr_error_x86_x64_fma(const double _Xval, const double _Prod0) noexcept {
|
||||
const __m128d _Mx = _mm_set_sd(_Xval);
|
||||
const __m128d _Mprod0 = _mm_set_sd(_Prod0);
|
||||
const __m128d _Mresult = _mm_fmsub_sd(_Mx, _Mx, _Mprod0);
|
||||
double _Result;
|
||||
_mm_store_sd(&_Result, _Mresult);
|
||||
return _Result;
|
||||
}
|
||||
#endif // _FMP_USING_X86_X64_INTRINSICS
|
||||
|
||||
#ifdef _FMP_USING_ARM64_INTRINSICS
|
||||
_NODISCARD inline double _Sqr_error_arm64_neon(const double _Xval, const double _Prod0) noexcept {
|
||||
const float64x1_t _Mx = vld1_f64(&_Xval);
|
||||
const float64x1_t _Mprod0 = vld1_f64(&_Prod0);
|
||||
const float64x1_t _Mresult = vfma_f64(vneg_f64(_Mprod0), _Mx, _Mx);
|
||||
double _Result;
|
||||
vst1_f64(&_Result, _Mresult);
|
||||
return _Result;
|
||||
}
|
||||
#endif // _FMP_USING_ARM64_INTRINSICS
|
||||
|
||||
// square(1x precision) -> 2x precision
|
||||
// the result is exact when no internal overflow or underflow occurs
|
||||
_NODISCARD inline _Fmp_t<double, 2> _Sqr_x2(const double _Xval) noexcept {
|
||||
const double _Prod0 = _Xval * _Xval;
|
||||
|
||||
#if defined(_FMP_USING_X86_X64_INTRINSICS)
|
||||
|
||||
#ifdef __AVX2__
|
||||
return {_Prod0, _Sqr_error_x86_x64_fma(_Xval, _Prod0)};
|
||||
#else // ^^^ defined(__AVX2__) / !defined(__AVX2__) vvv
|
||||
const bool _Definitely_have_fma = __isa_available >= __ISA_AVAILABLE_AVX2;
|
||||
if (_Definitely_have_fma) {
|
||||
return {_Prod0, _Sqr_error_x86_x64_fma(_Xval, _Prod0)};
|
||||
} else {
|
||||
return {_Prod0, _Sqr_error_fallback(_Xval, _Prod0)};
|
||||
}
|
||||
#endif // ^^^ !defined(__AVX2__) ^^^
|
||||
|
||||
#elif defined(_FMP_USING_ARM64_INTRINSICS)
|
||||
// https://docs.microsoft.com/en-us/cpp/build/arm64-windows-abi-conventions?view=vs-2019#base-requirements
|
||||
// Both floating-point and NEON support are presumed to be present in hardware.
|
||||
return {_Prod0, _Sqr_error_arm64_neon(_Xval, _Prod0)};
|
||||
#else // ^^^ defined(_FMP_USING_ARM64_INTRINSICS) / not using intrinsics vvv
|
||||
return {_Prod0, _Sqr_error_fallback(_Xval, _Prod0)};
|
||||
#endif // ^^^ not using intrinsics ^^^
|
||||
}
|
||||
} // namespace _Float_multi_prec
|
||||
#pragma float_control(pop)
|
||||
|
||||
#undef _FMP_USING_X86_X64_INTRINSICS
|
||||
#undef _FMP_USING_ARM64_INTRINSICS
|
||||
|
||||
#define _FMP ::std::_Float_multi_prec::
|
||||
|
||||
// implements numerical algorithms for <complex>
|
||||
namespace _Math_algorithms {
|
||||
// TRANSITION: sqrt() isn't constexpr
|
||||
// _Hypot_leg_huge = _Ty{0.5} * _STD sqrt((_STD numeric_limits<_Ty>::max)());
|
||||
// _Hypot_leg_tiny = _STD sqrt(_Ty{2.0} * (_STD numeric_limits<_Ty>::min)() / _STD numeric_limits<_Ty>::epsilon());
|
||||
template <class _Ty>
|
||||
struct _Hypot_leg_huge_helper {
|
||||
static constexpr _Ty value{6.703903964971298e+153};
|
||||
};
|
||||
template <>
|
||||
struct _Hypot_leg_huge_helper<float> {
|
||||
static constexpr float value{9.2233715e+18f};
|
||||
};
|
||||
template <class _Ty>
|
||||
_INLINE_VAR constexpr _Ty _Hypot_leg_huge = _Hypot_leg_huge_helper<_Ty>::value;
|
||||
|
||||
template <class _Ty>
|
||||
struct _Hypot_leg_tiny_helper {
|
||||
static constexpr _Ty value{1.4156865331029228e-146};
|
||||
};
|
||||
template <>
|
||||
struct _Hypot_leg_tiny_helper<float> {
|
||||
static constexpr float value{4.440892e-16f};
|
||||
};
|
||||
template <class _Ty>
|
||||
_INLINE_VAR constexpr _Ty _Hypot_leg_tiny = _Hypot_leg_tiny_helper<_Ty>::value;
|
||||
|
||||
template <class _Ty>
|
||||
_NODISCARD _Ty _Norm_minus_one(const _Ty _Xval, const _Ty _Yval) noexcept {
|
||||
// requires |_Xval| >= |_Yval| and 0.5 <= |_Xval| < 2^12
|
||||
// returns _Xval * _Xval + _Yval * _Yval - 1
|
||||
const _FMP _Fmp_t<_Ty, 2> _Xsqr = _FMP _Sqr_x2(_Xval);
|
||||
const _FMP _Fmp_t<_Ty, 2> _Ysqr = _FMP _Sqr_x2(_Yval);
|
||||
const _FMP _Fmp_t<_Ty, 2> _Xsqr_m1 = _FMP _Add_small_x2(_Ty{-1.0}, _Xsqr);
|
||||
return _Add_x1(_Xsqr_m1, _Ysqr);
|
||||
}
|
||||
|
||||
_NODISCARD inline float _Norm_minus_one(const float _Xval, const float _Yval) noexcept {
|
||||
const auto _Dx = static_cast<double>(_Xval);
|
||||
const auto _Dy = static_cast<double>(_Yval);
|
||||
return static_cast<float>((_Dx * _Dx - 1.0) + _Dy * _Dy);
|
||||
}
|
||||
|
||||
// TRANSITION: CRT log1p can be inaccurate for tiny inputs under directed rounding modes
|
||||
template <class _Ty>
|
||||
_NODISCARD _Ty _Logp1(const _Ty _Xval) { // returns log(1 + _Xval)
|
||||
static_assert(is_floating_point_v<_Ty>, "_Ty must be floating-point");
|
||||
|
||||
if (_Is_nan(_Xval)) { // NaN
|
||||
return _Xval + _Xval; // raise FE_INVALID if _Xval is a signaling NaN
|
||||
}
|
||||
|
||||
if (_Xval <= _Ty{-0.5} || _Ty{2.0} <= _Xval) { // naive formula is moderately accurate
|
||||
if (_Xval == (numeric_limits<_Ty>::max)()) { // avoid overflow
|
||||
return _STD log(_Xval);
|
||||
}
|
||||
|
||||
return _STD log(_Ty{1.0} + _Xval);
|
||||
}
|
||||
|
||||
const _Ty _Xabs = _Float_abs(_Xval);
|
||||
if (_Xabs < numeric_limits<_Ty>::epsilon()) { // zero or tiny
|
||||
if (_Xval == _Ty{0.0}) {
|
||||
return _Xval;
|
||||
}
|
||||
|
||||
// honor rounding mode, raise FE_INEXACT
|
||||
return _Xval - _Ty{0.5} * _Xval * _Xval;
|
||||
}
|
||||
|
||||
// compute log(1 + _Xval) with fixup for small _Xval
|
||||
const _FMP _Fmp_t<_Ty, 2> _Xp1 = _FMP _Add_small_x2(_Ty{1.0}, _Xval);
|
||||
return _STD log(_Xp1._Val0) + _Xp1._Val1 / _Xp1._Val0;
|
||||
}
|
||||
|
||||
template <class _Ty>
|
||||
_NODISCARD _Ty _Log_hypot(const _Ty _Xval, const _Ty _Yval) noexcept { // returns log(hypot(_Xval, _Yval))
|
||||
static_assert(is_floating_point_v<_Ty>, "_Ty must be floating-point");
|
||||
|
||||
if (!_Is_finite(_Xval) || !_Is_finite(_Yval)) { // Inf or NaN
|
||||
// raise FE_INVALID and return NaN if at least one of them is a signaling NaN
|
||||
if (_Is_signaling_nan(_Xval) || _Is_signaling_nan(_Yval)) {
|
||||
return _Xval + _Yval;
|
||||
}
|
||||
|
||||
// return +Inf if at least one of them is an infinity, even when the other is a quiet NaN
|
||||
if (_Is_inf(_Xval)) {
|
||||
return _Float_abs(_Xval);
|
||||
}
|
||||
|
||||
if (_Is_inf(_Yval)) {
|
||||
return _Float_abs(_Yval);
|
||||
}
|
||||
|
||||
// at least one of them is a quiet NaN, and the other is not an infinity
|
||||
return _Xval + _Yval;
|
||||
}
|
||||
|
||||
_Ty _Av = _Float_abs(_Xval);
|
||||
_Ty _Bv = _Float_abs(_Yval);
|
||||
|
||||
if (_Av < _Bv) { // ensure that _Bv <= _Av
|
||||
_STD swap(_Av, _Bv);
|
||||
}
|
||||
|
||||
if (_Bv == 0) {
|
||||
return _STD log(_Av);
|
||||
}
|
||||
|
||||
if (_Hypot_leg_tiny<_Ty> < _Av && _Av < _Hypot_leg_huge<_Ty>) { // no overflow or harmful underflow
|
||||
constexpr _Ty _Norm_small = _Ty{0.5};
|
||||
constexpr _Ty _Norm_big = _Ty{3.0};
|
||||
|
||||
const _Ty _Bv_sqr = _Bv * _Bv;
|
||||
|
||||
if (_Av == _Ty{1.0}) { // correctly return +0 when _Av == 1 and _Bv * _Bv underflows
|
||||
// _Norm_minus_one(_Av, _Bv) could return -0 under FE_DOWNWARD rounding mode
|
||||
return _Logp1(_Bv_sqr) * _Ty{0.5};
|
||||
}
|
||||
|
||||
const _Ty _Norm = _Av * _Av + _Bv_sqr;
|
||||
|
||||
if (_Norm_small < _Norm && _Norm < _Norm_big) { // avoid catastrophic cancellation
|
||||
return _Logp1(_Norm_minus_one(_Av, _Bv)) * _Ty{0.5};
|
||||
} else {
|
||||
return _STD log(_Norm) * _Ty{0.5};
|
||||
}
|
||||
} else { // use 1 1/2 precision to preserve bits
|
||||
constexpr _Ty _Cm = _Ty{22713.0L / 32768.0L};
|
||||
constexpr _Ty _Cl = _Ty{1.4286068203094172321214581765680755e-6L};
|
||||
|
||||
const int _Exponent = _STD ilogb(_Av);
|
||||
const _Ty _Av_scaled = _STD scalbn(_Av, -_Exponent);
|
||||
const _Ty _Bv_scaled = _STD scalbn(_Bv, -_Exponent);
|
||||
const _Ty _Bv_scaled_sqr = _Bv_scaled * _Bv_scaled;
|
||||
const _Ty _Norm_scaled = _Av_scaled * _Av_scaled + _Bv_scaled_sqr;
|
||||
const _Ty _Real_shifted = _STD log(_Norm_scaled) * _Ty{0.5};
|
||||
return (_Real_shifted + _Exponent * _Cl) + _Exponent * _Cm;
|
||||
}
|
||||
}
|
||||
} // namespace _Math_algorithms
|
||||
|
||||
#undef _FMP
|
||||
|
||||
using _Dcomplex_value = _CSTD _C_double_complex;
|
||||
using _Fcomplex_value = _CSTD _C_float_complex;
|
||||
using _Lcomplex_value = _CSTD _C_ldouble_complex;
|
||||
|
@ -66,8 +374,12 @@ public:
|
|||
return (numeric_limits<_Ty>::max)();
|
||||
}
|
||||
|
||||
static constexpr _Ty _Flt_norm_min() {
|
||||
return (numeric_limits<_Ty>::min)() > 0 ? (numeric_limits<_Ty>::min)() : 0;
|
||||
}
|
||||
|
||||
static _Ty _Abs(_Ty _Left) {
|
||||
return static_cast<_Ty>(_Left < 0 ? -_Left : _Left);
|
||||
return static_cast<_Ty>(_Signbit(_Left) ? -_Left : _Left);
|
||||
}
|
||||
|
||||
static _Ty _Cosh(_Ty _Left, _Ty _Right) { // return cosh(_Left) * _Right
|
||||
|
@ -75,7 +387,7 @@ public:
|
|||
}
|
||||
|
||||
static _Ty _Copysign(_Ty _Magnitude, _Ty _Sign) {
|
||||
return static_cast<_Ty>(_Sign < 0 ? -_Abs(_Magnitude) : _Abs(_Magnitude));
|
||||
return static_cast<_Ty>(_Signbit(_Sign) ? -_Abs(_Magnitude) : _Abs(_Magnitude));
|
||||
}
|
||||
|
||||
static short _Exp(_Ty* _Pleft, _Ty _Right, short _Exponent) { // compute exp(*_Pleft) * _Right * 2 ^ _Exponent
|
||||
|
@ -106,7 +418,7 @@ public:
|
|||
}
|
||||
|
||||
static bool _Signbit(_Ty _Left) {
|
||||
return _Left < 0;
|
||||
return (_STD signbit)(static_cast<double>(_Left));
|
||||
}
|
||||
|
||||
static _Ty _Sinh(_Ty _Left, _Ty _Right) { // return sinh(_Left) * _Right
|
||||
|
@ -200,6 +512,10 @@ public:
|
|||
return (numeric_limits<long double>::max)();
|
||||
}
|
||||
|
||||
static constexpr _Ty _Flt_norm_min() {
|
||||
return (numeric_limits<long double>::min)();
|
||||
}
|
||||
|
||||
static _Ty _Abs(_Ty _Left) {
|
||||
// testing _Left < 0 would be incorrect when _Left is -0.0
|
||||
return _CSTD fabsl(_Left);
|
||||
|
@ -340,6 +656,10 @@ public:
|
|||
return (numeric_limits<double>::max)();
|
||||
}
|
||||
|
||||
static constexpr _Ty _Flt_norm_min() {
|
||||
return (numeric_limits<double>::min)();
|
||||
}
|
||||
|
||||
static _Ty _Abs(_Ty _Left) {
|
||||
// testing _Left < 0 would be incorrect when _Left is -0.0
|
||||
return _CSTD fabs(_Left);
|
||||
|
@ -475,6 +795,10 @@ public:
|
|||
return (numeric_limits<float>::max)();
|
||||
}
|
||||
|
||||
static constexpr _Ty _Flt_norm_min() {
|
||||
return (numeric_limits<float>::min)();
|
||||
}
|
||||
|
||||
static _Ty _Abs(_Ty _Left) {
|
||||
// testing _Left < 0 would be incorrect when _Left is -0.0
|
||||
return _CSTD fabsf(_Left);
|
||||
|
@ -1524,10 +1848,13 @@ _NODISCARD complex<_Ty> exp(const complex<_Ty>& _Left) {
|
|||
|
||||
// FUNCTION TEMPLATE _Fabs
|
||||
template <class _Ty>
|
||||
_Ty _Fabs(const complex<_Ty>& _Left, int* _Pexp) { // return magnitude and scale factor
|
||||
_Ty _Fabs(const complex<_Ty>& _Left, int* _Pexp) { // Used by sqrt(), return magnitude and scale factor.
|
||||
// Returns a non-zero even integer in *_Pexp when _Left is finite
|
||||
// and non-zero.
|
||||
// Returns 0 in *_Pexp when _Left is zero, infinity, or NaN.
|
||||
*_Pexp = 0;
|
||||
_Ty _Av = real(_Left);
|
||||
_Ty _Bv = imag(_Left);
|
||||
_Ty _Av = _Ctraits<_Ty>::_Abs(_STD real(_Left));
|
||||
_Ty _Bv = _Ctraits<_Ty>::_Abs(_STD imag(_Left));
|
||||
|
||||
if (_Ctraits<_Ty>::_Isinf(_Av) || _Ctraits<_Ty>::_Isinf(_Bv)) {
|
||||
return _Ctraits<_Ty>::_Infv(); // at least one component is INF
|
||||
|
@ -1536,13 +1863,8 @@ _Ty _Fabs(const complex<_Ty>& _Left, int* _Pexp) { // return magnitude and scale
|
|||
} else if (_Ctraits<_Ty>::_Isnan(_Bv)) {
|
||||
return _Bv; // imaginary component is NaN
|
||||
} else { // neither component is NaN or INF
|
||||
_Av = _Ctraits<_Ty>::_Abs(_Av);
|
||||
_Bv = _Ctraits<_Ty>::_Abs(_Bv);
|
||||
|
||||
if (_Av < _Bv) { // ensure that |_Bv| <= |_Av|
|
||||
_Ty _Tmp = _Av;
|
||||
_Av = _Bv;
|
||||
_Bv = _Tmp;
|
||||
_STD swap(_Av, _Bv);
|
||||
}
|
||||
|
||||
if (_Av == 0) {
|
||||
|
@ -1550,16 +1872,28 @@ _Ty _Fabs(const complex<_Ty>& _Left, int* _Pexp) { // return magnitude and scale
|
|||
}
|
||||
|
||||
if (1 <= _Av) {
|
||||
*_Pexp = 2;
|
||||
_Av = _Av * static_cast<_Ty>(0.25);
|
||||
_Bv = _Bv * static_cast<_Ty>(0.25);
|
||||
*_Pexp = 4;
|
||||
_Av = _Av * static_cast<_Ty>(0.0625);
|
||||
_Bv = _Bv * static_cast<_Ty>(0.0625);
|
||||
} else {
|
||||
*_Pexp = -2;
|
||||
_Av = _Av * 4;
|
||||
_Bv = _Bv * 4;
|
||||
constexpr _Ty _Flt_eps = _Ctraits<_Ty>::_Flt_eps();
|
||||
// TRANSITION, workaround for non floating point _Ty
|
||||
constexpr _Ty _Leg_tiny = _Flt_eps == 0 ? _Ty{0} : 2 * _Ctraits<_Ty>::_Flt_norm_min() / _Flt_eps;
|
||||
|
||||
if (_Av < _Leg_tiny) {
|
||||
constexpr int _Exponent = -2 * numeric_limits<_Ty>::digits;
|
||||
|
||||
*_Pexp = _Exponent;
|
||||
_Av = _Ctraits<_Ty>::ldexp(_Av, -_Exponent);
|
||||
_Bv = _Ctraits<_Ty>::ldexp(_Bv, -_Exponent);
|
||||
} else {
|
||||
*_Pexp = -2;
|
||||
_Av = _Av * 4;
|
||||
_Bv = _Bv * 4;
|
||||
}
|
||||
}
|
||||
|
||||
_Ty _Tmp = _Av - _Bv;
|
||||
const _Ty _Tmp = _Av - _Bv;
|
||||
if (_Tmp == _Av) {
|
||||
return _Av; // _Bv unimportant
|
||||
} else if (_Bv < _Tmp) { // use simple approximation
|
||||
|
@ -1579,33 +1913,21 @@ _Ty _Fabs(const complex<_Ty>& _Left, int* _Pexp) { // return magnitude and scale
|
|||
}
|
||||
|
||||
// FUNCTION TEMPLATE log
|
||||
template <class _Ty>
|
||||
_NODISCARD _Ty _Log_abs(const complex<_Ty>& _Left) noexcept { // for double, long double, and non floating point types
|
||||
return static_cast<_Ty>(
|
||||
_Math_algorithms::_Log_hypot(static_cast<double>(_STD real(_Left)), static_cast<double>(_STD imag(_Left))));
|
||||
}
|
||||
|
||||
_NODISCARD inline float _Log_abs(const complex<float>& _Left) noexcept {
|
||||
return _Math_algorithms::_Log_hypot(_STD real(_Left), _STD imag(_Left));
|
||||
}
|
||||
|
||||
template <class _Ty>
|
||||
_NODISCARD complex<_Ty> log(const complex<_Ty>& _Left) {
|
||||
_Ty _Theta = _Ctraits<_Ty>::atan2(imag(_Left), real(_Left)); // get phase
|
||||
|
||||
if (_Ctraits<_Ty>::_Isnan(_Theta)) {
|
||||
return complex<_Ty>(_Theta, _Theta); // real or imag is NaN
|
||||
} else { // use 1 1/2 precision to preserve bits
|
||||
constexpr _Ty _Cm = static_cast<_Ty>(22713.0L / 32768.0L);
|
||||
constexpr _Ty _Cl = static_cast<_Ty>(1.4286068203094172321214581765680755e-6L);
|
||||
int _Leftexp;
|
||||
_Ty _Rho = _Fabs(_Left, &_Leftexp); // get magnitude and scale factor
|
||||
|
||||
_Ty _Leftn = static_cast<_Ty>(_Leftexp);
|
||||
|
||||
_Ty _Real;
|
||||
if (_Rho == 0) {
|
||||
_Real = -_Ctraits<_Ty>::_Infv(); // log(0) == -INF
|
||||
} else if (_Ctraits<_Ty>::_Isinf(_Rho)) {
|
||||
_Real = _Rho; // log(INF) == INF
|
||||
} else {
|
||||
_Real = static_cast<_Ty>(_Ctraits<_Ty>::log(_Rho)); // These casts are TRANSITION, DevCom-1093507
|
||||
_Real += static_cast<_Ty>(_Leftn * _Cl);
|
||||
_Real += static_cast<_Ty>(_Leftn * _Cm);
|
||||
}
|
||||
|
||||
return complex<_Ty>(_Real, _Theta);
|
||||
}
|
||||
const _Ty _Log_abs_v = _STD _Log_abs(_Left); // get logarithm of magnitude
|
||||
const _Ty _Theta = _Ctraits<_Ty>::atan2(_STD imag(_Left), _STD real(_Left)); // get phase
|
||||
return complex<_Ty>(_Log_abs_v, _Theta);
|
||||
}
|
||||
|
||||
// FUNCTION TEMPLATE pow
|
||||
|
@ -1678,7 +2000,7 @@ _NODISCARD complex<_Ty> sqrt(const complex<_Ty>& _Left) {
|
|||
return complex<_Ty>(_Ctraits<_Ty>::_Infv(), _Im); // (any, +/-Inf)
|
||||
} else if (_Ctraits<_Ty>::_Isnan(_Im)) {
|
||||
if (_Re < 0) {
|
||||
return complex<_Ty>(_Im, _Re); // (-Inf, NaN)
|
||||
return complex<_Ty>(_Ctraits<_Ty>::_Abs(_Im), _Ctraits<_Ty>::_Copysign(_Re, _Im)); // (-Inf, NaN)
|
||||
} else {
|
||||
return _Left; // (+Inf, NaN)
|
||||
}
|
||||
|
|
|
@ -6131,16 +6131,20 @@ struct _Float_traits {
|
|||
// traits for double and long double:
|
||||
using type = unsigned long long;
|
||||
|
||||
static constexpr type _Sign_mask = 0x8000'0000'0000'0000ULL;
|
||||
static constexpr type _Magnitude_mask = 0x7fff'ffff'ffff'ffffULL;
|
||||
static constexpr type _Exponent_mask = 0x7ff0'0000'0000'0000ULL;
|
||||
static constexpr type _Quiet_nan_mask = 0x0008'0000'0000'0000ULL;
|
||||
};
|
||||
|
||||
template <>
|
||||
struct _Float_traits<float> {
|
||||
using type = unsigned int;
|
||||
|
||||
static constexpr type _Sign_mask = 0x8000'0000U;
|
||||
static constexpr type _Magnitude_mask = 0x7fff'ffffU;
|
||||
static constexpr type _Exponent_mask = 0x7f80'0000U;
|
||||
static constexpr type _Quiet_nan_mask = 0x0040'0000U;
|
||||
};
|
||||
|
||||
// FUNCTION TEMPLATE _Float_abs_bits
|
||||
|
@ -6156,12 +6160,36 @@ _NODISCARD _CONSTEXPR_BIT_CAST _Ty _Float_abs(const _Ty _Xx) { // constexpr floa
|
|||
return _Bit_cast<_Ty>(_Float_abs_bits(_Xx));
|
||||
}
|
||||
|
||||
// FUNCTION TEMPLATE _Float_copysign
|
||||
template <class _Ty, enable_if_t<is_floating_point_v<_Ty>, int> = 0>
|
||||
_NODISCARD _CONSTEXPR_BIT_CAST _Ty _Float_copysign(const _Ty _Magnitude, const _Ty _Sign) { // constexpr copysign()
|
||||
const auto _Signbit = _Bit_cast<typename _Float_traits<_Ty>::type>(_Sign) & _Float_traits<_Ty>::_Sign_mask;
|
||||
return _Bit_cast<_Ty>(_Float_abs_bits(_Magnitude) | _Signbit);
|
||||
}
|
||||
|
||||
// FUNCTION TEMPLATE _Is_nan
|
||||
template <class _Ty, enable_if_t<is_floating_point_v<_Ty>, int> = 0>
|
||||
_NODISCARD _CONSTEXPR_BIT_CAST bool _Is_nan(const _Ty _Xx) { // constexpr isnan()
|
||||
return _Float_abs_bits(_Xx) > _Float_traits<_Ty>::_Exponent_mask;
|
||||
}
|
||||
|
||||
// FUNCTION TEMPLATE _Is_signaling_nan
|
||||
// TRANSITION, workaround x86 ABI
|
||||
// On x86 ABI, floating point by-value arguments and return values are passed in 80-bit x87 registers.
|
||||
// When the value is a 32-bit or 64-bit signaling NaN, the conversion to/from 80-bit raises FE_INVALID
|
||||
// and turns it into a quiet NaN. This behavior is undesirable if we want to test for signaling NaNs.
|
||||
template <class _Ty, enable_if_t<is_floating_point_v<_Ty>, int> = 0>
|
||||
_NODISCARD _CONSTEXPR_BIT_CAST bool _Is_signaling_nan(const _Ty& _Xx) { // returns true if input is a signaling NaN
|
||||
const auto _Abs_bits = _Float_abs_bits(_Xx);
|
||||
return _Abs_bits > _Float_traits<_Ty>::_Exponent_mask && ((_Abs_bits & _Float_traits<_Ty>::_Quiet_nan_mask) == 0);
|
||||
}
|
||||
|
||||
// FUNCTION TEMPLATE _Is_inf
|
||||
template <class _Ty, enable_if_t<is_floating_point_v<_Ty>, int> = 0>
|
||||
_NODISCARD _CONSTEXPR_BIT_CAST bool _Is_inf(const _Ty _Xx) { // constexpr isinf()
|
||||
return _Float_abs_bits(_Xx) == _Float_traits<_Ty>::_Exponent_mask;
|
||||
}
|
||||
|
||||
// FUNCTION TEMPLATE _Is_finite
|
||||
template <class _Ty, enable_if_t<is_floating_point_v<_Ty>, int> = 0>
|
||||
_NODISCARD _CONSTEXPR_BIT_CAST bool _Is_finite(const _Ty _Xx) { // constexpr isfinite()
|
||||
|
@ -6177,7 +6205,6 @@ struct _Nontrivial_dummy_type {
|
|||
_STL_INTERNAL_STATIC_ASSERT(!is_trivially_default_constructible_v<_Nontrivial_dummy_type>);
|
||||
|
||||
_STD_END
|
||||
#undef _CONSTEXPR_BIT_CAST
|
||||
#pragma pop_macro("new")
|
||||
_STL_RESTORE_CLANG_WARNINGS
|
||||
#pragma warning(pop)
|
||||
|
|
|
@ -0,0 +1,61 @@
|
|||
// Copyright (c) Microsoft Corporation.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
|
||||
#pragma once
|
||||
|
||||
#ifdef FP_CONFIG_PRESET
|
||||
#if FP_CONFIG_PRESET == 3
|
||||
#define FP_PRESET_FAST 1
|
||||
#else // ^^^ FP_CONFIG_PRESET == 3 / FP_CONFIG_PRESET != 3 vvv
|
||||
#define FP_PRESET_FAST 0
|
||||
#endif // ^^^ FP_CONFIG_PRESET != 3 ^^^
|
||||
#endif // defined(FP_CONFIG_PRESET)
|
||||
|
||||
#ifdef FP_CONTRACT_MODE
|
||||
#ifdef __clang__
|
||||
|
||||
#if FP_CONTRACT_MODE == 0
|
||||
#pragma STDC FP_CONTRACT OFF
|
||||
#elif FP_CONTRACT_MODE == 1 // ^^^ no floating point contraction / standard floating point contraction vvv
|
||||
#pragma STDC FP_CONTRACT ON
|
||||
#elif FP_CONTRACT_MODE == 2 // ^^^ standard floating point contraction / fast floating point contraction vvv
|
||||
#pragma STDC FP_CONTRACT ON
|
||||
#else // ^^^ fast floating point contraction / invalid FP_CONTRACT_MODE vvv
|
||||
#error invalid FP_CONTRACT_MODE
|
||||
#endif // ^^^ invalid FP_CONTRACT_MODE ^^^
|
||||
|
||||
#else // ^^^ clang / MSVC vvv
|
||||
|
||||
#if FP_CONTRACT_MODE == 0
|
||||
#pragma fp_contract(off)
|
||||
#elif FP_CONTRACT_MODE == 1 // ^^^ no floating point contraction / standard floating point contraction vvv
|
||||
#pragma fp_contract(on)
|
||||
#elif FP_CONTRACT_MODE == 2 // ^^^ standard floating point contraction / fast floating point contraction vvv
|
||||
#pragma fp_contract(on)
|
||||
#else // ^^^ fast floating point contraction / invalid FP_CONTRACT_MODE vvv
|
||||
#error invalid FP_CONTRACT_MODE
|
||||
#endif // ^^^ invalid FP_CONTRACT_MODE ^^^
|
||||
|
||||
#endif // ^^^ MSVC ^^^
|
||||
#endif // defined(FP_CONTRACT_MODE)
|
||||
|
||||
#include <cassert>
|
||||
#include <float.h>
|
||||
|
||||
struct fenv_initializer_t {
|
||||
fenv_initializer_t() {
|
||||
#if WITH_FP_ABRUPT_UNDERFLOW
|
||||
{
|
||||
const errno_t result = _controlfp_s(nullptr, _DN_FLUSH, _MCW_DN);
|
||||
assert(result == 0);
|
||||
}
|
||||
#endif // WITH_FP_ABRUPT_UNDERFLOW
|
||||
}
|
||||
|
||||
~fenv_initializer_t() = default;
|
||||
|
||||
fenv_initializer_t(const fenv_initializer_t&) = delete;
|
||||
fenv_initializer_t& operator=(const fenv_initializer_t&) = delete;
|
||||
};
|
||||
|
||||
const fenv_initializer_t fenv_initializer{};
|
|
@ -161,6 +161,7 @@ tests\GH_000625_vector_bool_optimization
|
|||
tests\GH_000685_condition_variable_any
|
||||
tests\GH_000690_overaligned_function
|
||||
tests\GH_000890_pow_template
|
||||
tests\GH_000935_complex_numerical_accuracy
|
||||
tests\GH_000940_missing_valarray_copy
|
||||
tests\GH_001001_random_rejection_rounding
|
||||
tests\GH_001010_filesystem_error_encoding
|
||||
|
|
|
@ -0,0 +1,4 @@
|
|||
# Copyright (c) Microsoft Corporation.
|
||||
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
|
||||
RUNALL_INCLUDE ..\floating_point_model_matrix.lst
|
|
@ -0,0 +1,241 @@
|
|||
// Copyright (c) Microsoft Corporation.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <cassert>
|
||||
#include <cfenv>
|
||||
#include <cmath>
|
||||
#include <float.h>
|
||||
#include <type_traits>
|
||||
#include <xutility>
|
||||
|
||||
namespace fputil {
|
||||
template <typename T>
|
||||
using float_bits_t = typename _STD _Float_traits<T>::type;
|
||||
|
||||
template <typename T>
|
||||
_INLINE_VAR constexpr float_bits_t<T> magnitude_mask_v = _STD _Float_traits<T>::_Magnitude_mask;
|
||||
|
||||
template <typename T>
|
||||
_INLINE_VAR constexpr float_bits_t<T> exponent_mask_v = _STD _Float_traits<T>::_Exponent_mask;
|
||||
|
||||
template <typename T>
|
||||
_INLINE_VAR constexpr float_bits_t<T> significand_mask_v = magnitude_mask_v<T> & ~exponent_mask_v<T>;
|
||||
|
||||
template <typename T>
|
||||
_INLINE_VAR constexpr float_bits_t<T> sign_mask_v = _STD _Float_traits<T>::_Sign_mask;
|
||||
|
||||
template <typename T>
|
||||
_INLINE_VAR constexpr float_bits_t<T> norm_min_bits_v = significand_mask_v<T> + 1U;
|
||||
|
||||
template <typename T>
|
||||
_INLINE_VAR constexpr float_bits_t<T> norm_max_bits_v = exponent_mask_v<T> - 1U;
|
||||
|
||||
template <typename T>
|
||||
_INLINE_VAR constexpr float_bits_t<T> infinity_bits_v = exponent_mask_v<T>;
|
||||
|
||||
// not affected by abrupt underflow
|
||||
template <typename T, std::enable_if_t<std::is_floating_point_v<T>, int> = 0>
|
||||
constexpr bool iszero(const T& x) {
|
||||
return _STD _Float_abs_bits(x) == 0;
|
||||
}
|
||||
|
||||
// not affected by /fp:fast
|
||||
template <typename T, std::enable_if_t<std::is_floating_point_v<T>, int> = 0>
|
||||
constexpr bool signbit(const T& x) {
|
||||
const auto bits = std::_Bit_cast<float_bits_t<T>>(x);
|
||||
return (bits & sign_mask_v<T>) != 0;
|
||||
}
|
||||
|
||||
enum class rounding_mode {
|
||||
to_nearest_ties_even = FE_TONEAREST,
|
||||
toward_zero = FE_TOWARDZERO,
|
||||
toward_positive = FE_UPWARD,
|
||||
toward_negative = FE_DOWNWARD,
|
||||
};
|
||||
|
||||
bool is_directed_rounding_mode(const rounding_mode mode) {
|
||||
switch (mode) {
|
||||
case rounding_mode::to_nearest_ties_even:
|
||||
return false;
|
||||
|
||||
case rounding_mode::toward_zero:
|
||||
case rounding_mode::toward_positive:
|
||||
case rounding_mode::toward_negative:
|
||||
return true;
|
||||
|
||||
default:
|
||||
assert(false);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
#if TEST_FP_ROUNDING
|
||||
|
||||
#ifdef __clang__
|
||||
// TRANSITION, should be #pragma STDC FENV_ACCESS ON
|
||||
#else // ^^^ clang / MSVC vvv
|
||||
// TRANSITION, VSO-923474 -- should be #pragma STDC FENV_ACCESS ON
|
||||
#pragma fenv_access(on)
|
||||
#endif // ^^^ MSVC ^^^
|
||||
|
||||
constexpr rounding_mode all_rounding_modes[] = {
|
||||
rounding_mode::to_nearest_ties_even,
|
||||
rounding_mode::toward_zero,
|
||||
rounding_mode::toward_positive,
|
||||
rounding_mode::toward_negative,
|
||||
};
|
||||
|
||||
class rounding_guard {
|
||||
public:
|
||||
explicit rounding_guard(const rounding_mode mode) : old_mode{static_cast<rounding_mode>(std::fegetround())} {
|
||||
const int result = std::fesetround(static_cast<int>(mode));
|
||||
assert(result == 0);
|
||||
}
|
||||
|
||||
~rounding_guard() {
|
||||
const int result = std::fesetround(static_cast<int>(old_mode));
|
||||
assert(result == 0);
|
||||
}
|
||||
|
||||
rounding_guard(const rounding_guard&) = delete;
|
||||
rounding_guard& operator=(const rounding_guard&) = delete;
|
||||
|
||||
private:
|
||||
rounding_mode old_mode;
|
||||
};
|
||||
|
||||
#else // ^^^ alternative rounding modes / default rounding mode only vvv
|
||||
|
||||
constexpr rounding_mode all_rounding_modes[] = {rounding_mode::to_nearest_ties_even};
|
||||
|
||||
class rounding_guard {
|
||||
public:
|
||||
explicit rounding_guard(const rounding_mode mode) {
|
||||
static_cast<void>(mode);
|
||||
}
|
||||
|
||||
~rounding_guard() = default;
|
||||
|
||||
rounding_guard(const rounding_guard&) = delete;
|
||||
rounding_guard& operator=(const rounding_guard&) = delete;
|
||||
};
|
||||
|
||||
#endif // ^^^ default rounding mode only ^^^
|
||||
|
||||
// compares whether two floating point values are equal
|
||||
// all NaNs are equal, +0.0 and -0.0 are not equal
|
||||
template <typename T, std::enable_if_t<std::is_floating_point_v<T>, int> = 0>
|
||||
bool precise_equal(const T& actual, const T& expected) {
|
||||
if (_STD _Is_nan(actual) || _STD _Is_nan(expected)) {
|
||||
return _STD _Is_nan(actual) == _STD _Is_nan(expected);
|
||||
} else {
|
||||
return actual == expected && fputil::signbit(actual) == fputil::signbit(expected);
|
||||
}
|
||||
}
|
||||
|
||||
namespace detail {
|
||||
// 0x80...00 = zero, 0x80...01 = numeric_limits<T>::denorm_min(), 0x7f...ff = -numeric_limits<T>::denorm_min()
|
||||
template <typename T, std::enable_if_t<std::is_floating_point_v<T>, int> = 0>
|
||||
float_bits_t<T> offset_representation(const T& x) {
|
||||
const float_bits_t<T> abs_bits = _STD _Float_abs_bits(x);
|
||||
return fputil::signbit(x) ? sign_mask_v<T> - abs_bits : sign_mask_v<T> + abs_bits;
|
||||
}
|
||||
|
||||
template <typename T, std::enable_if_t<std::is_floating_point_v<T>, int> = 0>
|
||||
float_bits_t<T> is_offset_value_subnormal_or_zero(const float_bits_t<T> offset_value) {
|
||||
constexpr float_bits_t<T> positive_norm_min_offset = sign_mask_v<T> + norm_min_bits_v<T>;
|
||||
constexpr float_bits_t<T> negative_norm_min_offset = sign_mask_v<T> - norm_min_bits_v<T>;
|
||||
|
||||
return negative_norm_min_offset < offset_value && offset_value < positive_norm_min_offset;
|
||||
}
|
||||
|
||||
// number of ulps above zero, if we count [0, numeric_limits<T>::min()) as 1 ulp
|
||||
template <typename T, std::enable_if_t<std::is_floating_point_v<T>, int> = 0>
|
||||
double abrupt_underflow_ulp(const float_bits_t<T> offset_value) {
|
||||
using bits_type = float_bits_t<T>;
|
||||
|
||||
constexpr bits_type offset_positive_norm_min = sign_mask_v<T> + norm_min_bits_v<T>;
|
||||
constexpr bits_type offset_negative_norm_min = sign_mask_v<T> - norm_min_bits_v<T>;
|
||||
|
||||
if (offset_value >= offset_positive_norm_min) {
|
||||
return 1.0 + (offset_value - offset_positive_norm_min);
|
||||
} else if (offset_value <= offset_negative_norm_min) {
|
||||
return -1.0 - (offset_negative_norm_min - offset_value);
|
||||
} else if (offset_value >= sign_mask_v<T>) {
|
||||
return static_cast<double>(offset_value - sign_mask_v<T>) / norm_min_bits_v<T>;
|
||||
} else {
|
||||
return -static_cast<double>(sign_mask_v<T> - offset_value) / norm_min_bits_v<T>;
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T, std::enable_if_t<std::is_floating_point_v<T>, int> = 0>
|
||||
bool is_within_ulp_tolerance(const T& actual, const T& expected, const int ulp_tolerance) {
|
||||
if (_STD _Is_nan(actual) || _STD _Is_nan(expected)) {
|
||||
return _STD _Is_nan(actual) == _STD _Is_nan(expected);
|
||||
}
|
||||
|
||||
if (_STD _Is_inf(expected)) {
|
||||
return actual == expected;
|
||||
}
|
||||
|
||||
if (fputil::signbit(actual) != fputil::signbit(expected)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
using bits_type = float_bits_t<T>;
|
||||
|
||||
// compute ulp difference
|
||||
const bits_type actual_offset = detail::offset_representation(actual);
|
||||
const bits_type expected_offset = detail::offset_representation(expected);
|
||||
const bits_type ulp_diff =
|
||||
actual_offset < expected_offset ? expected_offset - actual_offset : actual_offset - expected_offset;
|
||||
|
||||
if (ulp_diff <= static_cast<unsigned int>(ulp_tolerance) && ulp_tolerance >= 0) {
|
||||
return true;
|
||||
}
|
||||
|
||||
#if WITH_FP_ABRUPT_UNDERFLOW
|
||||
// handle abrupt underflow
|
||||
if (detail::is_offset_value_subnormal_or_zero<T>(expected_offset)
|
||||
|| detail::is_offset_value_subnormal_or_zero<T>(actual_offset)) {
|
||||
const double adjusted_actual_ulp = detail::abrupt_underflow_ulp<T>(actual_offset);
|
||||
const double adjusted_expected_ulp = detail::abrupt_underflow_ulp<T>(expected_offset);
|
||||
const double adjusted_ulp_diff = std::abs(adjusted_actual_ulp - adjusted_expected_ulp);
|
||||
|
||||
if (adjusted_ulp_diff <= ulp_tolerance) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
#endif // WITH_FP_ABRUPT_UNDERFLOW
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
template <typename T, std::enable_if_t<std::is_floating_point_v<T>, int> = 0>
|
||||
bool is_within_absolute_tolerance(const T& actual, const T& expected, const double absolute_tolerance) {
|
||||
return _STD _Is_finite(actual) && _STD _Is_finite(expected)
|
||||
&& std::abs(actual - expected) <= absolute_tolerance;
|
||||
}
|
||||
} // namespace detail
|
||||
|
||||
// returns whether floating point result is nearly equal to the expected value
|
||||
template <typename T, std::enable_if_t<std::is_floating_point_v<T>, int> = 0>
|
||||
bool near_equal(
|
||||
const T& actual, const T& expected, const int ulp_tolerance = 1, const double absolute_tolerance = 0) {
|
||||
if (precise_equal(actual, expected)) {
|
||||
return true;
|
||||
}
|
||||
|
||||
if (ulp_tolerance > 0 && detail::is_within_ulp_tolerance(actual, expected, ulp_tolerance)) {
|
||||
return true;
|
||||
}
|
||||
|
||||
if (absolute_tolerance > 0 && detail::is_within_absolute_tolerance(actual, expected, absolute_tolerance)) {
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
} // namespace fputil
|
|
@ -0,0 +1,276 @@
|
|||
// Copyright (c) Microsoft Corporation.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <complex>
|
||||
|
||||
#include "floating_point_utils.hpp"
|
||||
#include "test.hpp"
|
||||
|
||||
template <typename T>
|
||||
constexpr T pi_over_4_v = T{0.7853981633974483};
|
||||
template <typename T>
|
||||
constexpr T pi_over_2_v = T{1.5707963267948966};
|
||||
template <typename T>
|
||||
constexpr T pi_3_over_4_v = T{2.356194490192345};
|
||||
template <typename T>
|
||||
constexpr T pi_v = T{3.141592653589793};
|
||||
|
||||
constexpr complex_unary_test_case<double> log_double_cases[] = {
|
||||
// normal cases
|
||||
{{+0x1.8p+0, +0x1p+1}, {+0x1.d5240f0e0e078p-1, +0x1.dac670561bb4fp-1}},
|
||||
{{+0x1.8p+0, -0x1p+1}, {+0x1.d5240f0e0e078p-1, -0x1.dac670561bb4fp-1}},
|
||||
{{-0x1.8p+0, +0x1p+1}, {+0x1.d5240f0e0e078p-1, +0x1.1b6e192ebbe44p+1}},
|
||||
{{-0x1.8p+0, -0x1p+1}, {+0x1.d5240f0e0e078p-1, -0x1.1b6e192ebbe44p+1}},
|
||||
{{+0x1.8p-1, +0x1p+0}, {+0x1.c8ff7c79a9a22p-3, +0x1.dac670561bb4fp-1}},
|
||||
{{+0x1.8p-1, -0x1p+0}, {+0x1.c8ff7c79a9a22p-3, -0x1.dac670561bb4fp-1}},
|
||||
{{-0x1.8p-1, +0x1p+0}, {+0x1.c8ff7c79a9a22p-3, +0x1.1b6e192ebbe44p+1}},
|
||||
{{-0x1.8p-1, -0x1p+0}, {+0x1.c8ff7c79a9a22p-3, -0x1.1b6e192ebbe44p+1}},
|
||||
{{+0x1.8p-2, +0x1p-1}, {-0x1.e148a1a2726cep-2, +0x1.dac670561bb4fp-1}},
|
||||
{{+0x1.8p-2, -0x1p-1}, {-0x1.e148a1a2726cep-2, -0x1.dac670561bb4fp-1}},
|
||||
{{-0x1.8p-2, +0x1p-1}, {-0x1.e148a1a2726cep-2, +0x1.1b6e192ebbe44p+1}},
|
||||
{{-0x1.8p-2, -0x1p-1}, {-0x1.e148a1a2726cep-2, -0x1.1b6e192ebbe44p+1}},
|
||||
|
||||
// special cases
|
||||
{{+1.0, +0.0}, {0.0, +0.0}, {true, true}},
|
||||
{{+1.0, -0.0}, {0.0, -0.0}, {true, true}},
|
||||
{{+0.0, +1.0}, {0.0, +pi_over_2_v<double>}, {true, false}},
|
||||
{{+0.0, -1.0}, {0.0, -pi_over_2_v<double>}, {true, false}},
|
||||
{{-0.0, +1.0}, {0.0, +pi_over_2_v<double>}, {true, false}},
|
||||
{{-0.0, -1.0}, {0.0, -pi_over_2_v<double>}, {true, false}},
|
||||
{{-1.0, +0.0}, {0.0, +pi_v<double>}, {true, false}},
|
||||
{{-1.0, -0.0}, {0.0, -pi_v<double>}, {true, false}},
|
||||
|
||||
#if !FP_PRESET_FAST
|
||||
{{+0.0, +0.0}, {-double_inf, +0.0}, {true, true}},
|
||||
{{+0.0, -0.0}, {-double_inf, -0.0}, {true, true}},
|
||||
{{-0.0, +0.0}, {-double_inf, +pi_v<double>}, {true, false}},
|
||||
{{-0.0, -0.0}, {-double_inf, -pi_v<double>}, {true, false}},
|
||||
{{+double_inf, +0.0}, {+double_inf, +0.0}, {true, true}},
|
||||
{{+double_inf, -0.0}, {+double_inf, -0.0}, {true, true}},
|
||||
{{+double_inf, +1.0}, {+double_inf, +0.0}, {true, true}},
|
||||
{{+double_inf, -1.0}, {+double_inf, -0.0}, {true, true}},
|
||||
{{+double_inf, +double_inf}, {+double_inf, +pi_over_4_v<double>}, {true, false}},
|
||||
{{+double_inf, -double_inf}, {+double_inf, -pi_over_4_v<double>}, {true, false}},
|
||||
{{+1.0, +double_inf}, {+double_inf, +pi_over_2_v<double>}, {true, false}},
|
||||
{{+1.0, -double_inf}, {+double_inf, -pi_over_2_v<double>}, {true, false}},
|
||||
{{+0.0, +double_inf}, {+double_inf, +pi_over_2_v<double>}, {true, false}},
|
||||
{{+0.0, -double_inf}, {+double_inf, -pi_over_2_v<double>}, {true, false}},
|
||||
{{-0.0, +double_inf}, {+double_inf, +pi_over_2_v<double>}, {true, false}},
|
||||
{{-0.0, -double_inf}, {+double_inf, -pi_over_2_v<double>}, {true, false}},
|
||||
{{-1.0, +double_inf}, {+double_inf, +pi_over_2_v<double>}, {true, false}},
|
||||
{{-1.0, -double_inf}, {+double_inf, -pi_over_2_v<double>}, {true, false}},
|
||||
{{-double_inf, +double_inf}, {+double_inf, +pi_3_over_4_v<double>}, {true, false}},
|
||||
{{-double_inf, -double_inf}, {+double_inf, -pi_3_over_4_v<double>}, {true, false}},
|
||||
{{-double_inf, +1.0}, {+double_inf, +pi_v<double>}, {true, false}},
|
||||
{{-double_inf, -1.0}, {+double_inf, -pi_v<double>}, {true, false}},
|
||||
{{-double_inf, +0.0}, {+double_inf, +pi_v<double>}, {true, false}},
|
||||
{{-double_inf, -0.0}, {+double_inf, -pi_v<double>}, {true, false}},
|
||||
{{+double_inf, double_nan}, {+double_inf, double_nan}, {true, true}},
|
||||
{{-double_inf, double_nan}, {+double_inf, double_nan}, {true, true}},
|
||||
{{double_nan, +double_inf}, {+double_inf, double_nan}, {true, true}},
|
||||
{{double_nan, -double_inf}, {+double_inf, double_nan}, {true, true}},
|
||||
{{double_nan, +0.0}, {double_nan, double_nan}, {true, true}},
|
||||
{{+0.0, double_nan}, {double_nan, double_nan}, {true, true}},
|
||||
{{double_nan, double_nan}, {double_nan, double_nan}, {true, true}},
|
||||
#endif // !FP_PRESET_FAST
|
||||
|
||||
// abs(z) overflows
|
||||
{{+0x1.fffffffffffffp+1023, +0x1.fffffffffffffp+1023}, {+0x1.63108c75a1936p+9, +0x1.921fb54442d18p-1}},
|
||||
{{-0x1.bb67ae8584caap+1023, +0x1.0000000000000p+1023}, {+0x1.62e42fefa39efp+9, +0x1.4f1a6c638d03fp+1}},
|
||||
{{+0x1.fffffffffffffp+1023, -0x0.0000000000001p-1022}, {+0x1.62e42fefa39efp+9, -0x0.0000000000000p-1022}},
|
||||
|
||||
// norm(z) overflows
|
||||
{{-0x1.4e718d7d7625ap+664, -0x1.4e718d7d7625ap+665}, {+0x1.cd525d6474bb8p+8, -0x1.0468a8ace4df6p+1}},
|
||||
{{+0x1.ca3d8e6d80cbbp+511, -0x1.57ae2ad22098cp+511}, {+0x1.6300e9ed15a44p+8, -0x1.4978fa3269ee1p-1}},
|
||||
|
||||
#if !WITH_FP_ABRUPT_UNDERFLOW
|
||||
// abs(z) underflows
|
||||
{{-0x0.0000000000001p-1022, +0x0.0000000000001p-1022}, {-0x1.740bf7c0d927cp+9, +0x1.2d97c7f3321d2p+1}},
|
||||
{{+0x0.0000000000001p-1022, +0x0.8000000000000p-1022}, {-0x1.628b76e3a7b61p+9, +0x1.921fb54442d16p+0}},
|
||||
#endif // !WITH_FP_ABRUPT_UNDERFLOW
|
||||
|
||||
// abs(z) close to underflow
|
||||
{{+0x1.4p-1022, +0x1p-1022}, {-0x1.61f684c577299p+9, +0x1.5977a5103ea92p-1}},
|
||||
|
||||
// norm(z) underflows
|
||||
{{+0x1.87e92154ef7acp-664, -0x1.87e92154ef7acp-665}, {-0x1.cbb65944f5e2bp+8, -0x1.dac670561bb4fp-2}},
|
||||
{{-0x1.9be34ac46b18fp-513, -0x1.1297872d9cbb5p-512}, {-0x1.62991d5d62a5ep+8, -0x1.1b6e192ebbe44p+1}},
|
||||
|
||||
// z close to 1
|
||||
{{+0x1.0000000000001p+0, -0.0}, {+0x1.fffffffffffffp-53, -0.0}, {false, true}},
|
||||
{{+0x1.fffffffffffffp-1, +0.0}, {-0x1.0000000000000p-53, +0.0}, {false, true}},
|
||||
#if !WITH_FP_ABRUPT_UNDERFLOW
|
||||
{{+0x1.0000000000001p+0, -0x0.0000000000001p-1022}, {+0x1.fffffffffffffp-53, -0x0.0000000000001p-1022}},
|
||||
{{+0x1.0000000000000p+0, +0x0.0000000000001p-1022}, {+0x0.0000000000000p-1022, +0x0.0000000000001p-1022}},
|
||||
{{+0x1.fffffffffffffp-1, -0x0.0000000000001p-1022}, {-0x1.0000000000000p-53, -0x0.0000000000001p-1022}},
|
||||
#endif // !WITH_FP_ABRUPT_UNDERFLOW
|
||||
{{+0x1.0000000000001p+0, +0x1p-1022}, {+0x1.fffffffffffffp-53, +0x0.fffffffffffffp-1022}},
|
||||
{{+0x1.0000000000000p+0, -0x1p-1022}, {+0x0.0000000000000p-1022, -0x1.0000000000000p-1022}},
|
||||
{{+0x1.fffffffffffffp-1, +0x1p-1022}, {-0x1.0000000000000p-53, +0x1.0000000000001p-1022}},
|
||||
{{+0x1.0000000000001p+0, -0x1p-52}, {+0x1.0000000000000p-52, -0x1.ffffffffffffep-53}},
|
||||
{{+0x1.0000000000000p+0, +0x1p-52}, {+0x1.0000000000000p-105, +0x1.0000000000000p-52}},
|
||||
{{+0x1.fffffffffffffp-1, -0x1p-52}, {-0x1.ffffffffffffep-54, -0x1.0000000000000p-52}},
|
||||
{{+0x1.fffffffffffffp-1, +0x1p-26}, {+0x1.0000000000000p-107, +0x1.0000000000000p-26}},
|
||||
|
||||
// z close to -1, i, or -i
|
||||
{{-0x1.0000000000001p+0, -0x1p-52}, {+0x1.0000000000000p-52, -0x1.921fb54442d18p+1}},
|
||||
{{-0x1.0000000000000p+0, +0x1p-52}, {+0x1.0000000000000p-105, +0x1.921fb54442d18p+1}},
|
||||
{{-0x1.fffffffffffffp-1, -0x1p-52}, {-0x1.ffffffffffffep-54, -0x1.921fb54442d18p+1}},
|
||||
{{+0x1p-52, +0x1.0000000000001p+0}, {+0x1.0000000000000p-52, +0x1.921fb54442d17p+0}},
|
||||
{{-0x1p-52, +0x1.0000000000000p+0}, {+0x1.0000000000000p-105, +0x1.921fb54442d19p+0}},
|
||||
{{+0x1p-52, +0x1.fffffffffffffp-1}, {-0x1.ffffffffffffep-54, +0x1.921fb54442d17p+0}},
|
||||
{{-0x1p-52, -0x1.0000000000001p+0}, {+0x1.0000000000000p-52, -0x1.921fb54442d19p+0}},
|
||||
{{+0x1p-52, -0x1.0000000000000p+0}, {+0x1.0000000000000p-105, -0x1.921fb54442d17p+0}},
|
||||
{{-0x1p-52, -0x1.fffffffffffffp-1}, {-0x1.ffffffffffffep-54, -0x1.921fb54442d19p+0}},
|
||||
|
||||
// abs(z) close to 1
|
||||
{{+0x1.6a09e667f3bccp-1, +0x1.6a09e667f3bccp-1}, {-0x1.98d4d0da05571p-54, +0x1.921fb54442d18p-1}},
|
||||
{{+0x1.6a09e667f3bcdp-1, -0x1.6a09e667f3bcdp-1}, {+0x1.3b3efbf5e2229p-54, -0x1.921fb54442d18p-1}},
|
||||
{{-0x1.3333333333333p-1, -0x1.999999999999ap-1}, {+0x1.999999999999ap-56, -0x1.1b6e192ebbe44p+1}},
|
||||
{{-0x1.3333333333333p-1, +0x1.9999999999999p-1}, {-0x1.3333333333333p-54, +0x1.1b6e192ebbe44p+1}},
|
||||
{{+0x1.69fbe76c8b439p-1, +0x1.69fbe76c8b439p-1}, {-0x1.3cb7c059d6699p-13, +0x1.921fb54442d18p-1}},
|
||||
{{-0x1.d89d89d89d89ep-1, +0x1.89d89d89d89d6p-2}, {-0x1.3b13b13b13b0cp-57, +0x1.5f97315254857p+1}},
|
||||
|
||||
// control flow edge cases
|
||||
{{+0x1p-1, +0x1.fffffffffffffp-2}, {-0x1.62e42fefa39f0p-2, +0x1.921fb54442d18p-1}},
|
||||
{{+0x1p-1, +0x1.0000000000000p-1}, {-0x1.62e42fefa39efp-2, +0x1.921fb54442d18p-1}},
|
||||
{{+0x1p-1, +0x1.0000000000001p-1}, {-0x1.62e42fefa39edp-2, +0x1.921fb54442d19p-1}},
|
||||
{{+0x1p-1, +0x1.a887293fd6f33p+0}, {+0x1.193ea7aad0309p-1, +0x1.4727f6d4d118cp+0}},
|
||||
{{+0x1p-1, +0x1.a887293fd6f34p+0}, {+0x1.193ea7aad030ap-1, +0x1.4727f6d4d118dp+0}},
|
||||
{{+0x1p-1, +0x1.a887293fd6f35p+0}, {+0x1.193ea7aad030cp-1, +0x1.4727f6d4d118dp+0}},
|
||||
{{+6.703903964971297e+153, +6e+153}, {+0x1.627e0d1e7a85dp+8, +0x1.75c8a07421461p-1}},
|
||||
{{+6.703903964971298e+153, +6e+153}, {+0x1.627e0d1e7a85dp+8, +0x1.75c8a07421461p-1}},
|
||||
{{+1e-154, +1.4156865331029228e-146}, {-0x1.4fd46e5c84953p+8, +0x1.921fb525ec2fcp+0}},
|
||||
{{+1e-154, +1.415686533102923e-146}, {-0x1.4fd46e5c84953p+8, +0x1.921fb525ec2fcp+0}},
|
||||
};
|
||||
|
||||
constexpr complex_unary_test_case<float> log_float_cases[] = {
|
||||
// normal cases
|
||||
{{+0x1.8p+0F, +0x1p+1F}, {+0x1.d52410p-1F, +0x1.dac670p-1F}},
|
||||
{{+0x1.8p+0F, -0x1p+1F}, {+0x1.d52410p-1F, -0x1.dac670p-1F}},
|
||||
{{-0x1.8p+0F, +0x1p+1F}, {+0x1.d52410p-1F, +0x1.1b6e1ap+1F}},
|
||||
{{-0x1.8p+0F, -0x1p+1F}, {+0x1.d52410p-1F, -0x1.1b6e1ap+1F}},
|
||||
{{+0x1.8p-1F, +0x1p+0F}, {+0x1.c8ff7cp-3F, +0x1.dac670p-1F}},
|
||||
{{+0x1.8p-1F, -0x1p+0F}, {+0x1.c8ff7cp-3F, -0x1.dac670p-1F}},
|
||||
{{-0x1.8p-1F, +0x1p+0F}, {+0x1.c8ff7cp-3F, +0x1.1b6e1ap+1F}},
|
||||
{{-0x1.8p-1F, -0x1p+0F}, {+0x1.c8ff7cp-3F, -0x1.1b6e1ap+1F}},
|
||||
{{+0x1.8p-2F, +0x1p-1F}, {-0x1.e148a2p-2F, +0x1.dac670p-1F}},
|
||||
{{+0x1.8p-2F, -0x1p-1F}, {-0x1.e148a2p-2F, -0x1.dac670p-1F}},
|
||||
{{-0x1.8p-2F, +0x1p-1F}, {-0x1.e148a2p-2F, +0x1.1b6e1ap+1F}},
|
||||
{{-0x1.8p-2F, -0x1p-1F}, {-0x1.e148a2p-2F, -0x1.1b6e1ap+1F}},
|
||||
|
||||
// special cases
|
||||
{{+1.0F, +0.0F}, {0.0F, +0.0F}, {true, true}},
|
||||
{{+1.0F, -0.0F}, {0.0F, -0.0F}, {true, true}},
|
||||
{{+0.0F, +1.0F}, {0.0F, +pi_over_2_v<float>}, {true, false}},
|
||||
{{+0.0F, -1.0F}, {0.0F, -pi_over_2_v<float>}, {true, false}},
|
||||
{{-0.0F, +1.0F}, {0.0F, +pi_over_2_v<float>}, {true, false}},
|
||||
{{-0.0F, -1.0F}, {0.0F, -pi_over_2_v<float>}, {true, false}},
|
||||
{{-1.0F, +0.0F}, {0.0F, +pi_v<float>}, {true, false}},
|
||||
{{-1.0F, -0.0F}, {0.0F, -pi_v<float>}, {true, false}},
|
||||
|
||||
#if !FP_PRESET_FAST
|
||||
{{+0.0F, +0.0F}, {-float_inf, +0.0F}, {true, true}},
|
||||
{{+0.0F, -0.0F}, {-float_inf, -0.0F}, {true, true}},
|
||||
{{-0.0F, +0.0F}, {-float_inf, +pi_v<float>}, {true, false}},
|
||||
{{-0.0F, -0.0F}, {-float_inf, -pi_v<float>}, {true, false}},
|
||||
{{+float_inf, +0.0F}, {+float_inf, +0.0F}, {true, true}},
|
||||
{{+float_inf, -0.0F}, {+float_inf, -0.0F}, {true, true}},
|
||||
{{+float_inf, +1.0F}, {+float_inf, +0.0F}, {true, true}},
|
||||
{{+float_inf, -1.0F}, {+float_inf, -0.0F}, {true, true}},
|
||||
{{+float_inf, +float_inf}, {+float_inf, +pi_over_4_v<float>}, {true, false}},
|
||||
{{+float_inf, -float_inf}, {+float_inf, -pi_over_4_v<float>}, {true, false}},
|
||||
{{+1.0F, +float_inf}, {+float_inf, +pi_over_2_v<float>}, {true, false}},
|
||||
{{+1.0F, -float_inf}, {+float_inf, -pi_over_2_v<float>}, {true, false}},
|
||||
{{+0.0F, +float_inf}, {+float_inf, +pi_over_2_v<float>}, {true, false}},
|
||||
{{+0.0F, -float_inf}, {+float_inf, -pi_over_2_v<float>}, {true, false}},
|
||||
{{-0.0F, +float_inf}, {+float_inf, +pi_over_2_v<float>}, {true, false}},
|
||||
{{-0.0F, -float_inf}, {+float_inf, -pi_over_2_v<float>}, {true, false}},
|
||||
{{-1.0F, +float_inf}, {+float_inf, +pi_over_2_v<float>}, {true, false}},
|
||||
{{-1.0F, -float_inf}, {+float_inf, -pi_over_2_v<float>}, {true, false}},
|
||||
{{-float_inf, +float_inf}, {+float_inf, +pi_3_over_4_v<float>}, {true, false}},
|
||||
{{-float_inf, -float_inf}, {+float_inf, -pi_3_over_4_v<float>}, {true, false}},
|
||||
{{-float_inf, +1.0F}, {+float_inf, +pi_v<float>}, {true, false}},
|
||||
{{-float_inf, -1.0F}, {+float_inf, -pi_v<float>}, {true, false}},
|
||||
{{-float_inf, +0.0F}, {+float_inf, +pi_v<float>}, {true, false}},
|
||||
{{-float_inf, -0.0F}, {+float_inf, -pi_v<float>}, {true, false}},
|
||||
{{+float_inf, float_nan}, {+float_inf, float_nan}, {true, true}},
|
||||
{{-float_inf, float_nan}, {+float_inf, float_nan}, {true, true}},
|
||||
{{float_nan, +float_inf}, {+float_inf, float_nan}, {true, true}},
|
||||
{{float_nan, -float_inf}, {+float_inf, float_nan}, {true, true}},
|
||||
{{float_nan, +0.0F}, {float_nan, float_nan}, {true, true}},
|
||||
{{+0.0F, float_nan}, {float_nan, float_nan}, {true, true}},
|
||||
{{float_nan, float_nan}, {float_nan, float_nan}, {true, true}},
|
||||
#endif // !FP_PRESET_FAST
|
||||
|
||||
// abs(z) overflows
|
||||
{{+0x1.fffffep+127F, +0x1.fffffep+127F}, {+0x1.644714p+6F, +0x1.921fb6p-1F}},
|
||||
{{-0x1.bb67aep+127F, +0x1.000000p+127F}, {+0x1.62e430p+6F, +0x1.4f1a6cp+1F}},
|
||||
{{+0x1.fffffep+127F, -0x0.000002p-126F}, {+0x1.62e430p+6F, -0x0.000000p-126F}},
|
||||
|
||||
// norm(z) overflows
|
||||
{{-0x1.08b2a2p+83F, -0x1.08b2a2p+84F}, {+0x1.d2f46cp+5F, -0x1.0468a8p+1F}},
|
||||
{{+0x1.bc16d6p+63F, -0x1.4d1120p+63F}, {+0x1.6389c2p+5F, -0x1.4978fap-1F}},
|
||||
|
||||
#if !WITH_FP_ABRUPT_UNDERFLOW
|
||||
// abs(z) underflows
|
||||
{{-0x0.000002p-126F, +0x0.000002p-126F}, {-0x1.9bbabcp+6F, +0x1.2d97c8p+1F}},
|
||||
{{+0x0.000002p-126F, +0x0.800000p-126F}, {-0x1.601e68p+6F, +0x1.921fb2p+0F}},
|
||||
#endif // !WITH_FP_ABRUPT_UNDERFLOW
|
||||
|
||||
// abs(z) close to underflow
|
||||
{{+0x1.4p-126F, +0x1p-126F}, {-0x1.5b76d6p+6F, +0x1.5977a6p-1F}},
|
||||
|
||||
// norm(z) underflows
|
||||
{{+0x1.ef2d10p-83F, -0x1.ef2d10p-84F}, {-0x1.c6144ap+5F, -0x1.dac670p-2F}},
|
||||
{{-0x1.622d70p-61F, -0x1.d83c94p-61F}, {-0x1.4b9280p+5F, -0x1.1b6e1ap+1F}},
|
||||
|
||||
// z close to 1
|
||||
{{+0x1.000002p+0F, -0.0F}, {+0x1.fffffep-24F, -0.0F}, {false, true}},
|
||||
{{+0x1.fffffep-1F, +0.0F}, {-0x1.000000p-24F, +0.0F}, {false, true}},
|
||||
#if !WITH_FP_ABRUPT_UNDERFLOW
|
||||
{{+0x1.000002p+0F, -0x0.000002p-126F}, {+0x1.fffffep-24F, -0x0.000002p-126F}},
|
||||
{{+0x1.000000p+0F, +0x0.000002p-126F}, {+0x0.000000p-126F, +0x0.000002p-126F}},
|
||||
{{+0x1.fffffep-1F, -0x0.000002p-126F}, {-0x1.000000p-24F, -0x0.000002p-126F}},
|
||||
#endif // !WITH_FP_ABRUPT_UNDERFLOW
|
||||
{{+0x1.000002p+0F, +0x1.000000p-126F}, {+0x1.fffffep-24F, +0x0.fffffep-126F}},
|
||||
{{+0x1.000000p+0F, -0x1.000000p-126F}, {+0x0.000000p+0F, -0x1.000000p-126F}},
|
||||
{{+0x1.fffffep-1F, +0x1.000000p-126F}, {-0x1.000000p-24F, +0x1.000002p-126F}},
|
||||
{{+0x1.000002p+0F, -0x1.000000p-23F}, {+0x1.000000p-23F, -0x1.fffffcp-24F}},
|
||||
{{+0x1.000000p+0F, +0x1.000000p-23F}, {+0x1.000000p-47F, +0x1.000000p-23F}},
|
||||
{{+0x1.fffffep-1F, -0x1.000000p-23F}, {-0x1.fffffcp-25F, -0x1.000000p-23F}},
|
||||
{{+0x1.fffffep-1F, +0x1.6a09e6p-12F}, {-0x1.302ae0p-52F, +0x1.6a09e6p-12F}},
|
||||
|
||||
// z close to -1, i, or -i
|
||||
{{-0x1.000002p+0F, -0x1.000000p-23F}, {+0x1.000000p-23F, -0x1.921fb4p+1F}},
|
||||
{{-0x1.000000p+0F, +0x1.000000p-23F}, {+0x1.000000p-47F, +0x1.921fb4p+1F}},
|
||||
{{-0x1.fffffep-1F, -0x1.000000p-23F}, {-0x1.fffffcp-25F, -0x1.921fb4p+1F}},
|
||||
{{+0x1.000000p-23F, +0x1.000002p+0F}, {+0x1.000000p-23F, +0x1.921fb4p+0F}},
|
||||
{{-0x1.000000p-23F, +0x1.000000p+0F}, {+0x1.000000p-47F, +0x1.921fb8p+0F}},
|
||||
{{+0x1.000000p-23F, +0x1.fffffep-1F}, {-0x1.fffffcp-25F, +0x1.921fb4p+0F}},
|
||||
{{-0x1.000000p-23F, -0x1.000002p+0F}, {+0x1.000000p-23F, -0x1.921fb8p+0F}},
|
||||
{{+0x1.000000p-23F, -0x1.000000p+0F}, {+0x1.000000p-47F, -0x1.921fb4p+0F}},
|
||||
{{-0x1.000000p-23F, -0x1.fffffep-1F}, {-0x1.fffffcp-25F, -0x1.921fb8p+0F}},
|
||||
|
||||
// abs(z) close to 1
|
||||
{{+0x1.6a09e6p-1F, +0x1.6a09e6p-1F}, {-0x1.26055cp-26F, +0x1.921fb6p-1F}},
|
||||
{{+0x1.6a09e8p-1F, -0x1.6a09e8p-1F}, {+0x1.20888ep-24F, -0x1.921fb6p-1F}},
|
||||
{{-0x1.333334p-1F, -0x1.99999ap-1F}, {+0x1.99999ap-26F, -0x1.1b6e1ap+1F}},
|
||||
{{-0x1.333332p-1F, +0x1.99999ap-1F}, {-0x1.999998p-27F, +0x1.1b6e18p+1F}},
|
||||
{{+0x1.69fbe8p-1F, +0x1.69fbe8p-1F}, {-0x1.3caab8p-13F, +0x1.921fb6p-1F}},
|
||||
{{-0x1.d89d8ap-1F, +0x1.89d89ep-2F}, {+0x1.d89d8ap-28F, +0x1.5f9732p+1F}},
|
||||
|
||||
// control flow edge cases
|
||||
{{+0x1p-1F, +0x1.fffffep-2F}, {-0x1.62e432p-2F, +0x1.921fb4p-1F}},
|
||||
{{+0x1p-1F, +0x1.000000p-1F}, {-0x1.62e430p-2F, +0x1.921fb6p-1F}},
|
||||
{{+0x1p-1F, +0x1.000002p-1F}, {-0x1.62e42cp-2F, +0x1.921fb8p-1F}},
|
||||
{{+0x1p-1F, +0x1.a88728p+0F}, {+0x1.193ea6p-1F, +0x1.4727f6p+0F}},
|
||||
{{+0x1p-1F, +0x1.a8872ap+0F}, {+0x1.193ea8p-1F, +0x1.4727f6p+0F}},
|
||||
{{+0x1p-1F, +0x1.a8872cp+0F}, {+0x1.193eaap-1F, +0x1.4727f8p+0F}},
|
||||
{{+9.223371e+18F, +9e+18F}, {+0x1.60059cp+5F, +0x1.8bd930p-1F}},
|
||||
{{+9.2233715e+18F, +9e+18F}, {+0x1.60059cp+5F, +0x1.8bd930p-1F}},
|
||||
{{+7e-20F, +4.440892e-16F}, {-0x1.1acdd6p+5F, +0x1.921560p+0F}},
|
||||
{{+7e-20F, +4.4408926e-16F}, {-0x1.1acdd6p+5F, +0x1.921560p+0F}},
|
||||
};
|
|
@ -0,0 +1,249 @@
|
|||
// Copyright (c) Microsoft Corporation.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <complex>
|
||||
|
||||
#include "floating_point_utils.hpp"
|
||||
#include "test.hpp"
|
||||
|
||||
constexpr complex_unary_test_case<double> sqrt_double_cases[] = {
|
||||
// normal cases
|
||||
{{+0x3p-0, +0x4p-0}, {+0x2p-0, +0x1p-0}},
|
||||
{{+0x3p-0, -0x4p-0}, {+0x2p-0, -0x1p-0}},
|
||||
{{-0x3p-0, +0x4p-0}, {+0x1p-0, +0x2p-0}},
|
||||
{{-0x3p-0, -0x4p-0}, {+0x1p-0, -0x2p-0}},
|
||||
{{+0x3p-2, +0x4p-2}, {+0x2p-1, +0x1p-1}},
|
||||
{{+0x3p-2, -0x4p-2}, {+0x2p-1, -0x1p-1}},
|
||||
{{-0x3p-2, +0x4p-2}, {+0x1p-1, +0x2p-1}},
|
||||
{{-0x3p-2, -0x4p-2}, {+0x1p-1, -0x2p-1}},
|
||||
{{+0x3p-4, +0x4p-4}, {+0x2p-2, +0x1p-2}},
|
||||
{{+0x3p-4, -0x4p-4}, {+0x2p-2, -0x1p-2}},
|
||||
{{-0x3p-4, +0x4p-4}, {+0x1p-2, +0x2p-2}},
|
||||
{{-0x3p-4, -0x4p-4}, {+0x1p-2, -0x2p-2}},
|
||||
|
||||
// special cases
|
||||
{{+0.0, +0.0}, {+0.0, +0.0}, {true, true}},
|
||||
{{+0.0, -0.0}, {+0.0, -0.0}, {true, true}},
|
||||
{{-0.0, +0.0}, {+0.0, +0.0}, {true, true}},
|
||||
{{-0.0, -0.0}, {+0.0, -0.0}, {true, true}},
|
||||
{{+1.0, +0.0}, {+1.0, +0.0}, {false, true}},
|
||||
{{+1.0, -0.0}, {+1.0, -0.0}, {false, true}},
|
||||
{{-1.0, +0.0}, {+0.0, +1.0}, {true, false}},
|
||||
{{-1.0, -0.0}, {+0.0, -1.0}, {true, false}},
|
||||
{{+0.0, +1.0}, {+0x1.6a09e667f3bcdp-1, +0x1.6a09e667f3bcdp-1}},
|
||||
{{+0.0, -1.0}, {+0x1.6a09e667f3bcdp-1, -0x1.6a09e667f3bcdp-1}},
|
||||
{{-0.0, +1.0}, {+0x1.6a09e667f3bcdp-1, +0x1.6a09e667f3bcdp-1}},
|
||||
{{-0.0, -1.0}, {+0x1.6a09e667f3bcdp-1, -0x1.6a09e667f3bcdp-1}},
|
||||
|
||||
#if !FP_PRESET_FAST
|
||||
{{+double_inf, +0.0}, {+double_inf, +0.0}, {true, true}},
|
||||
{{+double_inf, -0.0}, {+double_inf, -0.0}, {true, true}},
|
||||
{{-double_inf, +0.0}, {+0.0, +double_inf}, {true, true}},
|
||||
{{-double_inf, -0.0}, {+0.0, -double_inf}, {true, true}},
|
||||
{{+double_inf, +1.0}, {+double_inf, +0.0}, {true, true}},
|
||||
{{+double_inf, -1.0}, {+double_inf, -0.0}, {true, true}},
|
||||
{{-double_inf, +1.0}, {+0.0, +double_inf}, {true, true}},
|
||||
{{-double_inf, -1.0}, {+0.0, -double_inf}, {true, true}},
|
||||
{{+double_inf, +double_inf}, {+double_inf, +double_inf}, {true, true}},
|
||||
{{+double_inf, -double_inf}, {+double_inf, -double_inf}, {true, true}},
|
||||
{{-double_inf, +double_inf}, {+double_inf, +double_inf}, {true, true}},
|
||||
{{-double_inf, -double_inf}, {+double_inf, -double_inf}, {true, true}},
|
||||
{{+1.0, +double_inf}, {+double_inf, +double_inf}, {true, true}},
|
||||
{{+1.0, -double_inf}, {+double_inf, -double_inf}, {true, true}},
|
||||
{{-1.0, +double_inf}, {+double_inf, +double_inf}, {true, true}},
|
||||
{{-1.0, -double_inf}, {+double_inf, -double_inf}, {true, true}},
|
||||
{{+0.0, +double_inf}, {+double_inf, +double_inf}, {true, true}},
|
||||
{{+0.0, -double_inf}, {+double_inf, -double_inf}, {true, true}},
|
||||
{{-0.0, +double_inf}, {+double_inf, +double_inf}, {true, true}},
|
||||
{{-0.0, -double_inf}, {+double_inf, -double_inf}, {true, true}},
|
||||
{{+double_inf, +double_nan}, {+double_inf, +double_nan}, {true, true}},
|
||||
{{+double_inf, -double_nan}, {+double_inf, -double_nan}, {true, true}},
|
||||
{{-double_inf, +double_nan}, {+double_nan, +double_inf}, {true, true}},
|
||||
{{-double_inf, -double_nan}, {+double_nan, -double_inf}, {true, true}},
|
||||
{{+double_nan, +double_inf}, {+double_inf, +double_inf}, {true, true}},
|
||||
{{+double_nan, -double_inf}, {+double_inf, -double_inf}, {true, true}},
|
||||
{{+double_nan, +0.0}, {+double_nan, +double_nan}, {true, true}},
|
||||
{{+double_nan, -0.0}, {+double_nan, -double_nan}, {true, true}},
|
||||
{{+0.0, +double_nan}, {+double_nan, +double_nan}, {true, true}},
|
||||
{{+0.0, -double_nan}, {+double_nan, -double_nan}, {true, true}},
|
||||
{{+double_nan, +double_nan}, {+double_nan, +double_nan}, {true, true}},
|
||||
{{+double_nan, -double_nan}, {+double_nan, -double_nan}, {true, true}},
|
||||
#endif // !FP_PRESET_FAST
|
||||
|
||||
// abs(z) overflows
|
||||
{{+0x1.fffffffffffffp+1023, +0x1.fffffffffffffp+1023}, {+0x1.19435caffa9f8p+512, +0x1.d203138f6c828p+510}},
|
||||
{{-0x1.bb67ae8584caap+1023, +0x1.0000000000000p+1023}, {+0x1.0907dc1930691p+510, +0x1.ee8dd4748bf15p+511}},
|
||||
{{+0x1.fffffffffffffp+1023, -0x0.0000000000001p-1022}, {+0x1.fffffffffffffp+511, -0x0.0000000000000p-1022}},
|
||||
|
||||
// norm(z) overflows
|
||||
{{-0x1.4e718d7d7625ap+664, -0x1.4e718d7d7625ap+665}, {+0x1.cc1033be914a7p+331, -0x1.7432f2f528ea0p+332}},
|
||||
{{+0x1.ca3d8e6d80cbbp+511, -0x1.57ae2ad22098cp+511}, {+0x1.00e0ed3ec75c3p+256, -0x1.56813c53b47afp+254}},
|
||||
|
||||
#if !WITH_FP_ABRUPT_UNDERFLOW
|
||||
// abs(z) underflows
|
||||
{{-0x0.0000000000001p-1022, +0x0.0000000000001p-1022}, {+0x1.d203138f6c828p-539, +0x1.19435caffa9f9p-537}},
|
||||
{{+0x0.0000000000001p-1022, +0x0.8000000000000p-1022}, {+0x1.0000000000001p-512, +0x1.ffffffffffffep-513}},
|
||||
#endif // !WITH_FP_ABRUPT_UNDERFLOW
|
||||
|
||||
// abs(z) close to underflow
|
||||
{{+0x1.4p-1022, +0x1p-1022}, {+0x1.31a33f3eb2fd9p-511, +0x1.acd8ff10ebe7ep-513}},
|
||||
|
||||
// norm(z) underflows
|
||||
{{+0x1.87e92154ef7acp-664, -0x1.87e92154ef7acp-665}, {+0x1.45f5e3f782563p-332, -0x1.33cb9c4327c54p-334}},
|
||||
{{-0x1.9be34ac46b18fp-513, -0x1.1297872d9cbb5p-512}, {+0x1.09220ecd9c241p-257, -0x1.09220ecd9c241p-256}},
|
||||
|
||||
// control flow edge cases
|
||||
{{+0x1p-2, +0x1.fffffffffffffp-1}, {+0x1.99b96593b936dp-1, +0x1.3fe72a921c6f4p-1}},
|
||||
{{+0x1p-2, +0x1.0000000000000p+0}, {+0x1.99b96593b936ep-1, +0x1.3fe72a921c6f4p-1}},
|
||||
{{+0x1p-2, +0x1.0000000000001p+0}, {+0x1.99b96593b936ep-1, +0x1.3fe72a921c6f5p-1}},
|
||||
{{+0x1p+0, +0x1p-54}, {+0x1.0000000000000p+0, +0x1.0000000000000p-55}},
|
||||
{{+0x1p+0, +0x1p-53}, {+0x1.0000000000000p+0, +0x1.0000000000000p-54}},
|
||||
{{+0x1p+0, +0x1p-52}, {+0x1.0000000000000p+0, +0x1.0000000000000p-53}},
|
||||
{{+0x1p+0, +0x1.ffffffffffffep-2}, {+0x1.077225f1da572p+0, +0x1.f18773c56f720p-3}},
|
||||
{{+0x1p+0, +0x1.fffffffffffffp-2}, {+0x1.077225f1da572p+0, +0x1.f18773c56f721p-3}},
|
||||
{{+0x1p+0, +0x1.0000000000000p-1}, {+0x1.077225f1da572p+0, +0x1.f18773c56f721p-3}},
|
||||
{{+0x1p+0, +0x1.0000000000001p-1}, {+0x1.077225f1da572p+0, +0x1.f18773c56f723p-3}},
|
||||
{{+0x1.ffffffffffffep-970, +0x1.fffffffffffffp-970}, {+0x1.8dc42193d5c02p-485, +0x1.49852f983efddp-486}},
|
||||
{{+0x1.fffffffffffffp-970, +0x1.0000000000000p-969}, {+0x1.8dc42193d5c02p-485, +0x1.49852f983efdep-486}},
|
||||
{{+0x1.0000000000000p-969, +0x1.0000000000001p-969}, {+0x1.8dc42193d5c03p-485, +0x1.49852f983efdep-486}},
|
||||
{{+0x1.ffffffffffffep-971, +0x1.fffffffffffffp-970}, {+0x1.45a3146a88455p-485, +0x1.92826ef258d1bp-486}},
|
||||
{{+0x1.fffffffffffffp-971, +0x1.0000000000000p-969}, {+0x1.45a3146a88456p-485, +0x1.92826ef258d1bp-486}},
|
||||
{{+0x1.0000000000000p-970, +0x1.0000000000001p-969}, {+0x1.45a3146a88456p-485, +0x1.92826ef258d1cp-486}},
|
||||
{{+0x1.fffffffffffffp-971, +0x1.fffffffffffffp-970}, {+0x1.45a3146a88456p-485, +0x1.92826ef258d1bp-486}},
|
||||
{{+0x1.0000000000000p-970, +0x1.0000000000000p-969}, {+0x1.45a3146a88456p-485, +0x1.92826ef258d1bp-486}},
|
||||
{{+0x1.0000000000001p-970, +0x1.0000000000001p-969}, {+0x1.45a3146a88457p-485, +0x1.92826ef258d1cp-486}},
|
||||
{{+0x1.0000000000000p-970, +0x1.fffffffffffffp-970}, {+0x1.45a3146a88456p-485, +0x1.92826ef258d1bp-486}},
|
||||
{{+0x1.0000000000001p-970, +0x1.0000000000000p-969}, {+0x1.45a3146a88456p-485, +0x1.92826ef258d1bp-486}},
|
||||
{{+0x1.0000000000002p-970, +0x1.0000000000001p-969}, {+0x1.45a3146a88457p-485, +0x1.92826ef258d1cp-486}},
|
||||
{{+0x1p-1022, +0x1.fffffffffffffp-970}, {+0x1.0000000000000p-485, +0x1.fffffffffffffp-486}},
|
||||
{{+0x1p-1022, +0x1.0000000000000p-969}, {+0x1.0000000000000p-485, +0x1.0000000000000p-485}},
|
||||
{{+0x1p-1022, +0x1.0000000000001p-969}, {+0x1.0000000000001p-485, +0x1.0000000000000p-485}},
|
||||
|
||||
#if !WITH_FP_ABRUPT_UNDERFLOW
|
||||
{{+0x0.0000000000001p-1022, +0x1.fffffffffffffp-970}, {+0x1.0000000000000p-485, +0x1.fffffffffffffp-486}},
|
||||
{{+0x0.0000000000001p-1022, +0x1.0000000000000p-969}, {+0x1.0000000000000p-485, +0x1.0000000000000p-485}},
|
||||
{{+0x0.0000000000001p-1022, +0x1.0000000000001p-969}, {+0x1.0000000000001p-485, +0x1.0000000000000p-485}},
|
||||
#endif // !WITH_FP_ABRUPT_UNDERFLOW
|
||||
};
|
||||
|
||||
constexpr complex_unary_test_case<float> sqrt_float_cases[] = {
|
||||
// normal cases
|
||||
{{+0x3p-0F, +0x4p-0F}, {+0x2p-0F, +0x1p-0F}},
|
||||
{{+0x3p-0F, -0x4p-0F}, {+0x2p-0F, -0x1p-0F}},
|
||||
{{-0x3p-0F, +0x4p-0F}, {+0x1p-0F, +0x2p-0F}},
|
||||
{{-0x3p-0F, -0x4p-0F}, {+0x1p-0F, -0x2p-0F}},
|
||||
{{+0x3p-2F, +0x4p-2F}, {+0x2p-1F, +0x1p-1F}},
|
||||
{{+0x3p-2F, -0x4p-2F}, {+0x2p-1F, -0x1p-1F}},
|
||||
{{-0x3p-2F, +0x4p-2F}, {+0x1p-1F, +0x2p-1F}},
|
||||
{{-0x3p-2F, -0x4p-2F}, {+0x1p-1F, -0x2p-1F}},
|
||||
{{+0x3p-4F, +0x4p-4F}, {+0x2p-2F, +0x1p-2F}},
|
||||
{{+0x3p-4F, -0x4p-4F}, {+0x2p-2F, -0x1p-2F}},
|
||||
{{-0x3p-4F, +0x4p-4F}, {+0x1p-2F, +0x2p-2F}},
|
||||
{{-0x3p-4F, -0x4p-4F}, {+0x1p-2F, -0x2p-2F}},
|
||||
|
||||
// special cases
|
||||
{{+0.0F, +0.0F}, {+0.0F, +0.0F}, {true, true}},
|
||||
{{+0.0F, -0.0F}, {+0.0F, -0.0F}, {true, true}},
|
||||
{{-0.0F, +0.0F}, {+0.0F, +0.0F}, {true, true}},
|
||||
{{-0.0F, -0.0F}, {+0.0F, -0.0F}, {true, true}},
|
||||
{{+1.0F, +0.0F}, {+1.0F, +0.0F}, {false, true}},
|
||||
{{+1.0F, -0.0F}, {+1.0F, -0.0F}, {false, true}},
|
||||
{{-1.0F, +0.0F}, {+0.0F, +1.0F}, {true, false}},
|
||||
{{-1.0F, -0.0F}, {+0.0F, -1.0F}, {true, false}},
|
||||
{{+0.0F, +1.0F}, {+0x1.6a09e6p-1F, +0x1.6a09e6p-1F}},
|
||||
{{+0.0F, -1.0F}, {+0x1.6a09e6p-1F, -0x1.6a09e6p-1F}},
|
||||
{{-0.0F, +1.0F}, {+0x1.6a09e6p-1F, +0x1.6a09e6p-1F}},
|
||||
{{-0.0F, -1.0F}, {+0x1.6a09e6p-1F, -0x1.6a09e6p-1F}},
|
||||
|
||||
#if !FP_PRESET_FAST
|
||||
{{+float_inf, +0.0F}, {+float_inf, +0.0F}, {true, true}},
|
||||
{{+float_inf, -0.0F}, {+float_inf, -0.0F}, {true, true}},
|
||||
{{-float_inf, +0.0F}, {+0.0F, +float_inf}, {true, true}},
|
||||
{{-float_inf, -0.0F}, {+0.0F, -float_inf}, {true, true}},
|
||||
{{+float_inf, +1.0F}, {+float_inf, +0.0F}, {true, true}},
|
||||
{{+float_inf, -1.0F}, {+float_inf, -0.0F}, {true, true}},
|
||||
{{-float_inf, +1.0F}, {+0.0F, +float_inf}, {true, true}},
|
||||
{{-float_inf, -1.0F}, {+0.0F, -float_inf}, {true, true}},
|
||||
{{+float_inf, +float_inf}, {+float_inf, +float_inf}, {true, true}},
|
||||
{{+float_inf, -float_inf}, {+float_inf, -float_inf}, {true, true}},
|
||||
{{-float_inf, +float_inf}, {+float_inf, +float_inf}, {true, true}},
|
||||
{{-float_inf, -float_inf}, {+float_inf, -float_inf}, {true, true}},
|
||||
{{+1.0F, +float_inf}, {+float_inf, +float_inf}, {true, true}},
|
||||
{{+1.0F, -float_inf}, {+float_inf, -float_inf}, {true, true}},
|
||||
{{-1.0F, +float_inf}, {+float_inf, +float_inf}, {true, true}},
|
||||
{{-1.0F, -float_inf}, {+float_inf, -float_inf}, {true, true}},
|
||||
{{+0.0F, +float_inf}, {+float_inf, +float_inf}, {true, true}},
|
||||
{{+0.0F, -float_inf}, {+float_inf, -float_inf}, {true, true}},
|
||||
{{-0.0F, +float_inf}, {+float_inf, +float_inf}, {true, true}},
|
||||
{{-0.0F, -float_inf}, {+float_inf, -float_inf}, {true, true}},
|
||||
{{+float_inf, +float_nan}, {+float_inf, +float_nan}, {true, true}},
|
||||
{{+float_inf, -float_nan}, {+float_inf, -float_nan}, {true, true}},
|
||||
{{-float_inf, +float_nan}, {+float_nan, +float_inf}, {true, true}},
|
||||
{{-float_inf, -float_nan}, {+float_nan, -float_inf}, {true, true}},
|
||||
{{+float_nan, +float_inf}, {+float_inf, +float_inf}, {true, true}},
|
||||
{{+float_nan, -float_inf}, {+float_inf, -float_inf}, {true, true}},
|
||||
{{+float_nan, +0.0}, {+float_nan, +float_nan}, {true, true}},
|
||||
{{+float_nan, -0.0}, {+float_nan, -float_nan}, {true, true}},
|
||||
{{+0.0, +float_nan}, {+float_nan, +float_nan}, {true, true}},
|
||||
{{+0.0, -float_nan}, {+float_nan, -float_nan}, {true, true}},
|
||||
{{+float_nan, +float_nan}, {+float_nan, +float_nan}, {true, true}},
|
||||
{{+float_nan, -float_nan}, {+float_nan, -float_nan}, {true, true}},
|
||||
#endif // !FP_PRESET_FAST
|
||||
|
||||
// abs(z) overflows
|
||||
{{+0x1.fffffep+127F, +0x1.fffffep+127F}, {+0x1.19435cp+64F, +0x1.d20312p+62F}},
|
||||
{{-0x1.bb67aep+127F, +0x1.000000p+127F}, {+0x1.0907dcp+62F, +0x1.ee8dd4p+63F}},
|
||||
{{+0x1.fffffep+127F, -0x0.000002p-126F}, {+0x1.fffffep+63F, -0x0.000000p-126F}},
|
||||
|
||||
// norm(z) overflows
|
||||
{{-0x1.08b2a2p+83F, -0x1.08b2a2p+84F}, {+0x1.216970p+41F, -0x1.d4473ap+41F}},
|
||||
{{+0x1.bc16d6p+63F, -0x1.4d1120p+63F}, {+0x1.f9c31ep+31F, -0x1.512cbep+30F}},
|
||||
|
||||
#if !WITH_FP_ABRUPT_UNDERFLOW
|
||||
// abs(z) underflows
|
||||
{{-0x0.000002p-126F, +0x0.000002p-126F}, {+0x1.498530p-76F, +0x1.8dc422p-75F}},
|
||||
{{+0x0.000002p-126F, +0x0.800000p-126F}, {+0x1.000002p-64F, +0x1.fffffcp-65F}},
|
||||
#endif // !WITH_FP_ABRUPT_UNDERFLOW
|
||||
|
||||
// abs(z) close to underflow
|
||||
{{+0x1.4p-126F, +0x1p-126F}, {+0x1.31a340p-63F, +0x1.acd900p-65F}},
|
||||
|
||||
// norm(z) underflows
|
||||
{{+0x1.ef2d1p-83F, -0x1.ef2d10p-84F}, {+0x1.0314d8p-41F, -0x1.e9495ep-44F}},
|
||||
{{-0x1.622d7p-61F, -0x1.d83c94p-61F}, {+0x1.ebb76ep-32F, -0x1.ebb770p-31F}},
|
||||
|
||||
// control flow edge cases
|
||||
{{+0x1p-2F, +0x1.fffffep-1F}, {+0x1.99b964p-1F, +0x1.3fe72ap-1F}},
|
||||
{{+0x1p-2F, +0x1.000000p+0F}, {+0x1.99b966p-1F, +0x1.3fe72ap-1F}},
|
||||
{{+0x1p-2F, +0x1.000002p+0F}, {+0x1.99b966p-1F, +0x1.3fe72cp-1F}},
|
||||
{{+0x1p+0F, +0x1p-25F}, {+0x1.000000p+0F, +0x1.000000p-26F}},
|
||||
{{+0x1p+0F, +0x1p-24F}, {+0x1.000000p+0F, +0x1.000000p-25F}},
|
||||
{{+0x1p+0F, +0x1p-23F}, {+0x1.000000p+0F, +0x1.000000p-24F}},
|
||||
{{+0x1p+0F, +0x1.fffffcp-2F}, {+0x1.077226p+0F, +0x1.f18770p-3F}},
|
||||
{{+0x1p+0F, +0x1.fffffep-2F}, {+0x1.077226p+0F, +0x1.f18772p-3F}},
|
||||
{{+0x1p+0F, +0x1.000000p-1F}, {+0x1.077226p+0F, +0x1.f18774p-3F}},
|
||||
{{+0x1p+0F, +0x1.000002p-1F}, {+0x1.077226p+0F, +0x1.f18778p-3F}},
|
||||
{{+0x1.fffffcp-103F, +0x1.fffffep-103F}, {+0x1.19435cp-51F, +0x1.d20314p-53F}},
|
||||
{{+0x1.fffffep-103F, +0x1.000000p-102F}, {+0x1.19435cp-51F, +0x1.d20314p-53F}},
|
||||
{{+0x1.000000p-102F, +0x1.000002p-102F}, {+0x1.19435ep-51F, +0x1.d20316p-53F}},
|
||||
{{+0x1.fffffcp-104F, +0x1.fffffep-103F}, {+0x1.cc8532p-52F, +0x1.1c9e00p-52F}},
|
||||
{{+0x1.fffffep-104F, +0x1.000000p-102F}, {+0x1.cc8532p-52F, +0x1.1c9e02p-52F}},
|
||||
{{+0x1.000000p-103F, +0x1.000002p-102F}, {+0x1.cc8534p-52F, +0x1.1c9e02p-52F}},
|
||||
{{+0x1.fffffep-104F, +0x1.fffffep-103F}, {+0x1.cc8532p-52F, +0x1.1c9e00p-52F}},
|
||||
{{+0x1.000000p-103F, +0x1.000000p-102F}, {+0x1.cc8532p-52F, +0x1.1c9e00p-52F}},
|
||||
{{+0x1.000002p-103F, +0x1.000002p-102F}, {+0x1.cc8534p-52F, +0x1.1c9e02p-52F}},
|
||||
{{+0x1.000000p-103F, +0x1.fffffep-103F}, {+0x1.cc8532p-52F, +0x1.1c9e00p-52F}},
|
||||
{{+0x1.000002p-103F, +0x1.000000p-102F}, {+0x1.cc8534p-52F, +0x1.1c9e00p-52F}},
|
||||
{{+0x1.000004p-103F, +0x1.000002p-102F}, {+0x1.cc8536p-52F, +0x1.1c9e02p-52F}},
|
||||
{{+0x1.000000p-126F, +0x1.fffffep-103F}, {+0x1.6a09e6p-52F, +0x1.6a09e4p-52F}},
|
||||
{{+0x1.000000p-126F, +0x1.000000p-102F}, {+0x1.6a09e8p-52F, +0x1.6a09e6p-52F}},
|
||||
{{+0x1.000000p-126F, +0x1.000002p-102F}, {+0x1.6a09e8p-52F, +0x1.6a09e8p-52F}},
|
||||
|
||||
#if !WITH_FP_ABRUPT_UNDERFLOW
|
||||
{{+0x0.000002p-126F, +0x1.fffffep-103F}, {+0x1.6a09e6p-52F, +0x1.6a09e6p-52F}},
|
||||
{{+0x0.000002p-126F, +0x1.000000p-102F}, {+0x1.6a09e6p-52F, +0x1.6a09e6p-52F}},
|
||||
{{+0x0.000002p-126F, +0x1.000002p-102F}, {+0x1.6a09e8p-52F, +0x1.6a09e8p-52F}},
|
||||
#endif // !WITH_FP_ABRUPT_UNDERFLOW
|
||||
};
|
|
@ -0,0 +1,125 @@
|
|||
// Copyright (c) Microsoft Corporation.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
|
||||
#include <cassert>
|
||||
#include <complex>
|
||||
#include <limits>
|
||||
|
||||
#include "floating_point_utils.hpp"
|
||||
#include "log_test_cases.hpp"
|
||||
#include "sqrt_test_cases.hpp"
|
||||
|
||||
using namespace std;
|
||||
using namespace fputil;
|
||||
|
||||
void test_sqrt(const rounding_mode mode) {
|
||||
#if FP_PRESET_FAST
|
||||
constexpr int ulp_tolerance = 4;
|
||||
#else // ^^^ fp:fast / not fp:fast vvv
|
||||
const int ulp_tolerance = is_directed_rounding_mode(mode) ? 3 : 2;
|
||||
#endif // ^^^ not fp:fast ^^^
|
||||
|
||||
const auto check_result = [&](const auto& result, const auto& test_case) {
|
||||
const int case_real_ulp_tolerance = test_case.result_exactness.real ? 0 : ulp_tolerance;
|
||||
const int case_imag_ulp_tolerance = test_case.result_exactness.imag ? 0 : ulp_tolerance;
|
||||
return near_equal(result.real(), test_case.expected_result.real(), case_real_ulp_tolerance)
|
||||
&& near_equal(result.imag(), test_case.expected_result.imag(), case_imag_ulp_tolerance);
|
||||
};
|
||||
|
||||
for (const auto& c : sqrt_double_cases) {
|
||||
const auto result = [&] {
|
||||
rounding_guard guard(mode);
|
||||
return sqrt(c.input);
|
||||
}();
|
||||
|
||||
assert(check_result(result, c));
|
||||
}
|
||||
|
||||
for (const auto& c : sqrt_float_cases) {
|
||||
const auto result = [&] {
|
||||
rounding_guard guard(mode);
|
||||
return sqrt(c.input);
|
||||
}();
|
||||
|
||||
assert(check_result(result, c));
|
||||
}
|
||||
|
||||
for (const auto& c : sqrt_double_cases) {
|
||||
const auto result = [&] {
|
||||
rounding_guard guard(mode);
|
||||
return sqrt(static_cast<complex<long double>>(c.input));
|
||||
}();
|
||||
|
||||
assert(check_result(static_cast<complex<double>>(result), c));
|
||||
}
|
||||
}
|
||||
|
||||
void test_log(const rounding_mode mode) {
|
||||
#if FP_PRESET_FAST
|
||||
constexpr int ulp_tolerance = 4;
|
||||
// under /fp:fast, allow inaccurate real(log(z)) when |z| is close to 1
|
||||
constexpr double real_absolute_epsilon_tolerance = 4;
|
||||
#else // ^^^ fp:fast / not fp:fast vvv
|
||||
const int ulp_tolerance = is_directed_rounding_mode(mode) ? 3 : 2;
|
||||
constexpr double real_absolute_epsilon_tolerance = 0;
|
||||
#endif // ^^^ not fp:fast ^^^
|
||||
|
||||
const auto check_result = [&](const auto& result, const auto& test_case) {
|
||||
using Float = decltype(result.real());
|
||||
|
||||
constexpr auto epsilon = static_cast<double>(numeric_limits<Float>::epsilon());
|
||||
const int case_real_ulp_tolerance = test_case.result_exactness.real ? 0 : ulp_tolerance;
|
||||
const int case_imag_ulp_tolerance = test_case.result_exactness.imag ? 0 : ulp_tolerance;
|
||||
const double case_real_absolute_tolerance =
|
||||
test_case.result_exactness.real ? 0.0 : real_absolute_epsilon_tolerance * epsilon;
|
||||
|
||||
// TRANSITION: under rounding toward negative mode, log(1.0) returns +0.0 on x86, -0.0 on x64
|
||||
const auto is_mod_exactly_one = [](const auto& z) {
|
||||
// no other complex<binary_floating_point> has mod of exactly 1
|
||||
return (abs(real(z)) == 1 && imag(z) == 0) || (real(z) == 0 && abs(imag(z)) == 1);
|
||||
};
|
||||
|
||||
if (mode == rounding_mode::toward_negative && is_mod_exactly_one(test_case.input)) {
|
||||
return abs(result.real()) <= case_real_absolute_tolerance
|
||||
&& near_equal(result.imag(), test_case.expected_result.imag(), case_imag_ulp_tolerance);
|
||||
}
|
||||
|
||||
return near_equal(result.real(), test_case.expected_result.real(), case_real_ulp_tolerance,
|
||||
case_real_absolute_tolerance)
|
||||
&& near_equal(result.imag(), test_case.expected_result.imag(), case_imag_ulp_tolerance);
|
||||
};
|
||||
|
||||
for (const auto& c : log_double_cases) {
|
||||
const auto result = [&] {
|
||||
rounding_guard guard(mode);
|
||||
return log(c.input);
|
||||
}();
|
||||
|
||||
assert(check_result(result, c));
|
||||
}
|
||||
|
||||
for (const auto& c : log_float_cases) {
|
||||
const auto result = [&] {
|
||||
rounding_guard guard(mode);
|
||||
return log(c.input);
|
||||
}();
|
||||
|
||||
assert(check_result(result, c));
|
||||
}
|
||||
|
||||
for (const auto& c : log_double_cases) {
|
||||
const auto result = [&] {
|
||||
rounding_guard guard(mode);
|
||||
return log(static_cast<complex<long double>>(c.input));
|
||||
}();
|
||||
|
||||
assert(check_result(static_cast<complex<double>>(result), c));
|
||||
}
|
||||
}
|
||||
|
||||
int main() {
|
||||
for (const auto& mode : all_rounding_modes) {
|
||||
test_sqrt(mode);
|
||||
test_log(mode);
|
||||
}
|
||||
}
|
|
@ -0,0 +1,27 @@
|
|||
// Copyright (c) Microsoft Corporation.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <complex>
|
||||
#include <limits>
|
||||
|
||||
#include "floating_point_utils.hpp"
|
||||
|
||||
constexpr double double_inf = std::numeric_limits<double>::infinity();
|
||||
constexpr double double_nan = std::numeric_limits<double>::quiet_NaN();
|
||||
|
||||
constexpr float float_inf = std::numeric_limits<float>::infinity();
|
||||
constexpr float float_nan = std::numeric_limits<float>::quiet_NaN();
|
||||
|
||||
struct complex_result_exactness {
|
||||
bool real = false;
|
||||
bool imag = false;
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
struct complex_unary_test_case {
|
||||
std::complex<T> input;
|
||||
std::complex<T> expected_result;
|
||||
complex_result_exactness result_exactness{};
|
||||
};
|
|
@ -0,0 +1,33 @@
|
|||
# Copyright (c) Microsoft Corporation.
|
||||
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
|
||||
RUNALL_INCLUDE .\prefix.lst
|
||||
RUNALL_CROSSLIST
|
||||
PM_CL="/FIfenv_prefix.hpp"
|
||||
RUNALL_CROSSLIST
|
||||
PM_CL="/w14640 /Zc:threadSafeInit- /EHsc /std:c++latest"
|
||||
RUNALL_CROSSLIST
|
||||
PM_CL="/Od /MDd"
|
||||
PM_CL="/O2 /MD /permissive-"
|
||||
PM_CL="/O2 /MT /GL"
|
||||
# TRANSITION, -Wno-unused-command-line-argument is needed for the internal test harness
|
||||
PM_COMPILER="clang-cl" PM_CL="-fno-ms-compatibility -fno-delayed-template-parsing -Wno-unused-command-line-argument /Od /MTd"
|
||||
PM_COMPILER="clang-cl" PM_CL="-fno-ms-compatibility -fno-delayed-template-parsing -Wno-unused-command-line-argument /O2 /MT"
|
||||
PM_COMPILER="clang-cl" PM_CL="-fno-ms-compatibility -fno-delayed-template-parsing -Wno-unused-command-line-argument /O2 /MD /Oi-"
|
||||
RUNALL_CROSSLIST
|
||||
PM_CL=""
|
||||
PM_CL="/arch:IA32"
|
||||
PM_CL="/arch:AVX2"
|
||||
PM_CL="/arch:VFPv4"
|
||||
RUNALL_CROSSLIST
|
||||
PM_CL="/fp:strict /DFP_CONFIG_PRESET=1 /DTEST_FP_ROUNDING=1"
|
||||
PM_CL="/fp:precise /DFP_CONFIG_PRESET=2 /DTEST_FP_ROUNDING=1"
|
||||
PM_CL="/fp:precise /DFP_CONFIG_PRESET=2 /DTEST_FP_ROUNDING=0"
|
||||
PM_CL="/fp:fast /DFP_CONFIG_PRESET=3 /DTEST_FP_ROUNDING=0"
|
||||
RUNALL_CROSSLIST
|
||||
PM_CL="/DWITH_FP_ABRUPT_UNDERFLOW=0"
|
||||
PM_CL="/DWITH_FP_ABRUPT_UNDERFLOW=1" PM_LINK="loosefpmath.obj"
|
||||
RUNALL_CROSSLIST
|
||||
PM_CL="/DFP_CONTRACT_MODE=0 /clang:-ffp-contract=off"
|
||||
PM_CL="/DFP_CONTRACT_MODE=1 /clang:-ffp-contract=on"
|
||||
PM_CL="/DFP_CONTRACT_MODE=2 /clang:-ffp-contract=fast"
|
|
@ -44,5 +44,13 @@ def getDefaultFeatures(config, litConfig):
|
|||
|
||||
if litConfig.target_arch.casefold() == 'x86'.casefold():
|
||||
DEFAULT_FEATURES.append(Feature(name='edg'))
|
||||
DEFAULT_FEATURES.append(Feature(name='arch_ia32'))
|
||||
DEFAULT_FEATURES.append(Feature(name='arch_avx2'))
|
||||
|
||||
if litConfig.target_arch.casefold() == 'x64'.casefold():
|
||||
DEFAULT_FEATURES.append(Feature(name='arch_avx2'))
|
||||
|
||||
if litConfig.target_arch.casefold() == 'arm'.casefold():
|
||||
DEFAULT_FEATURES.append(Feature(name='arch_vfpv4'))
|
||||
|
||||
return DEFAULT_FEATURES
|
||||
|
|
|
@ -217,6 +217,12 @@ class STLTest(Test):
|
|||
self.requires.append('clr') # TRANSITION, GH-797
|
||||
elif flag[1:] == 'BE':
|
||||
self.requires.append('edg') # available for x86, see features.py
|
||||
elif flag[1:] == 'arch:AVX2':
|
||||
self.requires.append('arch_avx2') # available for x86 and x64, see features.py
|
||||
elif flag[1:] == 'arch:IA32':
|
||||
self.requires.append('arch_ia32') # available for x86, see features.py
|
||||
elif flag[1:] == 'arch:VFPv4':
|
||||
self.requires.append('arch_vfpv4') # available for arm, see features.py
|
||||
|
||||
if not foundStd:
|
||||
Feature('c++14').enableIn(self.config)
|
||||
|
|
Загрузка…
Ссылка в новой задаче