remove unused and confusing float16 constants (#21999)
### Description Remove unused and confusing special constants in MLFloat16 and BFloat16 types. ### Motivation and Context While looking at adding a specialization for std::numeric_limits for the 16-bit floating point types, I found that there are various special constants in those types that are confusing or just wrong. MLFLoat16::Epsilon is not an epsilon at all, but approximates "e". Looks like a copy-paste bug. BFloat16::Epsilon does not correspond to `numeric_limits::epsilon()`, nor even to the C# Float.Epsilon. Instead, it corresponds to `numeric_limits::min()` which was really confusing to me. The "MinValue" constants does correspond to the C# `Float.MinValue` constant, but this is C++ so it would be better renamed to "LowestValue" since it corresponds to `numeric_limits::lowest()`. As it was unused except for some unit tests I have replaced it with the equivalent `MaxValue.Negate()` here. There's also an unused `kSignaling_NaNBits` constant which is just wrong (has the same value as `kPositiveInfinityBits` instead of a NaN).
This commit is contained in:
Родитель
970ebc2ccf
Коммит
605a84ffc9
|
@ -45,8 +45,6 @@ struct MLFloat16 : onnxruntime_float16::Float16Impl<MLFloat16> {
|
|||
static const MLFloat16 NegativeNaN;
|
||||
static const MLFloat16 Infinity;
|
||||
static const MLFloat16 NegativeInfinity;
|
||||
static const MLFloat16 Epsilon;
|
||||
static const MLFloat16 MinValue;
|
||||
static const MLFloat16 MaxValue;
|
||||
static const MLFloat16 Zero;
|
||||
static const MLFloat16 One;
|
||||
|
@ -181,8 +179,6 @@ struct BFloat16 : onnxruntime_float16::BFloat16Impl<BFloat16> {
|
|||
static const BFloat16 NegativeNaN;
|
||||
static const BFloat16 Infinity;
|
||||
static const BFloat16 NegativeInfinity;
|
||||
static const BFloat16 Epsilon;
|
||||
static const BFloat16 MinValue;
|
||||
static const BFloat16 MaxValue;
|
||||
static const BFloat16 Zero;
|
||||
static const BFloat16 One;
|
||||
|
|
|
@ -75,8 +75,6 @@ struct Float16Impl {
|
|||
static constexpr uint16_t kNegativeInfinityBits = 0xFC00U;
|
||||
static constexpr uint16_t kPositiveQNaNBits = 0x7E00U;
|
||||
static constexpr uint16_t kNegativeQNaNBits = 0xFE00U;
|
||||
static constexpr uint16_t kEpsilonBits = 0x4170U;
|
||||
static constexpr uint16_t kMinValueBits = 0xFBFFU; // Minimum normal number
|
||||
static constexpr uint16_t kMaxValueBits = 0x7BFFU; // Largest normal number
|
||||
static constexpr uint16_t kOneBits = 0x3C00U;
|
||||
static constexpr uint16_t kMinusOneBits = 0xBC00U;
|
||||
|
@ -364,9 +362,6 @@ struct BFloat16Impl {
|
|||
static constexpr uint16_t kNegativeInfinityBits = 0xFF80U;
|
||||
static constexpr uint16_t kPositiveQNaNBits = 0x7FC1U;
|
||||
static constexpr uint16_t kNegativeQNaNBits = 0xFFC1U;
|
||||
static constexpr uint16_t kSignaling_NaNBits = 0x7F80U;
|
||||
static constexpr uint16_t kEpsilonBits = 0x0080U;
|
||||
static constexpr uint16_t kMinValueBits = 0xFF7FU;
|
||||
static constexpr uint16_t kMaxValueBits = 0x7F7FU;
|
||||
static constexpr uint16_t kRoundToNearest = 0x7FFFU;
|
||||
static constexpr uint16_t kOneBits = 0x3F80U;
|
||||
|
|
|
@ -36,8 +36,6 @@ const MLFloat16 MLFloat16::NaN(MLFloat16::FromBits(MLFloat16::kPositiveQNaNBits)
|
|||
const MLFloat16 MLFloat16::NegativeNaN(MLFloat16::FromBits(MLFloat16::kNegativeQNaNBits));
|
||||
const MLFloat16 MLFloat16::Infinity(MLFloat16::FromBits(MLFloat16::kPositiveInfinityBits));
|
||||
const MLFloat16 MLFloat16::NegativeInfinity(MLFloat16::FromBits(MLFloat16::kNegativeInfinityBits));
|
||||
const MLFloat16 MLFloat16::Epsilon(MLFloat16::FromBits(MLFloat16::kEpsilonBits));
|
||||
const MLFloat16 MLFloat16::MinValue(MLFloat16::FromBits(MLFloat16::kMinValueBits));
|
||||
const MLFloat16 MLFloat16::MaxValue(MLFloat16::FromBits(MLFloat16::kMaxValueBits));
|
||||
const MLFloat16 MLFloat16::Zero(MLFloat16::FromBits(0));
|
||||
const MLFloat16 MLFloat16::One(MLFloat16::FromBits(MLFloat16::kOneBits));
|
||||
|
@ -47,8 +45,6 @@ const BFloat16 BFloat16::NaN(BFloat16::FromBits(BFloat16::kPositiveQNaNBits));
|
|||
const BFloat16 BFloat16::NegativeNaN(BFloat16::FromBits(BFloat16::kNegativeQNaNBits));
|
||||
const BFloat16 BFloat16::Infinity(BFloat16::FromBits(BFloat16::kPositiveInfinityBits));
|
||||
const BFloat16 BFloat16::NegativeInfinity(BFloat16::FromBits(BFloat16::kNegativeInfinityBits));
|
||||
const BFloat16 BFloat16::Epsilon(BFloat16::FromBits(BFloat16::kEpsilonBits));
|
||||
const BFloat16 BFloat16::MinValue(BFloat16::FromBits(BFloat16::kMinValueBits));
|
||||
const BFloat16 BFloat16::MaxValue(BFloat16::FromBits(BFloat16::kMaxValueBits));
|
||||
const BFloat16 BFloat16::Zero(BFloat16::FromBits(0));
|
||||
const BFloat16 BFloat16::One(BFloat16::FromBits(BFloat16::kOneBits));
|
||||
|
|
|
@ -480,7 +480,7 @@ TEST_F(DataTypeTest, MLFloat16Comparision) {
|
|||
const MLFloat16 right = MLFloat16(66.66f);
|
||||
const MLFloat16 right_same = MLFloat16(66.66f);
|
||||
|
||||
EXPECT_TRUE(MLFloat16::Epsilon < right);
|
||||
EXPECT_TRUE(MLFloat16::One < right);
|
||||
|
||||
EXPECT_EQ(left, left_same);
|
||||
EXPECT_NE(left, left_same.Negate());
|
||||
|
@ -513,14 +513,14 @@ TEST_F(DataTypeTest, MLFloat16NaNComparision) {
|
|||
|
||||
EXPECT_FALSE(MLFloat16::MaxValue < MLFloat16::NaN);
|
||||
EXPECT_FALSE(MLFloat16::MaxValue == MLFloat16::NaN);
|
||||
EXPECT_FALSE(MLFloat16::MinValue < MLFloat16::NaN);
|
||||
EXPECT_FALSE(MLFloat16::MaxValue.Negate() < MLFloat16::NaN);
|
||||
EXPECT_FALSE(MLFloat16::NaN < MLFloat16::MaxValue);
|
||||
|
||||
EXPECT_TRUE(MLFloat16::MinValue < MLFloat16::MaxValue);
|
||||
EXPECT_TRUE(MLFloat16::Zero < MLFloat16::MaxValue);
|
||||
}
|
||||
|
||||
TEST_F(DataTypeTest, MLFloat16Infinity) {
|
||||
EXPECT_FALSE(MLFloat16::MinValue.IsInfinity());
|
||||
EXPECT_FALSE(MLFloat16::MaxValue.Negate().IsInfinity());
|
||||
EXPECT_FALSE(MLFloat16::MaxValue.IsInfinity());
|
||||
EXPECT_TRUE(MLFloat16::MaxValue.IsFinite());
|
||||
|
||||
|
@ -625,7 +625,7 @@ TEST_F(DataTypeTest, BFloat16Comparision) {
|
|||
const BFloat16 right = BFloat16(66.66f);
|
||||
const BFloat16 right_same = BFloat16(66.66f);
|
||||
|
||||
EXPECT_TRUE(BFloat16::Epsilon < right);
|
||||
EXPECT_TRUE(BFloat16::One < right);
|
||||
|
||||
EXPECT_EQ(left, left_same);
|
||||
EXPECT_NE(left, left_same.Negate());
|
||||
|
@ -657,14 +657,14 @@ TEST_F(DataTypeTest, BFloat16NaNComparision) {
|
|||
|
||||
EXPECT_FALSE(BFloat16::MaxValue < BFloat16::NaN);
|
||||
EXPECT_FALSE(BFloat16::MaxValue == BFloat16::NaN);
|
||||
EXPECT_FALSE(BFloat16::MinValue < BFloat16::NaN);
|
||||
EXPECT_FALSE(BFloat16::MaxValue.Negate() < BFloat16::NaN);
|
||||
EXPECT_FALSE(BFloat16::NaN < BFloat16::MaxValue);
|
||||
|
||||
EXPECT_TRUE(BFloat16::MinValue < BFloat16::MaxValue);
|
||||
EXPECT_TRUE(BFloat16::Zero < BFloat16::MaxValue);
|
||||
}
|
||||
|
||||
TEST_F(DataTypeTest, BFloat16Infinity) {
|
||||
EXPECT_FALSE(BFloat16::MinValue.IsInfinity());
|
||||
EXPECT_FALSE(BFloat16::MaxValue.Negate().IsInfinity());
|
||||
EXPECT_FALSE(BFloat16::MaxValue.IsInfinity());
|
||||
EXPECT_TRUE(BFloat16::MaxValue.IsFinite());
|
||||
|
||||
|
|
|
@ -370,11 +370,10 @@ TEST(CPPApi, Float16Zeros) {
|
|||
}
|
||||
|
||||
namespace {
|
||||
const auto EpsilonFl16 = Ort::Float16_t::FromBits(Ort::Float16_t::kEpsilonBits);
|
||||
const auto NaNFl16 = Ort::Float16_t::FromBits(Ort::Float16_t::kPositiveQNaNBits);
|
||||
const auto MinValueFl16 = Ort::Float16_t::FromBits(Ort::Float16_t::kMinValueBits);
|
||||
const auto MaxValueFl16 = Ort::Float16_t::FromBits(Ort::Float16_t::kMaxValueBits);
|
||||
const auto InfinityFl16 = Ort::Float16_t::FromBits(Ort::Float16_t::kPositiveInfinityBits);
|
||||
const auto ZeroFl16 = Ort::Float16_t::FromBits(0x0000);
|
||||
} // namespace
|
||||
|
||||
TEST(CPPApi, Float16Comparision) {
|
||||
|
@ -383,8 +382,6 @@ TEST(CPPApi, Float16Comparision) {
|
|||
const auto right = Ort::Float16_t(66.66f);
|
||||
const auto right_same = Ort::Float16_t(66.66f);
|
||||
|
||||
EXPECT_LT(EpsilonFl16, right);
|
||||
|
||||
EXPECT_EQ(left, left_same);
|
||||
EXPECT_NE(left, left_same.Negate());
|
||||
|
||||
|
@ -416,13 +413,12 @@ TEST(CPPApi, Float16NaNComparision) {
|
|||
|
||||
EXPECT_FALSE(MaxValueFl16 < NaNFl16);
|
||||
EXPECT_FALSE(MaxValueFl16 == NaNFl16);
|
||||
EXPECT_FALSE(NaNFl16 < MinValueFl16);
|
||||
EXPECT_FALSE(NaNFl16 < MaxValueFl16);
|
||||
|
||||
EXPECT_LT(MinValueFl16, MaxValueFl16);
|
||||
EXPECT_LT(ZeroFl16, MaxValueFl16);
|
||||
}
|
||||
|
||||
TEST(CPPApi, Float16Infinity) {
|
||||
EXPECT_FALSE(MinValueFl16.IsInfinity());
|
||||
EXPECT_FALSE(MaxValueFl16.IsInfinity());
|
||||
EXPECT_TRUE(MaxValueFl16.IsFinite());
|
||||
|
||||
|
@ -530,11 +526,10 @@ TEST(CPPApi, BFloat16Zeros) {
|
|||
}
|
||||
|
||||
namespace {
|
||||
const auto EpsilonBfl16 = Ort::BFloat16_t::FromBits(Ort::BFloat16_t::kEpsilonBits);
|
||||
const auto NaNBfl15 = Ort::BFloat16_t::FromBits(Ort::BFloat16_t::kPositiveQNaNBits);
|
||||
const auto MinValueBfl16 = Ort::BFloat16_t::FromBits(Ort::BFloat16_t::kMinValueBits);
|
||||
const auto NaNBfl16 = Ort::BFloat16_t::FromBits(Ort::BFloat16_t::kPositiveQNaNBits);
|
||||
const auto MaxValueBfl16 = Ort::BFloat16_t::FromBits(Ort::BFloat16_t::kMaxValueBits);
|
||||
const auto InfinityBFl16 = Ort::BFloat16_t::FromBits(Ort::BFloat16_t::kPositiveInfinityBits);
|
||||
const auto ZeroBfl16 = Ort::BFloat16_t::FromBits(0x0000);
|
||||
} // namespace
|
||||
|
||||
TEST(CPPApi, BFloat16Comparision) {
|
||||
|
@ -543,8 +538,6 @@ TEST(CPPApi, BFloat16Comparision) {
|
|||
const auto right = Ort::BFloat16_t(66.66f);
|
||||
const auto right_same = Ort::BFloat16_t(66.66f);
|
||||
|
||||
EXPECT_LT(EpsilonBfl16, right);
|
||||
|
||||
EXPECT_EQ(left, left_same);
|
||||
EXPECT_NE(left, left_same.Negate());
|
||||
|
||||
|
@ -561,7 +554,7 @@ TEST(CPPApi, BFloat16TestNAN) {
|
|||
EXPECT_TRUE(fp16NANFromSingle.IsNaN());
|
||||
|
||||
// NaN are not equal to each other
|
||||
EXPECT_NE(NaNBfl15, fp16NANFromSingle);
|
||||
EXPECT_NE(NaNBfl16, fp16NANFromSingle);
|
||||
|
||||
const float NanFromBFloat16 = fp16NANFromSingle.ToFloat();
|
||||
EXPECT_TRUE(std::isnan(NanFromBFloat16));
|
||||
|
@ -570,19 +563,18 @@ TEST(CPPApi, BFloat16TestNAN) {
|
|||
}
|
||||
|
||||
TEST(CPPApi, BFloat16NaNComparision) {
|
||||
EXPECT_FALSE(NaNBfl15 < NaNBfl15);
|
||||
EXPECT_TRUE(NaNBfl15 != NaNBfl15);
|
||||
EXPECT_FALSE(NaNBfl15 == NaNBfl15);
|
||||
EXPECT_FALSE(NaNBfl16 < NaNBfl16);
|
||||
EXPECT_TRUE(NaNBfl16 != NaNBfl16);
|
||||
EXPECT_FALSE(NaNBfl16 == NaNBfl16);
|
||||
|
||||
EXPECT_FALSE(MaxValueBfl16 < NaNBfl15);
|
||||
EXPECT_FALSE(MaxValueBfl16 == NaNBfl15);
|
||||
EXPECT_FALSE(NaNBfl15 < MinValueBfl16);
|
||||
EXPECT_FALSE(MaxValueBfl16 < NaNBfl16);
|
||||
EXPECT_FALSE(MaxValueBfl16 == NaNBfl16);
|
||||
EXPECT_FALSE(NaNBfl16 < MaxValueBfl16);
|
||||
|
||||
EXPECT_LT(MinValueBfl16, MaxValueBfl16);
|
||||
EXPECT_LT(ZeroBfl16, MaxValueBfl16);
|
||||
}
|
||||
|
||||
TEST(CPPApi, BFloat16Infinity) {
|
||||
EXPECT_FALSE(MinValueBfl16.IsInfinity());
|
||||
EXPECT_FALSE(MaxValueBfl16.IsInfinity());
|
||||
EXPECT_TRUE(MaxValueBfl16.IsFinite());
|
||||
|
||||
|
|
Загрузка…
Ссылка в новой задаче