Fix DiscreteChar.Complement() & Simplify ranges operations (#192)

After recent refactoring that removed `ProbabilityOutsideRanges`, `DiscreteChar.Complement()` 
started to work incorrectly in case ranges were going one after another.

For example DiscreteChar.Point('\0').Complement() was equal to uniform distribution, i.e. still included the \0 char.
This commit is contained in:
Ivan Korostelev 2019-11-05 14:42:28 +00:00 коммит произвёл GitHub
Родитель 1f54bc32f7
Коммит fe41d89eba
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: 4AEE18F83AFDEB23
2 изменённых файлов: 47 добавлений и 2 удалений

Просмотреть файл

@ -1537,8 +1537,9 @@ namespace Microsoft.ML.Probabilistic.Distributions
if (range.StartInclusive != prevEnd)
{
builder.AddRange(new CharRange(prevEnd, range.StartInclusive, Weight.One));
prevEnd = range.EndExclusive;
}
prevEnd = range.EndExclusive;
}
if (prevEnd != CharRangeEndExclusive)
@ -2099,4 +2100,4 @@ namespace Microsoft.ML.Probabilistic.Distributions
#endregion
}
}
}
}

Просмотреть файл

@ -91,6 +91,50 @@ namespace Microsoft.ML.Probabilistic.Tests
Assert.True(maxDiff < 0.01);
}
[Fact]
[Trait("Category", "StringInference")]
public void ComplementWorks()
{
TestComplement(DiscreteChar.PointMass('\0'));
TestComplement(DiscreteChar.PointMass('a'));
TestComplement(DiscreteChar.PointMass(char.MaxValue));
var a = DiscreteChar.PointMass('a');
var b = DiscreteChar.PointMass('b');
var ab = default(DiscreteChar);
ab.SetToSum(1, a, 2, b);
// 2 subsequent ranges
Assert.Equal(2, ab.Ranges.Count);
TestComplement(ab);
void TestComplement(DiscreteChar dist)
{
var uniformDist = dist.Clone();
uniformDist.SetToPartialUniform();
var complement = dist.Complement();
// complement should always be partial uniform
Assert.True(complement.IsPartialUniform());
// overlap is zero
Assert.True(double.IsNegativeInfinity(dist.GetLogAverageOf(complement)));
Assert.True(double.IsNegativeInfinity(uniformDist.GetLogAverageOf(complement)));
// union is covers the whole range
var sum = default(DiscreteChar);
sum.SetToSum(1, dist, 1, complement);
sum.SetToPartialUniform();
Assert.True(sum.IsUniform());
// Doing complement again will cover the same set of characters
var complement2 = complement.Complement();
Assert.Equal(uniformDist, complement2);
}
}
/// <summary>
/// Tests the support of a character distribution.
/// </summary>