Due to refactoring mistake `sampleProb / prob * intervalLength`
turned into `sampleProb / (prob * intervalLength)` which is obviosly incorrect.

Fixed that + added a rudimentary test for `DiscreteChar.Sample()`
This commit is contained in:
Ivan Korostelev 2019-05-28 15:52:38 +01:00 коммит произвёл GitHub
Родитель 5b67978a61
Коммит 70ae46cd79
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: 4AEE18F83AFDEB23
2 изменённых файлов: 28 добавлений и 1 удалений

Просмотреть файл

@ -859,7 +859,7 @@ namespace Microsoft.ML.Probabilistic.Distributions
sampleProb -= prob.Value;
if (sampleProb < 0)
{
return (char)(interval.StartInclusive - sampleProb / (prob * intervalLength).Value);
return (char)(interval.StartInclusive - sampleProb / interval.Probability.Value);
}
}

Просмотреть файл

@ -2,6 +2,8 @@
// The .NET Foundation licenses this file to you under the MIT license.
// See the LICENSE file in the project root for more information.
using Microsoft.ML.Probabilistic.Math;
namespace Microsoft.ML.Probabilistic.Tests
{
using System;
@ -64,6 +66,31 @@ namespace Microsoft.ML.Probabilistic.Tests
Assert.True(false);
}
[Fact]
[Trait("Category", "StringInference")]
public void SampleFromUniformCharDistribution()
{
// Make test deterministic
Rand.Restart(7);
// 10 chars in distribution
const int numChars = 10;
const int numSamples = 100000;
var dist = DiscreteChar.UniformInRanges("aj");
var hist = Vector.Zero(numChars);
for (var i = 0; i < numSamples; ++i)
{
hist[dist.Sample() - 'a'] += 1;
}
hist = hist * (1.0 / numSamples);
var unif = Vector.Constant(numChars, 1.0 / numChars);
var maxDiff = hist.MaxDiff(unif);
Assert.True(maxDiff < 0.01);
}
/// <summary>
/// Tests the support of a character distribution.
/// </summary>