зеркало из https://github.com/dotnet/infer.git
QuantileEstimator.GetProbLessThan uses empirical cdf
This commit is contained in:
Родитель
c966b93213
Коммит
0114565ad3
|
@ -111,7 +111,7 @@ namespace Microsoft.ML.Probabilistic.Distributions
|
|||
/// <param name="upperItem"></param>
|
||||
/// <param name="n"></param>
|
||||
/// <returns></returns>
|
||||
internal static double GetQuantile(double probability, long lowerIndex, double lowerItem, double upperItem, long n)
|
||||
internal static double GetQuantile(double probability, double lowerIndex, double lowerItem, double upperItem, long n)
|
||||
{
|
||||
if (n == 1) throw new ArgumentOutOfRangeException("n == 1");
|
||||
double pos = MMath.LargestDoubleProduct(n - 1, probability);
|
||||
|
|
|
@ -53,6 +53,13 @@ namespace Microsoft.ML.Probabilistic.Distributions
|
|||
[DataMember]
|
||||
public readonly double MaximumError;
|
||||
|
||||
/// <summary>
|
||||
/// 0 = point mass on each data point (i/n)
|
||||
/// 1 = interpolate (i/n + (i+1)/n)/2 = (i+0.5)/n
|
||||
/// 2 = interpolate i/(n-1)
|
||||
/// </summary>
|
||||
private static int InterpolationType = 0;
|
||||
|
||||
/// <summary>
|
||||
/// Creates a new QuantileEstimator.
|
||||
/// </summary>
|
||||
|
@ -79,15 +86,32 @@ namespace Microsoft.ML.Probabilistic.Distributions
|
|||
{
|
||||
double lowerItem;
|
||||
double upperItem;
|
||||
long lowerIndex;
|
||||
long lowerRank;
|
||||
long lowerWeight, minLowerWeight, upperWeight, minUpperWeight;
|
||||
long itemCount;
|
||||
GetAdjacentItems(x, out lowerItem, out upperItem, out lowerIndex, out lowerWeight, out upperWeight, out minLowerWeight, out minUpperWeight, out itemCount);
|
||||
if (lowerIndex < 0) return 0;
|
||||
if (x == lowerItem) return (double)(lowerIndex - lowerWeight + 1) / (itemCount - 1);
|
||||
// interpolate between the ranks of lowerItem and upperItem
|
||||
double frac = (x - lowerItem) / (upperItem - lowerItem);
|
||||
return (lowerIndex + frac) / (itemCount - 1);
|
||||
GetAdjacentItems(x, out lowerItem, out upperItem, out lowerRank, out lowerWeight, out upperWeight, out minLowerWeight, out minUpperWeight, out itemCount);
|
||||
if (lowerRank == 0) return 0;
|
||||
if (lowerRank == itemCount) return 1;
|
||||
if (InterpolationType == 0)
|
||||
{
|
||||
return (double)lowerRank / itemCount;
|
||||
}
|
||||
else if (InterpolationType == 1)
|
||||
{
|
||||
// interpolate between the ranks of lowerItem and upperItem
|
||||
// lowerItem has rank (lowerRank - 0.5) from above
|
||||
// upperItem has rank (lowerRank + 0.5) from below
|
||||
double frac = (x - lowerItem) / (upperItem - lowerItem);
|
||||
return (lowerRank - 0.5 + frac) / itemCount;
|
||||
}
|
||||
else
|
||||
{
|
||||
// interpolate between the ranks of lowerItem and upperItem
|
||||
// lowerItem has rank (lowerRank - 1)/(itemCount-1) from above
|
||||
// upperItem has rank lowerRank/(itemCount-1) from below
|
||||
double frac = (x - lowerItem) / (upperItem - lowerItem);
|
||||
return (lowerRank - 1 + frac) / (itemCount - 1);
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
|
@ -113,6 +137,7 @@ namespace Microsoft.ML.Probabilistic.Distributions
|
|||
}
|
||||
}
|
||||
if (probability == 1.0) return MMath.NextDouble(upperBound);
|
||||
if (probability == 0.0) return lowerBound;
|
||||
if (double.IsPositiveInfinity(lowerBound)) throw new Exception("QuantileEstimator has no data");
|
||||
if (lowerBound == upperBound) return upperBound;
|
||||
// use bisection
|
||||
|
@ -121,32 +146,83 @@ namespace Microsoft.ML.Probabilistic.Distributions
|
|||
double x = (lowerBound + upperBound) / 2;
|
||||
double lowerItem;
|
||||
double upperItem;
|
||||
long lowerIndex;
|
||||
long lowerRank;
|
||||
long lowerWeight, minLowerWeight, upperWeight, minUpperWeight;
|
||||
long itemCount;
|
||||
GetAdjacentItems(x, out lowerItem, out upperItem, out lowerIndex, out lowerWeight, out upperWeight, out minLowerWeight, out minUpperWeight, out itemCount);
|
||||
double scaledProbability = MMath.LargestDoubleProduct(itemCount - 1, probability);
|
||||
// probability of lowerItem ranges from (lowerIndex - lowerWeight + 1) / (itemCount - 1)
|
||||
// to lowerIndex / (itemCount - 1).
|
||||
if ((scaledProbability >= lowerIndex - lowerWeight + 1) && (scaledProbability < lowerIndex))
|
||||
return lowerItem;
|
||||
// probability of upperItem ranges from (lowerIndex + 1) / (itemCount - 1)
|
||||
// to (lowerIndex + upperWeight) / (itemCount - 1)
|
||||
if ((scaledProbability >= lowerIndex + 1) && (scaledProbability <= lowerIndex + upperWeight))
|
||||
return upperItem;
|
||||
// solve for frac in (lowerIndex + frac) / (itemCount - 1) == probability
|
||||
double frac = scaledProbability - lowerIndex;
|
||||
if (frac < 0)
|
||||
GetAdjacentItems(x, out lowerItem, out upperItem, out lowerRank, out lowerWeight, out upperWeight, out minLowerWeight, out minUpperWeight, out itemCount);
|
||||
if (InterpolationType == 0)
|
||||
{
|
||||
upperBound = x;
|
||||
double probabilityLessThanLowerItem = (double)(lowerRank - lowerWeight) / itemCount;
|
||||
if (probability < probabilityLessThanLowerItem)
|
||||
{
|
||||
upperBound = lowerItem;
|
||||
}
|
||||
else
|
||||
{
|
||||
double probabilityLessThanUpperItem = (double)lowerRank / itemCount;
|
||||
if (probability < probabilityLessThanUpperItem) return lowerItem;
|
||||
double probabilityLessThanOrEqualUpperItem = (double)(lowerRank + upperWeight) / itemCount;
|
||||
if (probability < probabilityLessThanOrEqualUpperItem) return upperItem;
|
||||
lowerBound = upperItem;
|
||||
}
|
||||
}
|
||||
else if (frac > 1)
|
||||
else if (InterpolationType == 1)
|
||||
{
|
||||
lowerBound = x;
|
||||
// Find frac such that (lowerRank - 0.5 + frac) / itemCount == probability
|
||||
double scaledProbability = MMath.LargestDoubleProduct(itemCount, probability);
|
||||
if (scaledProbability < 0.5) return lowerBound;
|
||||
if (scaledProbability >= itemCount - 0.5) return upperBound;
|
||||
// probability of lowerItem ranges from (lowerRank-lowerWeight+0.5) / itemCount
|
||||
// to (lowerRank - 0.5) / itemCount
|
||||
//if (scaledProbability == lowerRank - lowerWeight + 0.5) return lowerItem;
|
||||
if ((scaledProbability > lowerRank - lowerWeight + 0.5) && (scaledProbability < lowerRank - 0.5))
|
||||
return lowerItem;
|
||||
// probability of upperItem ranges from (lowerRank + 0.5) / itemCount
|
||||
// to (lowerRank + upperWeight - 0.5) / itemCount
|
||||
if (scaledProbability == lowerRank+0.5) return upperItem;
|
||||
if ((scaledProbability > lowerRank+0.5) && (scaledProbability < lowerRank + upperWeight - 0.5))
|
||||
return upperItem;
|
||||
double frac = scaledProbability - (lowerRank - 0.5);
|
||||
if (frac < 0)
|
||||
{
|
||||
upperBound = x;
|
||||
}
|
||||
else if (frac > 1)
|
||||
{
|
||||
lowerBound = x;
|
||||
}
|
||||
else
|
||||
{
|
||||
return OuterQuantiles.GetQuantile(probability, lowerRank - 0.5, lowerItem, upperItem, itemCount+1);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
return OuterQuantiles.GetQuantile(probability, lowerIndex, lowerItem, upperItem, itemCount);
|
||||
double scaledProbability = MMath.LargestDoubleProduct(itemCount - 1, probability);
|
||||
// probability of lowerItem ranges from (lowerRank-lowerWeight) / (itemCount - 1)
|
||||
// to (lowerRank - 1) / (itemCount - 1).
|
||||
if (scaledProbability == lowerRank - lowerWeight) return lowerItem;
|
||||
if ((scaledProbability > lowerRank - lowerWeight) && (scaledProbability < lowerRank - 1))
|
||||
return lowerItem;
|
||||
// probability of upperItem ranges from lowerRank / (itemCount - 1)
|
||||
// to (lowerRank + upperWeight-1) / (itemCount - 1)
|
||||
if (scaledProbability == lowerRank) return upperItem;
|
||||
if ((scaledProbability > lowerRank) && (scaledProbability < lowerRank + upperWeight - 1))
|
||||
return upperItem;
|
||||
// solve for frac in (lowerRank-1 + frac) / (itemCount - 1) == probability
|
||||
double frac = scaledProbability - (lowerRank - 1);
|
||||
if (frac < 0)
|
||||
{
|
||||
upperBound = x;
|
||||
}
|
||||
else if (frac > 1)
|
||||
{
|
||||
lowerBound = x;
|
||||
}
|
||||
else
|
||||
{
|
||||
return OuterQuantiles.GetQuantile(probability, lowerRank - 1, lowerItem, upperItem, itemCount);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -255,17 +331,17 @@ namespace Microsoft.ML.Probabilistic.Distributions
|
|||
///
|
||||
/// </summary>
|
||||
/// <param name="x"></param>
|
||||
/// <param name="lowerItem">The largest item that is less than or equal to x.</param>
|
||||
/// <param name="upperItem">The smallest item that is greater than x.</param>
|
||||
/// <param name="lowerIndex"></param>
|
||||
/// <param name="lowerWeight">The total weight of items equal to lowerItem, excluding the item with lowest weight.</param>
|
||||
/// <param name="lowerItem">The largest item that is less than x.</param>
|
||||
/// <param name="upperItem">The smallest item that is greater than or equal to x.</param>
|
||||
/// <param name="lowerRank">The total weight of items less than x.</param>
|
||||
/// <param name="lowerWeight">The total weight of items equal to lowerItem.</param>
|
||||
/// <param name="upperWeight">The total weight of items equal to upperItem.</param>
|
||||
/// <param name="minLowerWeight">The smallest weight of items equal to lowerItem.</param>
|
||||
/// <param name="minUpperWeight">The smallest weight of items equal to upperItem.</param>
|
||||
/// <param name="itemCount"></param>
|
||||
private void GetAdjacentItems(double x, out double lowerItem, out double upperItem, out long lowerIndex, out long lowerWeight, out long upperWeight, out long minLowerWeight, out long minUpperWeight, out long itemCount)
|
||||
private void GetAdjacentItems(double x, out double lowerItem, out double upperItem, out long lowerRank, out long lowerWeight, out long upperWeight, out long minLowerWeight, out long minUpperWeight, out long itemCount)
|
||||
{
|
||||
long lowerRank = 0;
|
||||
lowerRank = 0;
|
||||
long weight = (1L << lowestBufferHeight);
|
||||
itemCount = 0;
|
||||
lowerItem = double.NegativeInfinity;
|
||||
|
@ -282,7 +358,7 @@ namespace Microsoft.ML.Probabilistic.Distributions
|
|||
for (int i = 0; i < count; i++)
|
||||
{
|
||||
double item = buffer[i];
|
||||
if (item <= x)
|
||||
if (item < x)
|
||||
{
|
||||
lowerRank += weight;
|
||||
if (item > lowerItem)
|
||||
|
@ -316,7 +392,6 @@ namespace Microsoft.ML.Probabilistic.Distributions
|
|||
if (bufferIndex == lowestBufferIndex) break;
|
||||
}
|
||||
if (itemCount == 0) throw new Exception("QuantileEstimator has no data");
|
||||
lowerIndex = lowerRank - 1;
|
||||
}
|
||||
|
||||
private void AddToBuffer(int bufferIndex, double item)
|
||||
|
|
|
@ -165,10 +165,10 @@ namespace Microsoft.ML.Probabilistic.Tests
|
|||
CheckGetQuantile(inner, inner, (int)Math.Ceiling(100.0/8), (int)Math.Floor(100.0*7/8));
|
||||
var est = new QuantileEstimator(0.01);
|
||||
est.AddRange(x);
|
||||
Assert.Equal(0.25, est.GetProbLessThan(middle));
|
||||
Assert.Equal(est.GetQuantile(0.3), middle);
|
||||
Assert.Equal(est.GetQuantile(0.5), middle);
|
||||
Assert.Equal(est.GetQuantile(0.7), middle);
|
||||
// InterpolationType==1 returns NextDouble(middle)
|
||||
Assert.Equal(est.GetQuantile(0.7), middle, 1e-15);
|
||||
CheckGetQuantile(est, est);
|
||||
}
|
||||
|
||||
|
@ -199,8 +199,6 @@ namespace Microsoft.ML.Probabilistic.Tests
|
|||
Assert.Equal(0.0, est.GetProbLessThan(first));
|
||||
Assert.Equal(first, est.GetQuantile(0.0));
|
||||
Assert.Equal(0.5, est.GetProbLessThan(between));
|
||||
Assert.Equal(between, est.GetQuantile(0.5));
|
||||
Assert.Equal(2.0 / 3, est.GetProbLessThan(second));
|
||||
Assert.Equal(second, est.GetQuantile(2.0 / 3));
|
||||
Assert.Equal(1.0, est.GetProbLessThan(next));
|
||||
Assert.Equal(next, est.GetQuantile(1.0));
|
||||
|
@ -278,12 +276,10 @@ namespace Microsoft.ML.Probabilistic.Tests
|
|||
[Fact]
|
||||
public void QuantileEstimatorTest()
|
||||
{
|
||||
foreach (int n in new[] { 20, 10000 })
|
||||
foreach(double maximumError in new[] { 0.05, 0.01, 0.005, 0.001 })
|
||||
{
|
||||
QuantileEstimator(0.05, n);
|
||||
QuantileEstimator(0.01, n);
|
||||
QuantileEstimator(0.005, n);
|
||||
QuantileEstimator(0.001, n);
|
||||
int n = (int)(2.0 / maximumError);
|
||||
QuantileEstimator(maximumError, n);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -302,7 +298,7 @@ namespace Microsoft.ML.Probabilistic.Tests
|
|||
x.Add(sample);
|
||||
est.Add(sample);
|
||||
}
|
||||
CheckProbLessThan(est, x, (n < 40) ? 0 : maximumError);
|
||||
CheckProbLessThan(est, x, maximumError);
|
||||
}
|
||||
|
||||
private void CheckProbLessThan(CanGetProbLessThan canGetProbLessThan, List<double> x, double maximumError)
|
||||
|
@ -324,7 +320,7 @@ namespace Microsoft.ML.Probabilistic.Tests
|
|||
maxError = System.Math.Max(maxError, error);
|
||||
}
|
||||
Console.WriteLine($"max rank error = {maxError}");
|
||||
Assert.True(maxError <= 2 * maximumError);
|
||||
Assert.True(maxError <= maximumError);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
|
|
Загрузка…
Ссылка в новой задаче