Update URL for BookCrossing dataset (#288)

* Added Matrix.FromDiagonal
This commit is contained in:
Tom Minka 2020-09-09 23:54:45 +01:00 коммит произвёл GitHub
Родитель 2e03d713b9
Коммит 9ca3230018
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: 4AEE18F83AFDEB23
8 изменённых файлов: 75 добавлений и 26 удалений

Просмотреть файл

@ -1,6 +1,8 @@
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.
// See the LICENSE file in the project root for more information.
using Microsoft.ML.Probabilistic.Factors;
using System.IO;
using System.Net;
using System.Text.RegularExpressions;
@ -12,17 +14,26 @@ namespace Microsoft.ML.Probabilistic.Learners.Runners.DatasetGenerators
/// </summary>
class BookCrossingGenerator : DownloadingDatasetGenerator
{
private const string link = "http://www2.informatik.uni-freiburg.de/~cziegler/BX/BX-CSV-Dump.zip";
private const string link = "https://raw.githubusercontent.com/ashwanidv100/Recommendation-System---Book-Crossing-Dataset/52c29a80b7a4e69c422cd84806cdbb45879d8c66/BX-CSV-Dump/BX-Book-Ratings.csv";
// The original link no longer works.
//private const string link = "http://www2.informatik.uni-freiburg.de/~cziegler/BX/BX-CSV-Dump.zip";
private const string datFileName = "BX-Book-Ratings.csv";
protected override void DownloadArchives(string tmpDir)
{
string tmpZipPath = Path.Combine(tmpDir, "tmp.zip");
if (link.EndsWith(".zip"))
{
string tmpZipPath = Path.Combine(tmpDir, "tmp.zip");
new WebClient().DownloadFile(link, tmpZipPath);
System.IO.Compression.ZipFile.ExtractToDirectory(tmpZipPath, tmpDir);
new WebClient().DownloadFile(link, tmpZipPath);
System.IO.Compression.ZipFile.ExtractToDirectory(tmpZipPath, tmpDir);
}
else
{
new WebClient().DownloadFile(link, Path.Combine(tmpDir, datFileName));
}
}
protected override void MakeDataset(string tmpDir, string outputFileName)

Просмотреть файл

@ -51,11 +51,16 @@ namespace Microsoft.ML.Probabilistic.Learners.Runners
{
try
{
string configFile = DefaultConfigFile;
string configFile;
if (args.Length > 0)
{
configFile = args[0];
}
else
{
configFile = DefaultConfigFile;
Console.WriteLine($"Using default config file {configFile}");
}
RegisterTestRunHandlers();

Просмотреть файл

@ -2,7 +2,7 @@
"profiles": {
"Evaluator": {
"commandName": "Project",
"commandLineArgs": "x64JVMConfig.xml"
"commandLineArgs": "InferNetRunsOnly.xml"
}
}
}

Просмотреть файл

@ -658,6 +658,18 @@ namespace Microsoft.ML.Probabilistic.Math
return this;
}
/// <summary>
/// Creates a new diagonal matrix with diagonal values specified in the given vector
/// </summary>
/// <param name="diag"></param>
/// <returns></returns>
public static Matrix FromDiagonal(Vector diag)
{
int count = diag.Count;
Matrix m = new Matrix(count, count);
m.SetDiagonal(diag);
return m;
}
/// <summary>
/// Creates a vector from the diagonal values in the matrix

Просмотреть файл

@ -98,7 +98,8 @@ namespace Microsoft.ML.Probabilistic.Learners.Tests
distribution = new Discrete(0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1);
Assert.Equal(0, distribution.GetMode());
Assert.Equal(4.5, distribution.GetMean(), 1e-10);
Assert.Equal(4, distribution.GetMedian());
int median = distribution.GetMedian();
Assert.True(median == 4 || median == 5);
Assert.Equal(3, PointEstimator.GetEstimate(distribution, this.LinearLossFunction(3, 2))); // 2nd quintile
Assert.Equal(7, PointEstimator.GetEstimate(distribution, this.LinearLossFunction(1, 3))); // 3rd quartile
Assert.Equal(9, PointEstimator.GetEstimate(distribution, this.LinearLossFunction(1, 999))); // 999th permille

Просмотреть файл

@ -2203,41 +2203,56 @@ namespace Microsoft.ML.Probabilistic.Tests
});
Matrix V = new Matrix(A.Cols, A.Cols);
V.SetToRightSingularVectors(A);
//Matrix A2 = new Matrix(A.Rows, A.Cols);
//A2.SetToProduct(A, V.Transpose());
// A now contains the left singular vectors scaled by the singular values.
Matrix US = A;
DenseVector S = DenseVector.Zero(A.Cols);
for (int i = 0; i < A.Cols; i++)
{
double sum = 0;
double compensation = 0;
for (int j = 0; j < A.Rows; j++)
{
sum += A[j, i] * A[j, i];
double y = A[j, i] * A[j, i] - compensation;
double nextSum = sum + y;
compensation = (nextSum - sum) - y;
sum = nextSum;
}
S[i] = System.Math.Sqrt(sum);
}
var Sinv = DenseVector.Zero(S.Count);
Sinv.SetToFunction(S, x => 1.0 / x);
A.ScaleCols(Sinv);
Matrix U = A;
Matrix U = (Matrix)US.Clone();
U.ScaleCols(Sinv);
// these results are slightly different from Matlab, but seem to be more accurate.
// They were computed in extended precision.
Matrix UExpected = new Matrix(new double[,] {
{ 0.577350268923100, -0.69217074896108965 },
{ 0.577350269178466, -0.028980941246352369 },
{ 0.577350269467311, 0.72115168954050146 }
{ 0.57735026892309971, -0.69217076042542958 },
{ 0.57735026917846644, -0.028980919646426295 },
{ 0.57735026946731116, 0.72115167940491542 }
});
DenseVector SExpected = DenseVector.FromArray(2.183575560941113, 0.000000001095252);
DenseVector SExpected = DenseVector.FromArray(2.1835755609411125, 1.0952516532160563e-09);
Matrix VExpected = new Matrix(new double[,] {
{ 0.793217710690245, -0.608938144188165 },
{ 0.608938144188165, 0.793217710690245 }
{ 0.79321771069024527, -0.60893814418816494 },
{ 0.60893814418816494, 0.79321771069024527 }
});
Console.WriteLine(StringUtil.JoinColumns("U = ", U, " expected ", UExpected));
Console.WriteLine(StringUtil.JoinColumns("S = ", S, " expected ", SExpected));
Console.WriteLine(StringUtil.JoinColumns("V = ", V, " expected ", VExpected));
Assert.True(UExpected.MaxDiff(U) < 1e-10);
Assert.True(SExpected.MaxDiff(S) < 1e-10);
Assert.True(VExpected.MaxDiff(V) < 1e-10);
double UError = UExpected.MaxDiff(U);
double SError = SExpected.MaxDiff(S);
double VError = VExpected.MaxDiff(V);
Matrix USExpected = UExpected * Matrix.FromDiagonal(SExpected);
double USError = USExpected.MaxDiff(US);
Console.WriteLine(StringUtil.JoinColumns("US = ", US.ToString("g17"), " expected ", USExpected.ToString("g17"), " error ", USError));
Console.WriteLine(StringUtil.JoinColumns("U = ", U.ToString("g17"), " expected ", UExpected.ToString("g17"), " error ", UError));
Console.WriteLine(StringUtil.JoinColumns("S = ", S, " expected ", SExpected, " error ", SError));
Console.WriteLine(StringUtil.JoinColumns("V = ", V, " expected ", VExpected, " error ", VError));
Matrix AExpected = UExpected * Matrix.FromDiagonal(SExpected) * VExpected.Transpose();
Matrix USVActual = U * Matrix.FromDiagonal(S) * V.Transpose();
Matrix AActual = US * V.Transpose();
Console.WriteLine(StringUtil.JoinColumns("A = ", AActual.ToString("g17"), " expected ", AExpected.ToString("g17")));
Assert.True(UError < 1e-7);
Assert.True(SError < 1e-10);
Assert.True(VError < 1e-10);
}
// TODO: change this test to use SetToLeftSingularVectors

Просмотреть файл

@ -553,7 +553,7 @@ namespace Microsoft.ML.Probabilistic.Tests
Assert.Throws<ImproperDistributionException>(() => g.GetMean());
Gaussian g3 = new Gaussian();
g3.SetToSum(1.0, g, System.Math.Exp(800), g2);
g3.SetToSum(1.0, g, double.PositiveInfinity, g2);
Assert.True(g3.Equals(g2));
}

Просмотреть файл

@ -235,7 +235,12 @@ namespace Microsoft.ML.Probabilistic.Tests
public void GammaPowerPowerTest()
{
Assert.False(double.IsNaN(PowerOp.GammaPowerFromDifferentPower(new GammaPower(1.333, 1.5, 1), 0.01).Shape));
Assert.True(PowerOp.XAverageConditional(new GammaPower(7, 0.1111, -1), new GammaPower(16.19, 0.06154, 1), 2.2204460492503136E-16, GammaPower.Uniform(1)).IsProper());
for (int i = 1; i <= 10; i++)
{
// TODO: make this work
//Assert.True(PowerOp.XAverageConditional(new GammaPower(7, 0.1111, -1), new GammaPower(16.19, 0.06154, 1), 2.2204460492503136E-10/i, GammaPower.Uniform(1)).IsProper());
}
Assert.True(PowerOp.XAverageConditional(new GammaPower(7, 0.1111, -1), new GammaPower(16.19, 0.06154, 1), MMath.Ulp(1), GammaPower.Uniform(1)).IsProper());
Assert.True(PowerOp.PowAverageConditional(GammaPower.FromShapeAndRate(9.0744065303642287, 8.7298765698182414, 1), 1.6327904641199278, GammaPower.Uniform(-1)).IsProper());
Assert.False(PowerOp.XAverageConditional(GammaPower.Uniform(-1), GammaPower.FromShapeAndRate(1, 1, 1), 4.0552419045546273, GammaPower.Uniform(1)).IsPointMass);