From 44660297b4238a4f3e843bd071f5e8b214bdd38a Mon Sep 17 00:00:00 2001 From: Xiaoyun Zhang Date: Tue, 2 Feb 2021 17:22:42 -0800 Subject: [PATCH] Make ColumnInference serializable (#5611) * upgrade to 3.1 * write inline data using invariantCulture * make column inference serializable * add test json * add approvaltests --- .../API/ColumnInference.cs | 18 +++-- src/Microsoft.ML.AutoML/Assembly.cs | 2 +- src/Microsoft.ML.CodeGenerator/Assembly.cs | 2 +- ...result_should_be_serializable.approved.txt | 66 +++++++++++++++++++ .../ColumnInferenceTests.cs | 18 +++++ .../Microsoft.ML.AutoML.Tests.csproj | 21 +----- .../TestData/wiki-column-inference.json | 66 +++++++++++++++++++ 7 files changed, 167 insertions(+), 26 deletions(-) create mode 100644 test/Microsoft.ML.AutoML.Tests/ApprovalTests/ColumnInferenceTests.Wiki_column_inference_result_should_be_serializable.approved.txt create mode 100644 test/Microsoft.ML.AutoML.Tests/TestData/wiki-column-inference.json diff --git a/src/Microsoft.ML.AutoML/API/ColumnInference.cs b/src/Microsoft.ML.AutoML/API/ColumnInference.cs index ab2539b94..a2d697da9 100644 --- a/src/Microsoft.ML.AutoML/API/ColumnInference.cs +++ b/src/Microsoft.ML.AutoML/API/ColumnInference.cs @@ -5,6 +5,7 @@ using System.Collections.Generic; using System.Collections.ObjectModel; using Microsoft.ML.Data; +using Newtonsoft.Json; namespace Microsoft.ML.AutoML { @@ -20,6 +21,7 @@ namespace Microsoft.ML.AutoML /// Can be used to instantiate a new to load /// data into an . /// + [JsonProperty(DefaultValueHandling = DefaultValueHandling.Include)] public TextLoader.Options TextLoaderOptions { get; internal set; } /// @@ -31,6 +33,7 @@ namespace Microsoft.ML.AutoML /// See /// for example. /// + [JsonProperty(DefaultValueHandling = DefaultValueHandling.Include)] public ColumnInformation ColumnInformation { get; internal set; } } @@ -90,31 +93,36 @@ namespace Microsoft.ML.AutoML /// /// Categorical data columns should generally be columns that contain a small number of unique values. /// - public ICollection CategoricalColumnNames { get; } + [JsonProperty] + public ICollection CategoricalColumnNames { get; private set; } /// /// The dataset columns that are numeric. /// /// The default value is a new, empty . - public ICollection NumericColumnNames { get; } + [JsonProperty] + public ICollection NumericColumnNames { get; private set; } /// /// The dataset columns that are text. /// /// The default value is a new, empty . - public ICollection TextColumnNames { get; } + [JsonProperty] + public ICollection TextColumnNames { get; private set; } /// /// The dataset columns that AutoML should ignore. /// /// The default value is a new, empty . - public ICollection IgnoredColumnNames { get; } + [JsonProperty] + public ICollection IgnoredColumnNames { get; private set; } /// /// The dataset columns that are image paths. /// /// The default value is a new, empty . - public ICollection ImagePathColumnNames { get; } + [JsonProperty] + public ICollection ImagePathColumnNames { get; private set; } public ColumnInformation() { diff --git a/src/Microsoft.ML.AutoML/Assembly.cs b/src/Microsoft.ML.AutoML/Assembly.cs index 1fd879e42..110e01af7 100644 --- a/src/Microsoft.ML.AutoML/Assembly.cs +++ b/src/Microsoft.ML.AutoML/Assembly.cs @@ -13,5 +13,5 @@ using System.Runtime.CompilerServices; [assembly: InternalsVisibleTo("Microsoft.ML.ModelBuilder, PublicKey=002400000480000094000000060200000024000052534131000400000100010007d1fa57c4aed9f0a32e84aa0faefd0de9e8fd6aec8f87fb03766c834c99921eb23be79ad9d5dcc1dd9ad236132102900b723cf980957fc4e177108fc607774f29e8320e92ea05ece4e821c0a5efe8f1645c4c0c93c1ab99285d622caa652c1dfad63d745d6f2de5f17e5eaf0fc4963d261c8a12436518206dc093344d5ad293")] [assembly: InternalsVisibleTo("Microsoft.ML.ModelBuilder.AutoMLService, PublicKey=002400000480000094000000060200000024000052534131000400000100010007d1fa57c4aed9f0a32e84aa0faefd0de9e8fd6aec8f87fb03766c834c99921eb23be79ad9d5dcc1dd9ad236132102900b723cf980957fc4e177108fc607774f29e8320e92ea05ece4e821c0a5efe8f1645c4c0c93c1ab99285d622caa652c1dfad63d745d6f2de5f17e5eaf0fc4963d261c8a12436518206dc093344d5ad293")] [assembly: InternalsVisibleTo("Microsoft.ML.ModelBuilder.AutoMLService.Gpu, PublicKey=002400000480000094000000060200000024000052534131000400000100010007d1fa57c4aed9f0a32e84aa0faefd0de9e8fd6aec8f87fb03766c834c99921eb23be79ad9d5dcc1dd9ad236132102900b723cf980957fc4e177108fc607774f29e8320e92ea05ece4e821c0a5efe8f1645c4c0c93c1ab99285d622caa652c1dfad63d745d6f2de5f17e5eaf0fc4963d261c8a12436518206dc093344d5ad293")] -[assembly: InternalsVisibleTo("Microsoft.ML.ModelBuilder.AutoMLService.Test, PublicKey=002400000480000094000000060200000024000052534131000400000100010007d1fa57c4aed9f0a32e84aa0faefd0de9e8fd6aec8f87fb03766c834c99921eb23be79ad9d5dcc1dd9ad236132102900b723cf980957fc4e177108fc607774f29e8320e92ea05ece4e821c0a5efe8f1645c4c0c93c1ab99285d622caa652c1dfad63d745d6f2de5f17e5eaf0fc4963d261c8a12436518206dc093344d5ad293")] +[assembly: InternalsVisibleTo("Microsoft.ML.ModelBuilder.AutoMLService.Tests, PublicKey=002400000480000094000000060200000024000052534131000400000100010007d1fa57c4aed9f0a32e84aa0faefd0de9e8fd6aec8f87fb03766c834c99921eb23be79ad9d5dcc1dd9ad236132102900b723cf980957fc4e177108fc607774f29e8320e92ea05ece4e821c0a5efe8f1645c4c0c93c1ab99285d622caa652c1dfad63d745d6f2de5f17e5eaf0fc4963d261c8a12436518206dc093344d5ad293")] diff --git a/src/Microsoft.ML.CodeGenerator/Assembly.cs b/src/Microsoft.ML.CodeGenerator/Assembly.cs index 9b9e70ea8..71b6b77d8 100644 --- a/src/Microsoft.ML.CodeGenerator/Assembly.cs +++ b/src/Microsoft.ML.CodeGenerator/Assembly.cs @@ -10,4 +10,4 @@ using System.Runtime.CompilerServices; [assembly: InternalsVisibleTo("mlnet.Tests, PublicKey=00240000048000009400000006020000002400005253413100040000010001004b86c4cb78549b34bab61a3b1800e23bfeb5b3ec390074041536a7e3cbd97f5f04cf0f857155a8928eaa29ebfd11cfbbad3ba70efea7bda3226c6a8d370a4cd303f714486b6ebc225985a638471e6ef571cc92a4613c00b8fa65d61ccee0cbe5f36330c9a01f4183559f1bef24cc2917c6d913e3a541333a1d05d9bed22b38cb")] [assembly: InternalsVisibleTo("Microsoft.ML.ModelBuilder.AutoMLService, PublicKey=002400000480000094000000060200000024000052534131000400000100010007d1fa57c4aed9f0a32e84aa0faefd0de9e8fd6aec8f87fb03766c834c99921eb23be79ad9d5dcc1dd9ad236132102900b723cf980957fc4e177108fc607774f29e8320e92ea05ece4e821c0a5efe8f1645c4c0c93c1ab99285d622caa652c1dfad63d745d6f2de5f17e5eaf0fc4963d261c8a12436518206dc093344d5ad293")] [assembly: InternalsVisibleTo("Microsoft.ML.ModelBuilder.AutoMLService.Gpu, PublicKey=002400000480000094000000060200000024000052534131000400000100010007d1fa57c4aed9f0a32e84aa0faefd0de9e8fd6aec8f87fb03766c834c99921eb23be79ad9d5dcc1dd9ad236132102900b723cf980957fc4e177108fc607774f29e8320e92ea05ece4e821c0a5efe8f1645c4c0c93c1ab99285d622caa652c1dfad63d745d6f2de5f17e5eaf0fc4963d261c8a12436518206dc093344d5ad293")] -[assembly: InternalsVisibleTo("Microsoft.ML.ModelBuilder.AutoMLService.Test, PublicKey=002400000480000094000000060200000024000052534131000400000100010007d1fa57c4aed9f0a32e84aa0faefd0de9e8fd6aec8f87fb03766c834c99921eb23be79ad9d5dcc1dd9ad236132102900b723cf980957fc4e177108fc607774f29e8320e92ea05ece4e821c0a5efe8f1645c4c0c93c1ab99285d622caa652c1dfad63d745d6f2de5f17e5eaf0fc4963d261c8a12436518206dc093344d5ad293")] +[assembly: InternalsVisibleTo("Microsoft.ML.ModelBuilder.AutoMLService.Tests, PublicKey=002400000480000094000000060200000024000052534131000400000100010007d1fa57c4aed9f0a32e84aa0faefd0de9e8fd6aec8f87fb03766c834c99921eb23be79ad9d5dcc1dd9ad236132102900b723cf980957fc4e177108fc607774f29e8320e92ea05ece4e821c0a5efe8f1645c4c0c93c1ab99285d622caa652c1dfad63d745d6f2de5f17e5eaf0fc4963d261c8a12436518206dc093344d5ad293")] diff --git a/test/Microsoft.ML.AutoML.Tests/ApprovalTests/ColumnInferenceTests.Wiki_column_inference_result_should_be_serializable.approved.txt b/test/Microsoft.ML.AutoML.Tests/ApprovalTests/ColumnInferenceTests.Wiki_column_inference_result_should_be_serializable.approved.txt new file mode 100644 index 000000000..79c9e98a0 --- /dev/null +++ b/test/Microsoft.ML.AutoML.Tests/ApprovalTests/ColumnInferenceTests.Wiki_column_inference_result_should_be_serializable.approved.txt @@ -0,0 +1,66 @@ +{ + "TextLoaderOptions": { + "AllowQuoting": true, + "AllowSparse": false, + "InputSize": null, + "Separators": [ + "\t" + ], + "DecimalMarker": ".", + "Columns": [ + { + "Name": "Sentiment", + "Source": [ + { + "Min": 0, + "Max": 0, + "AutoEnd": false, + "VariableEnd": false, + "AllOther": false, + "ForceVector": false + } + ], + "KeyCount": null, + "DataKind": 11 + }, + { + "Name": "SentimentText", + "Source": [ + { + "Min": 1, + "Max": 1, + "AutoEnd": false, + "VariableEnd": false, + "AllOther": false, + "ForceVector": false + } + ], + "KeyCount": null, + "DataKind": 11 + } + ], + "TrimWhitespace": false, + "HasHeader": true, + "UseThreads": true, + "ReadMultilines": false, + "HeaderFile": null, + "MaxRows": null, + "EscapeChar": "\"", + "MissingRealsAsNaNs": false + }, + "ColumnInformation": { + "LabelColumnName": "Sentiment", + "UserIdColumnName": null, + "GroupIdColumnName": null, + "ItemIdColumnName": null, + "ExampleWeightColumnName": null, + "SamplingKeyColumnName": null, + "CategoricalColumnNames": [], + "NumericColumnNames": [], + "TextColumnNames": [ + "SentimentText" + ], + "IgnoredColumnNames": [], + "ImagePathColumnNames": [] + } +} \ No newline at end of file diff --git a/test/Microsoft.ML.AutoML.Tests/ColumnInferenceTests.cs b/test/Microsoft.ML.AutoML.Tests/ColumnInferenceTests.cs index f7b7c2f2f..63ada4d49 100644 --- a/test/Microsoft.ML.AutoML.Tests/ColumnInferenceTests.cs +++ b/test/Microsoft.ML.AutoML.Tests/ColumnInferenceTests.cs @@ -2,9 +2,13 @@ using System.Collections.Generic; using System.IO; using System.Linq; +using ApprovalTests; +using ApprovalTests.Namers; +using ApprovalTests.Reporters; using FluentAssertions; using Microsoft.ML.Data; using Microsoft.ML.TestFramework; +using Newtonsoft.Json; using Xunit; using Xunit.Abstractions; @@ -222,5 +226,19 @@ namespace Microsoft.ML.AutoML.Test Assert.Equal("description", result.ColumnInformation.TextColumnNames.First()); Assert.Equal("animal", result.ColumnInformation.CategoricalColumnNames.First()); } + + [Fact] + [UseReporter(typeof(DiffReporter))] + [UseApprovalSubdirectory("ApprovalTests")] + public void Wiki_column_inference_result_should_be_serializable() + { + var wiki = Path.Combine("TestData", "wiki-column-inference.json"); + using (var stream = new StreamReader(wiki)) + { + var json = stream.ReadToEnd(); + var columnInferenceResults = JsonConvert.DeserializeObject(json); + Approvals.Verify(JsonConvert.SerializeObject(columnInferenceResults, Formatting.Indented)); + } + } } } diff --git a/test/Microsoft.ML.AutoML.Tests/Microsoft.ML.AutoML.Tests.csproj b/test/Microsoft.ML.AutoML.Tests/Microsoft.ML.AutoML.Tests.csproj index 2800f5599..fad409f5f 100644 --- a/test/Microsoft.ML.AutoML.Tests/Microsoft.ML.AutoML.Tests.csproj +++ b/test/Microsoft.ML.AutoML.Tests/Microsoft.ML.AutoML.Tests.csproj @@ -7,30 +7,13 @@ + - - PreserveNewest - - - PreserveNewest - - - PreserveNewest - - - PreserveNewest - - - PreserveNewest - - - PreserveNewest - - + PreserveNewest diff --git a/test/Microsoft.ML.AutoML.Tests/TestData/wiki-column-inference.json b/test/Microsoft.ML.AutoML.Tests/TestData/wiki-column-inference.json new file mode 100644 index 000000000..79c9e98a0 --- /dev/null +++ b/test/Microsoft.ML.AutoML.Tests/TestData/wiki-column-inference.json @@ -0,0 +1,66 @@ +{ + "TextLoaderOptions": { + "AllowQuoting": true, + "AllowSparse": false, + "InputSize": null, + "Separators": [ + "\t" + ], + "DecimalMarker": ".", + "Columns": [ + { + "Name": "Sentiment", + "Source": [ + { + "Min": 0, + "Max": 0, + "AutoEnd": false, + "VariableEnd": false, + "AllOther": false, + "ForceVector": false + } + ], + "KeyCount": null, + "DataKind": 11 + }, + { + "Name": "SentimentText", + "Source": [ + { + "Min": 1, + "Max": 1, + "AutoEnd": false, + "VariableEnd": false, + "AllOther": false, + "ForceVector": false + } + ], + "KeyCount": null, + "DataKind": 11 + } + ], + "TrimWhitespace": false, + "HasHeader": true, + "UseThreads": true, + "ReadMultilines": false, + "HeaderFile": null, + "MaxRows": null, + "EscapeChar": "\"", + "MissingRealsAsNaNs": false + }, + "ColumnInformation": { + "LabelColumnName": "Sentiment", + "UserIdColumnName": null, + "GroupIdColumnName": null, + "ItemIdColumnName": null, + "ExampleWeightColumnName": null, + "SamplingKeyColumnName": null, + "CategoricalColumnNames": [], + "NumericColumnNames": [], + "TextColumnNames": [ + "SentimentText" + ], + "IgnoredColumnNames": [], + "ImagePathColumnNames": [] + } +} \ No newline at end of file