Make ColumnInference serializable (#5611)

* upgrade to 3.1

* write inline data using invariantCulture

* make column inference serializable

* add test json

* add approvaltests
This commit is contained in:
Xiaoyun Zhang 2021-02-02 17:22:42 -08:00 коммит произвёл GitHub
Родитель 9e789473ce
Коммит 44660297b4
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: 4AEE18F83AFDEB23
7 изменённых файлов: 167 добавлений и 26 удалений

Просмотреть файл

@ -5,6 +5,7 @@
using System.Collections.Generic;
using System.Collections.ObjectModel;
using Microsoft.ML.Data;
using Newtonsoft.Json;
namespace Microsoft.ML.AutoML
{
@ -20,6 +21,7 @@ namespace Microsoft.ML.AutoML
/// Can be used to instantiate a new <see cref="TextLoader" /> to load
/// data into an <see cref="IDataView" />.
/// </remarks>
[JsonProperty(DefaultValueHandling = DefaultValueHandling.Include)]
public TextLoader.Options TextLoaderOptions { get; internal set; }
/// <summary>
@ -31,6 +33,7 @@ namespace Microsoft.ML.AutoML
/// See <typeref cref="ExperimentBase{TMetrics, TExperimentSettings}.Execute(IDataView, ColumnInformation, IEstimator{ITransformer}, System.IProgress{RunDetail{TMetrics}})" />
/// for example.</para>
/// </remarks>
[JsonProperty(DefaultValueHandling = DefaultValueHandling.Include)]
public ColumnInformation ColumnInformation { get; internal set; }
}
@ -90,31 +93,36 @@ namespace Microsoft.ML.AutoML
/// <remarks>
/// Categorical data columns should generally be columns that contain a small number of unique values.
/// </remarks>
public ICollection<string> CategoricalColumnNames { get; }
[JsonProperty]
public ICollection<string> CategoricalColumnNames { get; private set; }
/// <summary>
/// The dataset columns that are numeric.
/// </summary>
/// <value>The default value is a new, empty <see cref="Collection{String}"/>.</value>
public ICollection<string> NumericColumnNames { get; }
[JsonProperty]
public ICollection<string> NumericColumnNames { get; private set; }
/// <summary>
/// The dataset columns that are text.
/// </summary>
/// <value>The default value is a new, empty <see cref="Collection{String}"/>.</value>
public ICollection<string> TextColumnNames { get; }
[JsonProperty]
public ICollection<string> TextColumnNames { get; private set; }
/// <summary>
/// The dataset columns that AutoML should ignore.
/// </summary>
/// <value>The default value is a new, empty <see cref="Collection{String}"/>.</value>
public ICollection<string> IgnoredColumnNames { get; }
[JsonProperty]
public ICollection<string> IgnoredColumnNames { get; private set; }
/// <summary>
/// The dataset columns that are image paths.
/// </summary>
/// <value>The default value is a new, empty <see cref="Collection{String}"/>.</value>
public ICollection<string> ImagePathColumnNames { get; }
[JsonProperty]
public ICollection<string> ImagePathColumnNames { get; private set; }
public ColumnInformation()
{

Просмотреть файл

@ -13,5 +13,5 @@ using System.Runtime.CompilerServices;
[assembly: InternalsVisibleTo("Microsoft.ML.ModelBuilder, PublicKey=002400000480000094000000060200000024000052534131000400000100010007d1fa57c4aed9f0a32e84aa0faefd0de9e8fd6aec8f87fb03766c834c99921eb23be79ad9d5dcc1dd9ad236132102900b723cf980957fc4e177108fc607774f29e8320e92ea05ece4e821c0a5efe8f1645c4c0c93c1ab99285d622caa652c1dfad63d745d6f2de5f17e5eaf0fc4963d261c8a12436518206dc093344d5ad293")]
[assembly: InternalsVisibleTo("Microsoft.ML.ModelBuilder.AutoMLService, PublicKey=002400000480000094000000060200000024000052534131000400000100010007d1fa57c4aed9f0a32e84aa0faefd0de9e8fd6aec8f87fb03766c834c99921eb23be79ad9d5dcc1dd9ad236132102900b723cf980957fc4e177108fc607774f29e8320e92ea05ece4e821c0a5efe8f1645c4c0c93c1ab99285d622caa652c1dfad63d745d6f2de5f17e5eaf0fc4963d261c8a12436518206dc093344d5ad293")]
[assembly: InternalsVisibleTo("Microsoft.ML.ModelBuilder.AutoMLService.Gpu, PublicKey=002400000480000094000000060200000024000052534131000400000100010007d1fa57c4aed9f0a32e84aa0faefd0de9e8fd6aec8f87fb03766c834c99921eb23be79ad9d5dcc1dd9ad236132102900b723cf980957fc4e177108fc607774f29e8320e92ea05ece4e821c0a5efe8f1645c4c0c93c1ab99285d622caa652c1dfad63d745d6f2de5f17e5eaf0fc4963d261c8a12436518206dc093344d5ad293")]
[assembly: InternalsVisibleTo("Microsoft.ML.ModelBuilder.AutoMLService.Test, PublicKey=002400000480000094000000060200000024000052534131000400000100010007d1fa57c4aed9f0a32e84aa0faefd0de9e8fd6aec8f87fb03766c834c99921eb23be79ad9d5dcc1dd9ad236132102900b723cf980957fc4e177108fc607774f29e8320e92ea05ece4e821c0a5efe8f1645c4c0c93c1ab99285d622caa652c1dfad63d745d6f2de5f17e5eaf0fc4963d261c8a12436518206dc093344d5ad293")]
[assembly: InternalsVisibleTo("Microsoft.ML.ModelBuilder.AutoMLService.Tests, PublicKey=002400000480000094000000060200000024000052534131000400000100010007d1fa57c4aed9f0a32e84aa0faefd0de9e8fd6aec8f87fb03766c834c99921eb23be79ad9d5dcc1dd9ad236132102900b723cf980957fc4e177108fc607774f29e8320e92ea05ece4e821c0a5efe8f1645c4c0c93c1ab99285d622caa652c1dfad63d745d6f2de5f17e5eaf0fc4963d261c8a12436518206dc093344d5ad293")]

Просмотреть файл

@ -10,4 +10,4 @@ using System.Runtime.CompilerServices;
[assembly: InternalsVisibleTo("mlnet.Tests, PublicKey=00240000048000009400000006020000002400005253413100040000010001004b86c4cb78549b34bab61a3b1800e23bfeb5b3ec390074041536a7e3cbd97f5f04cf0f857155a8928eaa29ebfd11cfbbad3ba70efea7bda3226c6a8d370a4cd303f714486b6ebc225985a638471e6ef571cc92a4613c00b8fa65d61ccee0cbe5f36330c9a01f4183559f1bef24cc2917c6d913e3a541333a1d05d9bed22b38cb")]
[assembly: InternalsVisibleTo("Microsoft.ML.ModelBuilder.AutoMLService, PublicKey=002400000480000094000000060200000024000052534131000400000100010007d1fa57c4aed9f0a32e84aa0faefd0de9e8fd6aec8f87fb03766c834c99921eb23be79ad9d5dcc1dd9ad236132102900b723cf980957fc4e177108fc607774f29e8320e92ea05ece4e821c0a5efe8f1645c4c0c93c1ab99285d622caa652c1dfad63d745d6f2de5f17e5eaf0fc4963d261c8a12436518206dc093344d5ad293")]
[assembly: InternalsVisibleTo("Microsoft.ML.ModelBuilder.AutoMLService.Gpu, PublicKey=002400000480000094000000060200000024000052534131000400000100010007d1fa57c4aed9f0a32e84aa0faefd0de9e8fd6aec8f87fb03766c834c99921eb23be79ad9d5dcc1dd9ad236132102900b723cf980957fc4e177108fc607774f29e8320e92ea05ece4e821c0a5efe8f1645c4c0c93c1ab99285d622caa652c1dfad63d745d6f2de5f17e5eaf0fc4963d261c8a12436518206dc093344d5ad293")]
[assembly: InternalsVisibleTo("Microsoft.ML.ModelBuilder.AutoMLService.Test, PublicKey=002400000480000094000000060200000024000052534131000400000100010007d1fa57c4aed9f0a32e84aa0faefd0de9e8fd6aec8f87fb03766c834c99921eb23be79ad9d5dcc1dd9ad236132102900b723cf980957fc4e177108fc607774f29e8320e92ea05ece4e821c0a5efe8f1645c4c0c93c1ab99285d622caa652c1dfad63d745d6f2de5f17e5eaf0fc4963d261c8a12436518206dc093344d5ad293")]
[assembly: InternalsVisibleTo("Microsoft.ML.ModelBuilder.AutoMLService.Tests, PublicKey=002400000480000094000000060200000024000052534131000400000100010007d1fa57c4aed9f0a32e84aa0faefd0de9e8fd6aec8f87fb03766c834c99921eb23be79ad9d5dcc1dd9ad236132102900b723cf980957fc4e177108fc607774f29e8320e92ea05ece4e821c0a5efe8f1645c4c0c93c1ab99285d622caa652c1dfad63d745d6f2de5f17e5eaf0fc4963d261c8a12436518206dc093344d5ad293")]

Просмотреть файл

@ -0,0 +1,66 @@
{
"TextLoaderOptions": {
"AllowQuoting": true,
"AllowSparse": false,
"InputSize": null,
"Separators": [
"\t"
],
"DecimalMarker": ".",
"Columns": [
{
"Name": "Sentiment",
"Source": [
{
"Min": 0,
"Max": 0,
"AutoEnd": false,
"VariableEnd": false,
"AllOther": false,
"ForceVector": false
}
],
"KeyCount": null,
"DataKind": 11
},
{
"Name": "SentimentText",
"Source": [
{
"Min": 1,
"Max": 1,
"AutoEnd": false,
"VariableEnd": false,
"AllOther": false,
"ForceVector": false
}
],
"KeyCount": null,
"DataKind": 11
}
],
"TrimWhitespace": false,
"HasHeader": true,
"UseThreads": true,
"ReadMultilines": false,
"HeaderFile": null,
"MaxRows": null,
"EscapeChar": "\"",
"MissingRealsAsNaNs": false
},
"ColumnInformation": {
"LabelColumnName": "Sentiment",
"UserIdColumnName": null,
"GroupIdColumnName": null,
"ItemIdColumnName": null,
"ExampleWeightColumnName": null,
"SamplingKeyColumnName": null,
"CategoricalColumnNames": [],
"NumericColumnNames": [],
"TextColumnNames": [
"SentimentText"
],
"IgnoredColumnNames": [],
"ImagePathColumnNames": []
}
}

Просмотреть файл

@ -2,9 +2,13 @@
using System.Collections.Generic;
using System.IO;
using System.Linq;
using ApprovalTests;
using ApprovalTests.Namers;
using ApprovalTests.Reporters;
using FluentAssertions;
using Microsoft.ML.Data;
using Microsoft.ML.TestFramework;
using Newtonsoft.Json;
using Xunit;
using Xunit.Abstractions;
@ -222,5 +226,19 @@ namespace Microsoft.ML.AutoML.Test
Assert.Equal("description", result.ColumnInformation.TextColumnNames.First());
Assert.Equal("animal", result.ColumnInformation.CategoricalColumnNames.First());
}
[Fact]
[UseReporter(typeof(DiffReporter))]
[UseApprovalSubdirectory("ApprovalTests")]
public void Wiki_column_inference_result_should_be_serializable()
{
var wiki = Path.Combine("TestData", "wiki-column-inference.json");
using (var stream = new StreamReader(wiki))
{
var json = stream.ReadToEnd();
var columnInferenceResults = JsonConvert.DeserializeObject<ColumnInferenceResults>(json);
Approvals.Verify(JsonConvert.SerializeObject(columnInferenceResults, Formatting.Indented));
}
}
}
}

Просмотреть файл

@ -7,30 +7,13 @@
</ItemGroup>
<ItemGroup>
<PackageReference Include="ApprovalTests" Version="5.2.4" />
<PackageReference Include="FluentAssertions" Version="5.10.3" />
<PackageReference Include="SciSharp.TensorFlow.Redist" Version="$(TensorFlowVersion)" />
</ItemGroup>
<ItemGroup>
<None Update="TestData\DatasetWithNewlineBetweenQuotes.txt">
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
</None>
<None Update="TestData\DatasetWithDefaultColumnNames.txt">
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
</None>
<None Update="TestData\DatasetWithoutHeader.txt">
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
</None>
<None Update="TestData\NameColumnIsOnlyFeatureDataset.txt">
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
</None>
<None Update="TestData\BinaryDatasetWithBoolColumn.txt">
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
</None>
<None Update="TestData\DatasetWithEmptyColumn.txt">
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
</None>
<None Update="TestData\TrivialMulticlassDataset.txt">
<None Update="TestData\**\*">
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
</None>
<None Update="xunit.runner.json">

Просмотреть файл

@ -0,0 +1,66 @@
{
"TextLoaderOptions": {
"AllowQuoting": true,
"AllowSparse": false,
"InputSize": null,
"Separators": [
"\t"
],
"DecimalMarker": ".",
"Columns": [
{
"Name": "Sentiment",
"Source": [
{
"Min": 0,
"Max": 0,
"AutoEnd": false,
"VariableEnd": false,
"AllOther": false,
"ForceVector": false
}
],
"KeyCount": null,
"DataKind": 11
},
{
"Name": "SentimentText",
"Source": [
{
"Min": 1,
"Max": 1,
"AutoEnd": false,
"VariableEnd": false,
"AllOther": false,
"ForceVector": false
}
],
"KeyCount": null,
"DataKind": 11
}
],
"TrimWhitespace": false,
"HasHeader": true,
"UseThreads": true,
"ReadMultilines": false,
"HeaderFile": null,
"MaxRows": null,
"EscapeChar": "\"",
"MissingRealsAsNaNs": false
},
"ColumnInformation": {
"LabelColumnName": "Sentiment",
"UserIdColumnName": null,
"GroupIdColumnName": null,
"ItemIdColumnName": null,
"ExampleWeightColumnName": null,
"SamplingKeyColumnName": null,
"CategoricalColumnNames": [],
"NumericColumnNames": [],
"TextColumnNames": [
"SentimentText"
],
"IgnoredColumnNames": [],
"ImagePathColumnNames": []
}
}