Make ColumnInference serializable (#5611)
* upgrade to 3.1 * write inline data using invariantCulture * make column inference serializable * add test json * add approvaltests
This commit is contained in:
Родитель
9e789473ce
Коммит
44660297b4
|
@ -5,6 +5,7 @@
|
|||
using System.Collections.Generic;
|
||||
using System.Collections.ObjectModel;
|
||||
using Microsoft.ML.Data;
|
||||
using Newtonsoft.Json;
|
||||
|
||||
namespace Microsoft.ML.AutoML
|
||||
{
|
||||
|
@ -20,6 +21,7 @@ namespace Microsoft.ML.AutoML
|
|||
/// Can be used to instantiate a new <see cref="TextLoader" /> to load
|
||||
/// data into an <see cref="IDataView" />.
|
||||
/// </remarks>
|
||||
[JsonProperty(DefaultValueHandling = DefaultValueHandling.Include)]
|
||||
public TextLoader.Options TextLoaderOptions { get; internal set; }
|
||||
|
||||
/// <summary>
|
||||
|
@ -31,6 +33,7 @@ namespace Microsoft.ML.AutoML
|
|||
/// See <typeref cref="ExperimentBase{TMetrics, TExperimentSettings}.Execute(IDataView, ColumnInformation, IEstimator{ITransformer}, System.IProgress{RunDetail{TMetrics}})" />
|
||||
/// for example.</para>
|
||||
/// </remarks>
|
||||
[JsonProperty(DefaultValueHandling = DefaultValueHandling.Include)]
|
||||
public ColumnInformation ColumnInformation { get; internal set; }
|
||||
}
|
||||
|
||||
|
@ -90,31 +93,36 @@ namespace Microsoft.ML.AutoML
|
|||
/// <remarks>
|
||||
/// Categorical data columns should generally be columns that contain a small number of unique values.
|
||||
/// </remarks>
|
||||
public ICollection<string> CategoricalColumnNames { get; }
|
||||
[JsonProperty]
|
||||
public ICollection<string> CategoricalColumnNames { get; private set; }
|
||||
|
||||
/// <summary>
|
||||
/// The dataset columns that are numeric.
|
||||
/// </summary>
|
||||
/// <value>The default value is a new, empty <see cref="Collection{String}"/>.</value>
|
||||
public ICollection<string> NumericColumnNames { get; }
|
||||
[JsonProperty]
|
||||
public ICollection<string> NumericColumnNames { get; private set; }
|
||||
|
||||
/// <summary>
|
||||
/// The dataset columns that are text.
|
||||
/// </summary>
|
||||
/// <value>The default value is a new, empty <see cref="Collection{String}"/>.</value>
|
||||
public ICollection<string> TextColumnNames { get; }
|
||||
[JsonProperty]
|
||||
public ICollection<string> TextColumnNames { get; private set; }
|
||||
|
||||
/// <summary>
|
||||
/// The dataset columns that AutoML should ignore.
|
||||
/// </summary>
|
||||
/// <value>The default value is a new, empty <see cref="Collection{String}"/>.</value>
|
||||
public ICollection<string> IgnoredColumnNames { get; }
|
||||
[JsonProperty]
|
||||
public ICollection<string> IgnoredColumnNames { get; private set; }
|
||||
|
||||
/// <summary>
|
||||
/// The dataset columns that are image paths.
|
||||
/// </summary>
|
||||
/// <value>The default value is a new, empty <see cref="Collection{String}"/>.</value>
|
||||
public ICollection<string> ImagePathColumnNames { get; }
|
||||
[JsonProperty]
|
||||
public ICollection<string> ImagePathColumnNames { get; private set; }
|
||||
|
||||
public ColumnInformation()
|
||||
{
|
||||
|
|
|
@ -13,5 +13,5 @@ using System.Runtime.CompilerServices;
|
|||
[assembly: InternalsVisibleTo("Microsoft.ML.ModelBuilder, PublicKey=002400000480000094000000060200000024000052534131000400000100010007d1fa57c4aed9f0a32e84aa0faefd0de9e8fd6aec8f87fb03766c834c99921eb23be79ad9d5dcc1dd9ad236132102900b723cf980957fc4e177108fc607774f29e8320e92ea05ece4e821c0a5efe8f1645c4c0c93c1ab99285d622caa652c1dfad63d745d6f2de5f17e5eaf0fc4963d261c8a12436518206dc093344d5ad293")]
|
||||
[assembly: InternalsVisibleTo("Microsoft.ML.ModelBuilder.AutoMLService, PublicKey=002400000480000094000000060200000024000052534131000400000100010007d1fa57c4aed9f0a32e84aa0faefd0de9e8fd6aec8f87fb03766c834c99921eb23be79ad9d5dcc1dd9ad236132102900b723cf980957fc4e177108fc607774f29e8320e92ea05ece4e821c0a5efe8f1645c4c0c93c1ab99285d622caa652c1dfad63d745d6f2de5f17e5eaf0fc4963d261c8a12436518206dc093344d5ad293")]
|
||||
[assembly: InternalsVisibleTo("Microsoft.ML.ModelBuilder.AutoMLService.Gpu, PublicKey=002400000480000094000000060200000024000052534131000400000100010007d1fa57c4aed9f0a32e84aa0faefd0de9e8fd6aec8f87fb03766c834c99921eb23be79ad9d5dcc1dd9ad236132102900b723cf980957fc4e177108fc607774f29e8320e92ea05ece4e821c0a5efe8f1645c4c0c93c1ab99285d622caa652c1dfad63d745d6f2de5f17e5eaf0fc4963d261c8a12436518206dc093344d5ad293")]
|
||||
[assembly: InternalsVisibleTo("Microsoft.ML.ModelBuilder.AutoMLService.Test, PublicKey=002400000480000094000000060200000024000052534131000400000100010007d1fa57c4aed9f0a32e84aa0faefd0de9e8fd6aec8f87fb03766c834c99921eb23be79ad9d5dcc1dd9ad236132102900b723cf980957fc4e177108fc607774f29e8320e92ea05ece4e821c0a5efe8f1645c4c0c93c1ab99285d622caa652c1dfad63d745d6f2de5f17e5eaf0fc4963d261c8a12436518206dc093344d5ad293")]
|
||||
[assembly: InternalsVisibleTo("Microsoft.ML.ModelBuilder.AutoMLService.Tests, PublicKey=002400000480000094000000060200000024000052534131000400000100010007d1fa57c4aed9f0a32e84aa0faefd0de9e8fd6aec8f87fb03766c834c99921eb23be79ad9d5dcc1dd9ad236132102900b723cf980957fc4e177108fc607774f29e8320e92ea05ece4e821c0a5efe8f1645c4c0c93c1ab99285d622caa652c1dfad63d745d6f2de5f17e5eaf0fc4963d261c8a12436518206dc093344d5ad293")]
|
||||
|
||||
|
|
|
@ -10,4 +10,4 @@ using System.Runtime.CompilerServices;
|
|||
[assembly: InternalsVisibleTo("mlnet.Tests, PublicKey=00240000048000009400000006020000002400005253413100040000010001004b86c4cb78549b34bab61a3b1800e23bfeb5b3ec390074041536a7e3cbd97f5f04cf0f857155a8928eaa29ebfd11cfbbad3ba70efea7bda3226c6a8d370a4cd303f714486b6ebc225985a638471e6ef571cc92a4613c00b8fa65d61ccee0cbe5f36330c9a01f4183559f1bef24cc2917c6d913e3a541333a1d05d9bed22b38cb")]
|
||||
[assembly: InternalsVisibleTo("Microsoft.ML.ModelBuilder.AutoMLService, PublicKey=002400000480000094000000060200000024000052534131000400000100010007d1fa57c4aed9f0a32e84aa0faefd0de9e8fd6aec8f87fb03766c834c99921eb23be79ad9d5dcc1dd9ad236132102900b723cf980957fc4e177108fc607774f29e8320e92ea05ece4e821c0a5efe8f1645c4c0c93c1ab99285d622caa652c1dfad63d745d6f2de5f17e5eaf0fc4963d261c8a12436518206dc093344d5ad293")]
|
||||
[assembly: InternalsVisibleTo("Microsoft.ML.ModelBuilder.AutoMLService.Gpu, PublicKey=002400000480000094000000060200000024000052534131000400000100010007d1fa57c4aed9f0a32e84aa0faefd0de9e8fd6aec8f87fb03766c834c99921eb23be79ad9d5dcc1dd9ad236132102900b723cf980957fc4e177108fc607774f29e8320e92ea05ece4e821c0a5efe8f1645c4c0c93c1ab99285d622caa652c1dfad63d745d6f2de5f17e5eaf0fc4963d261c8a12436518206dc093344d5ad293")]
|
||||
[assembly: InternalsVisibleTo("Microsoft.ML.ModelBuilder.AutoMLService.Test, PublicKey=002400000480000094000000060200000024000052534131000400000100010007d1fa57c4aed9f0a32e84aa0faefd0de9e8fd6aec8f87fb03766c834c99921eb23be79ad9d5dcc1dd9ad236132102900b723cf980957fc4e177108fc607774f29e8320e92ea05ece4e821c0a5efe8f1645c4c0c93c1ab99285d622caa652c1dfad63d745d6f2de5f17e5eaf0fc4963d261c8a12436518206dc093344d5ad293")]
|
||||
[assembly: InternalsVisibleTo("Microsoft.ML.ModelBuilder.AutoMLService.Tests, PublicKey=002400000480000094000000060200000024000052534131000400000100010007d1fa57c4aed9f0a32e84aa0faefd0de9e8fd6aec8f87fb03766c834c99921eb23be79ad9d5dcc1dd9ad236132102900b723cf980957fc4e177108fc607774f29e8320e92ea05ece4e821c0a5efe8f1645c4c0c93c1ab99285d622caa652c1dfad63d745d6f2de5f17e5eaf0fc4963d261c8a12436518206dc093344d5ad293")]
|
||||
|
|
|
@ -0,0 +1,66 @@
|
|||
{
|
||||
"TextLoaderOptions": {
|
||||
"AllowQuoting": true,
|
||||
"AllowSparse": false,
|
||||
"InputSize": null,
|
||||
"Separators": [
|
||||
"\t"
|
||||
],
|
||||
"DecimalMarker": ".",
|
||||
"Columns": [
|
||||
{
|
||||
"Name": "Sentiment",
|
||||
"Source": [
|
||||
{
|
||||
"Min": 0,
|
||||
"Max": 0,
|
||||
"AutoEnd": false,
|
||||
"VariableEnd": false,
|
||||
"AllOther": false,
|
||||
"ForceVector": false
|
||||
}
|
||||
],
|
||||
"KeyCount": null,
|
||||
"DataKind": 11
|
||||
},
|
||||
{
|
||||
"Name": "SentimentText",
|
||||
"Source": [
|
||||
{
|
||||
"Min": 1,
|
||||
"Max": 1,
|
||||
"AutoEnd": false,
|
||||
"VariableEnd": false,
|
||||
"AllOther": false,
|
||||
"ForceVector": false
|
||||
}
|
||||
],
|
||||
"KeyCount": null,
|
||||
"DataKind": 11
|
||||
}
|
||||
],
|
||||
"TrimWhitespace": false,
|
||||
"HasHeader": true,
|
||||
"UseThreads": true,
|
||||
"ReadMultilines": false,
|
||||
"HeaderFile": null,
|
||||
"MaxRows": null,
|
||||
"EscapeChar": "\"",
|
||||
"MissingRealsAsNaNs": false
|
||||
},
|
||||
"ColumnInformation": {
|
||||
"LabelColumnName": "Sentiment",
|
||||
"UserIdColumnName": null,
|
||||
"GroupIdColumnName": null,
|
||||
"ItemIdColumnName": null,
|
||||
"ExampleWeightColumnName": null,
|
||||
"SamplingKeyColumnName": null,
|
||||
"CategoricalColumnNames": [],
|
||||
"NumericColumnNames": [],
|
||||
"TextColumnNames": [
|
||||
"SentimentText"
|
||||
],
|
||||
"IgnoredColumnNames": [],
|
||||
"ImagePathColumnNames": []
|
||||
}
|
||||
}
|
|
@ -2,9 +2,13 @@
|
|||
using System.Collections.Generic;
|
||||
using System.IO;
|
||||
using System.Linq;
|
||||
using ApprovalTests;
|
||||
using ApprovalTests.Namers;
|
||||
using ApprovalTests.Reporters;
|
||||
using FluentAssertions;
|
||||
using Microsoft.ML.Data;
|
||||
using Microsoft.ML.TestFramework;
|
||||
using Newtonsoft.Json;
|
||||
using Xunit;
|
||||
using Xunit.Abstractions;
|
||||
|
||||
|
@ -222,5 +226,19 @@ namespace Microsoft.ML.AutoML.Test
|
|||
Assert.Equal("description", result.ColumnInformation.TextColumnNames.First());
|
||||
Assert.Equal("animal", result.ColumnInformation.CategoricalColumnNames.First());
|
||||
}
|
||||
|
||||
[Fact]
|
||||
[UseReporter(typeof(DiffReporter))]
|
||||
[UseApprovalSubdirectory("ApprovalTests")]
|
||||
public void Wiki_column_inference_result_should_be_serializable()
|
||||
{
|
||||
var wiki = Path.Combine("TestData", "wiki-column-inference.json");
|
||||
using (var stream = new StreamReader(wiki))
|
||||
{
|
||||
var json = stream.ReadToEnd();
|
||||
var columnInferenceResults = JsonConvert.DeserializeObject<ColumnInferenceResults>(json);
|
||||
Approvals.Verify(JsonConvert.SerializeObject(columnInferenceResults, Formatting.Indented));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -7,30 +7,13 @@
|
|||
</ItemGroup>
|
||||
|
||||
<ItemGroup>
|
||||
<PackageReference Include="ApprovalTests" Version="5.2.4" />
|
||||
<PackageReference Include="FluentAssertions" Version="5.10.3" />
|
||||
<PackageReference Include="SciSharp.TensorFlow.Redist" Version="$(TensorFlowVersion)" />
|
||||
</ItemGroup>
|
||||
|
||||
<ItemGroup>
|
||||
<None Update="TestData\DatasetWithNewlineBetweenQuotes.txt">
|
||||
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
|
||||
</None>
|
||||
<None Update="TestData\DatasetWithDefaultColumnNames.txt">
|
||||
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
|
||||
</None>
|
||||
<None Update="TestData\DatasetWithoutHeader.txt">
|
||||
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
|
||||
</None>
|
||||
<None Update="TestData\NameColumnIsOnlyFeatureDataset.txt">
|
||||
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
|
||||
</None>
|
||||
<None Update="TestData\BinaryDatasetWithBoolColumn.txt">
|
||||
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
|
||||
</None>
|
||||
<None Update="TestData\DatasetWithEmptyColumn.txt">
|
||||
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
|
||||
</None>
|
||||
<None Update="TestData\TrivialMulticlassDataset.txt">
|
||||
<None Update="TestData\**\*">
|
||||
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
|
||||
</None>
|
||||
<None Update="xunit.runner.json">
|
||||
|
|
|
@ -0,0 +1,66 @@
|
|||
{
|
||||
"TextLoaderOptions": {
|
||||
"AllowQuoting": true,
|
||||
"AllowSparse": false,
|
||||
"InputSize": null,
|
||||
"Separators": [
|
||||
"\t"
|
||||
],
|
||||
"DecimalMarker": ".",
|
||||
"Columns": [
|
||||
{
|
||||
"Name": "Sentiment",
|
||||
"Source": [
|
||||
{
|
||||
"Min": 0,
|
||||
"Max": 0,
|
||||
"AutoEnd": false,
|
||||
"VariableEnd": false,
|
||||
"AllOther": false,
|
||||
"ForceVector": false
|
||||
}
|
||||
],
|
||||
"KeyCount": null,
|
||||
"DataKind": 11
|
||||
},
|
||||
{
|
||||
"Name": "SentimentText",
|
||||
"Source": [
|
||||
{
|
||||
"Min": 1,
|
||||
"Max": 1,
|
||||
"AutoEnd": false,
|
||||
"VariableEnd": false,
|
||||
"AllOther": false,
|
||||
"ForceVector": false
|
||||
}
|
||||
],
|
||||
"KeyCount": null,
|
||||
"DataKind": 11
|
||||
}
|
||||
],
|
||||
"TrimWhitespace": false,
|
||||
"HasHeader": true,
|
||||
"UseThreads": true,
|
||||
"ReadMultilines": false,
|
||||
"HeaderFile": null,
|
||||
"MaxRows": null,
|
||||
"EscapeChar": "\"",
|
||||
"MissingRealsAsNaNs": false
|
||||
},
|
||||
"ColumnInformation": {
|
||||
"LabelColumnName": "Sentiment",
|
||||
"UserIdColumnName": null,
|
||||
"GroupIdColumnName": null,
|
||||
"ItemIdColumnName": null,
|
||||
"ExampleWeightColumnName": null,
|
||||
"SamplingKeyColumnName": null,
|
||||
"CategoricalColumnNames": [],
|
||||
"NumericColumnNames": [],
|
||||
"TextColumnNames": [
|
||||
"SentimentText"
|
||||
],
|
||||
"IgnoredColumnNames": [],
|
||||
"ImagePathColumnNames": []
|
||||
}
|
||||
}
|
Загрузка…
Ссылка в новой задаче