add hasHeader to ColumnInference (#4922)
This commit is contained in:
Родитель
ae1b709947
Коммит
e5a19af589
|
@ -36,12 +36,12 @@ namespace Microsoft.ML.AutoML
|
|||
}
|
||||
|
||||
public static ColumnInferenceResults InferColumns(MLContext context, string path, ColumnInformation columnInfo,
|
||||
char? separatorChar, bool? allowQuotedStrings, bool? supportSparse, bool trimWhitespace, bool groupColumns)
|
||||
char? separatorChar, bool? allowQuotedStrings, bool? supportSparse, bool trimWhitespace, bool groupColumns, bool hasHeader = true)
|
||||
{
|
||||
var sample = TextFileSample.CreateFromFullFile(path);
|
||||
var splitInference = InferSplit(context, sample, separatorChar, allowQuotedStrings, supportSparse);
|
||||
var typeInference = InferColumnTypes(context, sample, splitInference, true, null, columnInfo.LabelColumnName);
|
||||
return InferColumns(context, path, columnInfo, true, splitInference, typeInference, trimWhitespace, groupColumns);
|
||||
var typeInference = InferColumnTypes(context, sample, splitInference, hasHeader, null, columnInfo.LabelColumnName);
|
||||
return InferColumns(context, path, columnInfo, hasHeader, splitInference, typeInference, trimWhitespace, groupColumns);
|
||||
}
|
||||
|
||||
public static ColumnInferenceResults InferColumns(MLContext context, string path, ColumnInformation columnInfo, bool hasHeader,
|
||||
|
|
|
@ -1,4 +1,5 @@
|
|||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.IO;
|
||||
using System.Linq;
|
||||
using Microsoft.ML.Data;
|
||||
|
@ -91,6 +92,35 @@ namespace Microsoft.ML.AutoML.Test
|
|||
Assert.Equal(DefaultColumnNames.Label, result.ColumnInformation.LabelColumnName);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void InferDatasetWithoutHeader()
|
||||
{
|
||||
var context = new MLContext(1);
|
||||
var filePath = Path.Combine("TestData", "DatasetWithoutHeader.txt");
|
||||
var columnInfo = new ColumnInformation()
|
||||
{
|
||||
LabelColumnName = "col0",
|
||||
UserIdColumnName = "col1",
|
||||
ItemIdColumnName = "col2",
|
||||
};
|
||||
columnInfo.IgnoredColumnNames.Add("col4");
|
||||
var result = ColumnInferenceApi.InferColumns(context, filePath, columnInfo, ',', null, null, false, false, false);
|
||||
Assert.Equal(6, result.TextLoaderOptions.Columns.Count());
|
||||
|
||||
var labelColumn = result.TextLoaderOptions.Columns.First(c => c.Name == "col0");
|
||||
var userColumn = result.TextLoaderOptions.Columns.First(c => c.Name == "col1");
|
||||
var itemColumn = result.TextLoaderOptions.Columns.First(c => c.Name == "col2");
|
||||
var ignoreColumn = result.TextLoaderOptions.Columns.First(c => c.Name == "col4");
|
||||
|
||||
Assert.Equal(DataKind.String, labelColumn.DataKind);
|
||||
Assert.Equal(DataKind.Single, userColumn.DataKind);
|
||||
Assert.Equal(DataKind.Single, itemColumn.DataKind);
|
||||
Assert.Equal(DataKind.Single, ignoreColumn.DataKind);
|
||||
|
||||
Assert.Single(result.ColumnInformation.CategoricalColumnNames);
|
||||
Assert.Empty(result.ColumnInformation.TextColumnNames);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void WhereNameColumnIsOnlyFeature()
|
||||
{
|
||||
|
|
|
@ -14,6 +14,9 @@
|
|||
<None Update="TestData\DatasetWithDefaultColumnNames.txt">
|
||||
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
|
||||
</None>
|
||||
<None Update="TestData\DatasetWithoutHeader.txt">
|
||||
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
|
||||
</None>
|
||||
<None Update="TestData\NameColumnIsOnlyFeatureDataset.txt">
|
||||
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
|
||||
</None>
|
||||
|
|
|
@ -0,0 +1,20 @@
|
|||
CMT,1271,3.8,CRD,17.5,T
|
||||
CMT,474,1.5,CRD,8,T
|
||||
CMT,637,1.4,CRD,8.5,T
|
||||
CMT,181,0.6,CSH,4.5,T
|
||||
CMT,661,1.1,CRD,8.5,T
|
||||
CMT,935,9.6,CSH,27.5,T
|
||||
CMT,869,2.3,CRD,11.5,T
|
||||
CMT,454,1.4,CRD,7.5,T
|
||||
CMT,366,1.5,CSH,7.5,T
|
||||
VTS,1140,5.61,CSH,18.5,F
|
||||
VTS,120,0.67,CSH,4,F
|
||||
VTS,240,1.7,CRD,6.5,F
|
||||
VTS,660,2.52,CRD,10.5,F
|
||||
VTS,420,0.82,CSH,6,F
|
||||
VTS,420,1.04,CRD,6.5,F
|
||||
VTS,2280,18,CRD,52,F
|
||||
VTS,360,1.2,CRD,6.5,F
|
||||
VTS,660,2.22,CSH,10,F
|
||||
VTS,840,3.29,CSH,12.5,F
|
||||
VTS,540,1.85,CRD,8.5,F
|
Загрузка…
Ссылка в новой задаче