541 строка
26 KiB
C#
541 строка
26 KiB
C#
// Licensed to the .NET Foundation under one or more agreements.
|
|
// The .NET Foundation licenses this file to you under the MIT license.
|
|
// See the LICENSE file in the project root for more information.
|
|
|
|
using System;
|
|
using System.IO;
|
|
using Microsoft.ML.Data;
|
|
using Microsoft.ML.RunTests;
|
|
using Microsoft.ML.TestFrameworkCommon;
|
|
using Microsoft.ML.Transforms;
|
|
using Xunit;
|
|
using Xunit.Abstractions;
|
|
|
|
namespace Microsoft.ML.Tests
|
|
{
|
|
public sealed class SvmLightTests : TestDataPipeBase
|
|
{
|
|
public SvmLightTests(ITestOutputHelper output) : base(output)
|
|
{
|
|
}
|
|
|
|
#pragma warning disable 0649 // Disable warnings about unused members. They are used through reflection.
|
|
private sealed class SvmLightOutput
|
|
{
|
|
public float Label;
|
|
public float Weight;
|
|
[KeyType(ulong.MaxValue - 1)]
|
|
public ulong GroupId = ulong.MaxValue;
|
|
public ReadOnlyMemory<char> Comment;
|
|
public VBuffer<float> Features;
|
|
}
|
|
#pragma warning restore 0649
|
|
|
|
private string CreateDataset(string name, string[] data)
|
|
{
|
|
var path = DeleteOutputPath(TestName + name);
|
|
File.WriteAllLines(path, data);
|
|
return path;
|
|
}
|
|
|
|
private void TestSvmLight(string path, string savingPath, int inputSize, int expectedInputSize, bool zeroBased, IDataView expectedData, long? numberOfRows = null)
|
|
{
|
|
var data = ML.Data.LoadFromSvmLightFile(path, inputSize: inputSize, zeroBased: zeroBased, numberOfRows: numberOfRows);
|
|
Assert.True(data.Schema["Features"].Type.GetValueCount() == expectedInputSize);
|
|
|
|
CheckSameValues(data, expectedData, checkId: false);
|
|
|
|
// Save, reload and compare dataviews again.
|
|
using (var stream = File.Create(savingPath))
|
|
ML.Data.SaveInSvmLightFormat(expectedData, stream, zeroBasedIndexing: zeroBased, exampleWeightColumnName: "Weight");
|
|
data = ML.Data.LoadFromSvmLightFile(savingPath, inputSize: inputSize, zeroBased: zeroBased);
|
|
CheckSameValues(ColumnSelectingTransformer.CreateDrop(Env, data, "Comment"),
|
|
ColumnSelectingTransformer.CreateDrop(Env, expectedData, "Comment"), checkId: false);
|
|
|
|
Done();
|
|
}
|
|
|
|
[Fact]
|
|
public void TestSvmLightLoaderAndSaverWithSpecifiedInputSize()
|
|
{
|
|
// Test with a specified size parameter. The "6" feature should be omitted.
|
|
// Also the blank and completely fully commented lines should be omitted,
|
|
// and the feature 2:3 that appears in the comment should not appear.
|
|
|
|
var path = CreateDataset("-data.txt", new string[] {
|
|
"1\t1:3\t4:6",
|
|
" -1 cost:5\t2:4 \t4:7\t6:-1 ",
|
|
"",
|
|
"1\t5:-2 # A comment! 2:3",
|
|
"# What a nice full line comment",
|
|
"1 cost:0.5\t2:3.14159",
|
|
});
|
|
|
|
var schemaDef = SchemaDefinition.Create(typeof(SvmLightOutput));
|
|
schemaDef["Features"].ColumnType = new VectorDataViewType(NumberDataViewType.Single, 5);
|
|
|
|
var expectedData = ML.Data.LoadFromEnumerable(new SvmLightOutput[]
|
|
{
|
|
new SvmLightOutput() { Label = 1, Weight = 1, Features = new VBuffer<float>(5, 2, new[] { 3f, 6f }, new[] { 0, 3 }) },
|
|
new SvmLightOutput() { Label = -1, Weight = 5, Features = new VBuffer<float>(5, 2, new[] { 4f, 7f }, new[] { 1, 3 }) },
|
|
new SvmLightOutput() { Label = 1, Weight = 1, Features = new VBuffer<float>(5, 1, new[] { -2f }, new[] { 4 }), Comment = " A comment! 2:3".AsMemory() },
|
|
new SvmLightOutput() { Label = 1, Weight = 0.5f, Features = new VBuffer<float>(5, 1, new[] { 3.14159f }, new[] { 1 }) },
|
|
}, schemaDef);
|
|
var savingPath = DeleteOutputPath(TestName + "-saved-data.txt");
|
|
TestSvmLight(path, savingPath, 5, 5, false, expectedData);
|
|
}
|
|
|
|
[Fact]
|
|
public void TestSvmLightLoaderAndSaverWithSpecifiedInputSizeZeroBased()
|
|
{
|
|
// If we specify the size parameter, and zero-based feature indices, both indices 5 and 6 should
|
|
// not appear.
|
|
|
|
var path = CreateDataset("-data.txt", new string[] {
|
|
"1\t1:3\t4:6",
|
|
" -1 cost:5\t2:4 \t4:7\t6:-1 ",
|
|
"",
|
|
"1\t5:-2 # A comment! 2:3",
|
|
"# What a nice full line comment",
|
|
"1 cost:0.5\t2:3.14159",
|
|
});
|
|
|
|
var schemaDef = SchemaDefinition.Create(typeof(SvmLightOutput));
|
|
schemaDef["Features"].ColumnType = new VectorDataViewType(NumberDataViewType.Single, 5);
|
|
var expectedData = ML.Data.LoadFromEnumerable(new SvmLightOutput[]
|
|
{
|
|
new SvmLightOutput() { Label = 1, Weight = 1, Features = new VBuffer<float>(5, 2, new[] { 3f, 6f }, new[] { 1, 4 }) },
|
|
new SvmLightOutput() { Label = -1, Weight = 5, Features = new VBuffer<float>(5, 2, new[] { 4f, 7f }, new[] { 2, 4 }) },
|
|
new SvmLightOutput() { Label = 1, Weight = 1, Features = new VBuffer<float>(5, 0, new float[0], new int[0]), Comment = " A comment! 2:3".AsMemory() },
|
|
new SvmLightOutput() { Label = 1, Weight = 0.5f, Features = new VBuffer<float>(5, 1, new[] { 3.14159f }, new[] { 2 }) },
|
|
}, schemaDef);
|
|
var savingPath = DeleteOutputPath(TestName + "-saved-data.txt");
|
|
TestSvmLight(path, savingPath, 5, 5, true, expectedData);
|
|
}
|
|
|
|
[Fact]
|
|
public void TestSvmLightLoaderAndSaverAutoDetectInputSize()
|
|
{
|
|
// Test with autodetermined sizes. The the "6" feature should be included,
|
|
// and the feature vector should have length 6.
|
|
|
|
var path = CreateDataset("-data.txt", new string[] {
|
|
"1\t1:3\t4:6",
|
|
" -1 cost:5\t2:4 \t4:7\t6:-1 ",
|
|
"",
|
|
"1\t5:-2 # A comment! 2:3",
|
|
"# What a nice full line comment",
|
|
"1 cost:0.5\t2:3.14159",
|
|
});
|
|
|
|
var schemaDef = SchemaDefinition.Create(typeof(SvmLightOutput));
|
|
schemaDef["Features"].ColumnType = new VectorDataViewType(NumberDataViewType.Single, 6);
|
|
|
|
var expectedData = ML.Data.LoadFromEnumerable(new SvmLightOutput[]
|
|
{
|
|
new SvmLightOutput() { Label = 1, Weight = 1, Features = new VBuffer<float>(6, 2, new[] { 3f, 6f }, new[] { 0, 3 }) },
|
|
new SvmLightOutput() { Label = -1, Weight = 5, Features = new VBuffer<float>(6, 3, new[] { 4f, 7f, -1f }, new[] { 1, 3, 5 }) },
|
|
new SvmLightOutput() { Label = 1, Weight = 1, Features = new VBuffer<float>(6, 1, new[] { -2f }, new[] { 4 }), Comment = " A comment! 2:3".AsMemory() },
|
|
new SvmLightOutput() { Label = 1, Weight = 0.5f, Features = new VBuffer<float>(6, 1, new[] { 3.14159f }, new[] { 1 }) },
|
|
}, schemaDef);
|
|
var savingPath = DeleteOutputPath(TestName + "-saved-data.txt");
|
|
TestSvmLight(path, savingPath, 0, 6, false, expectedData);
|
|
}
|
|
|
|
[Fact]
|
|
public void TestSvmLightLoaderAndSaverWithTermMapping()
|
|
{
|
|
// Test with a term mapping, instead of the actual SVM^light format that
|
|
// requires positive integers. ALso check that qid works here.
|
|
var path = CreateDataset("-data.txt", new string[] {
|
|
"1 qid:1 aurora:3.14159 beachwood:123",
|
|
"-1 qid:5 beachwood:345 chagrin:-21",
|
|
});
|
|
|
|
var model = ML.Data.CreateSvmLightLoaderWithFeatureNames(dataSample: new MultiFileSource(path));
|
|
var data = model.Load(path);
|
|
Assert.True(data.Schema["Features"].Type.GetValueCount() == 3);
|
|
|
|
var schemaDef = SchemaDefinition.Create(typeof(SvmLightOutput));
|
|
schemaDef["Features"].ColumnType = new VectorDataViewType(NumberDataViewType.Single, 3);
|
|
schemaDef["Features"].AddAnnotation(
|
|
AnnotationUtils.Kinds.SlotNames, new VBuffer<ReadOnlyMemory<char>>(3, new[] { "aurora".AsMemory(), "beachwood".AsMemory(), "chagrin".AsMemory() }),
|
|
new VectorDataViewType(TextDataViewType.Instance, 3));
|
|
var expectedData = ML.Data.LoadFromEnumerable(new SvmLightOutput[]
|
|
{
|
|
new SvmLightOutput() { Label = 1, Weight = 1, GroupId = 1, Features = new VBuffer<float>(3, 2, new[] { 3.14159f, 123f }, new[] { 0, 1 }) },
|
|
new SvmLightOutput() { Label = -1, Weight = 1, GroupId = 5, Features = new VBuffer<float>(3, 2, new[] { 345f, -21f }, new[] { 1, 2 }) },
|
|
}, schemaDef);
|
|
CheckSameValues(data, expectedData, checkId: false);
|
|
TestCommon.CheckSameSchemas(data.Schema, expectedData.Schema);
|
|
|
|
// Save, reload and compare dataviews again.
|
|
var outputPath = DeleteOutputPath(TestName + "-saved-data.txt");
|
|
using (var stream = File.Create(outputPath))
|
|
ML.Data.SaveInSvmLightFormat(expectedData, stream, zeroBasedIndexing: true, rowGroupColumnName: "GroupId");
|
|
data = ML.Data.LoadFromSvmLightFile(outputPath, zeroBased: true);
|
|
CheckSameValues(data, expectedData, checkId: false);
|
|
|
|
// We reload the model, but on a new set of data. The "euclid" key should be
|
|
// ignored as it would not have been detected by the term transform.
|
|
path = CreateDataset("-data2.txt", new string[] {
|
|
"-1 aurora:1 chagrin:2",
|
|
"1 chagrin:3 euclid:4"
|
|
});
|
|
data = model.Load(path);
|
|
Assert.True(data.Schema["Features"].Type.GetValueCount() == 3);
|
|
|
|
expectedData = ML.Data.LoadFromEnumerable(new SvmLightOutput[]
|
|
{
|
|
new SvmLightOutput() { Label = -1, Weight = 1, Features = new VBuffer<float>(3, 2, new[] { 1f, 2f }, new[] { 0, 2 }) },
|
|
new SvmLightOutput() { Label = 1, Weight = 1, Features = new VBuffer<float>(3, 1, new[] { 3f }, new[] { 2 }) },
|
|
}, schemaDef);
|
|
CheckSameValues(data, expectedData, checkId: false);
|
|
|
|
// Save, reload and compare dataviews again.
|
|
outputPath = DeleteOutputPath(TestName + "-saved-data2.txt");
|
|
using (var stream = File.Create(outputPath))
|
|
ML.Data.SaveInSvmLightFormat(expectedData, stream);
|
|
data = ML.Data.LoadFromSvmLightFile(outputPath);
|
|
CheckSameValues(data, expectedData, checkId: false);
|
|
}
|
|
|
|
[Fact]
|
|
public void TestSvmLightLoaderAndSaverWithTermMappingWithEmptyName()
|
|
{
|
|
var path = CreateDataset("-data.txt", new string[] { "1 aurora:2 :3" });
|
|
var data = ML.Data.LoadFromSvmLightFileWithFeatureNames(path);
|
|
Assert.True(data.Schema["Features"].Type.GetValueCount() == 1);
|
|
|
|
var schemaDef = SchemaDefinition.Create(typeof(SvmLightOutput));
|
|
schemaDef["Features"].ColumnType = new VectorDataViewType(NumberDataViewType.Single, 1);
|
|
var expectedData = ML.Data.LoadFromEnumerable(new SvmLightOutput[]
|
|
{
|
|
new SvmLightOutput() { Label = 1, Weight = 1, Features = new VBuffer<float>(1, 1, new[] { 2f }, new[] { 0 }) },
|
|
}, schemaDef);
|
|
CheckSameValues(data, expectedData, checkId: false);
|
|
|
|
// Save, reload and compare dataviews again.
|
|
var outputPath = DeleteOutputPath("reloaded-output.txt");
|
|
using (var stream = File.Create(outputPath))
|
|
ML.Data.SaveInSvmLightFormat(expectedData, stream, zeroBasedIndexing: true);
|
|
data = ML.Data.LoadFromSvmLightFile(outputPath, zeroBased: true);
|
|
CheckSameValues(data, expectedData, checkId: false);
|
|
|
|
Done();
|
|
}
|
|
|
|
[Fact]
|
|
public void TestSvmLightLoaderNoDuplicateKeys()
|
|
{
|
|
var path = CreateDataset("-data.txt", new string[] {
|
|
"-1 aurora:1 chagrin:2",
|
|
"1 chagrin:3 euclid:4 chagrin:5"
|
|
});
|
|
|
|
var ex = Assert.Throws<InvalidOperationException>(() =>
|
|
{
|
|
var view = ML.Data.LoadFromSvmLightFileWithFeatureNames(path);
|
|
using (var curs = view.GetRowCursor(view.Schema))
|
|
{
|
|
var featuresGetter = curs.GetGetter<VBuffer<float>>(view.Schema["Features"]);
|
|
VBuffer<float> buffer = default;
|
|
while (curs.MoveNext())
|
|
featuresGetter(ref buffer);
|
|
}
|
|
});
|
|
Assert.Contains("Duplicate keys found in dataset", ex.InnerException.Message);
|
|
}
|
|
|
|
[Fact]
|
|
public void TestSvmLightLoaderBadLabel()
|
|
{
|
|
var path = CreateDataset("-data.txt", new string[] {
|
|
"q\t1:3\t4:6",
|
|
" -1a cost:5\t2:4 \t4:7\t6:-1 ",
|
|
});
|
|
|
|
var data = ML.Data.LoadFromSvmLightFile(path);
|
|
using (var curs = data.GetRowCursor(data.Schema["Label"]))
|
|
{
|
|
var getter = curs.GetGetter<float>(data.Schema["Label"]);
|
|
float label = default;
|
|
while (curs.MoveNext())
|
|
{
|
|
getter(ref label);
|
|
Assert.True(float.IsNaN(label));
|
|
}
|
|
}
|
|
}
|
|
|
|
[Fact]
|
|
public void TestSvmLightLoaderMissingGroupId()
|
|
{
|
|
var path = CreateDataset("-data.txt", new string[] {
|
|
"1\tqid:-3\t1:3\t4:6",
|
|
});
|
|
var data = ML.Data.LoadFromSvmLightFile(path);
|
|
using (var curs = data.GetRowCursor(data.Schema["GroupId"]))
|
|
{
|
|
var getter = curs.GetGetter<ulong>(data.Schema["GroupId"]);
|
|
ulong group = default;
|
|
while (curs.MoveNext())
|
|
{
|
|
getter(ref group);
|
|
Assert.True(group == 0);
|
|
}
|
|
}
|
|
}
|
|
|
|
[Fact]
|
|
public void TestSvmLightLoaderBadFeature()
|
|
{
|
|
// Test with a dataset that has a feature that cannot be parsed. The loader should ignore the value.
|
|
var path = CreateDataset("-data.txt", new string[] {
|
|
"1\t1:3\t4:6",
|
|
" -1 cost:5\t2:4 \t4:7\t6:-1 ",
|
|
"",
|
|
"1\t5:-2 # A comment! 2:3",
|
|
"# What a nice full line comment",
|
|
"1 cost:0.5\t2:3.14159",
|
|
"-1 3:2 4:hello"
|
|
});
|
|
|
|
var schemaDef = SchemaDefinition.Create(typeof(SvmLightOutput));
|
|
schemaDef["Features"].ColumnType = new VectorDataViewType(NumberDataViewType.Single, 6);
|
|
|
|
var expectedData = ML.Data.LoadFromEnumerable(new SvmLightOutput[]
|
|
{
|
|
new SvmLightOutput() { Label = 1, Weight = 1, Features = new VBuffer<float>(6, 2, new[] { 3f, 6f }, new[] { 0, 3 }) },
|
|
new SvmLightOutput() { Label = -1, Weight = 5, Features = new VBuffer<float>(6, 3, new[] { 4f, 7f, -1f }, new[] { 1, 3, 5 }) },
|
|
new SvmLightOutput() { Label = 1, Weight = 1, Features = new VBuffer<float>(6, 1, new[] { -2f }, new[] { 4 }), Comment = " A comment! 2:3".AsMemory() },
|
|
new SvmLightOutput() { Label = 1, Weight = 0.5f, Features = new VBuffer<float>(6, 1, new[] { 3.14159f }, new[] { 1 }) },
|
|
new SvmLightOutput() { Label = -1, Weight = 1, Features = new VBuffer<float>(6, 1, new[] { 2f }, new[] { 2 }) },
|
|
}, schemaDef);
|
|
var savingPath = DeleteOutputPath(TestName + "-saved-data.txt");
|
|
TestSvmLight(path, savingPath, 0, 6, false, expectedData);
|
|
}
|
|
|
|
[Fact]
|
|
public void TestSvmLightLoaderNoColon()
|
|
{
|
|
var path = CreateDataset("-data.txt", new string[] {
|
|
"1\t1;3\t4:6",
|
|
});
|
|
var data = ML.Data.LoadFromSvmLightFile(path);
|
|
using (var curs = data.GetRowCursor(data.Schema["Features"]))
|
|
{
|
|
var getter = curs.GetGetter<VBuffer<float>>(data.Schema["Features"]);
|
|
VBuffer<float> features = default;
|
|
while (curs.MoveNext())
|
|
{
|
|
getter(ref features);
|
|
Assert.True(features.Length == 4);
|
|
Assert.True(features.GetValues().Length == 1);
|
|
Assert.True(features.GetIndices().Length == 1);
|
|
Assert.True(features.GetValues()[0] == 6);
|
|
Assert.True(features.GetIndices()[0] == 3);
|
|
}
|
|
}
|
|
}
|
|
|
|
[Fact]
|
|
public void TestSvmLightLoaderBadIndex()
|
|
{
|
|
// 0 index in 1-based parsing.
|
|
var path = CreateDataset("-data.txt", new string[] {
|
|
"1\t0:3\t4:6",
|
|
});
|
|
var data = ML.Data.LoadFromSvmLightFile(path);
|
|
var ex = Assert.Throws<InvalidOperationException>(() =>
|
|
{
|
|
using (var curs = data.GetRowCursor(data.Schema["Features"]))
|
|
{
|
|
var getter = curs.GetGetter<VBuffer<float>>(data.Schema["Features"]);
|
|
VBuffer<float> features = default;
|
|
while (curs.MoveNext())
|
|
{
|
|
getter(ref features);
|
|
}
|
|
}
|
|
});
|
|
Assert.Contains("Encountered 0 index while parsing a 1-based dataset", ex.InnerException.Message);
|
|
|
|
// negative index in 0-based parsing.
|
|
path = CreateDataset("-data1.txt", new string[] {
|
|
"1\t-1:3\t4:6",
|
|
});
|
|
data = ML.Data.LoadFromSvmLightFile(path);
|
|
ex = Assert.Throws<InvalidOperationException>(() =>
|
|
{
|
|
using (var curs = data.GetRowCursor(data.Schema["Features"]))
|
|
{
|
|
var getter = curs.GetGetter<VBuffer<float>>(data.Schema["Features"]);
|
|
VBuffer<float> features = default;
|
|
while (curs.MoveNext())
|
|
{
|
|
getter(ref features);
|
|
}
|
|
}
|
|
});
|
|
Assert.Contains("Encountered non-parsable index '-1' while parsing dataset", ex.InnerException.Message);
|
|
|
|
// non-parsable index.
|
|
path = CreateDataset("-data2.txt", new string[] {
|
|
"1\ta:3\t4:6",
|
|
});
|
|
data = ML.Data.LoadFromSvmLightFile(path);
|
|
ex = Assert.Throws<InvalidOperationException>(() =>
|
|
{
|
|
using (var curs = data.GetRowCursor(data.Schema["Features"]))
|
|
{
|
|
var getter = curs.GetGetter<VBuffer<float>>(data.Schema["Features"]);
|
|
VBuffer<float> features = default;
|
|
while (curs.MoveNext())
|
|
{
|
|
getter(ref features);
|
|
}
|
|
}
|
|
});
|
|
Assert.Contains("Encountered non-parsable index 'a' while parsing dataset", ex.InnerException.Message);
|
|
|
|
// Only non-parsable indices.
|
|
path = CreateDataset("-data3.txt", new string[] {
|
|
"1\ta:3\tb:6",
|
|
});
|
|
ex = Assert.Throws<InvalidOperationException>(() => ML.Data.LoadFromSvmLightFile(path));
|
|
Assert.Contains("No int parsable keys found during key transform inference", ex.Message);
|
|
}
|
|
|
|
[Fact]
|
|
public void TestSvmLightLoaderMultiStreamSourceSpecialCases()
|
|
{
|
|
var path1 = CreateDataset("-data1.txt", new string[] {
|
|
"1\t1:3\t4:6",
|
|
});
|
|
var path2 = CreateDataset("-data2.txt", new string[] {
|
|
"1\t1:3\t4:6",
|
|
});
|
|
var loader = ML.Data.CreateSvmLightLoader(inputSize: 4);
|
|
var data = loader.Load(new MultiFileSource(path1, path2));
|
|
using (var curs = data.GetRowCursor(data.Schema["Features"]))
|
|
{
|
|
var getter = curs.GetGetter<VBuffer<float>>(data.Schema["Features"]);
|
|
VBuffer<float> features = default;
|
|
curs.MoveNext();
|
|
getter(ref features);
|
|
curs.MoveNext();
|
|
getter(ref features);
|
|
Assert.False(curs.MoveNext());
|
|
}
|
|
|
|
loader = ML.Data.CreateSvmLightLoader(inputSize: 3);
|
|
data = loader.Load(new MultiFileSource(null));
|
|
using (var curs = data.GetRowCursor())
|
|
{
|
|
Assert.False(curs.MoveNext());
|
|
}
|
|
}
|
|
|
|
[Fact]
|
|
public void TestSvmLightLoaderNoDataSample()
|
|
{
|
|
var ex = Assert.Throws<InvalidOperationException>(() => ML.Data.CreateSvmLightLoader());
|
|
Assert.Contains("If the number of features is not specified, a dataset must be provided to infer it.", ex.Message);
|
|
ex = Assert.Throws<InvalidOperationException>(() => ML.Data.CreateSvmLightLoaderWithFeatureNames());
|
|
Assert.Contains("To use the text feature names option, a dataset must be provided", ex.Message);
|
|
}
|
|
|
|
[Fact]
|
|
public void TestSvmLightLoaderAndSaverTrainOnSubsetOfRows()
|
|
{
|
|
var path = CreateDataset("-data.txt", new string[] {
|
|
"1\t1:3\t4:6",
|
|
" -1 cost:5\t2:4 \t4:7\t6:-1 ",
|
|
"",
|
|
"1\t5:-2 # A comment! 2:3",
|
|
"1 cost:0.5\t2:3.14159",
|
|
"-1 2:5 11:0.34"
|
|
});
|
|
|
|
var schemaDef = SchemaDefinition.Create(typeof(SvmLightOutput));
|
|
schemaDef["Features"].ColumnType = new VectorDataViewType(NumberDataViewType.Single, 6);
|
|
|
|
var expectedData = ML.Data.LoadFromEnumerable(new SvmLightOutput[]
|
|
{
|
|
new SvmLightOutput() { Label = 1, Weight = 1, Features = new VBuffer<float>(6, 2, new[] { 3f, 6f }, new[] { 0, 3 }) },
|
|
new SvmLightOutput() { Label = -1, Weight = 5, Features = new VBuffer<float>(6, 3, new[] { 4f, 7f, -1f }, new[] { 1, 3, 5 }) },
|
|
new SvmLightOutput() { Label = 1, Weight = 1, Features = new VBuffer<float>(6, 1, new[] { -2f }, new[] { 4 }), Comment = " A comment! 2:3".AsMemory() },
|
|
new SvmLightOutput() { Label = 1, Weight = 0.5f, Features = new VBuffer<float>(6, 1, new[] { 3.14159f }, new[] { 1 }) },
|
|
new SvmLightOutput() { Label = -1, Weight = 1, Features = new VBuffer<float>(6, 1, new[] { 5f }, new[] { 1 }) }
|
|
}, schemaDef);
|
|
var savingPath = DeleteOutputPath(TestName + "-saved-data.txt");
|
|
TestSvmLight(path, savingPath, 0, 6, false, expectedData, numberOfRows: 4);
|
|
}
|
|
|
|
[Fact]
|
|
public void TestSvmLightLoaderLongIndex()
|
|
{
|
|
var path = CreateDataset("-data.txt", new string[] {
|
|
"1\t1:3\t4:6",
|
|
" -1 cost:5\t2:4 \t4:7\t6:-1 ",
|
|
"",
|
|
"1\t5:-2 # A comment! 2:3",
|
|
"1 cost:0.5\t2:3.14159",
|
|
$"-1 2:5 {(long)int.MaxValue + 2}:0.34"
|
|
});
|
|
|
|
var schemaDef = SchemaDefinition.Create(typeof(SvmLightOutput));
|
|
schemaDef["Features"].ColumnType = new VectorDataViewType(NumberDataViewType.Single, 6);
|
|
|
|
var expectedData = ML.Data.LoadFromEnumerable(new SvmLightOutput[]
|
|
{
|
|
new SvmLightOutput() { Label = 1, Weight = 1, Features = new VBuffer<float>(6, 2, new[] { 3f, 6f }, new[] { 0, 3 }) },
|
|
new SvmLightOutput() { Label = -1, Weight = 5, Features = new VBuffer<float>(6, 3, new[] { 4f, 7f, -1f }, new[] { 1, 3, 5 }) },
|
|
new SvmLightOutput() { Label = 1, Weight = 1, Features = new VBuffer<float>(6, 1, new[] { -2f }, new[] { 4 }), Comment = " A comment! 2:3".AsMemory() },
|
|
new SvmLightOutput() { Label = 1, Weight = 0.5f, Features = new VBuffer<float>(6, 1, new[] { 3.14159f }, new[] { 1 }) },
|
|
new SvmLightOutput() { Label = -1, Weight = 1, Features = new VBuffer<float>(6, 1, new[] { 5f }, new[] { 1 }) }
|
|
}, schemaDef);
|
|
var savingPath = DeleteOutputPath(TestName + "-saved-data.txt");
|
|
TestSvmLight(path, savingPath, 0, 6, false, expectedData);
|
|
}
|
|
|
|
[Fact]
|
|
public void TestSvmLightSaverBadInputSchema()
|
|
{
|
|
var loader = ML.Data.CreateTextLoader(new[] { new TextLoader.Column("Column", DataKind.Single, 0) });
|
|
var ex = Assert.Throws<InvalidOperationException>(() =>
|
|
{
|
|
var path = DeleteOutputPath(TestName + "-no-label.txt");
|
|
using (var stream = new FileStream(path, FileMode.Create))
|
|
ML.Data.SaveInSvmLightFormat(loader.Load(new MultiFileSource(null)), stream);
|
|
});
|
|
Assert.Contains("Column Label not found in data", ex.Message);
|
|
|
|
ex = Assert.Throws<InvalidOperationException>(() =>
|
|
{
|
|
var path = DeleteOutputPath(TestName + "-no-features.txt");
|
|
using (var stream = new FileStream(path, FileMode.Create))
|
|
ML.Data.SaveInSvmLightFormat(loader.Load(new MultiFileSource(null)), stream, labelColumnName: "Column");
|
|
});
|
|
Assert.Contains("Column Features not found in data", ex.Message);
|
|
|
|
ex = Assert.Throws<InvalidOperationException>(() =>
|
|
{
|
|
var path = DeleteOutputPath(TestName + "-no-group.txt");
|
|
using (var stream = new FileStream(path, FileMode.Create))
|
|
ML.Data.SaveInSvmLightFormat(loader.Load(new MultiFileSource(null)), stream, labelColumnName: "Column", featureColumnName: "Column", rowGroupColumnName: "Group");
|
|
});
|
|
Assert.Contains("Column Group not found in data", ex.Message);
|
|
|
|
ex = Assert.Throws<InvalidOperationException>(() =>
|
|
{
|
|
var path = DeleteOutputPath(TestName + "-no-weight.txt");
|
|
using (var stream = new FileStream(path, FileMode.Create))
|
|
ML.Data.SaveInSvmLightFormat(loader.Load(new MultiFileSource(null)), stream, labelColumnName: "Column", featureColumnName: "Column", exampleWeightColumnName: "Weight");
|
|
});
|
|
Assert.Contains("Column Weight not found in data", ex.Message);
|
|
}
|
|
}
|
|
}
|