// Licensed to the .NET Foundation under one or more agreements. // The .NET Foundation licenses this file to you under the MIT license. // See the LICENSE file in the project root for more information. using FluentAssertions; using Microsoft.Data.Analysis; using Microsoft.ML.AutoML; using Microsoft.ML.Fairlearn.AutoML; using Microsoft.ML.TestFramework.Attributes; using Xunit; using Xunit.Abstractions; namespace Microsoft.ML.Fairlearn.Tests { public class GridSearchTest { private readonly ITestOutputHelper _output; public GridSearchTest(ITestOutputHelper output) { _output = output; } [Fact] public void Generate_binary_classification_lambda_search_space_test() { var context = new MLContext(); var moment = new UtilityParity(); var X = CreateDummyDataset(); moment.LoadData(X, X["y_true"], X["sensitiveFeature"] as StringDataFrameColumn); var searchSpace = Utilities.GenerateBinaryClassificationLambdaSearchSpace(moment, 5); searchSpace.Keys.Should().BeEquivalentTo("a_pos", "a_neg", "b_pos", "b_neg"); } private DataFrame CreateDummyDataset() { var df = new DataFrame(); df["X"] = DataFrameColumn.Create("X", new[] { 0f, 1, 2, 3, 4, 5, 6, 7, 8, 9 }); df["y_true"] = DataFrameColumn.Create("y_true", new[] { true, true, true, true, true, true, true, false, false, false }); df["y_pred"] = DataFrameColumn.Create("y_pred", new[] { true, true, true, true, false, false, false, true, false, false }); df["sensitiveFeature"] = DataFrameColumn.Create("sensitiveFeature", new[] { "a", "b", "a", "a", "b", "a", "b", "b", "a", "b" }); return df; } // Data generated so it is identical from Binary_Classification.ipynb from Fairlearn.github on Github private DataFrame CreateGridSearchDataset() { float[] score_feature = new float[52]; int index = 0; for (int i = 0; i < 31; i++) { score_feature[index] = (i * 1.0f) / 30; index++; } for (int j = 0; j < 21; j++) { score_feature[index] = (j * 1.0f) / 20; index++; } var df = new DataFrame(); df["score_feature"] = DataFrameColumn.Create("score_feature", score_feature); df["y"] = DataFrameColumn.Create("y", new[] { false, false, false, false, false, false, false, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, false, false, false, false, false, false, false, false, false, false, false, false, false, false, true, true, true, true, true, true, true }); df["sensitiveFeature"] = DataFrameColumn.Create("sensitiveFeature", new[] { "2", "2", "2", "2", "2", "2", "2", "2", "2", "2", "2", "2", "2", "2", "2", "2", "2", "2", "2", "2", "2", "2", "2", "2", "2", "2", "2", "2", "2", "2", "2", "3", "3", "3", "3", "3", "3", "3", "3", "3", "3", "3", "3", "3", "3", "3", "3", "3", "3", "3", "3", "3" }); return df; } /// /// This trial runner run the tests from Grid searh for Binary Classification.ipynb /// [Fact(Skip = "Currently flaky on non x86/x64 devices. Disabling until we figure it out. See https://github.com/dotnet/machinelearning/issues/6684")] public void TestGridSearchTrialRunner2() { var context = new MLContext(); context.Log += (o, e) => { if (e.Source == "AutoMLExperiment") { _output.WriteLine(e.Message); } }; var experiment = context.Auto().CreateExperiment(); var df = CreateGridSearchDataset(); var shuffledDataset = context.Data.ShuffleRows(df); var trainTestSplit = context.Data.TrainTestSplit(shuffledDataset, 0.2); var pipeline = context.Transforms.Categorical.OneHotHashEncoding("sensitiveFeature_encode", "sensitiveFeature") .Append(context.Transforms.Concatenate("Features", "sensitiveFeature_encode", "score_feature")) .Append(context.Auto().BinaryClassification(labelColumnName: "y", exampleWeightColumnName: "signedWeight")); experiment.SetPipeline(pipeline) .SetDataset(trainTestSplit) .SetBinaryClassificationMetricWithFairLearn("y", "PredictedLabel", "sensitiveFeature", "signedWeight") .SetTrainingTimeInSeconds(10);//100 var bestResult = experiment.Run(); var model = bestResult.Model; bestResult.Metric.Should().BeGreaterOrEqualTo(0.4); } } }