* DateTimeTransformer is done.

ToStringTransformer is done.

CatagoryImputer is done.

TimeSeriesImputer is done.

RobustScaler is done.

Adding in samples and documentation. General code cleanup. Made the RowToRowMapperTransform create a new mapper if possible for each cursor.

* only files needed for initial project checkin

* removed samples, fixed nuget

* changed dependencies to nightly ML.NET

* removed old TODO:

* changes from PR comments

* Fixes based on PR comments

* fixed rebase error

* Added line removed by rebase

Added line in Directory.Build.Props that was removed during the rebase process. The line already exists in master and shouldn't be removed.
This commit is contained in:
Michael Sharp 2019-12-02 19:11:33 -08:00 коммит произвёл GitHub
Родитель b4c845b1fc
Коммит 8d20cdd16e
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: 4AEE18F83AFDEB23
11 изменённых файлов: 351 добавлений и 4 удалений

Просмотреть файл

@ -271,6 +271,14 @@ Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Microsoft.ML.AutoML.Samples
EndProject
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Microsoft.ML.Samples.GPU", "docs\samples\Microsoft.ML.Samples.GPU\Microsoft.ML.Samples.GPU.csproj", "{3C8F910B-7F23-4D25-B521-6D5AC9570ADD}"
EndProject
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Microsoft.ML.Featurizers", "src\Microsoft.ML.Featurizers\Microsoft.ML.Featurizers.csproj", "{E2DD0721-5B0F-4606-8182-4C7EFB834518}"
EndProject
Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Microsoft.ML.Featurizers", "Microsoft.ML.Featurizers", "{1BA5C784-52E8-4A87-8525-26B2452F2882}"
ProjectSection(SolutionItems) = preProject
pkg\Microsoft.ML.Featurizers\Microsoft.ML.Featurizers.nupkgproj = pkg\Microsoft.ML.Featurizers\Microsoft.ML.Featurizers.nupkgproj
pkg\Microsoft.ML.Featurizers\Microsoft.ML.Featurizers.symbols.nupkgproj = pkg\Microsoft.ML.Featurizers\Microsoft.ML.Featurizers.symbols.nupkgproj
EndProjectSection
EndProject
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Microsoft.ML.CodeGenerator", "src\Microsoft.ML.CodeGenerator\Microsoft.ML.CodeGenerator.csproj", "{56CB0850-7341-4D71-9AE4-9EFC472D93DD}"
EndProject
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Microsoft.ML.CodeGenerator.Tests", "test\Microsoft.ML.CodeGenerator.Tests\Microsoft.ML.CodeGenerator.Tests.csproj", "{46CC5637-3DDF-4100-93FC-44BB87B2DB81}"
@ -1763,6 +1771,30 @@ Global
{C8DB58DC-6434-4431-A81F-263D86E2A5F3}.Release-netfx|Any CPU.Build.0 = Release-netfx|Any CPU
{C8DB58DC-6434-4431-A81F-263D86E2A5F3}.Release-netfx|x64.ActiveCfg = Release-netfx|Any CPU
{C8DB58DC-6434-4431-A81F-263D86E2A5F3}.Release-netfx|x64.Build.0 = Release-netfx|Any CPU
{E2DD0721-5B0F-4606-8182-4C7EFB834518}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
{E2DD0721-5B0F-4606-8182-4C7EFB834518}.Debug|Any CPU.Build.0 = Debug|Any CPU
{E2DD0721-5B0F-4606-8182-4C7EFB834518}.Debug|x64.ActiveCfg = Debug|Any CPU
{E2DD0721-5B0F-4606-8182-4C7EFB834518}.Debug|x64.Build.0 = Debug|Any CPU
{E2DD0721-5B0F-4606-8182-4C7EFB834518}.Debug-netcoreapp3_0|Any CPU.ActiveCfg = Debug-netcoreapp3_0|Any CPU
{E2DD0721-5B0F-4606-8182-4C7EFB834518}.Debug-netcoreapp3_0|Any CPU.Build.0 = Debug-netcoreapp3_0|Any CPU
{E2DD0721-5B0F-4606-8182-4C7EFB834518}.Debug-netcoreapp3_0|x64.ActiveCfg = Debug-netcoreapp3_0|Any CPU
{E2DD0721-5B0F-4606-8182-4C7EFB834518}.Debug-netcoreapp3_0|x64.Build.0 = Debug-netcoreapp3_0|Any CPU
{E2DD0721-5B0F-4606-8182-4C7EFB834518}.Debug-netfx|Any CPU.ActiveCfg = Debug-netfx|Any CPU
{E2DD0721-5B0F-4606-8182-4C7EFB834518}.Debug-netfx|Any CPU.Build.0 = Debug-netfx|Any CPU
{E2DD0721-5B0F-4606-8182-4C7EFB834518}.Debug-netfx|x64.ActiveCfg = Debug-netfx|Any CPU
{E2DD0721-5B0F-4606-8182-4C7EFB834518}.Debug-netfx|x64.Build.0 = Debug-netfx|Any CPU
{E2DD0721-5B0F-4606-8182-4C7EFB834518}.Release|Any CPU.ActiveCfg = Release|Any CPU
{E2DD0721-5B0F-4606-8182-4C7EFB834518}.Release|Any CPU.Build.0 = Release|Any CPU
{E2DD0721-5B0F-4606-8182-4C7EFB834518}.Release|x64.ActiveCfg = Release|Any CPU
{E2DD0721-5B0F-4606-8182-4C7EFB834518}.Release|x64.Build.0 = Release|Any CPU
{E2DD0721-5B0F-4606-8182-4C7EFB834518}.Release-netcoreapp3_0|Any CPU.ActiveCfg = Release-netcoreapp3_0|Any CPU
{E2DD0721-5B0F-4606-8182-4C7EFB834518}.Release-netcoreapp3_0|Any CPU.Build.0 = Release-netcoreapp3_0|Any CPU
{E2DD0721-5B0F-4606-8182-4C7EFB834518}.Release-netcoreapp3_0|x64.ActiveCfg = Release-netcoreapp3_0|Any CPU
{E2DD0721-5B0F-4606-8182-4C7EFB834518}.Release-netcoreapp3_0|x64.Build.0 = Release-netcoreapp3_0|Any CPU
{E2DD0721-5B0F-4606-8182-4C7EFB834518}.Release-netfx|Any CPU.ActiveCfg = Release-netfx|Any CPU
{E2DD0721-5B0F-4606-8182-4C7EFB834518}.Release-netfx|Any CPU.Build.0 = Release-netfx|Any CPU
{E2DD0721-5B0F-4606-8182-4C7EFB834518}.Release-netfx|x64.ActiveCfg = Release-netfx|Any CPU
{E2DD0721-5B0F-4606-8182-4C7EFB834518}.Release-netfx|x64.Build.0 = Release-netfx|Any CPU
EndGlobalSection
GlobalSection(SolutionProperties) = preSolution
HideSolutionNode = FALSE
@ -1857,6 +1889,8 @@ Global
{C8DB58DC-6434-4431-A81F-263D86E2A5F3} = {AED9C836-31E3-4F3F-8ABC-929555D3F3C4}
{C91F81E3-B900-4968-A6DF-F53B515E97E1} = {BF66A305-DF10-47E4-8D81-42049B149D2B}
{027DBA48-85B6-46F1-9487-0B49B5057FC0} = {C91F81E3-B900-4968-A6DF-F53B515E97E1}
{E2DD0721-5B0F-4606-8182-4C7EFB834518} = {09EADF06-BE25-4228-AB53-95AE3E15B530}
{1BA5C784-52E8-4A87-8525-26B2452F2882} = {D3D38B03-B557-484D-8348-8BADEE4DF592}
EndGlobalSection
GlobalSection(ExtensibilityGlobals) = postSolution
SolutionGuid = {41165AF1-35BB-4832-A189-73060F82B01D}

Просмотреть файл

@ -11,6 +11,7 @@
<ItemGroup>
<ProjectReference Include="..\..\..\src\Microsoft.ML.Vision\Microsoft.ML.Vision.csproj" />
<ProjectReference Include="..\..\..\src\Microsoft.ML.Featurizers\Microsoft.ML.Featurizers.csproj" />
<ProjectReference Include="..\..\..\src\Microsoft.ML.LightGbm\Microsoft.ML.LightGbm.csproj" />
<ProjectReference Include="..\..\..\src\Microsoft.ML.Mkl.Components\Microsoft.ML.Mkl.Components.csproj" />
<ProjectReference Include="..\..\..\src\Microsoft.ML.KMeansClustering\Microsoft.ML.KMeansClustering.csproj" />

Просмотреть файл

@ -0,0 +1,14 @@
<Project Sdk="Microsoft.NET.Sdk" DefaultTargets="Pack">
<PropertyGroup>
<TargetFramework>netstandard2.0</TargetFramework>
<PackageDescription>ML.NET featurizers with native code implementation</PackageDescription>
</PropertyGroup>
<ItemGroup>
<ProjectReference Include="../Microsoft.ML/Microsoft.ML.nupkgproj" />
<PackageReference Include="Microsoft.MLFeaturizers" Version="0.3.5" />
</ItemGroup>
</Project>

Просмотреть файл

@ -0,0 +1,5 @@
<Project DefaultTargets="Pack">
<Import Project="Microsoft.ML.Featurizers.nupkgproj" />
</Project>

Просмотреть файл

@ -40,9 +40,8 @@ using Microsoft.ML;
[assembly: InternalsVisibleTo(assemblyName: "Microsoft.ML.Vision" + PublicKey.Value)]
[assembly: InternalsVisibleTo(assemblyName: "Microsoft.ML.TimeSeries" + PublicKey.Value)]
[assembly: InternalsVisibleTo(assemblyName: "Microsoft.ML.Transforms" + PublicKey.Value)]
[assembly: InternalsVisibleTo(assemblyName: "Microsoft.ML.AutoML" + PublicKey.Value)]
[assembly: InternalsVisibleTo(assemblyName: "Microsoft.ML.Featurizers" + PublicKey.Value)]
[assembly: InternalsVisibleTo(assemblyName: "Microsoft.ML.Internal.MetaLinearLearner" + InternalPublicKey.Value)]
[assembly: InternalsVisibleTo(assemblyName: "TreeVisualizer" + InternalPublicKey.Value)]
[assembly: InternalsVisibleTo(assemblyName: "TMSNlearnPrediction" + InternalPublicKey.Value)]

Просмотреть файл

@ -966,6 +966,14 @@ namespace Microsoft.ML.Internal.Utilities
return meth;
}
private static MethodInfo MarshalInvokeCheckAndCreate<TRet>(Type[] genArgs, Delegate func)
{
var meth = MarshalActionInvokeCheckAndCreate(genArgs, func);
if (meth.ReturnType != typeof(TRet))
throw Contracts.ExceptParam(nameof(func), "Cannot be generic on return type");
return meth;
}
// REVIEW: n-argument versions? The multi-column re-application problem?
// Think about how to address these.
@ -1092,6 +1100,28 @@ namespace Microsoft.ML.Internal.Utilities
return (TRet)meth.Invoke(func.Target, new object[] { arg1, arg2, arg3, arg4, arg5, arg6, arg7, arg8, arg9, arg10 });
}
/// <summary>
/// A 1 argument and n type version of <see cref="MarshalInvoke{TRet}"/>.
/// </summary>
public static TRet MarshalInvoke<TArg1, TRet>(
Func<TArg1, TRet> func,
Type[] genArgs, TArg1 arg1)
{
var meth = MarshalInvokeCheckAndCreate<TRet>(genArgs, func);
return (TRet)meth.Invoke(func.Target, new object[] { arg1});
}
/// <summary>
/// A 2 argument and n type version of <see cref="MarshalInvoke{TRet}"/>.
/// </summary>
public static TRet MarshalInvoke<TArg1, TArg2, TRet>(
Func<TArg1, TArg2, TRet> func,
Type[] genArgs, TArg1 arg1, TArg2 arg2)
{
var meth = MarshalInvokeCheckAndCreate<TRet>(genArgs, func);
return (TRet)meth.Invoke(func.Target, new object[] { arg1, arg2});
}
private static MethodInfo MarshalActionInvokeCheckAndCreate(Type genArg, Delegate func)
{
Contracts.CheckValue(genArg, nameof(genArg));
@ -1104,6 +1134,18 @@ namespace Microsoft.ML.Internal.Utilities
return meth;
}
private static MethodInfo MarshalActionInvokeCheckAndCreate(Type[] typeArguments, Delegate func)
{
Contracts.CheckValue(typeArguments, nameof(typeArguments));
Contracts.CheckValue(func, nameof(func));
var meth = func.GetMethodInfo();
Contracts.CheckParam(meth.IsGenericMethod, nameof(func), "Should be generic but is not");
Contracts.CheckParam(meth.GetGenericArguments().Length == typeArguments.Length, nameof(func),
"Method should have exactly the same number of generic type parameters as list passed in but it does not.");
meth = meth.GetGenericMethodDefinition().MakeGenericMethod(typeArguments);
return meth;
}
/// <summary>
/// This is akin to <see cref="MarshalInvoke{TRet}(Func{TRet}, Type)"/>, except applied to
/// <see cref="Action"/> instead of <see cref="Func{TRet}"/>.

Просмотреть файл

@ -44,9 +44,8 @@ using Microsoft.ML;
[assembly: InternalsVisibleTo(assemblyName: "Microsoft.ML.DnnImageFeaturizer.ResNet101" + PublicKey.Value)]
[assembly: InternalsVisibleTo(assemblyName: "Microsoft.ML.DnnImageFeaturizer.ResNet18" + PublicKey.Value)]
[assembly: InternalsVisibleTo(assemblyName: "Microsoft.ML.DnnImageFeaturizer.ResNet50" + PublicKey.Value)]
[assembly: InternalsVisibleTo(assemblyName: "Microsoft.ML.Experimental" + PublicKey.Value)]
[assembly: InternalsVisibleTo(assemblyName: "Microsoft.ML.Featurizers" + PublicKey.Value)]
[assembly: InternalsVisibleTo(assemblyName: "Microsoft.ML.Internal.MetaLinearLearner" + InternalPublicKey.Value)]
[assembly: InternalsVisibleTo(assemblyName: "TMSNlearnPrediction" + InternalPublicKey.Value)]
[assembly: InternalsVisibleTo(assemblyName: "Microsoft.ML.CntkWrapper" + InternalPublicKey.Value)]

Просмотреть файл

@ -0,0 +1,223 @@
using System;
using System.Collections.Generic;
using System.Runtime.InteropServices;
using System.Security;
using System.Text;
using Microsoft.Win32.SafeHandles;
namespace Microsoft.ML.Featurizers
{
internal enum FitResult : byte
{
Complete = 1,
Continue = 2,
ResetAndContinue = 3
}
// Not all these types are currently supported. These are taken directly from the Native code implementation.
internal enum TypeId : uint
{
// Enumeration values are in the following format:
//
// 0xVTTTXXXX
// ^^^^^^^^
// || |- Id
// ||- Number of trailing types
// |- Has trailing types
//
String = 1,
SByte = 2,
Short = 3,
Int = 4,
Long = 5,
Byte = 6,
UShort = 7,
UInt = 8,
ULong = 9,
Float16 = 10,
Float32 = 11,
Double = 12,
Complex64 = 13,
Complex128 = 14,
BFloat16 = 15,
Bool = 16,
Timepoint = 17,
Duration = 18,
LastStaticValue = 19,
// The following values have N number of trailing types
Tensor = 0x1001 | LastStaticValue + 1,
SparseTensor = 0x1001 | LastStaticValue + 2,
Tabular = 0x1001 | LastStaticValue + 3,
Nullable = 0x1001 | LastStaticValue + 4,
Vector = 0x1001 | LastStaticValue + 5,
MapId = 0x1002 | LastStaticValue + 6
};
// Is a struct mirroring the native struct.
// I used to pass binary data between ML.NET and the native code.
[StructLayout(LayoutKind.Sequential, Pack = 1)]
internal unsafe struct NativeBinaryArchiveData
{
public byte* Data;
public IntPtr DataSize;
}
#region SafeHandles
// Safe handle that frees the memory for a native error returned to ML.NET.
internal class ErrorInfoSafeHandle : SafeHandleZeroOrMinusOneIsInvalid
{
[DllImport("Featurizers", EntryPoint = "DestroyErrorInfo", CallingConvention = CallingConvention.Cdecl), SuppressUnmanagedCodeSecurity]
private static extern bool DestroyErrorInfo(IntPtr error);
public ErrorInfoSafeHandle(IntPtr handle) : base(true)
{
SetHandle(handle);
}
protected override bool ReleaseHandle()
{
return DestroyErrorInfo(handle);
}
}
// Safe handle that frees the memory for errors strings return from the native code to ML.NET.
internal class ErrorInfoStringSafeHandle : SafeHandleZeroOrMinusOneIsInvalid
{
[DllImport("Featurizers", EntryPoint = "DestroyErrorInfoString", CallingConvention = CallingConvention.Cdecl), SuppressUnmanagedCodeSecurity]
private static extern bool DestroyErrorInfoString(IntPtr errorString, IntPtr errorStringSize);
private IntPtr _length;
public ErrorInfoStringSafeHandle(IntPtr handle, IntPtr length) : base(true)
{
SetHandle(handle);
_length = length;
}
protected override bool ReleaseHandle()
{
return DestroyErrorInfoString(handle, _length);
}
}
// Safe handle that frees the memory for the transformed data.
// Is called automatically after each call to transform.
internal delegate bool DestroyTransformedDataNative(IntPtr output, IntPtr outputSize, out IntPtr errorHandle);
internal class TransformedDataSafeHandle : SafeHandleZeroOrMinusOneIsInvalid
{
private DestroyTransformedDataNative _destroySaveDataHandler;
private IntPtr _dataSize;
public TransformedDataSafeHandle(IntPtr handle, IntPtr dataSize, DestroyTransformedDataNative destroyCppTransformerEstimator) : base(true)
{
SetHandle(handle);
_dataSize = dataSize;
_destroySaveDataHandler = destroyCppTransformerEstimator;
}
protected override bool ReleaseHandle()
{
// Not sure what to do with error stuff here. There shoudln't ever be one though.
return _destroySaveDataHandler(handle, _dataSize, out IntPtr errorHandle);
}
}
// Safe handle that frees the memory for a native estimator or transformer.
// Is called automatically at the end of life for a transformer or estimator.
internal delegate bool DestroyNativeTransformerEstimator(IntPtr estimator, out IntPtr errorHandle);
internal class TransformerEstimatorSafeHandle : SafeHandleZeroOrMinusOneIsInvalid
{
private DestroyNativeTransformerEstimator _destroyNativeTransformerEstimator;
public TransformerEstimatorSafeHandle(IntPtr handle, DestroyNativeTransformerEstimator destroyNativeTransformerEstimator) : base(true)
{
SetHandle(handle);
_destroyNativeTransformerEstimator = destroyNativeTransformerEstimator;
}
protected override bool ReleaseHandle()
{
// Not sure what to do with error stuff here. There shouldn't ever be one though.
return _destroyNativeTransformerEstimator(handle, out IntPtr errorHandle);
}
}
// Safe handle that frees the memory for the internal state of a native transformer.
// Is called automatically after we save the model.
internal delegate bool DestroyTransformerSaveData(IntPtr buffer, IntPtr bufferSize, out IntPtr errorHandle);
internal class SaveDataSafeHandle : SafeHandleZeroOrMinusOneIsInvalid
{
private readonly IntPtr _dataSize;
[DllImport("Featurizers", EntryPoint = "DestroyTransformerSaveData", CallingConvention = CallingConvention.Cdecl), SuppressUnmanagedCodeSecurity]
private static extern bool DestroyTransformerSaveDataNative(IntPtr buffer, IntPtr bufferSize, out IntPtr error);
public SaveDataSafeHandle(IntPtr handle, IntPtr dataSize) : base(true)
{
SetHandle(handle);
_dataSize = dataSize;
}
protected override bool ReleaseHandle()
{
// Not sure what to do with error stuff here. There shoudln't ever be one though.
return DestroyTransformerSaveDataNative(handle, _dataSize, out _);
}
}
#endregion
// Static extension classes with Common methods used in multiple featurizers
internal static class CommonExtensions
{
[DllImport("Featurizers", EntryPoint = "GetErrorInfoString", CallingConvention = CallingConvention.Cdecl), SuppressUnmanagedCodeSecurity]
private static extern bool GetErrorInfoString(IntPtr error, out IntPtr errorHandleString, out IntPtr errorHandleStringSize);
internal static string GetErrorDetailsAndFreeNativeMemory(IntPtr errorHandle)
{
using (var error = new ErrorInfoSafeHandle(errorHandle))
{
GetErrorInfoString(errorHandle, out IntPtr errorHandleString, out IntPtr errorHandleStringSize);
using (var errorString = new ErrorInfoStringSafeHandle(errorHandleString, errorHandleStringSize))
{
byte[] buffer = new byte[errorHandleStringSize.ToInt32()];
Marshal.Copy(errorHandleString, buffer, 0, buffer.Length);
return Encoding.UTF8.GetString(buffer);
}
}
}
internal static TypeId GetNativeTypeIdFromType(this Type type)
{
if (type == typeof(sbyte))
return TypeId.SByte;
else if (type == typeof(short))
return TypeId.Short;
else if (type == typeof(int))
return TypeId.Int;
else if (type == typeof(long))
return TypeId.Long;
else if (type == typeof(byte))
return TypeId.Byte;
else if (type == typeof(ushort))
return TypeId.UShort;
else if (type == typeof(uint))
return TypeId.UInt;
else if (type == typeof(ulong))
return TypeId.ULong;
else if (type == typeof(float))
return TypeId.Float32;
else if (type == typeof(double))
return TypeId.Double;
else if (type == typeof(bool))
return TypeId.Bool;
else if (type == typeof(ReadOnlyMemory<char>))
return TypeId.String;
throw new InvalidOperationException($"Unsupported type {type}");
}
}
}

Просмотреть файл

@ -0,0 +1,18 @@
<Project Sdk="Microsoft.NET.Sdk">
<PropertyGroup>
<TargetFramework>netstandard2.0</TargetFramework>
<IncludeInPackage>Microsoft.ML.Featurizers</IncludeInPackage>
<AllowUnsafeBlocks>true</AllowUnsafeBlocks>
</PropertyGroup>
<ItemGroup>
<PackageReference Include="Microsoft.MLFeaturizers" Version="0.3.0-preview.2" />
</ItemGroup>
<ItemGroup>
<ProjectReference Include="..\Microsoft.ML.Core\Microsoft.ML.Core.csproj" />
<ProjectReference Include="..\Microsoft.ML.Data\Microsoft.ML.Data.csproj" />
</ItemGroup>
</Project>

Просмотреть файл

@ -0,0 +1,11 @@
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.
// See the LICENSE file in the project root for more information.
using System.Runtime.CompilerServices;
using Microsoft.ML;
[assembly: InternalsVisibleTo(assemblyName: "Microsoft.ML.Tests" + PublicKey.TestValue)]
[assembly: InternalsVisibleTo(assemblyName: "Microsoft.ML.EntryPoints" + PublicKey.Value)]
[assembly: WantsToBeBestFriends]

Просмотреть файл

@ -11,6 +11,7 @@
<ProjectReference Include="..\..\src\Microsoft.ML.Ensemble\Microsoft.ML.Ensemble.csproj" />
<ProjectReference Include="..\..\src\Microsoft.ML.EntryPoints\Microsoft.ML.EntryPoints.csproj" />
<ProjectReference Include="..\..\src\Microsoft.ML.Experimental\Microsoft.ML.Experimental.csproj" />
<ProjectReference Include="..\..\src\Microsoft.ML.Featurizers\Microsoft.ML.Featurizers.csproj" />
<ProjectReference Include="..\..\src\Microsoft.ML.LightGbm\Microsoft.ML.LightGbm.csproj" />
<ProjectReference Include="..\..\src\Microsoft.ML.Mkl.Components\Microsoft.ML.Mkl.Components.csproj" />
<ProjectReference Include="..\..\src\Microsoft.ML.ImageAnalytics\Microsoft.ML.ImageAnalytics.csproj" />