This commit is contained in:
Frank Seide 2017-01-22 18:50:15 -08:00
Родитель 2413087902 d15bec5a5b
Коммит 29bcb50ba8
375 изменённых файлов: 10456 добавлений и 3011 удалений

1
.gitignore поставляемый
Просмотреть файл

@ -249,6 +249,7 @@ Examples/Image/DataSets/grocery/testImages/
Examples/Image/DataSets/grocery/*.txt
Examples/Image/PretrainedModels/*.model
Examples/Image/FeatureExtraction/*.txt
Examples/Image/GettingStarted/Output/
Tests/EndToEndTests/CNTKv2Python/Examples/layerOutput.txt
Tutorials/HelloWorld-LogisticRegression/LR.txt.p
Tutorials/HelloWorld-LogisticRegression/Models/

Просмотреть файл

@ -1451,7 +1451,37 @@ Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "PerformanceProfilerDll", "S
{86883653-8A61-4038-81A0-2379FAE4200A} = {86883653-8A61-4038-81A0-2379FAE4200A}
EndProjectSection
EndProject
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "CNTKLibraryManagedExamplesTest", "Tests\EndToEndTests\EvalClientTests\CNTKLibraryManagedExamplesTest\CNTKLibraryManagedExamplesTest.csproj", "{3500A847-E024-4E7D-92DD-CC587C17460B}"
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "CNTKLibraryCSEvalExamplesTest", "Tests\EndToEndTests\EvalClientTests\CNTKLibraryCSEvalExamplesTest\CNTKLibraryCSEvalExamplesTest.csproj", "{3500A847-E024-4E7D-92DD-CC587C17460B}"
EndProject
Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "GoogLeNet", "GoogLeNet", "{789B4AB8-40F1-4A37-823A-BC20D80C8BF1}"
ProjectSection(SolutionItems) = preProject
Examples\Image\Classification\GoogLeNet\README.md = Examples\Image\Classification\GoogLeNet\README.md
EndProjectSection
EndProject
Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "BN-Inception", "BN-Inception", "{CE223840-1DEE-4849-B530-F06BEE05BAA8}"
ProjectSection(SolutionItems) = preProject
Examples\Image\Classification\GoogLeNet\BN-Inception\README.md = Examples\Image\Classification\GoogLeNet\BN-Inception\README.md
EndProjectSection
EndProject
Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "InceptionV3", "InceptionV3", "{824766FA-759A-4466-9C39-13200D2D3159}"
ProjectSection(SolutionItems) = preProject
Examples\Image\Classification\GoogLeNet\InceptionV3\README.md = Examples\Image\Classification\GoogLeNet\InceptionV3\README.md
EndProjectSection
EndProject
Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "BrainScript", "BrainScript", "{BD07C9F3-B10C-4C21-82BC-4F249B65DDFE}"
ProjectSection(SolutionItems) = preProject
Examples\Image\Classification\GoogLeNet\InceptionV3\BrainScript\InceptionBlocks.bs = Examples\Image\Classification\GoogLeNet\InceptionV3\BrainScript\InceptionBlocks.bs
Examples\Image\Classification\GoogLeNet\InceptionV3\BrainScript\InceptionV3.bs = Examples\Image\Classification\GoogLeNet\InceptionV3\BrainScript\InceptionV3.bs
Examples\Image\Classification\GoogLeNet\InceptionV3\BrainScript\InceptionV3.cntk = Examples\Image\Classification\GoogLeNet\InceptionV3\BrainScript\InceptionV3.cntk
Examples\Image\Classification\GoogLeNet\InceptionV3\BrainScript\README.md = Examples\Image\Classification\GoogLeNet\InceptionV3\BrainScript\README.md
EndProjectSection
EndProject
Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "BrainScript", "BrainScript", "{5CC403B9-2405-4FFB-A73B-DAE0DC986C76}"
ProjectSection(SolutionItems) = preProject
Examples\Image\Classification\GoogLeNet\BN-Inception\BrainScript\BN-Inception.bs = Examples\Image\Classification\GoogLeNet\BN-Inception\BrainScript\BN-Inception.bs
Examples\Image\Classification\GoogLeNet\BN-Inception\BrainScript\BN-Inception.cntk = Examples\Image\Classification\GoogLeNet\BN-Inception\BrainScript\BN-Inception.cntk
Examples\Image\Classification\GoogLeNet\BN-Inception\BrainScript\InceptionLayers.bs = Examples\Image\Classification\GoogLeNet\BN-Inception\BrainScript\InceptionLayers.bs
EndProjectSection
EndProject
Project("{888888A0-9F3D-457C-B088-3A5042F75D52}") = "PythonExamples", "Examples\PythonExamples.pyproj", "{292FF4EE-D9DD-4BA7-85F7-6A22148D1E01}"
EndProject
@ -2114,6 +2144,11 @@ Global
{CB4566F1-6C8F-4270-83EE-F6AED84EBB2B} = {39C3C8CA-9A8A-4733-ADBB-3E19D0F52528}
{4B442D34-641A-4B37-9A4B-D18DBE28A979} = {DD043083-71A4-409A-AA91-F9C548DCF7EC}
{3500A847-E024-4E7D-92DD-CC587C17460B} = {05E45AF7-C069-4057-BC16-0A532D068CE4}
{789B4AB8-40F1-4A37-823A-BC20D80C8BF1} = {151202CF-C2E4-47A6-A31C-CE039D698519}
{CE223840-1DEE-4849-B530-F06BEE05BAA8} = {789B4AB8-40F1-4A37-823A-BC20D80C8BF1}
{824766FA-759A-4466-9C39-13200D2D3159} = {789B4AB8-40F1-4A37-823A-BC20D80C8BF1}
{BD07C9F3-B10C-4C21-82BC-4F249B65DDFE} = {824766FA-759A-4466-9C39-13200D2D3159}
{5CC403B9-2405-4FFB-A73B-DAE0DC986C76} = {CE223840-1DEE-4849-B530-F06BEE05BAA8}
{292FF4EE-D9DD-4BA7-85F7-6A22148D1E01} = {47755F2E-D674-4175-9E38-8EA053455072}
EndGlobalSection
EndGlobal

Просмотреть файл

@ -2,7 +2,7 @@
// Copyright (c) Microsoft. All rights reserved.
// Licensed under the MIT license. See LICENSE.md file in the project root for full license information.
//
// CPPEvalV2Client.cpp : Sample application shows how to evaluate a model using CNTK V2 API.
// CNTKLibraryCPPevalExamples.cpp : Sample application shows how to evaluate a model using CNTK V2 API.
//
#include <stdio.h>

Просмотреть файл

@ -7,14 +7,14 @@
</ProjectConfiguration>
</ItemGroup>
<ItemGroup>
<ClCompile Include="CPPEvalV2Client.cpp" />
<ClCompile Include="CNTKLibraryCPPEvalExamples.cpp" />
<ClCompile Include="EvalMultithreads.cpp" />
</ItemGroup>
<PropertyGroup Label="Globals">
<ProjectGuid>{D771A06D-CC25-4582-B5CD-D2A4782BB005}</ProjectGuid>
<Keyword>Win32Proj</Keyword>
<RootNamespace>CPPEvalV2Client</RootNamespace>
<ProjectName>CPPEvalV2Client</ProjectName>
<RootNamespace>CNTKLibraryCPPEvalExamples</RootNamespace>
<ProjectName>CNTKLibraryCPPEvalExamples</ProjectName>
</PropertyGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">

Просмотреть файл

@ -15,7 +15,7 @@
</Filter>
</ItemGroup>
<ItemGroup>
<ClCompile Include="CPPEvalV2Client.cpp">
<ClCompile Include="CNTKLibraryCPPEvalExamples.cpp">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="EvalMultithreads.cpp">

Просмотреть файл

@ -0,0 +1,6 @@
<?xml version="1.0" encoding="utf-8"?>
<configuration>
<startup>
<supportedRuntime version="v4.0" sku=".NETFramework,Version=v4.5"/>
</startup>
</configuration>

Просмотреть файл

@ -0,0 +1,75 @@
<?xml version="1.0" encoding="utf-8"?>
<Project ToolsVersion="14.0" DefaultTargets="Build" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
<Import Project="$(MSBuildExtensionsPath)\$(MSBuildToolsVersion)\Microsoft.Common.props" Condition="Exists('$(MSBuildExtensionsPath)\$(MSBuildToolsVersion)\Microsoft.Common.props')" />
<PropertyGroup>
<Configuration Condition=" '$(Configuration)' == '' ">Debug</Configuration>
<Platform Condition=" '$(Platform)' == '' ">x64</Platform>
<ProjectGuid>{8AAD7322-10B1-48C3-9BC7-005A7910C5E6}</ProjectGuid>
<OutputType>Exe</OutputType>
<AppDesignerFolder>Properties</AppDesignerFolder>
<RootNamespace>CNTKLibraryCSEvalExamples</RootNamespace>
<AssemblyName>CNTKLibraryCSEvalCPUOnlyExamples</AssemblyName>
<TargetFrameworkVersion>v4.5</TargetFrameworkVersion>
<FileAlignment>512</FileAlignment>
<AutoGenerateBindingRedirects>true</AutoGenerateBindingRedirects>
<NuGetPackageImportStamp>
</NuGetPackageImportStamp>
<TargetFrameworkProfile />
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)' == 'Debug|x64'">
<DebugSymbols>true</DebugSymbols>
<OutputPath>$(SolutionDir)..\..\$(Platform)\CNTKLibraryCSEvalCPUOnlyExamples.$(Configuration)\</OutputPath>
<DefineConstants>DEBUG;TRACE</DefineConstants>
<WarningLevel>4</WarningLevel>
<TreatWarningsAsErrors>true</TreatWarningsAsErrors>
<DebugType>full</DebugType>
<PlatformTarget>x64</PlatformTarget>
<ErrorReport>prompt</ErrorReport>
<CodeAnalysisRuleSet>MinimumRecommendedRules.ruleset</CodeAnalysisRuleSet>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)' == 'Release|x64'">
<OutputPath>$(SolutionDir)..\..\$(Platform)\CNTKLibraryCSEvalCPUOnlyExamples.$(Configuration)\</OutputPath>
<DefineConstants>TRACE</DefineConstants>
<WarningLevel>4</WarningLevel>
<TreatWarningsAsErrors>true</TreatWarningsAsErrors>
<Optimize>true</Optimize>
<DebugType>pdbonly</DebugType>
<PlatformTarget>x64</PlatformTarget>
<ErrorReport>prompt</ErrorReport>
<CodeAnalysisRuleSet>MinimumRecommendedRules.ruleset</CodeAnalysisRuleSet>
</PropertyGroup>
<ItemGroup>
<Reference Include="CNTKLibraryManaged-2.0, Version=1.0.0.0, Culture=neutral, processorArchitecture=AMD64">
<HintPath>..\packages\CNTK.CPUOnly.2.0-beta9\lib\net45\x64\CNTKLibraryManaged-2.0.dll</HintPath>
<Private>True</Private>
</Reference>
<Reference Include="System" />
<Reference Include="System.Core" />
<Reference Include="System.Drawing" />
</ItemGroup>
<ItemGroup>
<Compile Include="CntkBitmapExtensions.cs" />
<Compile Include="CNTKLibraryCSEvalExamples.cs" />
<Compile Include="Program.cs" />
<Compile Include="Properties\AssemblyInfo.cs" />
</ItemGroup>
<ItemGroup>
<None Include="App.config" />
<None Include="packages.config" />
</ItemGroup>
<Import Project="$(MSBuildToolsPath)\Microsoft.CSharp.targets" />
<Import Project="..\packages\CNTK.CPUOnly.2.0-beta9\build\net45\CNTK.CPUOnly.targets" Condition="Exists('..\packages\CNTK.CPUOnly.2.0-beta9\build\net45\CNTK.CPUOnly.targets')" />
<Target Name="EnsureNuGetPackageBuildImports" BeforeTargets="PrepareForBuild">
<PropertyGroup>
<ErrorText>This project references NuGet package(s) that are missing on this computer. Use NuGet Package Restore to download them. For more information, see http://go.microsoft.com/fwlink/?LinkID=322105. The missing file is {0}.</ErrorText>
</PropertyGroup>
<Error Condition="!Exists('..\packages\CNTK.CPUOnly.2.0-beta9\build\net45\CNTK.CPUOnly.targets')" Text="$([System.String]::Format('$(ErrorText)', '..\packages\CNTK.CPUOnly.2.0-beta9\build\net45\CNTK.CPUOnly.targets'))" />
</Target>
<!-- To modify your build process, add your task inside one of the targets below and uncomment it.
Other similar extension points exist, see Microsoft.Common.targets.
<Target Name="BeforeBuild">
</Target>
<Target Name="AfterBuild">
</Target>
-->
</Project>

Просмотреть файл

@ -2,7 +2,7 @@
// Copyright (c) Microsoft. All rights reserved.
// Licensed under the MIT license. See LICENSE.md file in the project root for full license information.
//
// program.cs -- Example for using C# Eval V2 API.
// CNTKLibraryCSEvalExamples.cs -- Examples for using CNTK Library C# Eval API.
//
using System;
@ -10,13 +10,11 @@ using System.Collections.Generic;
using System.Drawing;
using System.IO;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
using CNTK;
namespace CNTKLibraryManagedExampleTest
namespace CNTKLibraryCSEvalExamples
{
public class Program
public class CNTKLibraryManagedExamples
{
//
// The example shows
@ -26,13 +24,13 @@ namespace CNTKLibraryManagedExampleTest
// - how to evaluate a model.
// - how to retrieve evaluation result and retrieve output data in dense format.
//
static void EvaluationSingleImage(DeviceDescriptor device)
public static void EvaluationSingleImage(DeviceDescriptor device)
{
const string outputName = "Plus2060";
var inputDataMap = new Dictionary<Variable, Value>();
// Load the model.
Function modelFunc = Function.LoadModel("z.model");
Function modelFunc = Function.LoadModel("z.model", device);
// Get output variable based on name
Variable outputVar = modelFunc.Outputs.Where(variable => string.Equals(variable.Name, outputName)).Single();
@ -85,13 +83,13 @@ namespace CNTKLibraryManagedExampleTest
// - how to evaluate a model.
// - how to retrieve evaluation result and retrieve output data in dense format.
//
static void EvaluationBatchOfImages(DeviceDescriptor device)
public static void EvaluationBatchOfImages(DeviceDescriptor device)
{
const string outputName = "Plus2060";
var inputDataMap = new Dictionary<Variable, Value>();
// Load the model.
Function modelFunc = Function.LoadModel("z.model");
Function modelFunc = Function.LoadModel("z.model", device);
// Get output variable based on name
Variable outputVar = modelFunc.Outputs.Where(variable => string.Equals(variable.Name, outputName)).Single();
@ -157,12 +155,12 @@ namespace CNTKLibraryManagedExampleTest
// - how to evaluate a model.
// - how to retrieve evaluation result and retrieve output data in the one-hot vector format.
//
static void EvaluationSingleSequenceUsingOneHot(DeviceDescriptor device)
public static void EvaluationSingleSequenceUsingOneHot(DeviceDescriptor device)
{
var vocabToIndex = buildVocabIndex("ATIS.vocab");
var indexToVocab = buildInvVocabIndex("ATIS.label");
Function myFunc = Function.LoadModel("atis.model");
Function myFunc = Function.LoadModel("atis.model", device);
Console.WriteLine("Evaluate single sequence using one-hot vector");
@ -237,12 +235,12 @@ namespace CNTKLibraryManagedExampleTest
// - how to evaluate a model.
// - how to retrieve evaluation result and retrieve output data in the one-hot vector format.
//
static void EvaluationBatchOfSequencesUsingOneHot(DeviceDescriptor device)
public static void EvaluationBatchOfSequencesUsingOneHot(DeviceDescriptor device)
{
var vocabToIndex = buildVocabIndex("ATIS.vocab");
var indexToVocab = buildInvVocabIndex("ATIS.label");
Function myFunc = Function.LoadModel("atis.model");
Function myFunc = Function.LoadModel("atis.model", device);
Console.WriteLine("Evaluate batch of sequences with variable length using one-hot vector");
@ -367,22 +365,5 @@ namespace CNTKLibraryManagedExampleTest
{
return File.ReadAllLines(filePath);
}
static void Main(string[] args)
{
Console.WriteLine("======== Evaluate model using C# ========");
EvaluationSingleImage(DeviceDescriptor.CPUDevice);
EvaluationBatchOfImages(DeviceDescriptor.CPUDevice);
//TODO: Add examples with OneHot.
//EvaluationSingleSequenceUsingOneHot(DeviceDescriptor.CPUDevice);
//EvaluationBatchOfSequencesUsingOneHot(DeviceDescriptor.CPUDevice);
// TODO: using GPU.
//EvaluationSingleImage(DeviceDescriptor.GPUDevice(0));
//EvaluationBatchOfImages(DeviceDescriptor.GPUDevice(0));
Console.WriteLine("======== Evaluation completes. ========");
}
}
}

Просмотреть файл

@ -9,12 +9,11 @@ using System.Collections.Generic;
using System.Drawing;
using System.Drawing.Imaging;
using System.Linq;
using System.Runtime.InteropServices;
using System.Threading.Tasks;
namespace CNTK
namespace CNTKLibraryCSEvalExamples
{
public static class BitmapExtensions
public static class CntkBitmapExtensions
{
/// <summary>
/// Resizes an image

Просмотреть файл

@ -0,0 +1,25 @@
//
// Copyright (c) Microsoft. All rights reserved.
// Licensed under the MIT license. See LICENSE.md file in the project root for full license information.
//
// Program.cs -- Example for using CNTK Library C# Eval CPUOnly Nuget Package.
//
using System;
using CNTK;
namespace CNTKLibraryCSEvalExamples
{
class Program
{
static void Main(string[] args)
{
Console.WriteLine("======== Evaluate model using C# CPUOnly Build ========");
CNTKLibraryManagedExamples.EvaluationSingleImage(DeviceDescriptor.CPUDevice);
CNTKLibraryManagedExamples.EvaluationBatchOfImages(DeviceDescriptor.CPUDevice);
Console.WriteLine("======== Evaluation completes. ========");
}
}
}

Просмотреть файл

@ -0,0 +1,36 @@
using System.Reflection;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
// General Information about an assembly is controlled through the following
// set of attributes. Change these attribute values to modify the information
// associated with an assembly.
[assembly: AssemblyTitle("CNTKLibraryCSEvalCPUOnlyExamples")]
[assembly: AssemblyDescription("")]
[assembly: AssemblyConfiguration("")]
[assembly: AssemblyCompany("")]
[assembly: AssemblyProduct("CNTKLibraryCSEvalCPUOnlyExamples")]
[assembly: AssemblyCopyright("Copyright © 2017")]
[assembly: AssemblyTrademark("")]
[assembly: AssemblyCulture("")]
// Setting ComVisible to false makes the types in this assembly not visible
// to COM components. If you need to access a type in this assembly from
// COM, set the ComVisible attribute to true on that type.
[assembly: ComVisible(false)]
// The following GUID is for the ID of the typelib if this project is exposed to COM
[assembly: Guid("8aad7322-10b1-48c3-9bc7-005a7910c5e6")]
// Version information for an assembly consists of the following four values:
//
// Major Version
// Minor Version
// Build Number
// Revision
//
// You can specify all the values or you can default the Build and Revision Numbers
// by using the '*' as shown below:
// [assembly: AssemblyVersion("1.0.*")]
[assembly: AssemblyVersion("1.0.0.0")]
[assembly: AssemblyFileVersion("1.0.0.0")]

Просмотреть файл

@ -0,0 +1,4 @@
<?xml version="1.0" encoding="utf-8"?>
<packages>
<package id="CNTK.CPUOnly" version="2.0-beta9" targetFramework="net45" />
</packages>

Просмотреть файл

@ -0,0 +1,6 @@
<?xml version="1.0" encoding="utf-8"?>
<configuration>
<startup>
<supportedRuntime version="v4.0" sku=".NETFramework,Version=v4.5"/>
</startup>
</configuration>

Просмотреть файл

@ -0,0 +1,79 @@
<?xml version="1.0" encoding="utf-8"?>
<Project ToolsVersion="14.0" DefaultTargets="Build" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
<Import Project="$(MSBuildExtensionsPath)\$(MSBuildToolsVersion)\Microsoft.Common.props" Condition="Exists('$(MSBuildExtensionsPath)\$(MSBuildToolsVersion)\Microsoft.Common.props')" />
<PropertyGroup>
<Configuration Condition=" '$(Configuration)' == '' ">Debug</Configuration>
<Platform Condition=" '$(Platform)' == '' ">x64</Platform>
<ProjectGuid>{307E5BAC-DA03-45D2-ADEC-FE6620090109}</ProjectGuid>
<OutputType>Exe</OutputType>
<AppDesignerFolder>Properties</AppDesignerFolder>
<RootNamespace>CNTKLibraryCSEvalExamples</RootNamespace>
<AssemblyName>CNTKLibraryCSEvalGPUExamples</AssemblyName>
<TargetFrameworkVersion>v4.5</TargetFrameworkVersion>
<FileAlignment>512</FileAlignment>
<AutoGenerateBindingRedirects>true</AutoGenerateBindingRedirects>
<TargetFrameworkProfile />
<NuGetPackageImportStamp>
</NuGetPackageImportStamp>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)' == 'Debug|x64'">
<DebugSymbols>true</DebugSymbols>
<OutputPath>$(SolutionDir)..\..\$(Platform)\CNTKLibraryCSEvalGPUExamples.$(Configuration)\</OutputPath>
<DefineConstants>DEBUG;TRACE</DefineConstants>
<WarningLevel>4</WarningLevel>
<TreatWarningsAsErrors>true</TreatWarningsAsErrors>
<DebugType>full</DebugType>
<PlatformTarget>x64</PlatformTarget>
<ErrorReport>prompt</ErrorReport>
<CodeAnalysisRuleSet>MinimumRecommendedRules.ruleset</CodeAnalysisRuleSet>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)' == 'Release|x64'">
<OutputPath>$(SolutionDir)..\..\$(Platform)\CNTKLibraryCSEvalGPUExamples.$(Configuration)\</OutputPath>
<DefineConstants>TRACE</DefineConstants>
<WarningLevel>4</WarningLevel>
<TreatWarningsAsErrors>true</TreatWarningsAsErrors>
<Optimize>true</Optimize>
<DebugType>pdbonly</DebugType>
<PlatformTarget>x64</PlatformTarget>
<ErrorReport>prompt</ErrorReport>
<CodeAnalysisRuleSet>MinimumRecommendedRules.ruleset</CodeAnalysisRuleSet>
</PropertyGroup>
<ItemGroup>
<Reference Include="CNTKLibraryManaged-2.0, Version=1.0.0.0, Culture=neutral, processorArchitecture=AMD64">
<HintPath>..\packages\CNTK.GPU.2.0-beta9\lib\net45\x64\CNTKLibraryManaged-2.0.dll</HintPath>
<Private>True</Private>
</Reference>
<Reference Include="System" />
<Reference Include="System.Core" />
<Reference Include="System.Drawing" />
</ItemGroup>
<ItemGroup>
<Compile Include="..\CNTKLibraryCSEvalCPUOnlyExamples\CntkBitmapExtensions.cs">
<Link>CntkBitmapExtensions.cs</Link>
</Compile>
<Compile Include="..\CNTKLibraryCSEvalCPUOnlyExamples\CNTKLibraryCSEvalExamples.cs">
<Link>CNTKLibraryCSEvalExamples.cs</Link>
</Compile>
<Compile Include="Program.cs" />
<Compile Include="Properties\AssemblyInfo.cs" />
</ItemGroup>
<ItemGroup>
<None Include="App.config" />
<None Include="packages.config" />
</ItemGroup>
<Import Project="$(MSBuildToolsPath)\Microsoft.CSharp.targets" />
<Import Project="..\packages\CNTK.GPU.2.0-beta9\build\net45\CNTK.GPU.targets" Condition="Exists('..\packages\CNTK.GPU.2.0-beta9\build\net45\CNTK.GPU.targets')" />
<Target Name="EnsureNuGetPackageBuildImports" BeforeTargets="PrepareForBuild">
<PropertyGroup>
<ErrorText>This project references NuGet package(s) that are missing on this computer. Use NuGet Package Restore to download them. For more information, see http://go.microsoft.com/fwlink/?LinkID=322105. The missing file is {0}.</ErrorText>
</PropertyGroup>
<Error Condition="!Exists('..\packages\CNTK.GPU.2.0-beta9\build\net45\CNTK.GPU.targets')" Text="$([System.String]::Format('$(ErrorText)', '..\packages\CNTK.GPU.2.0-beta9\build\net45\CNTK.GPU.targets'))" />
</Target>
<!-- To modify your build process, add your task inside one of the targets below and uncomment it.
Other similar extension points exist, see Microsoft.Common.targets.
<Target Name="BeforeBuild">
</Target>
<Target Name="AfterBuild">
</Target>
-->
</Project>

Просмотреть файл

@ -0,0 +1,25 @@
//
// Copyright (c) Microsoft. All rights reserved.
// Licensed under the MIT license. See LICENSE.md file in the project root for full license information.
//
// Program.cs -- Example for using CNTK Library C# Eval GPU Nuget Package.
//
using System;
using CNTK;
namespace CNTKLibraryCSEvalExamples
{
class Program
{
static void Main(string[] args)
{
Console.WriteLine("======== Evaluate model using C# GPU Build ========");
CNTKLibraryManagedExamples.EvaluationSingleImage(DeviceDescriptor.GPUDevice(0));
CNTKLibraryManagedExamples.EvaluationBatchOfImages(DeviceDescriptor.GPUDevice(0));
Console.WriteLine("======== Evaluation completes. ========");
}
}
}

Просмотреть файл

@ -0,0 +1,36 @@
using System.Reflection;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
// General Information about an assembly is controlled through the following
// set of attributes. Change these attribute values to modify the information
// associated with an assembly.
[assembly: AssemblyTitle("CSharpEvalGPUExamples")]
[assembly: AssemblyDescription("")]
[assembly: AssemblyConfiguration("")]
[assembly: AssemblyCompany("")]
[assembly: AssemblyProduct("CSharpEvalGPUExamples")]
[assembly: AssemblyCopyright("Copyright © 2017")]
[assembly: AssemblyTrademark("")]
[assembly: AssemblyCulture("")]
// Setting ComVisible to false makes the types in this assembly not visible
// to COM components. If you need to access a type in this assembly from
// COM, set the ComVisible attribute to true on that type.
[assembly: ComVisible(false)]
// The following GUID is for the ID of the typelib if this project is exposed to COM
[assembly: Guid("307e5bac-da03-45d2-adec-fe6620090109")]
// Version information for an assembly consists of the following four values:
//
// Major Version
// Minor Version
// Build Number
// Revision
//
// You can specify all the values or you can default the Build and Revision Numbers
// by using the '*' as shown below:
// [assembly: AssemblyVersion("1.0.*")]
[assembly: AssemblyVersion("1.0.0.0")]
[assembly: AssemblyFileVersion("1.0.0.0")]

Просмотреть файл

@ -0,0 +1,4 @@
<?xml version="1.0" encoding="utf-8"?>
<packages>
<package id="CNTK.GPU" version="2.0-beta9" targetFramework="net45" />
</packages>

Просмотреть файл

@ -0,0 +1,33 @@

Microsoft Visual Studio Solution File, Format Version 12.00
# Visual Studio 14
VisualStudioVersion = 14.0.25420.1
MinimumVisualStudioVersion = 10.0.40219.1
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "CNTKLibraryCPPEvalExamples", "CNTKLibraryCPPEvalExamples\CNTKLibraryCPPEvalExamples.vcxproj", "{D771A06D-CC25-4582-B5CD-D2A4782BB005}"
EndProject
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "CNTKLibraryCSEvalCPUOnlyExamples", "CNTKLibraryCSEvalCPUOnlyExamples\CNTKLibraryCSEvalCPUOnlyExamples.csproj", "{8AAD7322-10B1-48C3-9BC7-005A7910C5E6}"
EndProject
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "CNTKLibraryCSEvalGPUExamples", "CNTKLibraryCSEvalGPUExamples\CNTKLibraryCSEvalGPUExamples.csproj", "{307E5BAC-DA03-45D2-ADEC-FE6620090109}"
EndProject
Global
GlobalSection(SolutionConfigurationPlatforms) = preSolution
Debug|x64 = Debug|x64
Release|x64 = Release|x64
EndGlobalSection
GlobalSection(ProjectConfigurationPlatforms) = postSolution
{D771A06D-CC25-4582-B5CD-D2A4782BB005}.Debug|x64.ActiveCfg = Release|x64
{D771A06D-CC25-4582-B5CD-D2A4782BB005}.Release|x64.ActiveCfg = Release|x64
{D771A06D-CC25-4582-B5CD-D2A4782BB005}.Release|x64.Build.0 = Release|x64
{8AAD7322-10B1-48C3-9BC7-005A7910C5E6}.Debug|x64.ActiveCfg = Debug|x64
{8AAD7322-10B1-48C3-9BC7-005A7910C5E6}.Debug|x64.Build.0 = Debug|x64
{8AAD7322-10B1-48C3-9BC7-005A7910C5E6}.Release|x64.ActiveCfg = Release|x64
{8AAD7322-10B1-48C3-9BC7-005A7910C5E6}.Release|x64.Build.0 = Release|x64
{307E5BAC-DA03-45D2-ADEC-FE6620090109}.Debug|x64.ActiveCfg = Debug|x64
{307E5BAC-DA03-45D2-ADEC-FE6620090109}.Debug|x64.Build.0 = Debug|x64
{307E5BAC-DA03-45D2-ADEC-FE6620090109}.Release|x64.ActiveCfg = Release|x64
{307E5BAC-DA03-45D2-ADEC-FE6620090109}.Release|x64.Build.0 = Release|x64
EndGlobalSection
GlobalSection(SolutionProperties) = preSolution
HideSolutionNode = FALSE
EndGlobalSection
EndGlobal

Просмотреть файл

@ -50,7 +50,7 @@
</PropertyGroup>
<ItemGroup>
<Reference Include="EvalWrapper, Version=0.0.0.0, Culture=neutral, PublicKeyToken=52681d72504348ec, processorArchitecture=AMD64">
<HintPath>..\packages\Microsoft.Research.CNTK.CpuEval-mkl.2.0-beta7\lib\net45\x64\EvalWrapper.dll</HintPath>
<HintPath>..\packages\Microsoft.Research.CNTK.CpuEval-mkl.2.0-beta9\lib\net45\x64\EvalWrapper.dll</HintPath>
<Private>True</Private>
</Reference>
<Reference Include="System" />
@ -59,7 +59,9 @@
<Reference Include="System.Drawing" />
</ItemGroup>
<ItemGroup>
<Compile Include="CntkBitmapExtensions.cs" />
<Compile Include="..\CNTKLibraryCSEvalCPUOnlyExamples\CntkBitmapExtensions.cs">
<Link>CntkBitmapExtensions.cs</Link>
</Compile>
<Compile Include="ModelEvaluator.cs" />
<Compile Include="Program.cs" />
<Compile Include="Properties\AssemblyInfo.cs" />
@ -86,11 +88,11 @@
</BootstrapperPackage>
</ItemGroup>
<Import Project="$(MSBuildToolsPath)\Microsoft.CSharp.targets" />
<Import Project="..\packages\Microsoft.Research.CNTK.CpuEval-mkl.2.0-beta7\build\net45\Microsoft.Research.CNTK.CpuEval-mkl.targets" Condition="Exists('..\packages\Microsoft.Research.CNTK.CpuEval-mkl.2.0-beta7\build\net45\Microsoft.Research.CNTK.CpuEval-mkl.targets')" />
<Import Project="..\packages\Microsoft.Research.CNTK.CpuEval-mkl.2.0-beta9\build\net45\Microsoft.Research.CNTK.CpuEval-mkl.targets" Condition="Exists('..\packages\Microsoft.Research.CNTK.CpuEval-mkl.2.0-beta9\build\net45\Microsoft.Research.CNTK.CpuEval-mkl.targets')" />
<Target Name="EnsureNuGetPackageBuildImports" BeforeTargets="PrepareForBuild">
<PropertyGroup>
<ErrorText>This project references NuGet package(s) that are missing on this computer. Use NuGet Package Restore to download them. For more information, see http://go.microsoft.com/fwlink/?LinkID=322105. The missing file is {0}.</ErrorText>
</PropertyGroup>
<Error Condition="!Exists('..\packages\Microsoft.Research.CNTK.CpuEval-mkl.2.0-beta7\build\net45\Microsoft.Research.CNTK.CpuEval-mkl.targets')" Text="$([System.String]::Format('$(ErrorText)', '..\packages\Microsoft.Research.CNTK.CpuEval-mkl.2.0-beta7\build\net45\Microsoft.Research.CNTK.CpuEval-mkl.targets'))" />
<Error Condition="!Exists('..\packages\Microsoft.Research.CNTK.CpuEval-mkl.2.0-beta9\build\net45\Microsoft.Research.CNTK.CpuEval-mkl.targets')" Text="$([System.String]::Format('$(ErrorText)', '..\packages\Microsoft.Research.CNTK.CpuEval-mkl.2.0-beta9\build\net45\Microsoft.Research.CNTK.CpuEval-mkl.targets'))" />
</Target>
</Project>
</Project>

Просмотреть файл

@ -1,214 +0,0 @@
//
// Copyright (c) Microsoft. All rights reserved.
// Licensed under the MIT license. See LICENSE.md file in the project root for full license information.
//
// CntkBitmapExtensions.cs -- extension methods for transforming images used in CNTK.
//
using System;
using System.Collections.Generic;
using System.Drawing;
using System.Drawing.Imaging;
using System.Linq;
using System.Runtime.InteropServices;
using System.Threading.Tasks;
namespace Microsoft.MSR.CNTK.Extensibility.Managed.CSEvalClient
{
public static class CntkBitmapExtensions
{
/// <summary>
/// Resizes an image
/// </summary>
/// <param name="image">The image to resize</param>
/// <param name="width">New width in pixels</param>
/// <param name="height">New height in pixesl</param>
/// <param name="useHighQuality">Resize quality</param>
/// <returns>The resized image</returns>
public static Bitmap Resize(this Bitmap image, int width, int height, bool useHighQuality)
{
var newImg = new Bitmap(width, height);
newImg.SetResolution(image.HorizontalResolution, image.VerticalResolution);
using (var g = Graphics.FromImage(newImg))
{
g.CompositingMode = System.Drawing.Drawing2D.CompositingMode.SourceCopy;
if (useHighQuality)
{
g.InterpolationMode = System.Drawing.Drawing2D.InterpolationMode.HighQualityBicubic;
g.CompositingQuality = System.Drawing.Drawing2D.CompositingQuality.HighQuality;
g.SmoothingMode = System.Drawing.Drawing2D.SmoothingMode.HighQuality;
g.PixelOffsetMode = System.Drawing.Drawing2D.PixelOffsetMode.HighQuality;
}
else
{
g.InterpolationMode = System.Drawing.Drawing2D.InterpolationMode.Default;
g.CompositingQuality = System.Drawing.Drawing2D.CompositingQuality.Default;
g.SmoothingMode = System.Drawing.Drawing2D.SmoothingMode.Default;
g.PixelOffsetMode = System.Drawing.Drawing2D.PixelOffsetMode.Default;
}
var attributes = new ImageAttributes();
attributes.SetWrapMode(System.Drawing.Drawing2D.WrapMode.TileFlipXY);
g.DrawImage(image, new Rectangle(0, 0, width, height), 0, 0, image.Width, image.Height, GraphicsUnit.Pixel, attributes);
}
return newImg;
}
/// <summary>
/// Extracts image pixels in CHW
/// </summary>
/// <param name="image">The bitmap image to extract features from</param>
/// <returns>A list of pixels in HWC order</returns>
public static List<float> ExtractCHW(this Bitmap image)
{
var features = new List<float>(image.Width * image.Height * 3);
for (int c = 0; c < 3; c++)
{
for (int h = 0; h < image.Height; h++)
{
for (int w = 0; w < image.Width; w++)
{
var pixel = image.GetPixel(w, h);
float v = c == 0 ? pixel.B : c == 1 ? pixel.G : pixel.R;
features.Add(v);
}
}
}
return features;
}
/// <summary>
/// Extracts image pixels in CHW using parallelization
/// </summary>
/// <param name="image">The bitmap image to extract features from</param>
/// <returns>A list of pixels in CHW order</returns>
public static List<float> ParallelExtractCHW(this Bitmap image)
{
// We use local variables to avoid contention on the image object through the multiple threads.
int channelStride = image.Width * image.Height;
int imageWidth = image.Width;
int imageHeight = image.Height;
var features = new byte[imageWidth * imageHeight * 3];
var bitmapData = image.LockBits(new Rectangle(0, 0, imageWidth, imageHeight), ImageLockMode.ReadOnly, image.PixelFormat);
IntPtr ptr = bitmapData.Scan0;
int bytes = Math.Abs(bitmapData.Stride) * bitmapData.Height;
byte[] rgbValues = new byte[bytes];
int stride = bitmapData.Stride;
// Copy the RGB values into the array.
System.Runtime.InteropServices.Marshal.Copy(ptr, rgbValues, 0, bytes);
// The mapping depends on the pixel format
// The mapPixel lambda will return the right color channel for the desired pixel
Func<int, int, int, int> mapPixel = GetPixelMapper(image.PixelFormat, stride);
// Averaged over a large number of images, these loops here execute fastest
// when doing Parallel.For only over c, but not over h and w.
Parallel.For(0, 3, (int c) =>
{
for (int h = 0; h < imageHeight; h++)
{
for (int w = 0; w < imageWidth; w++)
{
features[channelStride * c + imageWidth * h + w] = rgbValues[mapPixel(h, w, c)];
}
}
});
image.UnlockBits(bitmapData);
return features.Select(b => (float)b).ToList();
}
/// <summary>
/// Extracts image pixels in HWC
/// </summary>
/// <param name="image">The bitmap image to extract features from</param>
/// <returns>A list of pixels in HWC order</returns>
public static List<float> ExtractHWC(this Bitmap image)
{
var features = new List<float>(image.Width * image.Height * 3);
for (int w = 0; w < image.Width; w++)
{
for (int h = 0; h < image.Height; h++)
{
for (int c = 0; c < 3; c++)
{
var pixel = image.GetPixel(w, h);
float v = c == 0 ? pixel.B : c == 1 ? pixel.G : pixel.R;
features.Add(v);
}
}
}
return features;
}
/// <summary>
/// Extracts image pixels in HWC using multiple threads
/// </summary>
/// <param name="image">The bitmap image to extract features from</param>
/// <returns>A list of pixels in HWC order</returns>
public static List<float> ParallelExtractHWC(this Bitmap image)
{
int heightStride = image.Width * 3;
int widthStride = image.Height * 3;
int imageWidth = image.Width;
int imageHeight = image.Height;
var features = new byte[image.Width * image.Height * 3];
var bitmapData = image.LockBits(new Rectangle(0, 0, image.Width, image.Height), ImageLockMode.ReadOnly, image.PixelFormat);
IntPtr ptr = bitmapData.Scan0;
int bytes = Math.Abs(bitmapData.Stride) * bitmapData.Height;
byte[] rgbValues = new byte[bytes];
int stride = bitmapData.Stride;
// Copy the RGB values into the array.
System.Runtime.InteropServices.Marshal.Copy(ptr, rgbValues, 0, bytes);
// The mapping depends on the pixel format
// The mapPixel lambda will return the right color channel for the desired pixel
Func<int, int, int, int> mapPixel = GetPixelMapper(image.PixelFormat, stride);
Parallel.For(0, 3, (int c) =>
{
for (int h = 0; h < imageHeight; h++)
{
for (int w = 0; w < imageWidth; w++)
{
features[w * widthStride + h * 3 + c] = rgbValues[mapPixel(h, w, c)];
};
};
});
image.UnlockBits(bitmapData);
return features.Select(b => (float)b).ToList();
}
/// <summary>
/// Returns a function for extracting the R-G-B values properly from an image based on its pixel format
/// </summary>
/// <param name="pixelFormat">The image's pixel format</param>
/// <param name="heightStride">The stride (row byte count)</param>
/// <returns>A function with signature (height, width, channel) returning the corresponding color value</returns>
private static Func<int, int, int, int> GetPixelMapper(PixelFormat pixelFormat, int heightStride)
{
switch (pixelFormat)
{
case PixelFormat.Format32bppArgb:
return (h, w, c) => h * heightStride + w * 4 + c; // bytes are B-G-R-A
case PixelFormat.Format24bppRgb:
default:
return (h, w, c) => h * heightStride + w * 3 + c; // bytes are B-G-R
}
}
}
}

Просмотреть файл

@ -14,7 +14,7 @@ using System.Linq;
using System.Linq.Expressions;
using System.Threading;
using System.Threading.Tasks;
using Microsoft.MSR.CNTK.Extensibility.Managed;
using CNTKLibraryCSEvalExamples;
namespace Microsoft.MSR.CNTK.Extensibility.Managed.CSEvalClient
{

Просмотреть файл

@ -1,4 +1,4 @@
<?xml version="1.0" encoding="utf-8"?>
<packages>
<package id="Microsoft.Research.CNTK.CpuEval-mkl" version="2.0-beta7" targetFramework="net45" />
</packages>
<package id="Microsoft.Research.CNTK.CpuEval-mkl" version="2.0-beta9" targetFramework="net45" />
</packages>

Просмотреть файл

@ -124,9 +124,9 @@ Train = {
transforms = (
{
type = "Crop"
cropType = "random"
cropRatio = 0.88671875
jitterType = "uniRatio"
cropType = "RandomSide"
sideRatio = 0.88671875
jitterType = "UniRatio"
}:{
type = "Scale"
width = 227
@ -167,7 +167,7 @@ Test = {
{
type = "Crop"
cropType = "center"
cropRatio = 0.88671875
sideRatio = 0.88671875
}:{
type = "Scale"
width = 227

Просмотреть файл

@ -94,7 +94,7 @@ TrainConvNet = {
file = "$dataDir$/train_map.txt"
input = {
features = { transforms = (
{ type = "Crop" ; cropType = "random" ; cropRatio = 0.8 ; jitterType = "uniRatio" } :
{ type = "Crop" ; cropType = "RandomSide" ; sideRatio = 0.8 ; jitterType = "UniRatio" } :
{ type = "Scale" ; width = 32 ; height = 32 ; channels = 3 ; interpolations = "linear" } :
{ type = "Mean" ; meanFile = "$dataDir$/CIFAR-10_mean.xml" } :
{ type = "Transpose" }

Просмотреть файл

@ -72,7 +72,7 @@ TrainConvNet = {
file = "$dataDir$/train_map.txt"
input = {
features = { transforms = (
{ type = "Crop" ; cropType = "random" ; cropRatio = 0.8 ; jitterType = "uniRatio" } :
{ type = "Crop" ; cropType = "RandomSide" ; sideRatio = 0.8 ; jitterType = "UniRatio" } :
{ type = "Scale" ; width = 32 ; height = 32 ; channels = 3 ; interpolations = "linear" } :
{ type = "Mean" ; meanFile = "$dataDir$/CIFAR-10_mean.xml" } :
{ type = "Transpose" }

Просмотреть файл

@ -0,0 +1,166 @@
# Copyright (c) Microsoft. All rights reserved.
# Licensed under the MIT license. See LICENSE.md file in the project root
# for full license information.
# ==============================================================================
from __future__ import print_function
import os
import math
import numpy as np
import cntk
import _cntk_py
# Paths relative to current python file.
abs_path = os.path.dirname(os.path.abspath(__file__))
data_path = os.path.join(abs_path, "..", "..", "..", "DataSets", "CIFAR-10")
model_path = os.path.join(abs_path, "Models")
# model dimensions
image_height = 32
image_width = 32
num_channels = 3 # RGB
num_classes = 10
# Define the reader for both training and evaluation action.
def create_reader(map_file, mean_file, is_training):
if not os.path.exists(map_file) or not os.path.exists(mean_file):
raise RuntimeError("File '%s' or '%s' does not exist. Please run install_cifar10.py from DataSets/CIFAR-10 to fetch them" %
(map_file, mean_file))
# transformation pipeline for the features has jitter/crop only when training
transforms = []
if is_training:
transforms += [
cntk.io.ImageDeserializer.crop(crop_type='RandomSide', side_ratio=0.8, jitter_type='uniRatio') # train uses jitter
]
transforms += [
cntk.io.ImageDeserializer.scale(width=image_width, height=image_height, channels=num_channels, interpolations='linear'),
cntk.io.ImageDeserializer.mean(mean_file)
]
# deserializer
return cntk.io.MinibatchSource(cntk.io.ImageDeserializer(map_file, cntk.io.StreamDefs(
features = cntk.io.StreamDef(field='image', transforms=transforms), # first column in map file is referred to as 'image'
labels = cntk.io.StreamDef(field='label', shape=num_classes))), # and second as 'label'
randomize=is_training)
# Local Response Normalization layer. See Section 3.3 of the paper:
# https://papers.nips.cc/paper/4824-imagenet-classification-with-deep-convolutional-neural-networks.pdf
# The mathematical equation is:
# b_{x,y}^i=a_{x,y}^i/(k+\alpha\sum_{j=max(0,i-n)}^{min(N-1, i+n)}(a_{x,y}^j)^2)^\beta
# where a_{x,y}^i is the activity of a neuron comoputed by applying kernel i at position (x,y)
# N is the total number of kernals, n is half normalization width.
def LRN(k, n, alpha, beta):
x = cntk.blocks.Placeholder(name='lrn_arg')
x2 = cntk.ops.square(x)
# reshape to insert a fake singleton reduction dimension after the 3th axis (channel axis). Note Python axis order and BrainScript are reversed.
x2s = cntk.ops.reshape(x2, (1, cntk.InferredDimension), 0, 1)
W = cntk.ops.constant(alpha/(2*n+1), (1,2*n+1,1,1), name='W')
# 3D convolution with a filter that has a non 1-size only in the 3rd axis, and does not reduce since the reduction dimension is fake and 1
y = cntk.ops.convolution (W, x2s)
# reshape back to remove the fake singleton reduction dimension
b = cntk.ops.reshape(y, cntk.InferredDimension, 0, 2)
den = cntk.ops.exp(beta * cntk.ops.log(k + b))
apply_x = cntk.ops.element_divide(x, den)
return cntk.blocks.Block(apply_x, 'LRN')
# Train and evaluate the network.
def convnetlrn_cifar10_dataaug(reader_train, reader_test, epoch_size=50000, max_epochs = 80):
_cntk_py.set_computation_network_trace_level(1)
# Input variables denoting the features and label data
input_var = cntk.ops.input_variable((num_channels, image_height, image_width))
label_var = cntk.ops.input_variable((num_classes))
# apply model to input
scaled_input = cntk.ops.element_times(cntk.ops.constant(0.00390625), input_var)
with cntk.layers.default_options (activation=cntk.ops.relu, pad=True):
z = cntk.models.Sequential([
cntk.models.LayerStack(2, lambda : [
cntk.layers.Convolution((3,3), 64),
cntk.layers.Convolution((3,3), 64),
LRN (1.0, 4, 0.001, 0.75),
cntk.layers.MaxPooling((3,3), (2,2))
]),
cntk.models.LayerStack(2, lambda i: [
cntk.layers.Dense([256,128][i]),
cntk.layers.Dropout(0.5)
]),
cntk.layers.Dense(num_classes, activation=None)
])(scaled_input)
# loss and metric
ce = cntk.ops.cross_entropy_with_softmax(z, label_var)
pe = cntk.ops.classification_error(z, label_var)
# training config
minibatch_size = 64
# Set learning parameters
lr_per_sample = [0.0015625]*20 + [0.00046875]*20 + [0.00015625]*20 + [0.000046875]*10 + [0.000015625]
lr_schedule = cntk.learning_rate_schedule(lr_per_sample, unit=cntk.learner.UnitType.sample, epoch_size=epoch_size)
mm_time_constant = [0]*20 + [600]*20 + [1200]
mm_schedule = cntk.learner.momentum_as_time_constant_schedule(mm_time_constant, epoch_size=epoch_size)
l2_reg_weight = 0.002
# trainer object
learner = cntk.learner.momentum_sgd(z.parameters, lr_schedule, mm_schedule,
unit_gain = True,
l2_regularization_weight = l2_reg_weight)
trainer = cntk.Trainer(z, ce, pe, learner)
# define mapping from reader streams to network inputs
input_map = {
input_var: reader_train.streams.features,
label_var: reader_train.streams.labels
}
cntk.utils.log_number_of_parameters(z) ; print()
progress_printer = cntk.utils.ProgressPrinter(tag='Training')
# perform model training
for epoch in range(max_epochs): # loop over epochs
sample_count = 0
while sample_count < epoch_size: # loop over minibatches in the epoch
data = reader_train.next_minibatch(min(minibatch_size, epoch_size-sample_count), input_map=input_map) # fetch minibatch.
trainer.train_minibatch(data) # update model with it
sample_count += trainer.previous_minibatch_sample_count # count samples processed so far
progress_printer.update_with_trainer(trainer, with_metric=True) # log progress
progress_printer.epoch_summary(with_metric=True)
z.save_model(os.path.join(model_path, "ConvNet_CIFAR10_DataAug_{}.dnn".format(epoch)))
### Evaluation action
epoch_size = 10000
minibatch_size = 16
# process minibatches and evaluate the model
metric_numer = 0
metric_denom = 0
sample_count = 0
minibatch_index = 0
while sample_count < epoch_size:
current_minibatch = min(minibatch_size, epoch_size - sample_count)
# Fetch next test min batch.
data = reader_test.next_minibatch(current_minibatch, input_map=input_map)
# minibatch data to be trained with
metric_numer += trainer.test_minibatch(data) * current_minibatch
metric_denom += current_minibatch
# Keep track of the number of samples processed so far.
sample_count += data[label_var].num_samples
minibatch_index += 1
print("")
print("Final Results: Minibatch[1-{}]: errs = {:0.2f}% * {}".format(minibatch_index+1, (metric_numer*100.0)/metric_denom, metric_denom))
print("")
return metric_numer/metric_denom
if __name__=='__main__':
reader_train = create_reader(os.path.join(data_path, 'train_map.txt'), os.path.join(data_path, 'CIFAR-10_mean.xml'), True)
reader_test = create_reader(os.path.join(data_path, 'test_map.txt'), os.path.join(data_path, 'CIFAR-10_mean.xml'), False)
convnetlrn_cifar10_dataaug(reader_train, reader_test)

Просмотреть файл

@ -73,7 +73,8 @@ def convnet_cifar10(debug_output=False):
l2_reg_weight = 0.002
# Instantiate the trainer object to drive the model training
learner = cntk.learner.momentum_sgd(z.parameters, lr_schedule, mm_schedule, l2_regularization_weight = l2_reg_weight)
learner = cntk.learner.momentum_sgd(z.parameters, lr_schedule, mm_schedule,
l2_regularization_weight = l2_reg_weight)
trainer = cntk.Trainer(z, (ce, pe), learner)
# define mapping from reader streams to network inputs

Просмотреть файл

@ -46,17 +46,17 @@ def create_reader(map_file, mean_file, is_training):
transforms = []
if is_training:
transforms += [
ImageDeserializer.crop(crop_type='Random', ratio=0.8, jitter_type='uniRatio') # train uses jitter
cntk.io.ImageDeserializer.crop(crop_type='randomside', side_ratio=0.8, jitter_type='uniratio') # train uses jitter
]
transforms += [
ImageDeserializer.scale(width=image_width, height=image_height, channels=num_channels, interpolations='linear'),
ImageDeserializer.mean(mean_file)
]
# deserializer
return MinibatchSource(ImageDeserializer(map_file, StreamDefs(
features = StreamDef(field='image', transforms=transforms), # first column in map file is referred to as 'image'
labels = StreamDef(field='label', shape=num_classes) # and second as 'label'
)), randomize=is_training, epoch_size = INFINITELY_REPEAT if is_training else FULL_DATA_SWEEP)
return cntk.io.MinibatchSource(cntk.io.ImageDeserializer(map_file, cntk.io.StreamDefs(
features = cntk.io.StreamDef(field='image', transforms=transforms), # first column in map file is referred to as 'image'
labels = cntk.io.StreamDef(field='label', shape=num_classes))), # and second as 'label'
randomize=is_training, epoch_size = INFINITELY_REPEAT if is_training else FULL_DATA_SWEEP)
########################
# define the model #

Просмотреть файл

@ -12,6 +12,9 @@ import numpy as np
import cntk
import _cntk_py
from cntk.utils import *
from cntk.distributed import data_parallel_distributed_learner, Communicator
# default Paths relative to current python file.
abs_path = os.path.dirname(os.path.abspath(__file__))
data_path = os.path.join(abs_path, "..", "..", "..", "DataSets", "CIFAR-10")
@ -24,8 +27,8 @@ image_width = 32
num_channels = 3 # RGB
num_classes = 10
# Define the reader for both training and evaluation action.
def create_reader(map_file, mean_file, train, total_number_of_samples, distributed_after=cntk.io.INFINITE_SAMPLES):
# Create a minibatch source.
def create_image_mb_source(map_file, mean_file, train, total_number_of_samples):
if not os.path.exists(map_file) or not os.path.exists(mean_file):
raise RuntimeError("File '%s' or '%s' does not exist. Please run install_cifar10.py from DataSets/CIFAR-10 to fetch them" %
(map_file, mean_file))
@ -34,7 +37,7 @@ def create_reader(map_file, mean_file, train, total_number_of_samples, distribut
transforms = []
if train:
transforms += [
cntk.io.ImageDeserializer.crop(crop_type='Random', ratio=0.8, jitter_type='uniRatio') # train uses jitter
cntk.io.ImageDeserializer.crop(crop_type='randomside', side_ratio=0.8, jitter_type='uniratio') # train uses jitter
]
transforms += [
@ -48,19 +51,17 @@ def create_reader(map_file, mean_file, train, total_number_of_samples, distribut
features = cntk.io.StreamDef(field='image', transforms=transforms), # first column in map file is referred to as 'image'
labels = cntk.io.StreamDef(field='label', shape=num_classes))), # and second as 'label'
epoch_size=total_number_of_samples,
multithreaded_deserializer = False, # turn off omp as CIFAR-10 is not heavy for deserializer
distributed_after = distributed_after)
multithreaded_deserializer = True)
# Train and evaluate the network.
def convnet_cifar10_dataaug(create_train_reader, test_reader, create_dist_learner, max_epochs=80, log_to_file=None, num_mbs_per_log=None, gen_heartbeat=False):
_cntk_py.set_computation_network_trace_level(0)
# Create the network.
def create_conv_network():
# Input variables denoting the features and label data
input_var = cntk.ops.input_variable((num_channels, image_height, image_width))
feature_var = cntk.ops.input_variable((num_channels, image_height, image_width))
label_var = cntk.ops.input_variable((num_classes))
# apply model to input
scaled_input = cntk.ops.element_times(cntk.ops.constant(0.00390625), input_var)
scaled_input = cntk.ops.element_times(cntk.ops.constant(0.00390625), feature_var)
with cntk.layers.default_options(activation=cntk.ops.relu, pad=True):
z = cntk.models.Sequential([
@ -80,55 +81,49 @@ def convnet_cifar10_dataaug(create_train_reader, test_reader, create_dist_learne
ce = cntk.ops.cross_entropy_with_softmax(z, label_var)
pe = cntk.ops.classification_error(z, label_var)
# training config
epoch_size = 50000 # for now we manually specify epoch size
minibatch_size = 64
cntk.utils.log_number_of_parameters(z) ; print()
return {
'feature': feature_var,
'label': label_var,
'ce' : ce,
'pe' : pe,
'output': z
}
# Create trainer
def create_trainer(network, epoch_size, num_quantization_bits):
# Set learning parameters
lr_per_sample = [0.0015625]*20 + [0.00046875]*20 + [0.00015625]*20 + [0.000046875]*10 + [0.000015625]
lr_schedule = cntk.learning_rate_schedule(lr_per_sample, unit=cntk.learner.UnitType.sample, epoch_size=epoch_size)
mm_time_constant = [0]*20 + [600]*20 + [1200]
mm_schedule = cntk.learner.momentum_as_time_constant_schedule(mm_time_constant, epoch_size=epoch_size)
l2_reg_weight = 0.002
# Create learner
learner = data_parallel_distributed_learner(
cntk.learner.momentum_sgd(network['output'].parameters, lr_schedule, mm_schedule, l2_regularization_weight=l2_reg_weight),
num_quantization_bits=num_quantization_bits,
distributed_after=0)
# trainer object
learner = create_dist_learner(
cntk.learner.momentum_sgd(z.parameters, lr_schedule, mm_schedule, l2_regularization_weight=l2_reg_weight))
# Create trainer
return cntk.Trainer(network['output'], network['ce'], network['pe'], learner)
trainer = cntk.Trainer(z, ce, pe, learner)
# Train and test
def train_and_test(network, trainer, train_source, test_source, progress_printer, epoch_size):
total_number_of_samples = max_epochs * epoch_size
train_reader = create_train_reader(total_number_of_samples)
# define mapping from reader streams to network inputs
# define mapping from intput streams to network inputs
input_map = {
input_var: train_reader.streams.features,
label_var: train_reader.streams.labels
network['feature']: train_source.streams.features,
network['label']: train_source.streams.labels
}
cntk.utils.log_number_of_parameters(z) ; print()
progress_printer = cntk.utils.ProgressPrinter(
freq=num_mbs_per_log,
tag='Training',
log_to_file=log_to_file,
distributed_learner=learner,
gen_heartbeat=gen_heartbeat,
num_epochs=max_epochs)
training_session = cntk.training_session(train_source, trainer,
cntk.minibatch_size_schedule(64), progress_printer, input_map, "ConvNet_CIFAR10_DataAug_", epoch_size)
training_session.train()
# perform model training
updated=True
epoch=0
while updated:
data = train_reader.next_minibatch(minibatch_size, input_map=input_map) # fetch minibatch.
updated = trainer.train_minibatch(data) # update model with it
progress_printer.update_with_trainer(trainer, with_metric=True) # log progress
current_epoch = int(trainer.total_number_of_samples_seen/epoch_size)
if epoch != current_epoch:
progress_printer.epoch_summary(with_metric=True)
epoch = current_epoch
trainer.save_checkpoint(os.path.join(model_path, "ConvNet_CIFAR10_DataAug_{}.dnn".format(epoch)))
### TODO: Stay tuned for an upcoming simpler EvalSession API for test/validation.
### Evaluation action
minibatch_size = 16
@ -139,9 +134,9 @@ def convnet_cifar10_dataaug(create_train_reader, test_reader, create_dist_learne
minibatch_index = 0
while True:
data = test_reader.next_minibatch(minibatch_size, input_map=input_map)
data = test_source.next_minibatch(minibatch_size, input_map=input_map)
if not data: break
local_mb_samples=data[label_var].num_samples
local_mb_samples=data[network['label']].num_samples
metric_numer += trainer.test_minibatch(data) * local_mb_samples
metric_denom += local_mb_samples
minibatch_index += 1
@ -156,7 +151,28 @@ def convnet_cifar10_dataaug(create_train_reader, test_reader, create_dist_learne
return metric_numer/metric_denom
# Train and evaluate the network.
def convnet_cifar10_dataaug(train_data, test_data, mean_data, num_quantization_bits=32, epoch_size = 50000, max_epochs=80, log_to_file=None, num_mbs_per_log=None, gen_heartbeat=False):
_cntk_py.set_computation_network_trace_level(0)
progress_printer = ProgressPrinter(
freq=num_mbs_per_log,
tag='Training',
log_to_file=log_to_file,
rank=Communicator.rank(),
gen_heartbeat=gen_heartbeat,
num_epochs=max_epochs)
network = create_conv_network()
trainer = create_trainer(network, epoch_size, num_quantization_bits)
train_source = create_image_mb_source(train_data, mean_data, train=True, total_number_of_samples=max_epochs * epoch_size)
test_source = create_image_mb_source(test_data, mean_data, train=False, total_number_of_samples=cntk.io.FULL_DATA_SWEEP)
train_and_test(network, trainer, train_source, test_source, progress_printer, epoch_size)
if __name__=='__main__':
parser = argparse.ArgumentParser()
parser.add_argument('-datadir', help='only interested in changes to that file');
@ -174,21 +190,9 @@ if __name__=='__main__':
if args['outputdir'] != None:
model_path = args['outputdir'] + "/models"
distributed_after_samples = 0
num_quantization_bits = 32
create_dist_learner = \
lambda learner: cntk.distributed.data_parallel_distributed_learner(learner,
num_quantization_bits=num_quantization_bits,
distributed_after=distributed_after_samples)
mean=os.path.join(data_path, 'CIFAR-10_mean.xml')
mean_data=os.path.join(data_path, 'CIFAR-10_mean.xml')
train_data=os.path.join(data_path, 'train_map.txt')
test_data=os.path.join(data_path, 'test_map.txt')
create_train_reader = lambda data_size: create_reader(train_data, mean, True, data_size, distributed_after_samples)
test_reader = create_reader(test_data, mean, False, cntk.io.FULL_DATA_SWEEP)
convnet_cifar10_dataaug(create_train_reader, test_reader, create_dist_learner, log_to_file=log_dir, num_mbs_per_log=10, gen_heartbeat=False)
cntk.distributed.Communicator.finalize()
convnet_cifar10_dataaug(train_data, test_data, mean_data, num_quantization_bits=32, max_epochs=80, log_to_file=log_dir, num_mbs_per_log=10)
Communicator.finalize()

Просмотреть файл

@ -37,9 +37,19 @@ Run the example from the current folder using:
We use a fixed crop ratio of `0.8` and scale the image to `32x32` pixels for training. Since all training images are pre-padded to `40x40` pixels, effectively we only perform translation transform without scaling. The accuracy of the network on test data is around `14%`, which is a lot better than the previous model.
### ConvNetLRN_CIFAR10_DataAug.py
The fourth example added local response normalization (LRN) to the previous example. LRN was used in [AlexNet](../../AlexNet), but it has lost its popularity. We implemented the LRN with a 3D convolution.
Run the example from the current folder using:
`python ConvNetLRN_CIFAR10_DataAug.py`
All settings are identical to the previous example. The accuracy of the network on test data is slightly (`0.1-0.2%`) better than the previous example.
### ConvNet_CIFAR10_DataAug_Distributed.py
The fourth example uses the same CNN as ConvNet_CIFAR10_DataAug.py, but it adds support for distributed training with simple aggregation. For a reference on distributed training, please check [here](https://github.com/Microsoft/CNTK/wiki/Multiple-GPUs-and-machines).
The fifth example uses the same CNN as ConvNet_CIFAR10_DataAug.py, but it adds support for distributed training with simple aggregation. For a reference on distributed training, please check [here](https://github.com/Microsoft/CNTK/wiki/Multiple-GPUs-and-machines).
Note that [this example](./ConvNet_CIFAR10_DataAug_Distributed.py) supports CPU-only build.
`mpiexec -n <#workers> python ConvNet_CIFAR10_DataAug_Distributed.py`

Просмотреть файл

@ -0,0 +1,47 @@
#
# BN-Inception network definition
# Details are in https://arxiv.org/pdf/1502.03167v3.pdf
#
BN_Inception(input, labelDim, bnScale, bnTimeConst) =
{
# 224 x 224 x 3
conv1 = ConvBNReLULayer{64, (7:7), (2:2), true, bnScale, bnTimeConst}(input)
# 112 x 112 x 64
pool1 = MaxPoolingLayer{(3:3), stride = (2:2), pad = true}(conv1)
# 56 x 56 x 64
conv2a = ConvBNReLULayer{64, (1:1), (1:1), true, bnScale, bnTimeConst}(pool1)
# 56 x 56 x 64
conv2b = ConvBNReLULayer{192, (3:3), (1:1), true, bnScale, bnTimeConst}(conv2a)
# 56 x 56 x 192
pool2 = MaxPoolingLayer{(3:3), stride = (2:2), pad = true}(conv2b)
# Inception Blocks
# 28 x 28 x 192
inception3a = InceptionWithAvgPoolLayer{64, 64, 64, 64, 96, 32, bnScale, bnTimeConst}(pool2)
# 28 x 28 x 256
inception3b = InceptionWithAvgPoolLayer{64, 64, 96, 64, 96, 64, bnScale, bnTimeConst}(inception3a)
# 28 x 28 x 320
inception3c = InceptionPassThroughLayer{0, 128, 160, 64, 96, 0, bnScale, bnTimeConst}(inception3b)
# 14 x 14 x 576
inception4a = InceptionWithAvgPoolLayer{224, 64, 96, 96, 128, 128, bnScale, bnTimeConst}(inception3c)
# 14 x 14 x 576
inception4b = InceptionWithAvgPoolLayer{192, 96, 128, 96, 128, 128, bnScale, bnTimeConst}(inception4a)
# 14 x 14 x 576
inception4c = InceptionWithAvgPoolLayer{160, 128, 160, 128, 160, 128, bnScale, bnTimeConst}(inception4b)
# 14 x 14 x 576
inception4d = InceptionWithAvgPoolLayer{96, 128, 192, 160, 192, 128, bnScale, bnTimeConst}(inception4c)
# 14 x 14 x 576
inception4e = InceptionPassThroughLayer{0, 128, 192, 192, 256, 0, bnScale, bnTimeConst}(inception4d)
# 7 x 7 x 1024
inception5a = InceptionWithAvgPoolLayer{352, 192, 320, 160, 224, 128, bnScale, bnTimeConst}(inception4e)
# 7 x 7 x 1024
inception5b = InceptionWithMaxPoolLayer{352, 192, 320, 192, 224, 128, bnScale, bnTimeConst}(inception5a)
# Global Average
# 7 x 7 x 1024
pool3 = AveragePoolingLayer{(7:7)}(inception5b)
# 1 x 1 x 1024
z = LinearLayer{labelDim, init = 'heNormal'}(pool3)
}

Просмотреть файл

@ -0,0 +1,153 @@
#
# BN-Inception network
# Details are in https://arxiv.org/pdf/1502.03167v3.pdf
#
RootDir = "."
ParentDir = ".."
ConfigDir = "$RootDir$"
DataDir = "$ParentDir$/Data"
OutputDir = "$ParentDir$/Output"
ModelDir = "$OutputDir$/Models"
MeanDir = "$ConfigDir$"
stderr = "$OutputDir$/BN-Inception"
precision = "float"
deviceId = "Auto"
command = Train:Eval
parallelTrain = "true"
traceLevel = 1
numMBsToShowResult = 100
###################
# TRAINING CONFIG #
###################
Train = [
action = "train"
modelPath = "$ModelDir$/BN-Inception"
BrainScriptNetworkBuilder = {
include "InceptionLayers.bs"
include "BN-Inception.bs"
imageShape = 224:224:3 # image dimensions
labelDim = 1000 # number of distinct labels
bnScale = 1
bnTimeConst = 4096
# inputs
features = Input {imageShape}
labels = Input {labelDim}
# apply model to features
model = BN_Inception(features, labelDim, bnScale, bnTimeConst)
z = model.z
# connect to system
ce = CrossEntropyWithSoftmax (labels, z)
errs = ClassificationError (labels, z)
top5Errs = ClassificationError (labels, z, topN = 5)
# define special nodes
featureNodes = (features)
labelNodes = (labels)
criterionNodes = (ce)
evaluationNodes = (errs : top5Errs)
outputNodes = (z)
}
SGD = [
epochSize = 0
minibatchSize = 256 # 8 GPUs
learningRatesPerMB = 3.6*2:3.384
momentumPerMB = 0.9
maxEpochs = 300
gradUpdateType = "None"
L2RegWeight = 0.0001
numMBsToShowResult = 100
autoAdjust = [
autoAdjustLR = "adjustAfterEpoch"
reduceLearnRateIfImproveLessThan = 1000
learnRateAdjustInterval = 2
learnRateDecreaseFactor = 0.94
loadBestModel = false
]
ParallelTrain = [
parallelizationMethod = "DataParallelSGD"
distributedMBReading = "true"
parallelizationStartEpoch = 1
DataParallelSGD = [
gradientBits = 32
]
]
]
reader = [
readerType = "ImageReader"
file = "$DataDir$/train_map.txt"
randomize = "Auto"
features = [
width = 224
height = 224
channels = 3
cropType = "Random"
cropRatio = 0.46666:0.875
jitterType = "UniRatio"
meanFile = "$MeanDir$/ImageNet1K_mean.xml"
]
labels = [
labelDim = 1000
]
]
cvreader = [
readerType = "ImageReader"
file = "$DataDir$/val_map.txt"
randomize = "None"
features = [
width = 224
height = 224
channels = 3
cropType = "Center"
cropRatio = 0.875
meanFile = "$MeanDir$/ImageNet1K_mean.xml"
]
labels = [
labelDim = 1000
]
]
]
Eval = [
action = "test"
modelPath = "$ModelDir$/BN-Inception"
evalNodeNames = errs:top5Errs # also test top-5 error rate
minibatchSize = 256
reader = [
readerType = "ImageReader"
file = "$DataDir$/val_map.txt"
randomize = "None"
features = [
width = 224
height = 224
channels = 3
cropType = "Center"
cropRatio = 0.875
meanFile = "$MeanDir$/ImageNet1K_mean.xml"
]
labels = [
labelDim = 1000
]
]
]

Просмотреть файл

@ -0,0 +1,88 @@
#
# BN-Inception network components
# Details are in https://arxiv.org/pdf/1502.03167v3.pdf
#
ConvBNReLULayer {outChannels, kernel, stride, pad, bnScale, bnTimeConst} = Sequential(
ConvolutionalLayer{outChannels, kernel, init = 'heNormal', stride = stride, pad = pad, bias = false} :
BatchNormalizationLayer{spatialRank = 2, normalizationTimeConstant = bnTimeConst, initialScale = bnScale} :
ReLU
)
InceptionWithAvgPoolLayer {num1x1, num3x3r, num3x3, num3x3dblr, num3x3dbl, numPool, bnScale, bnTimeConst} = {
apply(x) = {
# 1x1 Convolution
branch1x1 = ConvBNReLULayer{num1x1, (1:1), (1:1), true, bnScale, bnTimeConst}(x)
# 3x3 Convolution
branch3x3 = Sequential(
ConvBNReLULayer{num3x3r, (1:1), (1:1), true, bnScale, bnTimeConst} :
ConvBNReLULayer{num3x3, (3:3), (1:1), true, bnScale, bnTimeConst}
) (x)
# Double 3x3 Convolution
branch3x3dbl = Sequential(
ConvBNReLULayer{num3x3dblr, (1:1), (1:1), true, bnScale, bnTimeConst} :
ConvBNReLULayer{num3x3dbl, (3:3), (1:1), true, bnScale, bnTimeConst} :
ConvBNReLULayer{num3x3dbl, (3:3), (1:1), true, bnScale, bnTimeConst}
) (x)
# Average Pooling
branch_pool = Sequential(
AveragePoolingLayer{(3:3), stride = (1:1), pad = true} :
ConvBNReLULayer{numPool, (1:1), (1:1), true, bnScale, bnTimeConst}
) (x)
out = Splice((branch1x1:branch3x3:branch3x3dbl:branch_pool), axis=3)
}.out
}.apply
InceptionWithMaxPoolLayer {num1x1, num3x3r, num3x3, num3x3dblr, num3x3dbl, numPool, bnScale, bnTimeConst} = {
apply(x) = {
# 1x1 Convolution
branch1x1 = ConvBNReLULayer{num1x1, (1:1), (1:1), true, bnScale, bnTimeConst}(x)
# 3x3 Convolution
branch3x3 = Sequential(
ConvBNReLULayer{num3x3r, (1:1), (1:1), true, bnScale, bnTimeConst} :
ConvBNReLULayer{num3x3, (3:3), (1:1), true, bnScale, bnTimeConst}
) (x)
# Double 3x3 Convolution
branch3x3dbl = Sequential(
ConvBNReLULayer{num3x3dblr, (1:1), (1:1), true, bnScale, bnTimeConst} :
ConvBNReLULayer{num3x3dbl, (3:3), (1:1), true, bnScale, bnTimeConst} :
ConvBNReLULayer{num3x3dbl, (3:3), (1:1), true, bnScale, bnTimeConst}
) (x)
# Max Pooling
branch_pool = Sequential(
MaxPoolingLayer{(3:3), stride=(1:1), pad=true} :
ConvBNReLULayer{numPool, (1:1), (1:1), true, bnScale, bnTimeConst}
) (x)
out = Splice((branch1x1:branch3x3:branch3x3dbl:branch_pool), axis=3)
}.out
}.apply
InceptionPassThroughLayer {num1x1, num3x3r, num3x3, num3x3dblr, num3x3dbl, numPool, bnScale, bnTimeConst} = {
apply(x) = {
# 3x3 Convolution
branch3x3 = Sequential(
ConvBNReLULayer{num3x3r, (1:1), (1:1), true, bnScale, bnTimeConst} :
ConvBNReLULayer{num3x3, (3:3), (2:2), true, bnScale, bnTimeConst}
) (x)
# Double 3x3 Convolution
branch3x3dbl = Sequential(
ConvBNReLULayer{num3x3dblr, (1:1), (1:1), true, bnScale, bnTimeConst} :
ConvBNReLULayer{num3x3dbl, (3:3), (1:1), true, bnScale, bnTimeConst} :
ConvBNReLULayer{num3x3dbl, (3:3), (2:2), true, bnScale, bnTimeConst}
) (x)
# Max Pooling
branch_pool = MaxPoolingLayer{(3:3), stride=(2:2), pad=true}(x)
out = Splice((branch3x3:branch3x3dbl:branch_pool), axis=3)
}.out
}.apply

Просмотреть файл

@ -0,0 +1,24 @@
# CNTK Examples: Image/Classification/GoogLeNet/BN-Inception
## Overview
|Data: |The ILSVRC2012 dataset (http://www.image-net.org/challenges/LSVRC/2012/) for image classification.
|:---------|:---
|Purpose |This folder contains examples that demonstrate how to use CNTK to define BN-Inception (https://arxiv.org/abs/1502.03167) for image classification.
|Network |Deep convolutional neural networks codenamed "Inception" (GoogLeNet) with Batch Normalization.
|Training |Stochastic gradient descent with momentum.
|Comments |See below.
## Running the example
### Getting the data
We use the ILSVRC2012 datasets to demonstrate how to train a BN-Inception network. BN-Inception was initially published by Researchers at Google Inc., and it is firstly described in the Batch Normalization paper (https://arxiv.org/abs/1502.03167) to demonstrate the power of Batch Normalization with minor changes on the original GoogLeNet. It has been proved that it could increase the training speed and achieve better accuracy, compared with the GoogLeNet v1 which have been well known for winning first place in the [ILSVRC](http://www.image-net.org/challenges/LSVRC/) 2014 detection challenge.
ILSVRC2012 datasets are not included in the CNTK distribution. You may obtain it through http://image-net.org.
## Details
We currently offer the BN-Inception model (https://arxiv.org/abs/1502.03167). Only BrainScript version is available at this moment.
### [BrainScript](./BrainScript)

Просмотреть файл

@ -1,13 +1,16 @@
#
# This file contains the basic build block of Inception Network as defined in:
#
# https://arxiv.org/pdf/1512.00567.pdf
#
# and in Tensorflow implementation
#
#
# Convolution layer with Batch Normalization and Rectifier Linear activation.
#
ConvBNReLULayer {numOutputChannels, filterShape, stride, pad = true, bnTimeConst = 4096} = Sequential(
ConvolutionalLayer {numOutputChannels, filterShape, init = "heNormal", stride = stride, pad = pad, bias = false} :
ConvolutionalLayer {numOutputChannels, filterShape, init = "glorotUniform", stride = stride, pad = pad, bias = false} :
BatchNormalizationLayer {spatialRank = 2, normalizationTimeConstant = bnTimeConst, useCntkEngine = false} :
ReLU
)

Просмотреть файл

@ -1,8 +1,11 @@
#
# Inception V3 model from:
#
# https://arxiv.org/pdf/1512.00567.pdf
#
# and in Tensorflow implementation
#
InceptionV3(input, labelDim, bnTimeConst) =
{
# 299 x 299 x 3
@ -21,14 +24,15 @@ InceptionV3(input, labelDim, bnTimeConst) =
pool_2 = MaxPoolingLayer{(3:3), stride = (2:2), pad = false}(conv_5)
# 35 x 35 x 192
#
# Inception Blocks
# 35 x 35 x 256
#
mixed_1 = InceptionBlock1{64, (48:64), (64:96:96), 32, bnTimeConst}(pool_2)
# 35 x 35 x 288
# 35 x 35 x 256
mixed_2 = InceptionBlock1{64, (48:64), (64:96:96), 64, bnTimeConst}(mixed_1)
# 35 x 35 x 288
mixed_3 = InceptionBlock1{64, (48:64), (64:96:96), 64, bnTimeConst}(mixed_2)
# 17 x 17 x 768
# 35 x 35 x 288
mixed_4 = InceptionBlock2{384, (64:96:96), bnTimeConst}(mixed_3)
# 17 x 17 x 768
mixed_5 = InceptionBlock3{192, (128:128:192), (128:128:128:128:192), 192, bnTimeConst}(mixed_4)
@ -38,28 +42,47 @@ InceptionV3(input, labelDim, bnTimeConst) =
mixed_7 = InceptionBlock3{192, (160:160:192), (160:160:160:160:192), 192, bnTimeConst}(mixed_6)
# 17 x 17 x 768
mixed_8 = InceptionBlock3{192, (192:192:192), (192:192:192:192:192), 192, bnTimeConst}(mixed_7)
# 8 x 8 x 1280
# 17 x 17 x 768
mixed_9 = InceptionBlock4{(192:320), (192:192:192:192), bnTimeConst}(mixed_8)
# 8 x 8 x 2048
# 17 x 17 x 1280
mixed_10 = InceptionBlock5{320, (384:384:384), (448:384:384:384), 192, bnTimeConst}(mixed_9)
# 8 x 8 x 2048
mixed_11 = InceptionBlock5{320, (384:384:384), (448:384:384:384), 192, bnTimeConst}(mixed_10)
# 8 x 8 x 2048
# Global average
#
# Prediction
#
pool_3 = AveragePoolingLayer{(8:8), pad = false}(mixed_11)
# 1 x 1 x 2048
drop = Dropout(pool_3)
# 1 x 1 x 2048
z = DenseLayer{labelDim}(drop)
z = LinearLayer{labelDim}(drop)
#
# Auxiliary
# 8 x 8 x 1280
aux_pool_1 = AveragePoolingLayer{(5:5), pad = false}(mixed_8)
# 3 x 3 x 1280
#
# 17 x 17 x 768
aux_pool_1 = AveragePoolingLayer{(5:5), stride = (3:3), pad = false}(mixed_8)
# 5 x 5 x 768
aux_conv_1 = ConvBNReLULayer{128, (1:1), (1:1), pad=true, bnTimeConst = bnTimeConst}(aux_pool_1)
# 3 x 3 x 128
aux_conv_2 = ConvBNReLULayer{768, (3:3), (1:1), pad=false, bnTimeConst = bnTimeConst}(aux_conv_1)
aux = DenseLayer{labelDim}(aux_conv_2)
# 5 x 5 x 128
aux_conv_2 = ConvBNReLULayer{768, (5:5), (1:1), pad=false, bnTimeConst = bnTimeConst}(aux_conv_1)
# 1 x 1 x 768
aux = LinearLayer{labelDim}(aux_conv_2)
}
#
# Inception V3 model with normalized input, to use the below function
# remove "ImageNet1K_mean.xml" from each reader.
#
InceptionV3Norm(input, labelDim, bnTimeConst) =
{
# Normalize inputs to -1 and 1.
featMean = 128
featScale = 1/128
Normalize{m,f} = x => f .* (x - m)
inputNorm = Normalize{featMean, featScale}(input)
model = InceptionV3(inputNorm, labelDim, bnTimeConst)
}.model

Просмотреть файл

@ -7,19 +7,19 @@ command = Train:Eval
deviceId = "Auto"
precision = "float"
traceLevel = 1
#traceLevel = 1
#perfTraceLevel = 1
parallelTrain = true
RootDir = "."
ConfigDir = "$RootDir$"
ImageNetDir = "$ConfigDir$"
DataDir = "$RootDir$"
OutputDir = "$RootDir$/Output"
ModelDir = "$OutputDir$/Models"
modelPath = "$ModelDir$/InceptionV3"
#stderr = "$OutputDir$/InceptionV3.log"
ModelDir = "$OutputDir$/Model"
stderr = "$OutputDir$/InceptionV3.log"
modelPath = "$ModelDir$/InceptionV3.model"
ImageH = 299
ImageW = 299
ImageC = 3
@ -27,7 +27,7 @@ NumLabels = 1000
Train = {
action = "train"
traceLevel = 1
BrainScriptNetworkBuilder = {
include "$ConfigDir$/InceptionBlocks.bs"
include "$ConfigDir$/InceptionV3.bs"
@ -35,16 +35,16 @@ Train = {
imageShape = $ImageH$:$ImageW$:$ImageC$
labelDim = $NumLabels$
bnTimeConst = 4096
auxWeight = Constant(0.4)
auxWeight = Constant(0.3)
# inputs
features = Input {imageShape}
labels = Input {labelDim}
# apply model to features
model = InceptionV3(features, labelDim, bnTimeConst)
z = model.z
aux = model.aux
model = InceptionV3Norm(features, labelDim, bnTimeConst)
z = model.z
aux = model.aux
# connect to system
ceAux = CrossEntropyWithSoftmax (labels, aux)
@ -61,52 +61,60 @@ Train = {
}
SGD = {
epochSize = 256000
maxEpochs = 1
minibatchSize = 128 # 16 GPU
epochSize = 0
maxEpochs = 160
minibatchSize = 512 # 16 GPUs, 32 per GPU.
dropoutRate = 0.2
learningRatesPerMB = 1
momentumAsTimeConstant = 4096
#momentumPerMB = 0.9
gradUpdateType = "rmsProp"
normWithAveMultiplier = true
rms_wgt_inc = 1.2
rms_wgt_dec = 0.75
rms_wgt_max = 10.0
rms_wgt_min = 0.1
rms_gamma = 0.9
learningRatesPerMB = 3.2*10: 1.6*10: 0.8*10: 0.4*10: 0.2*10: 0.1*10: 0.05*10: 0.025*10: 0.0125*10: 0.00625*10: 0.003125*10: 0.0015625*10: 0.00078125*10: 0.000390625*10: 0.0001953125
momentumPerMB = 0.9
disableRegInBatchNormalization = true
numMBsToShowResult = 20
parallelTrain = {
parallelizationMethod = "dataParallelSGD"
parallelizationStartEpoch = 1
distributedMBReading = true
dataParallelSGD = {
gradientBits = 32
gradientBits = 32
}
}
firstMBsToShowResult = 10 ; numMBsToShowResult = 500
}
reader = {
verbosity = 0 ; randomize = true
deserializers = ({
type = "ImageDeserializer" ; module = "ImageReader"
file = "$DataDir$/val_map.txt"
file = "$DataDir$/train_map.txt"
input = {
features = { transforms = (
{ type = "Crop" ; cropType = "random" ; cropRatio = 0.8 ; jitterType = "uniRatio" } :
{ type = "Crop" ; cropType = "randomArea" ; areaRatio = 0.08:1.0 ; jitterType = "uniRatio" ; aspectRatio = 0.75:1.0 } :
{ type = "Scale" ; width = $ImageW$ ; height = $ImageH$ ; channels = $ImageC$ ; interpolations = "linear" } :
{ type = "Mean" ; meanFile = "$ConfigDir$/ImageNet1K_mean.xml" } :
{ type = "Color" ; brightnessRadius = 0.2 ; contrastRadius = 0.2 ; saturationRadius = 0.4 } :
{ type = "Transpose" }
)}
labels = { labelDim = $NumLabels$ }
}
})
}
cvreader = {
verbosity = 0 ; randomize = false
deserializers = ({
type = "ImageDeserializer" ; module = "ImageReader"
file = "$DataDir$/val_map.txt"
input = {
features = { transforms = (
{ type = "Crop" ; cropType = "Center" ; sideRatio = 0.875 } :
{ type = "Scale" ; width = $ImageW$ ; height = $ImageH$ ; channels = $ImageC$ ; interpolations = "linear" } :
{ type = "Transpose" }
)}
labels = { labelDim = $NumLabels$ }
}
})
}
}
# Eval action
@ -123,8 +131,8 @@ Eval = {
file = "$DataDir$/val_map.txt"
input = {
features = { transforms = (
{ type = "Crop" ; cropType = "Center" ; sideRatio = 0.875 } :
{ type = "Scale" ; width = $ImageW$ ; height = $ImageH$ ; channels = $ImageC$ ; interpolations = "linear" } :
{ type = "Mean"; meanFile = "$ConfigDir$/ImageNet1K_mean.xml" } :
{ type = "Transpose" }
)}
labels = { labelDim = $NumLabels$ }

Просмотреть файл

@ -1,4 +1,4 @@
# CNTK Examples: Image/Classification/GoogLeNet
# CNTK Examples: Image/Classification/GoogLeNet/InceptionV3
## BrainScript

Просмотреть файл

@ -0,0 +1,24 @@
# CNTK Examples: Image/Classification/GoogLeNet/InceptionV3
## Overview
|Data: |The ILSVRC2012 dataset (http://www.image-net.org/challenges/LSVRC/2012/) for image classification.
|:---------|:---
|Purpose |This folder contains examples that demonstrate how to use CNTK to define Inception V3 (https://arxiv.org/abs/1512.00567) for image classification.
|Network |Deep convolutional neural networks codenamed "Inception" (GoogLeNet) version 3.
|Training |RMSProp.
|Comments |See below.
## Running the example
### Getting the data
We use the ILSVRC2012 datasets to demonstrate how to train an Inception V3 network. Inception V3 was initially published by Researchers at Google Inc., and it is fine-tuned to have excellent classification accuracy and low computation cost. Its original version, GoogLeNet, won first place in the [ILSVRC](http://www.image-net.org/challenges/LSVRC/) 2014 detection challenge.
ILSVRC2012 datasets are not included in the CNTK distribution. You may obtain it through http://image-net.org.
## Details
We currently offer the Inception V3 model, published in December 2015 (https://arxiv.org/abs/1512.00567). Only BrainScript version is available at this moment.
### [BrainScript](./BrainScript)

Просмотреть файл

@ -4,9 +4,9 @@
|Data: |The ILSVRC2012 dataset (http://www.image-net.org/challenges/LSVRC/2012/) for image classification.
|:---------|:---
|Purpose |This folder contains examples that demonstrate how to use CNTK to define GoogLeNet (https://arxiv.org/abs/1409.4842) for image classification.
|Purpose |This folder contains examples that demonstrate how to use CNTK to define GoogLeNet (https://arxiv.org/abs/1409.4842) and its derivations for image classification.
|Network |Deep convolutional neural networks codenamed "Inception" (GoogLeNet).
|Training |RMSProp.
|Training |See the details.
|Comments |See below.
## Running the example
@ -19,6 +19,8 @@ ILSVRC2012 datasets are not included in the CNTK distribution. You may obtain it
## Details
We currently offer the Inception V3 model, published in December 2015 (https://arxiv.org/abs/1512.00567). Only BrainScript version is available at this moment.
We currently offer the BN-Inception (https://arxiv.org/abs/1502.03167) and Inception V3 (https://arxiv.org/abs/1512.00567) models.
### [BrainScript](./BrainScript)
### [BN-Inception](./BN-Inception)
### [Inception V3](./InceptionV3)

Просмотреть файл

@ -111,9 +111,9 @@ TrainNetwork = {
width = 224
height = 224
channels = 3
cropType = "Random"
cropType = "RandomSide"
jitterType = "UniRatio"
cropRatio = 0.46666:0.875
sideRatio = 0.46666:0.875
hflip = true
meanFile = "$meanDir$/ImageNet1K_mean.xml"
}
@ -131,7 +131,7 @@ TrainNetwork = {
height = 224
channels = 3
cropType = "Center"
cropRatio = 0.875
sideRatio = 0.875
meanFile = "$meanDir$/ImageNet1K_mean.xml"
}
labels = {
@ -156,9 +156,9 @@ BNStatistics = {
width = 224
height = 224
channels = 3
cropType = "Random"
cropType = "RandomSide"
hflip = true
cropRatio = 0.46666:0.875
sideRatio = 0.46666:0.875
jitterType = "UniRatio"
meanFile = "$meanDir$/ImageNet1K_mean.xml"
}
@ -183,7 +183,7 @@ Eval = {
height = 224
channels = 3
cropType = "Center"
cropRatio = 0.875
sideRatio = 0.875
meanFile = "$meanDir$/ImageNet1K_mean.xml"
}
labels = {

Просмотреть файл

@ -83,7 +83,7 @@ TrainConvNet = {
file = "$dataDir$/train_map.txt"
input = {
features = { transforms = (
{ type = "Crop" ; cropType = "random" ; cropRatio = 0.8 ; jitterType = "uniRatio" } :
{ type = "Crop" ; cropType = "RandomSide" ; sideRatio = 0.8 ; jitterType = "UniRatio" } :
{ type = "Scale" ; width = 32 ; height = 32 ; channels = 3 ; interpolations = "linear" } :
{ type = "Mean" ; meanFile = "$dataDir$/CIFAR-10_mean.xml" } :
{ type = "Transpose" }

Просмотреть файл

@ -111,9 +111,9 @@ TrainNetwork = {
width = 224
height = 224
channels = 3
cropType = "Random"
cropType = "RandomSide"
jitterType = "UniRatio"
cropRatio = 0.46666:0.875
sideRatio = 0.46666:0.875
hflip = true
meanFile = "$meanDir$/ImageNet1K_mean.xml"
}
@ -131,7 +131,7 @@ TrainNetwork = {
height = 224
channels = 3
cropType = "Center"
cropRatio = 0.875
sideRatio = 0.875
meanFile = "$meanDir$/ImageNet1K_mean.xml"
}
labels = {
@ -156,9 +156,9 @@ BNStatistics = {
width = 224
height = 224
channels = 3
cropType = "Random"
cropType = "RandomSide"
hflip = true
cropRatio = 0.46666:0.875
sideRatio = 0.46666:0.875
jitterType = "UniRatio"
meanFile = "$meanDir$/ImageNet1K_mean.xml"
}
@ -183,7 +183,7 @@ Eval = {
height = 224
channels = 3
cropType = "Center"
cropRatio = 0.875
sideRatio = 0.875
meanFile = "$meanDir$/ImageNet1K_mean.xml"
}
labels = {

Просмотреть файл

@ -83,7 +83,7 @@ TrainConvNet = {
file = "$dataDir$/train_map.txt"
input = {
features = { transforms = (
{ type = "Crop" ; cropType = "random" ; cropRatio = 0.8 ; jitterType = "uniRatio" } :
{ type = "Crop" ; cropType = "RandomSide" ; sideRatio = 0.8 ; jitterType = "UniRatio" } :
{ type = "Scale" ; width = 32 ; height = 32 ; channels = 3 ; interpolations = "linear" } :
{ type = "Mean" ; meanFile = "$dataDir$/CIFAR-10_mean.xml" } :
{ type = "Transpose" }

Просмотреть файл

@ -110,9 +110,9 @@ TrainNetwork = {
width = 224
height = 224
channels = 3
cropType = "Random"
cropType = "RandomSide"
jitterType = "UniRatio"
cropRatio = 0.46666:0.875
sideRatio = 0.46666:0.875
hflip = true
meanFile = "$meanDir$/ImageNet1K_mean.xml"
}
@ -130,7 +130,7 @@ TrainNetwork = {
height = 224
channels = 3
cropType = "Center"
cropRatio = 0.875
sideRatio = 0.875
meanFile = "$meanDir$/ImageNet1K_mean.xml"
}
labels = {
@ -156,9 +156,9 @@ BNStatistics = {
width = 224
height = 224
channels = 3
cropType = "Random"
cropType = "RandomSide"
hflip = true
cropRatio = 0.46666:0.875
sideRatio = 0.46666:0.875
jitterType = "UniRatio"
meanFile = "$meanDir$/ImageNet1K_mean.xml"
}
@ -183,7 +183,7 @@ Eval = {
height = 224
channels = 3
cropType = "Center"
cropRatio = 0.875
sideRatio = 0.875
meanFile = "$meanDir$/ImageNet1K_mean.xml"
}
labels = {

Просмотреть файл

@ -40,7 +40,7 @@ def create_reader(map_file, mean_file, train):
transforms = []
if train:
transforms += [
ImageDeserializer.crop(crop_type='Random', ratio=0.8, jitter_type='uniRatio') # train uses jitter
ImageDeserializer.crop(crop_type='randomside', side_ratio=0.8, jitter_type='uniratio') # train uses jitter
]
transforms += [
ImageDeserializer.scale(width=image_width, height=image_height, channels=num_channels, interpolations='linear'),
@ -53,7 +53,7 @@ def create_reader(map_file, mean_file, train):
# Train and evaluate the network.
def train_and_evaluate(reader_train, reader_test, network_name, max_epochs):
def train_and_evaluate(reader_train, reader_test, network_name, epoch_size, max_epochs):
set_computation_network_trace_level(0)
@ -76,7 +76,6 @@ def train_and_evaluate(reader_train, reader_test, network_name, max_epochs):
pe = classification_error(z, label_var)
# shared training parameters
epoch_size = 50000 # for now we manually specify epoch size
minibatch_size = 128
momentum_time_constant = -minibatch_size/np.log(0.9)
l2_reg_weight = 0.0001
@ -112,7 +111,7 @@ def train_and_evaluate(reader_train, reader_test, network_name, max_epochs):
z.save_model(os.path.join(model_path, network_name + "_{}.dnn".format(epoch)))
# Evaluation parameters
epoch_size = 10000
test_epoch_size = 10000
minibatch_size = 16
# process minibatches and evaluate the model
@ -121,8 +120,8 @@ def train_and_evaluate(reader_train, reader_test, network_name, max_epochs):
sample_count = 0
minibatch_index = 0
while sample_count < epoch_size:
current_minibatch = min(minibatch_size, epoch_size - sample_count)
while sample_count < test_epoch_size:
current_minibatch = min(minibatch_size, test_epoch_size - sample_count)
# Fetch next test min batch.
data = reader_test.next_minibatch(current_minibatch, input_map=input_map)
# minibatch data to be trained with
@ -150,4 +149,5 @@ if __name__=='__main__':
reader_train = create_reader(os.path.join(data_path, 'train_map.txt'), os.path.join(data_path, 'CIFAR-10_mean.xml'), True)
reader_test = create_reader(os.path.join(data_path, 'test_map.txt'), os.path.join(data_path, 'CIFAR-10_mean.xml'), False)
train_and_evaluate(reader_train, reader_test, network_name, epochs)
epoch_size = 50000
train_and_evaluate(reader_train, reader_test, network_name, epoch_size, epochs)

Просмотреть файл

@ -8,14 +8,15 @@ from __future__ import print_function
import os
import argparse
import math
import cntk
import numpy as np
from cntk.utils import *
from cntk.ops import input_variable, cross_entropy_with_softmax, classification_error
from cntk.io import MinibatchSource, ImageDeserializer, StreamDef, StreamDefs, INFINITE_SAMPLES, FULL_DATA_SWEEP
from cntk import Trainer, cntk_py, distributed
from cntk import Trainer, cntk_py
from cntk.learner import momentum_sgd, learning_rate_schedule, momentum_as_time_constant_schedule, UnitType
from _cntk_py import set_computation_network_trace_level
from cntk.distributed import data_parallel_distributed_learner, Communicator
from resnet_models import *
@ -24,43 +25,19 @@ abs_path = os.path.dirname(os.path.abspath(__file__))
data_path = os.path.join(abs_path, "..", "..", "..", "DataSets", "CIFAR-10")
model_path = os.path.join(abs_path, "Models")
# model dimensions
# For this example we are using the same data source as for conv net - CIFAR
sys.path.append(os.path.join(abs_path, "..", "..", "ConvNet", "Python"))
from ConvNet_CIFAR10_DataAug_Distributed import create_image_mb_source
# model dimensions - these match the ones from convnet_cifar10_dataaug
# so we can use the same data source
image_height = 32
image_width = 32
num_channels = 3 # RGB
num_classes = 10
# Define the reader for both training and evaluation action.
def create_reader(map_file, mean_file, train, total_data_size, distributed_after=INFINITE_SAMPLES):
if not os.path.exists(map_file) or not os.path.exists(mean_file):
raise RuntimeError("File '%s' or '%s' does not exist. Please run install_cifar10.py from DataSets/CIFAR-10 to fetch them" %
(map_file, mean_file))
# transformation pipeline for the features has jitter/crop only when training
transforms = []
if train:
transforms += [
ImageDeserializer.crop(crop_type='Random', ratio=0.8, jitter_type='uniRatio') # train uses jitter
]
transforms += [
ImageDeserializer.scale(width=image_width, height=image_height, channels=num_channels, interpolations='linear'),
ImageDeserializer.mean(mean_file)
]
# deserializer
return MinibatchSource(
ImageDeserializer(map_file, StreamDefs(
features = StreamDef(field='image', transforms=transforms), # first column in map file is referred to as 'image'
labels = StreamDef(field='label', shape=num_classes))), # and second as 'label'
epoch_size=total_data_size,
multithreaded_deserializer = False, # turn off omp as CIFAR-10 is not heavy for deserializer
distributed_after = distributed_after)
# Train and evaluate the network.
def train_and_evaluate(create_train_reader, test_reader, network_name, max_epochs, create_dist_learner, scale_up=False):
set_computation_network_trace_level(0)
# Create network
def create_resnet_network(network_name):
# Input variables denoting the features and label data
input_var = input_variable((num_channels, image_height, image_width))
label_var = input_variable((num_classes))
@ -68,10 +45,8 @@ def train_and_evaluate(create_train_reader, test_reader, network_name, max_epoch
# create model, and configure learning parameters
if network_name == 'resnet20':
z = create_cifar10_model(input_var, 3, num_classes)
lr_per_mb = [1.0]*80+[0.1]*40+[0.01]
elif network_name == 'resnet110':
z = create_cifar10_model(input_var, 18, num_classes)
lr_per_mb = [0.1]*1+[1.0]*80+[0.1]*40+[0.01]
else:
return RuntimeError("Unknown model name!")
@ -79,14 +54,24 @@ def train_and_evaluate(create_train_reader, test_reader, network_name, max_epoch
ce = cross_entropy_with_softmax(z, label_var)
pe = classification_error(z, label_var)
# shared training parameters
epoch_size = 50000 # for now we manually specify epoch size
# NOTE: scaling up minibatch_size increases sample throughput. In 8-GPU machine,
# ResNet110 samples-per-second is ~7x of single GPU, comparing to ~3x without scaling
# up. However, bigger minimatch size on the same number of samples means less updates,
# thus leads to higher training error. This is a trade-off of speed and accuracy
minibatch_size = 128 * (distributed.Communicator.num_workers() if scale_up else 1)
return {
'name' : network_name,
'feature': input_var,
'label': label_var,
'ce' : ce,
'pe' : pe,
'output': z
}
# Create trainer
def create_trainer(network, minibatch_size, epoch_size, num_quantization_bits):
if network['name'] == 'resnet20':
lr_per_mb = [1.0]*80+[0.1]*40+[0.01]
elif network['name'] == 'resnet110':
lr_per_mb = [0.1]*1+[1.0]*80+[0.1]*40+[0.01]
else:
return RuntimeError("Unknown model name!")
momentum_time_constant = -minibatch_size/np.log(0.9)
l2_reg_weight = 0.0001
@ -96,37 +81,29 @@ def train_and_evaluate(create_train_reader, test_reader, network_name, max_epoch
lr_schedule = learning_rate_schedule(lr_per_sample, epoch_size=epoch_size, unit=UnitType.sample)
mm_schedule = momentum_as_time_constant_schedule(momentum_time_constant)
# trainer object
learner = create_dist_learner(momentum_sgd(z.parameters, lr_schedule, mm_schedule,
l2_regularization_weight = l2_reg_weight))
trainer = Trainer(z, ce, pe, learner)
# learner object
local_learner = momentum_sgd(network['output'].parameters, lr_schedule, mm_schedule,
l2_regularization_weight = l2_reg_weight)
total_number_of_samples = max_epochs * epoch_size
train_reader=create_train_reader(total_number_of_samples)
learner = data_parallel_distributed_learner(learner=local_learner,
num_quantization_bits=num_quantization_bits,
distributed_after=0)
return Trainer(network['output'], network['ce'], network['pe'], learner)
# define mapping from reader streams to network inputs
# Train and test
def train_and_test(network, trainer, train_source, test_source, progress_printer, minibatch_size, epoch_size):
# define mapping from intput streams to network inputs
input_map = {
input_var: train_reader.streams.features,
label_var: train_reader.streams.labels
network['feature']: train_source.streams.features,
network['label']: train_source.streams.labels
}
log_number_of_parameters(z) ; print()
progress_printer = ProgressPrinter(tag='Training')
training_session = cntk.training_session(train_source, trainer,
cntk.minibatch_size_schedule(minibatch_size), progress_printer, input_map, "ConvNet_CIFAR10_DataAug_", epoch_size)
training_session.train()
# perform model training
current_epoch=0
updated=True
while updated:
data=train_reader.next_minibatch(minibatch_size, input_map=input_map) # fetch minibatch.
updated=trainer.train_minibatch(data) # update model with it
progress_printer.update_with_trainer(trainer, with_metric=True) # log progress
epoch_index = int(trainer.total_number_of_samples_seen/epoch_size)
if current_epoch != epoch_index: # new epoch reached
progress_printer.epoch_summary(with_metric=True)
current_epoch=epoch_index
trainer.save_checkpoint(os.path.join(model_path, network_name + "_{}.dnn".format(current_epoch)))
# Evaluation parameters
# TODO: Stay tuned for an upcoming simpler EvalSession API for test/validation.
epoch_size = 10000
minibatch_size = 16
@ -137,10 +114,10 @@ def train_and_evaluate(create_train_reader, test_reader, network_name, max_epoch
minibatch_index = 0
while True:
data = test_reader.next_minibatch(minibatch_size, input_map=input_map)
data = test_source.next_minibatch(minibatch_size, input_map=input_map)
if not data: break;
local_mb_samples=data[label_var].num_samples
local_mb_samples=data[network['label']].num_samples
metric_numer += trainer.test_minibatch(data) * local_mb_samples
metric_denom += local_mb_samples
minibatch_index += 1
@ -151,6 +128,33 @@ def train_and_evaluate(create_train_reader, test_reader, network_name, max_epoch
return metric_numer/metric_denom
# Train and evaluate the network.
def resnet_cifar10(train_data, test_data, mean_data, network_name, num_quantization_bits=32, epoch_size=50000, max_epochs=160, log_to_file=None, num_mbs_per_log=None, gen_heartbeat=False, scale_up=False):
set_computation_network_trace_level(0)
# NOTE: scaling up minibatch_size increases sample throughput. In 8-GPU machine,
# ResNet110 samples-per-second is ~7x of single GPU, comparing to ~3x without scaling
# up. However, bigger minimatch size on the same number of samples means less updates,
# thus leads to higher training error. This is a trade-off of speed and accuracy
minibatch_size = 128 * (Communicator.num_workers() if scale_up else 1)
progress_printer = ProgressPrinter(
freq=num_mbs_per_log,
tag='Training',
log_to_file=log_to_file,
rank=Communicator.rank(),
gen_heartbeat=gen_heartbeat,
num_epochs=max_epochs)
network = create_resnet_network(network_name)
trainer = create_trainer(network, minibatch_size, epoch_size, num_quantization_bits)
train_source = create_image_mb_source(train_data, mean_data, train=True, total_number_of_samples=max_epochs * epoch_size)
test_source = create_image_mb_source(test_data, mean_data, train=False, total_number_of_samples=cntk.io.FULL_DATA_SWEEP)
return train_and_test(network, trainer, train_source, test_source, progress_printer, minibatch_size, epoch_size)
if __name__=='__main__':
parser = argparse.ArgumentParser()
parser.add_argument('-n', '--network', help='network type, resnet20 or resnet110', required=False, default='resnet20')
@ -168,17 +172,15 @@ if __name__=='__main__':
# Create distributed trainer factory
print("Start training: quantize_bit = {}, epochs = {}, distributed_after = {}".format(num_quantization_bits, epochs, distributed_after_samples))
create_dist_learner = lambda learner: distributed.data_parallel_distributed_learner(learner=learner,
num_quantization_bits=num_quantization_bits,
distributed_after=distributed_after_samples)
train_data=os.path.join(data_path, 'train_map.txt')
test_data=os.path.join(data_path, 'test_map.txt')
mean=os.path.join(data_path, 'CIFAR-10_mean.xml')
mean_data=os.path.join(data_path, 'CIFAR-10_mean.xml')
create_train_reader=lambda data_size: create_reader(train_data, mean, True, data_size, distributed_after_samples)
test_reader=create_reader(test_data, mean, False, FULL_DATA_SWEEP)
train_and_evaluate(create_train_reader, test_reader, network_name, epochs, create_dist_learner, scale_up)
epoch_size = 50000
resnet_cifar10(train_data, test_data, mean_data,
network_name, num_quantization_bits, epoch_size, epochs,
scale_up=scale_up)
# Must call MPI finalize when process exit
distributed.Communicator.finalize()
Communicator.finalize()

Просмотреть файл

@ -58,13 +58,13 @@ Train=[
channels=3
# Below are the optional parameters.
# Possible values: Center, Random. Default: Center
cropType="Random"
# Horizontal random flip, will be enabled by default if cropType=Random
cropType="RandomSide"
# Horizontal random flip, will be enabled by default because cropType=RandomSide
#hflip="true"
# Crop scale ratio. Examples: cropRatio=0.9, cropRatio=0.7:0.9. Default: 1.
cropRatio=0.875
# Crop scale side ratio. Examples: sideRatio=0.9, sideRatio=0.7:0.9.
sideRatio=0.875
# Crop scale ratio jitter type.
# Possible values: None, UniRatio, UniLength, UniArea. Default: UniRatio
# Possible values: None, UniRatio. Default: None
jitterType="UniRatio"
# Interpolation to use when scaling image to width x height size.
# Possible values: nearest, linear, cubic, lanczos. Default: linear.

Просмотреть файл

@ -67,13 +67,13 @@ Train=[
channels=3
# Below are the optional parameters.
# Possible values: Center, Random. Default: Center
cropType="Random"
# Horizontal random flip, will be enabled by default if cropType=Random
cropType="RandomSide"
# Horizontal random flip, will be enabled because cropType=RandomSide
#hflip="true"
# Crop scale ratio. Examples: cropRatio=0.9, cropRatio=0.7:0.9. Default: 1.
cropRatio=0.875
# Crop scale side ratio. Examples: sideRatio=0.9, sideRatio=0.7:0.9.
sideRatio=0.875
# Crop scale ratio jitter type.
# Possible values: None, UniRatio, UniLength, UniArea. Default: UniRatio
# Possible values: None, UniRatio. Default: None
jitterType="UniRatio"
# Interpolation to use when scaling image to width x height size.
# Possible values: nearest, linear, cubic, lanczos. Default: linear.

Просмотреть файл

@ -67,13 +67,13 @@ Train=[
channels=3
# Below are the optional parameters.
# Possible values: Center, Random. Default: Center
cropType="Random"
# Horizontal random flip, will be enabled by default if cropType=Random
cropType="RandomSide"
# Horizontal random flip, will be enabled because cropType=RandomSide
#hflip="true"
# Crop scale ratio. Examples: cropRatio=0.9, cropRatio=0.7:0.9. Default: 1.
cropRatio=0.875
# Crop scale side ratio. Examples: sideRatio=0.9, sideRatio=0.7:0.9.
sideRatio=0.875
# Crop scale ratio jitter type.
# Possible values: None, UniRatio, UniLength, UniArea. Default: UniRatio
# Possible values: None, UniRatio. Default: None
jitterType="UniRatio"
# Interpolation to use when scaling image to width x height size.
# Possible values: nearest, linear, cubic, lanczos. Default: linear.

Просмотреть файл

@ -0,0 +1,109 @@
# Parameters can be overwritten on the command line
# for example: cntk configFile=myConfigFile RootDir=../..
# For running from Visual Studio add
# currentDirectory=$(SolutionDir)/<path to corresponding data folder>
#command = trainNetwork:testNetwork:writeResults
command = trainNetwork:testNetwork
precision = "float"; traceLevel = 1 ; deviceId = "auto"
rootDir = ".." ; dataDir = "$rootDir$/DataSets/MNIST" ;
outputDir = "./Output" ;
modelPath = "$outputDir$/Models/07_Deconvolution.model"
#stderr = "$outputDir$/07_Deconvolution_bs_out.txt"
#makemode=false
# TRAINING CONFIG
trainNetwork = {
action = "train"
BrainScriptNetworkBuilder = {
cMap = 1
model = inputFeatures => {
conv1 = ConvolutionalLayer {cMap, (5:5), pad = true, activation=ReLU}(inputFeatures)
pool1 = MaxPoolingLayer {(4:4), stride=(4:4)}(conv1)
unpool1 = MaxUnpoolingLayer {(4:4), stride=(4:4)}(pool1, conv1)
deconv1 = DeconvLayer {1, (5:5), cMap, lowerPad=(2:2:0), upperPad=(2:2:0), bias=false}(unpool1)
}.deconv1
# inputs
imageShape = 28:28:1
features = Input {imageShape}
featScale = 1/256
Scale{f} = x => Constant(f) .* x
# apply model to features
f1 = Scale{featScale} (features)
z = model (f1)
# rmse loss function
f2 = Scale{featScale} (features)
err = z - f2
sqErr = err .* err
mse = ReduceMean(sqErr)
rmse = Sqrt(mse)
# declare special nodes
featureNodes = (features)
criterionNodes = (rmse)
evaluationNodes = (rmse)
outputNodes = (z)
}
SGD = {
epochSize = 60000
minibatchSize = 64
maxEpochs = 3
learningRatesPerSample = 0.00015
momentumAsTimeConstant = 600
firstMBsToShowResult = 5
numMBsToShowResult = 235
}
reader = {
readerType = "CNTKTextFormatReader"
# See DataSets/MNIST/README.md for details on getting the data (Train-28x28_cntk_text.txt).
file = "$DataDir$/Train-28x28_cntk_text.txt"
input = {
features = { dim = 784 ; format = "dense" }
labels = { dim = 10 ; format = "dense" }
}
}
}
# TEST CONFIG
testNetwork = {
action = "test"
minibatchSize = 1024 # reduce this if you run out of memory
reader = {
readerType = "CNTKTextFormatReader"
file = "$DataDir$/Test-28x28_cntk_text.txt"
input = {
features = { dim = 784 ; format = "dense" }
labels = { dim = 10 ; format = "dense" }
}
}
}
# WRITE CONFIG
writeResults = {
action = "write"
minibatchSize = 1
outputPath = "$outputDir$/decoder_output_bs.txt"
reader = {
randomize = False
readerType = "CNTKTextFormatReader"
file = "$DataDir$/Test-28x28_cntk_text.txt"
input = {
features = { dim = 784 ; format = "dense" }
labels = { dim = 10 ; format = "dense" }
}
}
}

Просмотреть файл

@ -0,0 +1,108 @@
# Copyright (c) Microsoft. All rights reserved.
# Licensed under the MIT license. See LICENSE.md file in the project root
# for full license information.
# ==============================================================================
from __future__ import print_function
import os
import numpy as np
from cntk import load_model
from cntk.ops import combine
from cntk.io import MinibatchSource, CTFDeserializer, StreamDef, StreamDefs, FULL_DATA_SWEEP
from PIL import Image
from cntk import graph
# Paths relative to current python file.
abs_path = os.path.dirname(os.path.abspath(__file__))
data_path = os.path.join(abs_path, "..", "DataSets", "MNIST")
model_path = os.path.join(abs_path, "Output", "Models")
# Helper to print all node names
def print_all_node_names(model_file, is_BrainScript=True):
loaded_model = load_model(model_file)
if is_BrainScript:
loaded_model = combine([loaded_model.outputs[0]])
node_list = graph.depth_first_search(loaded_model, lambda x: x.is_output)
print("printing node information in the format")
print("node name (tensor shape)")
for node in node_list:
print(node.name, node.shape)
# Helper to save array as grayscale image
def save_as_png(val_array, img_file_name, dim=28):
img_array = val_array.reshape((dim, dim))
img_array = np.clip(img_array, 0, img_array.max())
img_array *= 255.0 / img_array.max()
img_array = np.rint(img_array).astype('uint8')
try:
os.remove(img_file_name)
except OSError:
pass
im = Image.fromarray(img_array)
im2 = im.resize((224,224))
im2.save(img_file_name)
if __name__ == '__main__':
num_objects_to_eval = 5
# define location of output, model and data and check existence
output_path = os.path.join(abs_path, "Output")
model_file = os.path.join(model_path, "07_Deconvolution.model")
data_file = os.path.join(data_path, "Test-28x28_cntk_text.txt")
if not (os.path.exists(model_file) and os.path.exists(data_file)):
print("Cannot find required data or model. "
"Please get the MNIST data set and run 'cntk configFile=07_Deconvolution.cntk' to create the model.")
exit(0)
# create minibatch source
minibatch_source = MinibatchSource(CTFDeserializer(data_file, StreamDefs(
features = StreamDef(field='features', shape=(28*28)),
labels = StreamDef(field='labels', shape=10)
)), randomize=False, epoch_size = FULL_DATA_SWEEP)
# use this to print all node names in the model
# print_all_node_names(model_file)
# load model and pick desired nodes as output
loaded_model = load_model(model_file)
output_nodes = combine(
[loaded_model.find_by_name('f1').owner,
loaded_model.find_by_name('z.p1').owner,
loaded_model.find_by_name('z').owner])
# evaluate model save output
features_si = minibatch_source['features']
with open(os.path.join(output_path, "decoder_output_py.txt"), 'wb') as decoder_text_file:
with open(os.path.join(output_path, "encoder_output_py.txt"), 'wb') as encoder_text_file:
for i in range(0, num_objects_to_eval):
mb = minibatch_source.next_minibatch(1)
raw_dict = output_nodes.eval(mb[features_si])
output_dict = {}
for key in raw_dict.keys(): output_dict[key.name] = raw_dict[key]
encoder_input = output_dict['f1']
encoder_output = output_dict['z.p1']
decoder_output = output_dict['z']
in_values = (encoder_input[0,0].flatten())[np.newaxis]
enc_values = (encoder_output[0,0].flatten())[np.newaxis]
out_values = (decoder_output[0,0].flatten())[np.newaxis]
# write results as text and png
np.savetxt(decoder_text_file, out_values, fmt="%.6f")
np.savetxt(encoder_text_file, enc_values, fmt="%.6f")
save_as_png(in_values, os.path.join(output_path, "imageAutoEncoder_%s__input.png" % i))
save_as_png(out_values, os.path.join(output_path, "imageAutoEncoder_%s_output.png" % i))
# visualizing the encoding is only possible and meaningful with a single conv filter
enc_dim = 7
if(enc_values.size == enc_dim*enc_dim):
save_as_png(enc_values, os.path.join(output_path, "imageAutoEncoder_%s_encoding.png" % i), dim=enc_dim)
print("Done. Wrote output to %s" % output_path)

Просмотреть файл

@ -108,3 +108,15 @@ In the sixth example, we show how to train CNTK with multiple process(GPUs) for
`mpiexec -n 2 cntk configFile=06_OneConvRegrMultiNode.cntk parallelTrain=True parallelizationMethod=DataParallelSGD`
You can change the parallelizationMethod to other three options. To see more detailed guide on multiple GPUs and machines tasks, please refer to [Multiple GPUs and machines](https://github.com/Microsoft/CNTK/wiki/Multiple-GPUs-and-machines).
### 07_Deconvolution.cntk
Example number seven shows how to use Deconvolution and Unpooling to generate a simple image auto encoder. It uses the MNIST dataset, which has a resolution of 28x28x1, encodes it into a 7x7x1 representation using convolution and pooling and decodes to the original resolution. The training criterion is root-mean-square error (RMSE). To run this example, use the following command:
`cntk configFile=07_Deconvolution.cntk`
The rmse values for training and testing are 0.225 and 0.223 respectively. To visualize the encoded and decoded images run the following command (from a Python CNTK environment):
`python 07_Deconvolution_Visualizer.py`
The visualizations will be stored in the `Output` folder together with a text representation of the encoder and the decoder output.

Просмотреть файл

@ -163,7 +163,7 @@ def train_lm(training_file):
momentum_time_constant = momentum_as_time_constant_schedule(1100)
clipping_threshold_per_sample = 5.0
gradient_clipping_with_truncation = True
learner = momentum_sgd(z.parameters, lr_per_sample, momentum_time_constant,
learner = momentum_sgd(z.parameters, lr_per_sample, momentum_time_constant,
gradient_clipping_threshold_per_sample=clipping_threshold_per_sample,
gradient_clipping_with_truncation=gradient_clipping_with_truncation)
trainer = Trainer(z, ce, errs, learner)

Просмотреть файл

@ -198,7 +198,7 @@ def conv3d_ucf11(train_reader, test_reader, max_epochs=30):
mm_schedule = momentum_as_time_constant_schedule(momentum_time_constant, epoch_size=epoch_size)
# Instantiate the trainer object to drive the model training
learner = momentum_sgd(z.parameters, lr_schedule, mm_schedule)
learner = momentum_sgd(z.parameters, lr_schedule, mm_schedule, True)
trainer = Trainer(z, ce, pe, learner)
log_number_of_parameters(z) ; print()

Просмотреть файл

@ -456,6 +456,7 @@ CNTKLIBRARY_COMMON_SRC =\
$(SOURCEDIR)/CNTKv2LibraryDll/Serialization.cpp \
$(SOURCEDIR)/CNTKv2LibraryDll/DistributedCommunicator.cpp \
$(SOURCEDIR)/CNTKv2LibraryDll/DistributedLearnerBase.cpp \
$(SOURCEDIR)/CNTKv2LibraryDll/TrainingSession.cpp \
$(SOURCEDIR)/CNTKv2LibraryDll/DataParallelDistributedLearner.cpp \
$(SOURCEDIR)/CNTKv2LibraryDll/proto/CNTK.pb.cc \
@ -513,7 +514,7 @@ CNTKLIBRARY_TESTS_SRC =\
$(CNTKLIBRARY_TESTS_SRC_PATH)/DeviceSelectionTests.cpp \
$(CNTKLIBRARY_TESTS_SRC_PATH)/MinibatchSourceTest.cpp \
$(CNTKLIBRARY_TESTS_SRC_PATH)/UserDefinedFunctionTests.cpp \
Examples/Evaluation/CPPEvalV2Client/EvalMultithreads.cpp \
Examples/Evaluation/CNTKLibraryCPPEvalExamples/EvalMultithreads.cpp \
CNTKLIBRARY_TESTS:=$(BINDIR)/v2librarytests
CNTKLIBRARY_TESTS_OBJ := $(patsubst %.cu, $(OBJDIR)/%.o, $(patsubst %.cpp, $(OBJDIR)/%.o, $(CNTKLIBRARY_TESTS_SRC)))
@ -633,11 +634,11 @@ $(EVAL_EXTENDED_CLIENT): $(EVAL_EXTENDED_CLIENT_OBJ) | $(EVAL_LIB) $(READER_LIBS
########################################
# Eval V2 Sample client
########################################
EVALV2_SAMPLE_CLIENT:=$(BINDIR)/cppevalv2client
EVALV2_SAMPLE_CLIENT:=$(BINDIR)/CNTKLibraryCPPEvalExamples
EVALV2_SAMPLE_CLIENT_SRC=\
$(SOURCEDIR)/../Examples/Evaluation/CPPEvalV2Client/CPPEvalV2Client.cpp \
$(SOURCEDIR)/../Examples/Evaluation/CPPEvalV2Client/EvalMultithreads.cpp
$(SOURCEDIR)/../Examples/Evaluation/CNTKLibraryCPPEvalExamples/CNTKLibraryCPPEvalExamples.cpp \
$(SOURCEDIR)/../Examples/Evaluation/CNTKLibraryCPPEvalExamples/EvalMultithreads.cpp
EVALV2_SAMPLE_CLIENT_OBJ:=$(patsubst %.cpp, $(OBJDIR)/%.o, $(EVALV2_SAMPLE_CLIENT_SRC))
@ -1132,6 +1133,7 @@ UNITTEST_NETWORK_SRC = \
$(SOURCEDIR)/../Tests/UnitTests/NetworkTests/OperatorEvaluation.cpp \
$(SOURCEDIR)/../Tests/UnitTests/NetworkTests/stdafx.cpp \
$(SOURCEDIR)/../Tests/UnitTests/NetworkTests/TestHelpers.cpp \
$(SOURCEDIR)/../Tests/UnitTests/NetworkTests/EditDistanceTests.cpp \
$(SOURCEDIR)/CNTK/ModelEditLanguage.cpp \
$(SOURCEDIR)/ActionsLib/TrainActions.cpp \
$(SOURCEDIR)/ActionsLib/EvalActions.cpp \
@ -1185,6 +1187,7 @@ UNITTEST_MATH_SRC = \
$(SOURCEDIR)/../Tests/UnitTests/MathTests/MatrixQuantizerTests.cpp \
$(SOURCEDIR)/../Tests/UnitTests/MathTests/MatrixSparseDenseInteractionsTests.cpp \
$(SOURCEDIR)/../Tests/UnitTests/MathTests/MatrixTests.cpp \
$(SOURCEDIR)/../Tests/UnitTests/MathTests/MatrixLearnerTests.cpp \
$(SOURCEDIR)/../Tests/UnitTests/MathTests/stdafx.cpp \
UNITTEST_MATH_OBJ := $(patsubst %.cpp, $(OBJDIR)/%.o, $(UNITTEST_MATH_SRC))

Просмотреть файл

@ -3,17 +3,44 @@
Give us feedback through these [channels](https://github.com/Microsoft/CNTK/wiki/Feedback-Channels).
# Latest news
***2017-01-20.* V 2.0 Beta 9 Release**
Highlights of this Release:
* Default Python version is now 3.5 (relates to default parameters in client installations as well as [Runtime Images at Docker Hub](https://github.com/Microsoft/CNTK/wiki/CNTK-Docker-Containers)).
* New and updated core and Python API features.
* New Tutorials and Examples:
* Deconvolution layer and image auto encoder example using deconvolution and unpooling ([Example **07_Deconvolution** in *Image - Getting Started*](https://github.com/Microsoft/CNTK/tree/v2.0.beta9.0/Examples/Image/GettingStarted)).
* [Basic autoencoder with MNIST data](https://github.com/Microsoft/CNTK/blob/v2.0.beta9.0/Tutorials/CNTK_105_Basic_Autoencoder_for_Dimensionality_Reduction.ipynb).
* [LSTM Timeseries with Simulated Data (Part A)](https://github.com/Microsoft/CNTK/blob/v2.0.beta9.0/Tutorials/CNTK_106A_LSTM_Timeseries_with_Simulated_Data.ipynb). (More will come in the next Releases)
* New [CNTK NuGet Packages](https://github.com/Microsoft/CNTK/wiki/NuGet-Package).
See more in the [Release Notes](https://github.com/Microsoft/CNTK/wiki/CNTK_2_0_beta_9_Release_Notes).
Get the Release from the [CNTK Releases page](https://github.com/Microsoft/CNTK/releases).
***2017-01-19.* V 2.0 Beta 8 Release available at Docker Hub**
CNTK V 2.0 Beta 8 Runtime packages are now available as [Public Images at Docker Hub](https://hub.docker.com/r/microsoft/cntk/).
See more on CNTK as Docker Images in this [Wiki article](https://github.com/Microsoft/CNTK/wiki/CNTK-Docker-Containers).
***2017-01-16.* V 2.0 Beta 8 Release**
Highlights of this Release:
* Support of Python v. 2.7, 3.4, and 3.5. See [binary and source setup](https://github.com/Microsoft/CNTK/wiki/Setup-CNTK-on-your-machine) instructions to find out about how to select Python version.
* New Python API features.
* New Python example [Feature extraction using a trained model in Python API](https://github.com/Microsoft/CNTK/tree/v2.0.beta8.0/Examples/Image/FeatureExtraction).
* Support of [Visual Studio 2015](https://github.com/Microsoft/CNTK/wiki/Setup-Migrate-VS13-to-VS15) for Windows version.
* Introduction of [C# API in CNTK Evaluation Library](https://github.com/Microsoft/CNTK/wiki/CNTK-Library-Managed-API) and a new set of [CNTK NuGet Packages](https://github.com/Microsoft/CNTK/wiki/NuGet-Package).
* CNTK Runtime packages are now available as [Public Images at Docker Hub](https://github.com/Microsoft/CNTK/wiki/CNTK-Docker-Containers). (**Beta 7** is currently available; Beta 8 Images availability will be announced separately in a few days)
* Version 3 of [CNTK Custom MKL Library](https://cntk.ai/mkl/) is available.
See more in the [Release Notes](https://github.com/Microsoft/CNTK/wiki/CNTK_2_0_beta_8_Release_Notes).
Get the Release from the [CNTK Releases page](https://github.com/Microsoft/CNTK/releases).
***2017-01-10.* CNTK for Windows supports Visual 2015**
***2017-01-10.*** CNTK for Windows supports Visual 2015
If you pull or merge the master branch, CNTK will now require Visual Studio 2015 to build on Windows. There are two ways to move your development environment to Visual Studio 2015:
[Migrate VS2013 to VS2015](https://github.com/Microsoft/CNTK/wiki/Setup-Migrate-VS13-to-VS15):
This gives you a fine grained control over where components are installed
* [Migrate VS2013 to VS2015](https://github.com/Microsoft/CNTK/wiki/Setup-Migrate-VS13-to-VS15): This gives you a fine grained control over where components are installed
* [Script driven setup](https://github.com/Microsoft/CNTK/wiki/Setup-CNTK-with-script-on-Windows): This gives you an mostly automated migration to Visual Studio 2015
[Script driven setup](https://github.com/Microsoft/CNTK/wiki/Setup-CNTK-with-script-on-Windows):
This gives you an mostly automated migration to Visual Studio 2015
***2016-12-22.*** V 2.0 Beta 7 Release
***2016-12-22.* V 2.0 Beta 7 Release**
Highlights of this Release:
* Python API behaviour is changed to be more strict.
@ -26,42 +53,7 @@ and [GoogLeNet (Inception V3)](https://github.com/Microsoft/CNTK/tree/v2.0.beta7
See more in the [Release Notes](https://github.com/Microsoft/CNTK/wiki/CNTK_2_0_beta_7_Release_Notes)
Get the Release from the [CNTK Releases page](https://github.com/Microsoft/CNTK/releases)
***2016-12-13.*** V 2.0 Beta 6 Release
Highlights of this Release:
* Both Windows and Linux packages are now created using NVIDIA CUDA 8.0 toolkit.
* Linux version now supports Python 3.5 (Windows support is coming soon).
* Support for training on one-hot and sparse arrays via NumPy.
* New Examples and Tutorials: [Video action recognition](https://github.com/Microsoft/CNTK/tree/v2.0.beta6.0/Examples/Video/GettingStarted), [Finance Timeseries with Pandas/Numpy](https://github.com/Microsoft/CNTK/blob/v2.0.beta6.0/Tutorials/CNTK_104_Finance_Timeseries_Basic_with_Pandas_Numpy.ipynb), [Neural Character Language Models](https://github.com/Microsoft/CNTK/tree/v2.0.beta6.0/Examples/Text/CharacterLM/README.md)
* Stability Improvements and bug fixes.
See more in the [Release Notes](https://github.com/Microsoft/CNTK/wiki/CNTK_2_0_beta_6_Release_Notes)
Get the Release from the [CNTK Releases page](https://github.com/Microsoft/CNTK/releases)
***2016-11-25.*** V 2.0 Beta 5 Release
Highlights of this Release:
* The Windows binary packages are now created using the NVIDIA CUDA 8 toolkit, see the [release notes](https://github.com/Microsoft/CNTK/wiki/CNTK_2_0_beta_5_Release_Notes) for details. The CNTK-Linux binary packages are still built with CUDA 7.5. The Linux support for Cuda8 will follow shortly!
* Performance enhancements for evaluation of bitmap images through the new `EvaluateRgbImage` function in the [managed Eval API](https://github.com/Microsoft/CNTK/wiki/Managed-EvalDLL-API).
* A new version of the [CNTK Nuget package](https://github.com/Microsoft/CNTK/wiki/NuGet-Package) is available.
* Stability Improvements and bug fixes, i.e. decreased memory footprint in CNTK Text Format deserializer.
* We continue to improve documentation and tutorials on an ongoing basis, in this release we added a [Sequence-to-Sequence tutorial](https://github.com/Microsoft/CNTK/blob/v2.0.beta5.0/Tutorials/CNTK_204_Sequence_To_Sequence.ipynb).
See more in the [Release Notes](https://github.com/Microsoft/CNTK/wiki/CNTK_2_0_beta_5_Release_Notes)
Get the Release from the [CNTK Releases page](https://github.com/Microsoft/CNTK/releases)
***2016-11-21.*** V 2.0 Beta 4 Release
Highlights of this Release:
* New ASGD/Hogwild! training using Microsofts Parameter Server ([Project Multiverso](https://github.com/Microsoft/multiverso))
* Distributed Scenarios now supported in CNTK Python API
* New [Memory Compression](https://github.com/Microsoft/CNTK/wiki/Top-level-configurations#hypercompressmemory) mode to reduce memory usage on GPU
* CNTK Docker image with 1bit-SGD support
* Stability Improvements and bug fixes
See more in the [Release Notes](https://github.com/Microsoft/CNTK/wiki/CNTK_2_0_beta_4_Release_Notes)
Get the Release from the [CNTK Releases page](https://github.com/Microsoft/CNTK/releases)
See [all news](https://github.com/Microsoft/CNTK/wiki/News).
# What is The Microsoft Cognitive Toolkit

Просмотреть файл

@ -6,7 +6,7 @@
# for full license information.
# ==============================================================================
PY_VERSION=34
PY_VERSION=35
while [ $# -gt 0 ]; do
case "$1" in
@ -49,7 +49,7 @@ CNTK_EXAMPLES_PATH="$PWD/Examples"
CNTK_TUTORIALS_PATH="$PWD/Tutorials"
CNTK_BINARY="$CNTK_BIN_PATH/cntk"
CNTK_PY_ENV_FILE="$SCRIPT_DIR/conda-linux-cntk-py$PY_VERSION-environment.yml"
CNTK_WHEEL_PATH="cntk/python/cntk-2.0.beta7.0-$PYWHEEL_QUALIFIER-linux_x86_64.whl"
CNTK_WHEEL_PATH="cntk/python/cntk-2.0.beta9.0-$PYWHEEL_QUALIFIER-linux_x86_64.whl"
test -d "$CNTK_BIN_PATH" && test -d "$CNTK_LIB_PATH" && test -d "$CNTK_DEP_LIB_PATH" &&
test -d "$CNTK_TUTORIALS_PATH" &&

Просмотреть файл

@ -89,14 +89,14 @@ function InstallYml(
$env= $table["Env"]
$ymlFile = $table["ymlFile"]
$envsDir = join-path $basePath "envs"
$targetDir = join-path $envsDir $env
$envsDir = Join-Path $basePath envs
$targetDir = Join-Path $envsDir $env
if (test-path -path $targetDir -PathType Container) {
$newTable = @{ Function = "InstallExe"; Command = "$basepath\Scripts\conda.exe"; Param = "env update --file $ymlFile --name $targetDir"; WorkDir = "$basePath\Scripts"; runAs=$false }
$newTable = @{ Function = "InstallExe"; Command = "$basepath\Scripts\conda.exe"; Param = "env update --file `"$ymlFile`" --name `"$targetDir`""; WorkDir = "$basePath\Scripts"; runAs=$false }
}
else {
$newTable = @{ Function = "InstallExe"; Command = "$basepath\Scripts\conda.exe"; Param = "env create --file $ymlFile --prefix $targetDir"; WorkDir = "$basePath\Scripts"; runAs=$false }
$newTable = @{ Function = "InstallExe"; Command = "$basepath\Scripts\conda.exe"; Param = "env create --file `"$ymlFile`" --prefix `"$targetDir`""; WorkDir = "$basePath\Scripts"; runAs=$false }
}
InstallExe $newTable
@ -149,6 +149,7 @@ function InstallWheel(
$EnvName = $table["EnvName"]
$message = $table["message"]
$whlDirectory = $table["WheelDirectory"]
$pyVersion = $table["PyVersion"]
Write-Host $message
if (-not $Execute) {
@ -156,8 +157,8 @@ function InstallWheel(
return
}
$whlFile = Get-ChildItem $cntkRootDir\cntk\Python\cntk*.whl
if ($whlFile -eq $null) {
$whlFile = Get-ChildItem $cntkRootDir\cntk\Python\cntk*cp$pyVersion-cp$pyVersion*.whl
if (-not $whlFile) {
throw "No WHL file found at $cntkRootDir\cntk\Python"
}
if ($whlFile.Count -gt 1) {
@ -260,6 +261,7 @@ function CreateBatch(
$func = $table["Function"]
$filename = $table["Filename"]
$pyVersion = $table["PyVersion"]
if (-not $Execute) {
Write-Host "Create-Batch [$filename]:No-Execute flag. No file created"
@ -277,7 +279,7 @@ if /I "%CMDCMDLINE%" neq ""%COMSPEC%" " (
exit /b 0
)
set PATH=$cntkRootDir\cntk;%PATH%
"$AnacondaBasePath\Scripts\activate" "$AnacondaBasePath\envs\cntk-py34"
"$AnacondaBasePath\Scripts\activate" "$AnacondaBasePath\envs\cntk-py$pyVersion"
"@
add-content -Path $filename -Encoding Ascii -Value $batchScript

Просмотреть файл

@ -32,11 +32,11 @@ The script will analyse your machine and will determine which components are req
The required components will be downloaded in [$localCache]
Repeated operation of this script will reuse already downloaded components.
- If required VS2012 Runtime and VS2013 Runtime will be installed
- If required VS2015 Runtime will be installed
- If required MSMPI will be installed
- Anaconda3 will be installed into [$AnacondaBasePath]
- A CNTK-PY34 environment will be created or updated in [$AnacondaBasePath\envs]
- CNTK will be installed or updated in the CNTK-PY34 environment
- A CNTK-PY$PyVersion environment will be created or updated in [$AnacondaBasePath\envs]
- CNTK will be installed or updated in the CNTK-PY$PyVersion environment
"
}
@ -108,7 +108,7 @@ function DisplayStart()
CheckOSVersion
if (-not $Execute) {
Write-Host $(DisplayWarningNoExecuteMessage)
Write-Warning $(DisplayWarningNoExecuteMessage)
}
Write-Host $(DisplayStartContinueMessage)
@ -131,7 +131,7 @@ Write-Host "
CNTK v2 Python install complete.
To activate the CNTK Python environment and set the PATH to include CNTK, start a command shell and run
$cntkRootDir\scripts\cntkpy34.bat
$cntkRootDir\scripts\cntkpy$PyVersion.bat
Please checkout tutorials and examples here:
$cntkRootDir\Tutorials

Просмотреть файл

@ -10,15 +10,10 @@ $operations = @(
@{Name = "Verifying Installation contents"; ShortName = "INSTCONTENT"; Info = "Verifying Installation contents";
Verification = @( @{Function = "VerifyInstallationContent"; Path = "$cntkRootDir" } )
},
@{Name = "Installation VS2012 Runtime"; ShortName = "VS2012"; Info = "Install VS2012 Runtime";
Verification = @( @{Function = "VerifyWin32ProductExists"; Match = "^Microsoft Visual C\+\+ 2012 x64 Additional Runtime" },
@{Function = "VerifyWin32ProductExists"; Match = "^Microsoft Visual C\+\+ 2012 x64 Minimum Runtime" } );
Action = @( @{Function = "InstallExe"; Command = "$cntkRootDir\prerequisites\VS2012\vcredist_x64.exe"; Param = "/install /passive /norestart"; Message="Installing VS2012 Runtime...." } )
},
@{Name = "Installation VS2013 Runtime"; ShortName = "VS2013"; Info = "Install VS2013 Runtime";
Verification = @( @{Function = "VerifyWin32ProductExists"; Match = "^Microsoft Visual C\+\+ 2013 x64 Additional Runtime" },
@{Function = "VerifyWin32ProductExists"; Match = "^Microsoft Visual C\+\+ 2013 x64 Minimum Runtime" } );
Action = @( @{Function = "InstallExe"; Command = "$cntkRootDir\prerequisites\VS2013\vcredist_x64.EXE"; Param = "/install /passive /norestart"; Message="Installing VS2013 Runtime...." } )
@{Name = "Installation VS2015 Runtime"; ShortName = "VS2015"; Info = "Install VS2015 Runtime";
Verification = @( @{Function = "VerifyWin32ProductExists"; Match = "^Microsoft Visual C\+\+ 201(5|7) x64 Additional Runtime" },
@{Function = "VerifyWin32ProductExists"; Match = "^Microsoft Visual C\+\+ 201(5|7) x64 Minimum Runtime" } );
Action = @( @{Function = "InstallExe"; Command = "$cntkRootDir\prerequisites\VS2015\vc_redist.x64.exe"; Param = "/install /passive /norestart"; Message="Installing VS2015 Runtime...." } )
},
@{Name = "MSMPI Installation"; ShortName = "CNTK"; Info = "Install MSMPI";
Verification = @( @{Function = "VerifyWin32ProductVersion"; Match = "^Microsoft MPI \(\d+\."; Version = "7.0.12437.6" } );
@ -29,16 +24,16 @@ $operations = @(
Download = @( @{Function = "Download"; Source = "https://repo.continuum.io/archive/Anaconda3-4.1.1-Windows-x86_64.exe"; Destination = "$localCache\Anaconda3-4.1.1-Windows-x86_64.exe" } );
Action = @( @{Function = "InstallExe"; Command = "$localCache\Anaconda3-4.1.1-Windows-x86_64.exe"; Param = "/InstallationType=JustMe /AddToPath=0 /RegisterPython=0 /S /D=$AnacondaBasePath"; runAs=$false; Message="Installing Anaconda3-4.1.1. This will take several minutes. Please be patient ...."} );
},
@{Name = "CNTK Python Environment 3.4"; ShortName = "CNTKPY34"; Info = "Setup CNTK PythonEnvironment 3.4";
@{Name = "CNTK Python Environment"; ShortName = "CNTKPY"; Info = "Setup CNTK PythonEnvironment $PyVersion";
Verification = @( @{Function = "VerifyRunAlways" } );
Action = @( @{Function = "InstallYml"; BasePath = $AnacondaBasePath; Env = "cntk-py34"; ymlFile= "$MyDir\conda-windows-cntk-py34-environment.yml" } )
Action = @( @{Function = "InstallYml"; BasePath = $AnacondaBasePath; Env = "cntk-py$PyVersion"; ymlFile= "$MyDir\conda-windows-cntk-py$PyVersion-environment.yml"; PyVersion = $PyVersion } )
},
@{Name = "CNTK WHL Install"; ShortName = "CNTKWHL34"; Info = "Setup/Update CNTK Wheel";
@{Name = "CNTK WHL Install"; ShortName = "CNTKWHL"; Info = "Setup/Update CNTK Wheel $PyVersion";
Verification = @( @{Function = "VerifyRunAlways" } );
Action = @( @{Function = "InstallWheel"; BasePath = "$AnacondaBasePath"; EnvName = "cntk-py34"; WheelDirectory="$AnacondaBasePath\envs\cntk-py34\Lib\site-packages\cntk"; Message="Setup/Update of CNTK Wheel environment. Please be patient...." } )
Action = @( @{Function = "InstallWheel"; BasePath = "$AnacondaBasePath"; EnvName = "cntk-py$PyVersion"; WheelDirectory="$AnacondaBasePath\envs\cntk-py$PyVersion\Lib\site-packages\cntk"; PyVersion = $PyVersion; Message="Setup/Update of CNTK Wheel $PyVersion environment. Please be patient...." } )
},
@{Name = "Create CNTKPY34 batch file"; ShortName = "BATCH34"; Info = "Create CNTKPY34 batch file";
Verification = @( @{Function = "VerifyFile"; Path = "$cntkRootDir\scripts\cntkpy34.bat" } );
Action = @( @{Function = "CreateBatch"; Filename = "$cntkRootDir\scripts\cntkpy34.bat" } )
@{Name = "Create CNTKPY batch file"; ShortName = "BATCH"; Info = "Create CNTKPY batch file";
Verification = @( @{Function = "VerifyFile"; Path = "$cntkRootDir\scripts\cntkpy$PyVersion.bat"; PyVersion = $PyVersion } );
Action = @( @{Function = "CreateBatch"; Filename = "$cntkRootDir\scripts\cntkpy$PyVersion.bat"; PyVersion = $PyVersion } )
}
)

Просмотреть файл

@ -127,8 +127,7 @@ function VerifyInstallationContent(
$path = $table["Path"]
$noInstallRequired = (join-path $path cntk\cntk.exe | test-path -PathType Leaf)
$noInstallRequired = (join-path $path prerequisites\VS2012\vcredist_x64.exe | test-path -PathType Leaf) -and $noInstallRequired
$noInstallRequired = (join-path $path prerequisites\VS2013\vcredist_x64.exe | test-path -PathType Leaf) -and $noInstallRequired
$noInstallRequired = (join-path $path prerequisites\VS2015\vc_redist.x64.exe | test-path -PathType Leaf) -and $noInstallRequired
$noInstallRequired = (join-path $path prerequisites\MSMpiSetup.exe | test-path -PathType Leaf) -and $noInstallRequired
if ($noInstallRequired) {
@ -136,7 +135,7 @@ function VerifyInstallationContent(
return $noInstallRequired
}
throw "`nFatal Error: Files from CNTK binary download package are missing!`nThe install script must be run out of the unpacked binary CNTK package, not from a CNTK source clone."
throw "`nFatal Error: Files from the CNTK binary download package are missing!`nThe install script must be run out of the unpacked binary CNTK package, not from a CNTK source clone."
}
function VerifyDirectory(

Просмотреть файл

@ -7,26 +7,38 @@
.SYNOPSIS
Use this cmdlet to install CNTK from a precompiled binary drop (see https://github.com/Microsoft/CNTK/releases)
By default the script will:
- Create or reuse Anaconda3 in the folder `C:\local\Anaconda3-4.1.1-Windows-x86_64`
- Create or update a CNTK Python 3.5 environment in `C:\local\Anaconda3-4.1.1-Windows-x86_64\envs\cntk-py35`
.DESCRIPTION
The script will download and install the CNTK prerequisites and Anaconda environment
The script will download and install the CNTK prerequisites and Anaconda environment.
It will analyse your machine and will determine which components are required.
The required components will be downloaded and cached.
Repeated operation of this script will reuse already downloaded components.
- If required VS2012 Runtime and VS2013 Runtime will be installed
- If required VS2015 Runtime will be installed
- If required MSMPI will be installed
- Anaconda3 will be installed into [<AnacondaBasePath>]
- A CNTK-PY34 environment will be created or updated in [<AnacondaBasePath>\envs]
- CNTK will be installed or updated in the CNTK-PY34 environment
- A CNTK-PY<version> environment will be created or updated in [<AnacondaBasePath>\envs]
- CNTK will be installed or updated in the CNTK-PY<version> environment
.PARAMETER Execute
This is an optional parameter. Without setting this switch, no changes to the machine setup/installation will be performed
You need to supply this optional parameter to have the install script perform any changes to your machine.
Without this parameter NO CHANGES will be done to your machine.
.PARAMETER AnacondaBasePath
This is an optional parameter and can be used to specify an already installed Anaconda3 installation.
This optional parameter allows you to specify the location of an Anaconda installation to be used or created on your
machine. If the directory exists on your machine, the script will continue under the assumption that this is a working
Anaconda 3 (4.1.1) (or compatible) installation, and will create the CNTK Python environment in that location.
By default a version of Anaconda3 will be installed into [C:\local\Anaconda3-4.1.1-Windows-x86_64]
.PARAMETER PyVersion
This is an optional parameter and can be used to specify the Python version used in the CNTK Python environment.
Supported values for this parameter are 27, 34, or 35. The default values is 35 (for a CNTK Python 35 environment).
.EXAMPLE
.\install.ps1
@ -39,15 +51,13 @@
.\install.ps1 -Execute -AnacondaBasePath d:\cntkBeta
This will install Anaconda in the [d:\cntkBeta] directory.
#>
[CmdletBinding()]
Param(
[parameter(Mandatory=$false)] [string] $AnacondaBasePath = "C:\local\Anaconda3-4.1.1-Windows-x86_64",
[parameter(Mandatory=$false)] [switch] $Execute
)
[parameter(Mandatory=$false)] [ValidateSet("27", "34", "35")] [string] $PyVersion = "35",
[parameter(Mandatory=$false)] [switch] $Execute)
$MyDir = Split-Path $MyInvocation.MyCommand.Definition

@ -1 +1 @@
Subproject commit 7bde79e23210f87289af940c6b4e615a335f830f
Subproject commit 4b2396f36b8129d035a0166cd2d1a1e457404249

Просмотреть файл

@ -160,6 +160,7 @@ bool CheckFunction(std::string& p_nodeType, bool* allowUndeterminedVariable)
#endif
else if (EqualInsensitive(nodeType, OperationNameOf(ClassBasedCrossEntropyWithSoftmaxNode), L"CBCEWithSM")) ret = true;
else if (EqualInsensitive(nodeType, OperationNameOf(ClassificationErrorNode), L"ErrorPrediction")) ret = true;
else if (EqualInsensitive(nodeType, OperationNameOf(EditDistanceErrorNode))) ret = true;
else if (EqualInsensitive(nodeType, OperationNameOf(EqualNode))) ret = true;
else if (EqualInsensitive(nodeType, OperationNameOf(GreaterEqualNode))) ret = true;
else if (EqualInsensitive(nodeType, OperationNameOf(GreaterNode))) ret = true;

Просмотреть файл

@ -84,20 +84,44 @@ ConvolutionalLayer {numOutputChannels, # e.g. (1) or BS.Constants.None
#reductionRank = 1, # TODO: support this
stride = 1, pad = false,
lowerPad = 0, upperPad = 0,
#transpose = false, # TODO: support this
maxTempMemSizeInSamples = 0} =
{
reductionRank = 1 # TODO: shall become an optional parameter
outputChannelsShape = _AsArray (numOutputChannels)
outputRank = Length (outputChannelsShape)
filterRank = Length (filterShape)
kernelShape = _ConcatArrays (filterShape, Repeat (reductionRank, Inferred)) # kernel := filter plus reductionDims
W = ParameterTensor{_ConcatArrays (kernelShape, outputChannelsShape), init = init, initValueScale = initValueScale, initFilterRank = filterRank, initOutputRank = -1} # [ W x H x C x K ]
b = ParameterTensor(_ConcatArrays (Repeat (Length (filterShape), 1), outputChannelsShape), initValue = initBias) # [ 1 x 1 x K ]
sharing = true # TODO: support this
transpose = false # TODO: support this
apply (x) = {
c = Convolution (W, x, filterShape, mapDims = numOutputChannels, stride = stride, sharing = sharing, autoPadding = pad, lowerPad = lowerPad, upperPad = upperPad, transpose = transpose, maxTempMemSizeInSamples = maxTempMemSizeInSamples)
c = Convolution (W, x, filterShape, mapDims = numOutputChannels, stride = stride, sharing = sharing, autoPadding = pad, lowerPad = lowerPad, upperPad = upperPad, deconv = false, maxTempMemSizeInSamples = maxTempMemSizeInSamples)
res = activation (if bias then c + b else c)
}.res
}.apply
# DeconvLayer -- create a deconvolution layer with optional non-linearity
DeconvLayer {numOutputChannels,
filterShape, # e.g. (3:3)
numInputChannels,
bias = true,
activation = (x=>x),
init = 'glorotUniform',
initValueScale = 0.001,
initBias = 0,
stride = 1,
sharing = true,
autoPadding = false,
lowerPad = 0,
upperPad = 0,
maxTempMemSizeInSamples = 0} =
{
outputChannelsShape = _AsArray (numOutputChannels)
kernelShape = _ConcatArrays (filterShape, outputChannelsShape)
paramShape = _ConcatArrays (kernelShape, _AsArray (numInputChannels))
W = ParameterTensor{paramShape, init=init, initValueScale=initValueScale, initOnCPUOnly=true}
b = ParameterTensor(_ConcatArrays (Repeat (Length (filterShape), 1), outputChannelsShape), initValue = initBias)
apply (x) = {
c = Convolution(W, x, kernelShape, mapDims=numInputChannels, stride=stride, sharing=sharing, autoPadding=autoPadding, lowerPad=lowerPad, upperPad=upperPad, deconv=true, maxTempMemSizeInSamples = maxTempMemSizeInSamples)
res = activation (if bias then c + b else c)
}.res
}.apply
@ -115,6 +139,15 @@ MaxPoolingLayer {filterShape, stride = 1, pad = false, lowerPad = 0, upperPad =
AveragePoolingLayer {filterShape, stride = 1, pad = false, lowerPad = 0, upperPad = 0} =
_PoolingLayer {"average", filterShape, stride = stride, pad = pad, lowerPad = lowerPad, upperPad = upperPad}
MaxUnpoolingLayer {filterShape, # e.g. (3:3)
stride = 1,
pad = false,
lowerPad = 0,
upperPad = 0} =
{
apply (unpoolInput, poolInput) = MaxUnpooling (unpoolInput, poolInput, filterShape, stride = stride, autoPadding = pad, lowerPad = lowerPad, upperPad = upperPad)
}.apply
# RecurrentLSTMLayer -- create an LSTM layer
RecurrentLSTMLayer {outputDim,
cellShape = None, # if set then use a projection
@ -571,7 +604,7 @@ ReconcileDynamicAxis(dataInput, layoutInput, tag='') = new ComputationNode [ ope
ReconcileMBLayout = ReconcileDynamicAxis # back compat
CastAs (type, data) = ReconcileDynamicAxis (data, type) # read as CastAs<type>(data) where the cast may consist of rearranging the data w.r.t. MBLayout or broadcasting across sequence items
# ND convo & pooling/unpooling --why is autoPadding true? Normally one would want to reduce dimensions, no?
Convolution(weightNode, inputValueNode, kernelDims, mapDims = 0, stride = 1, sharing = true, autoPadding = true, lowerPad = 0, upperPad = 0, transpose=false, imageLayout='CHW', maxTempMemSizeInSamples = 0, tag='') = new ComputationNode [ operation = 'Convolution' ; inputs = _AsNodes (weightNode : inputValueNode); kernelShape = new TensorShape [ dims = kernelDims ] ; mapCount = new TensorShape [ dims = mapDims ] ; strideShape = new TensorShape [ dims = stride ] ; dimSharing = new BoolVector [ items = sharing ] ; dimPadding = new BoolVector [ items = autoPadding ] ; dimPadLower = new TensorShape [ dims = lowerPad ] ; dimPadUpper = new TensorShape [ dims = upperPad ] /*plus the function args*/ ]
Convolution(weightNode, inputValueNode, kernelDims, mapDims = 0, stride = 1, sharing = true, autoPadding = true, lowerPad = 0, upperPad = 0, deconv=false, imageLayout='CHW', maxTempMemSizeInSamples = 0, tag='') = new ComputationNode [ operation = 'Convolution' ; inputs = _AsNodes (weightNode : inputValueNode); kernelShape = new TensorShape [ dims = kernelDims ] ; mapCount = new TensorShape [ dims = mapDims ] ; strideShape = new TensorShape [ dims = stride ] ; dimSharing = new BoolVector [ items = sharing ] ; dimPadding = new BoolVector [ items = autoPadding ] ; dimPadLower = new TensorShape [ dims = lowerPad ] ; dimPadUpper = new TensorShape [ dims = upperPad ] ; transpose = deconv /*plus the function args*/ ]
Pooling(input, poolKind/*'max'|'average'*/, kernelDims, stride=1, autoPadding = true, lowerPad = 0, upperPad = 0, imageLayout='CHW', tag='') = new ComputationNode [ operation = 'Pooling' ; inputs = _AsNodes (input); pool = poolKind ; kernelShape = new TensorShape [ dims = kernelDims ] ; strideShape = new TensorShape [ dims = stride ] ; dimPadding = new BoolVector [ items = autoPadding ] ; dimPadLower = new TensorShape [ dims = lowerPad ] ; dimPadUpper = new TensorShape [ dims = upperPad ] /*plus the function args*/ ]
MaxUnpooling(unpoolInput, poolInput, kernelDims, stride=1, autoPadding = true, lowerPad = 0, upperPad = 0, imageLayout='CHW', tag='') = new ComputationNode [ operation = 'MaxUnpooling' ; inputs = _AsNodes (unpoolInput : poolInput); kernelShape = new TensorShape [ dims = kernelDims ] ; strideShape = new TensorShape [ dims = stride ] ; dimPadding = new BoolVector [ items = autoPadding ] ; dimPadLower = new TensorShape [ dims = lowerPad ] ; dimPadUpper = new TensorShape [ dims = upperPad ] /*plus the function args*/ ]
# 2D pooling

Просмотреть файл

@ -666,7 +666,7 @@ int wmainWithBS(int argc, wchar_t* argv[]) // called from wmain which is a wrapp
static void PrintBanner(int argc, wchar_t* argv[], const string& timestamp)
{
fprintf(stderr, "CNTK 2.0.beta7.0+ (");
fprintf(stderr, "CNTK 2.0.beta9.0+ (");
#ifdef _GIT_EXIST
fprintf(stderr, "%s %.6s, ", _BUILDBRANCH_, _BUILDSHA1_);
#endif

Просмотреть файл

@ -121,6 +121,7 @@ namespace CNTK
struct MinibatchInfo
{
bool atEndOfData;
bool atEndOfSweep;
size_t numberOfSamples;
NDArrayViewPtr trainingLossValue;
NDArrayViewPtr evalCriterionValue;
@ -611,6 +612,11 @@ namespace CNTK
///
CNTK_API NDArrayViewPtr Alias(bool readOnly = false) const;
///
/// Creates a new NDArrayView which is an alias of 'this' view but with a new shape.
///
CNTK_API NDArrayViewPtr AsShape(const NDShape& newShape) const;
///
/// Copies the contents of the 'source' NDArrayView to 'this' view.
/// The shapes of the 'source' view and 'this' view must be identical.
@ -2379,6 +2385,7 @@ namespace CNTK
friend class Trainer;
public:
///
/// Computes and stores the values of specified variables in the 'outputs' map, using provided 'inputs' values corresponding
/// to each leaf variable of the Function of VariableKind 'Input'.
@ -2410,11 +2417,15 @@ namespace CNTK
CNTK_API virtual void Backward(const BackPropStatePtr& state,
const std::unordered_map<Variable, ValuePtr>& rootGradientValues,
std::unordered_map<Variable, ValuePtr>& backPropagatedGradientValuesForInputs);
///
/// Returns the name of the operation that this Function denotes
///
virtual const std::wstring& OpName() const = 0;
virtual const std::wstring& OpName() const
#ifdef SWIG
{ NOT_IMPLEMENTED; }
#else
= 0;
#endif
protected:
///
@ -2471,6 +2482,11 @@ namespace CNTK
///
CNTK_API static FunctionPtr Deserialize(const Dictionary& dictionary, const ::CNTK::DeviceDescriptor& device = DeviceDescriptor::UseDefaultDevice());
///
/// This method needs to be explicitly overriden in subclasses.
///
size_t CurrentVersion() const override { NOT_IMPLEMENTED; }
public:
///
/// Returns the name of 'this' Function.
@ -2516,10 +2532,10 @@ namespace CNTK
CNTK_API bool IsBlock() const;
///
/// Returns the composite Function underlying this block Function.
/// Returns the root of the Function graph underlying this block Function.
/// Throws an exception of this is not a block Function
///
CNTK_API FunctionPtr BlockComposite() const;
CNTK_API FunctionPtr BlockRoot() const;
///
/// Returns the mapping from the arguments of the composite underlying this block Function
@ -2726,7 +2742,10 @@ namespace CNTK
ThrowFormatted<std::invalid_argument>(formatString.c_str(), DiagnosticsName().c_str(), std::forward<_Types>(_Args)...);
}
private:
public:
CNTK_API Function(const std::vector<Variable>& inputs, const std::vector<Variable>& outputs, const std::wstring& name = L"", const std::wstring& uid = Internal::GenerateUid(L"UserDefinedFunction"));
private:
CNTK_API Function(const std::vector<Variable>& inputs, const std::vector<Variable>& outputs, Dictionary&& functionConfig, const FunctionPtr& rootFunction, const std::wstring& name, const std::wstring& uid);
std::vector<Variable> m_inputs;
@ -3055,6 +3074,16 @@ namespace CNTK
return ClassificationError(prediction, labels, Axis(0), name);
}
///
/// Create an instance of the CNTK built-in LambdaRank loss an effective proxy for optimizing the NDCG metric
///
CNTK_API FunctionPtr LambdaRank(const Variable& prediction, const Variable& gains, const Variable& groupId, const std::wstring& name = L"");
///
/// Create an instance of the CNTK built-in operation for evaluating the NDCG at 1 metric
///
CNTK_API FunctionPtr NDCGAt1(const Variable& prediction, const Variable& gains, const Variable& groupId, const std::wstring& name = L"");
///
/// Create an instance of the CNTK built-in operation for getting the past value along the lone dynamic axis of the specified operand.
/// Throws an exception of the operand has more than one dynamic axis.
@ -3281,7 +3310,7 @@ namespace CNTK
///
/// A special value that can be used for the epochSize to indicate that the schedule is sweep-based.
///
static const size_t EntireSweep = 0;
static const size_t FullDataSweep = 0;
///
/// Create a schedule with a constant parameter value.
@ -3293,7 +3322,7 @@ namespace CNTK
/// schedule[0] is used for the first 'epochSize' samples, schedule[1] -- for the second,
/// and so on. The last value is then used repeatedly until the end of training.
///
CNTK_API TrainingParameterSchedule(const std::vector<T>& schedule, UnitType unit, size_t epochSize = 1);
CNTK_API TrainingParameterSchedule(const std::vector<T>& schedule, UnitType unit, size_t epochSize = FullDataSweep);
///
/// Create a schedule using the list of key-value pairs, where the key specifies
@ -3304,7 +3333,7 @@ namespace CNTK
/// the first 100 samples, then '0.1' is used for the second 200 samples,
/// after which the values is switched to '0.005'.
///
CNTK_API TrainingParameterSchedule(const std::vector<std::pair<size_t, T>>& schedule, UnitType unit, size_t epochSize = 1);
CNTK_API TrainingParameterSchedule(const std::vector<std::pair<size_t, T>>& schedule, UnitType unit, size_t epochSize = FullDataSweep);
///
/// Returns a value corresponding to the absolute sample (or sweep)
@ -3319,7 +3348,7 @@ namespace CNTK
///
UnitType Unit() const { return m_unit; }
bool IsSweepBased() const { return m_epochSize == EntireSweep; }
bool IsSweepBased() const { return m_epochSize == FullDataSweep; }
CNTK_API virtual ~TrainingParameterSchedule();
@ -3351,20 +3380,23 @@ namespace CNTK
class TrainingParameterPerUnitSchedule : public TrainingParameterSchedule<T>
{
public:
TrainingParameterPerUnitSchedule(double value)
TrainingParameterPerUnitSchedule(T value)
: TrainingParameterSchedule<T>::TrainingParameterSchedule(value, U)
{ }
TrainingParameterPerUnitSchedule(const std::vector<double>& schedule, size_t epochSize = 1)
TrainingParameterPerUnitSchedule(const std::vector<T>& schedule,
size_t epochSize = TrainingParameterSchedule<T>::FullDataSweep)
: TrainingParameterSchedule<T>::TrainingParameterSchedule(schedule, U, epochSize)
{ }
TrainingParameterPerUnitSchedule(const std::vector<std::pair<size_t, double>>& schedule, size_t epochSize = 1)
TrainingParameterPerUnitSchedule(const std::vector<std::pair<size_t, T>>& schedule,
size_t epochSize = TrainingParameterSchedule<T>::FullDataSweep)
: TrainingParameterSchedule<T>::TrainingParameterSchedule(schedule, U, epochSize)
{ }
#ifdef SWIG // for Python interop (adds indexer)
const double __getitem__(size_t count) const
const T __getitem__(size_t count) const
{
return TrainingParameterSchedule<T>::operator[](count);
}
@ -3391,6 +3423,8 @@ namespace CNTK
typedef TrainingParameterPerSampleSchedule<double> MomentumPerSampleSchedule;
typedef TrainingParameterPerMinibatchSchedule<double> MomentumPerMinibatchSchedule;
typedef TrainingParameterPerSampleSchedule<size_t> MinibatchSizeSchedule;
///
/// This class allows to specify momentum as time constant in place of momentum per sample in
/// all of Learners factory methods. The specified values are then automatically converted into
@ -3405,13 +3439,13 @@ namespace CNTK
ConvertToPerSampleValues();
}
MomentumAsTimeConstantSchedule(const std::vector<double>& schedule, size_t epochSize = 1)
MomentumAsTimeConstantSchedule(const std::vector<double>& schedule, size_t epochSize = FullDataSweep)
: TrainingParameterSchedule<double>::TrainingParameterSchedule(schedule, UnitType::Sample, epochSize)
{
ConvertToPerSampleValues();
}
MomentumAsTimeConstantSchedule(const std::vector<std::pair<size_t, double>>& schedule, size_t epochSize = 1)
MomentumAsTimeConstantSchedule(const std::vector<std::pair<size_t, double>>& schedule, size_t epochSize = FullDataSweep)
: TrainingParameterSchedule<double>::TrainingParameterSchedule(schedule, UnitType::Sample, epochSize)
{
ConvertToPerSampleValues();
@ -3428,9 +3462,10 @@ namespace CNTK
CNTK_API void ConvertToPerSampleValues();
};
///
/// A collection of additional options that affect parameter updates and
/// are applicable for all standard learners
///
struct AdditionalLearningOptions
{
double l1RegularizationWeight = 0.0;
@ -3444,6 +3479,16 @@ namespace CNTK
bool gradientClippingWithTruncation = true;
};
///
/// Returns true if by default momentum is applied in the unit-gain fashion.
///
CNTK_API bool DefaultUnitGainValue();
///
/// Sets globally default unit-gain flag value.
///
CNTK_API void SetDefaultUnitGainValue(bool value);
///
/// Abstraction for learning a subset of parameters of a learnable Function using first order gradient values
/// For e.g momentum, AdaGrad, RMSProp etc. are different types of learners with their own algorithms for
@ -3456,7 +3501,7 @@ namespace CNTK
// Method to update the parameters associated with this learner. By returning false, this method indicates that
// learning has stopped for all of the parameters associated with this learner
//
virtual bool Update(std::unordered_map<Parameter, NDArrayViewPtr>& gradientValues, size_t trainingSampleCount) = 0;
virtual bool Update(std::unordered_map<Parameter, NDArrayViewPtr>& gradientValues, size_t trainingSampleCount, bool sweepEnd = false) = 0;
///
/// Returns the set of parameters associated with this learner.
@ -3554,6 +3599,7 @@ namespace CNTK
CNTK_API LearnerPtr MomentumSGDLearner(const std::vector<Parameter>& parameters,
const LearningRateSchedule& learningRateSchedule,
const MomentumSchedule& momentumSchedule,
bool unitGain = DefaultUnitGainValue(),
AdditionalLearningOptions additionalOptions = AdditionalLearningOptions());
///
@ -3562,6 +3608,7 @@ namespace CNTK
CNTK_API LearnerPtr NesterovLearner(const std::vector<Parameter>& parameters,
const LearningRateSchedule& learningRateSchedule,
const MomentumSchedule& momentumSchedule,
bool unitGain = DefaultUnitGainValue(),
AdditionalLearningOptions additionalOptions = AdditionalLearningOptions());
static MomentumSchedule DefaultVarianceMomentum = MomentumAsTimeConstantSchedule(2 * 3600 * 100);
@ -3572,6 +3619,7 @@ namespace CNTK
CNTK_API LearnerPtr AdamLearner(const std::vector<Parameter>& parameters,
const LearningRateSchedule& learningRateSchedule,
const MomentumSchedule& momentumSchedule,
bool unitGain = DefaultUnitGainValue(),
const MomentumSchedule& varianceMomentumSchedule = DefaultVarianceMomentum,
bool lowMemory = true,
AdditionalLearningOptions additionalOptions = AdditionalLearningOptions());
@ -3611,9 +3659,9 @@ namespace CNTK
return m_communicator;
}
bool Update(std::unordered_map<Parameter, NDArrayViewPtr>& gradientValues, size_t minibatchSampleCount) override
bool Update(std::unordered_map<Parameter, NDArrayViewPtr>& gradientValues, size_t minibatchSampleCount, bool sweepEnd = false) override
{
MinibatchInfo info{ false, minibatchSampleCount };
MinibatchInfo info{ false, sweepEnd, minibatchSampleCount };
return Update(gradientValues, info);
}
@ -3632,6 +3680,16 @@ namespace CNTK
m_learner->ResetSmoothedGradients();
}
//
// Returns the total number of samples needed for warmup.
// After reaching this number of samples the learner switches to the distributed mode.
// Warm up is useful for
//
virtual size_t ParallelizationAfter()
{
return m_distributeAfterSamples;
}
//
// Method to update the parameters associated with this learner. By returning false, this method indicates that
// learning has stopped for all of the parameters associated with this learner
@ -3639,11 +3697,12 @@ namespace CNTK
CNTK_API virtual bool Update(std::unordered_map<Parameter, NDArrayViewPtr>& gradientValues, MinibatchInfo& minibatch) = 0;
protected:
DistributedLearner(DistributedCommunicatorPtr communicator, LearnerPtr learner)
DistributedLearner(DistributedCommunicatorPtr communicator, LearnerPtr learner, size_t distributeAfterSamples)
: Learner(learner? learner->Parameters() : std::vector<Parameter>(),
LearningRateSchedule(0, LearningRateSchedule::UnitType::Sample)),
m_learner(learner),
m_communicator(communicator)
m_communicator(communicator),
m_distributeAfterSamples(distributeAfterSamples)
{
if (!m_learner)
InvalidArgument("Learner is not allowed to be null.");
@ -3654,6 +3713,7 @@ namespace CNTK
const LearnerPtr m_learner;
const DistributedCommunicatorPtr m_communicator;
const size_t m_distributeAfterSamples;
// Disallow copy and move construction and assignment
DistributedLearner(const DistributedLearner&) = delete; DistributedLearner& operator=(const DistributedLearner&) = delete; DistributedLearner& operator=(DistributedLearner&&) = delete; DistributedLearner(DistributedLearner&&) = delete;
@ -3682,32 +3742,44 @@ namespace CNTK
bool resetSGDMomentumAfterAggregation = true,
double blockLearningRate = 1.0);
///
/// Describes an input stream: its name, element type, storage, etc.
///
struct StreamInformation
{
std::wstring m_name; // Unique name of the stream
size_t m_id; // Unique identifier of the stream
StorageFormat m_storageFormat; // Storage format of the stream
DataType m_elementType; // Element type of the stream
NDShape m_sampleLayout; // Layout of the sample for the stream
};
inline bool operator==(const StreamInformation& left, const StreamInformation& right)
{
return ((left.m_id == right.m_id) &&
(left.m_name == right.m_name) &&
(left.m_storageFormat == right.m_storageFormat) &&
(left.m_elementType == right.m_elementType) &&
(left.m_sampleLayout == right.m_sampleLayout));
}
///
/// Trainer is the top-level abstraction responsible for the orchestration of the training of a model
/// using the specified learners and training data either explicitly supplied as Value objects or from
/// a MinibatchSource object.
///
class Trainer
class Trainer : public std::enable_shared_from_this<Trainer>
{
public:
///
/// Construct a Trainer to train the specified 'model' with the specified 'trainingLoss' Variable as the training criterion
/// and using the specified set of 'parameterLearners' for updating the model's parameters using computed gradients.
///
CNTK_API Trainer(const FunctionPtr& model, const FunctionPtr& lossFunction, const std::vector<LearnerPtr>& parameterLearners);
///
/// Construct a Trainer to train the specified 'model' with the specified 'trainingLoss' as the training criterion,
/// the specified 'evaluationFunction' as the criterion for evaluating the trained model's quality, and using the specified set
/// of 'parameterLearners' for updating the model's parameters using computed gradients.
///
// TODO: Add overload for multiple evaluation criterion
CNTK_API Trainer(const FunctionPtr& model, const FunctionPtr& lossFunction, const FunctionPtr& evaluationFunction, const std::vector<LearnerPtr>& parameterLearners);
///
/// Optimize model parameters using the specified 'arguments' minibatch of training samples.
/// Returns false if all parameter learners indicate end of learning (through their Update method's return value).
///
CNTK_API bool TrainMinibatch(const std::unordered_map<Variable, MinibatchData>& arguments, const DeviceDescriptor& computeDevice = DeviceDescriptor::UseDefaultDevice());
///
/// An overload of the TrainMinibatch above that takes a map of variables and their values (as its first argument).
///
CNTK_API bool TrainMinibatch(const std::unordered_map<Variable, ValuePtr>& arguments, const DeviceDescriptor& computeDevice = DeviceDescriptor::UseDefaultDevice());
///
@ -3717,12 +3789,22 @@ namespace CNTK
/// for the 'outputs' for which the ValuePtr mapping was left null by the caller.
/// Returns false if all parameter learners indicate end of learning (through their Update method's return value).
///
CNTK_API bool TrainMinibatch(const std::unordered_map<Variable, MinibatchData>& arguments, std::unordered_map<Variable, ValuePtr>& outputsToFetch, const DeviceDescriptor& computeDevice = DeviceDescriptor::UseDefaultDevice());
///
/// An overload of the TrainMinibatch above that takes a map of variables and their values (as its first argument).
///
CNTK_API bool TrainMinibatch(const std::unordered_map<Variable, ValuePtr>& arguments, std::unordered_map<Variable, ValuePtr>& outputsToFetch, const DeviceDescriptor& computeDevice = DeviceDescriptor::UseDefaultDevice());
///
/// Test the model on the specified batch of samples using the evaluation Function specified during construction of the Trainer
/// Returns the average evaluation criterion value per sample for the tested minibatch of samples
///
CNTK_API double TestMinibatch(const std::unordered_map<Variable, MinibatchData>& arguments, const DeviceDescriptor& computeDevice = DeviceDescriptor::UseDefaultDevice());
///
/// An overload of the TestMinibatch above that takes a map of variables and their values (as its first argument).
///
CNTK_API double TestMinibatch(const std::unordered_map<Variable, ValuePtr>& arguments, const DeviceDescriptor& computeDevice = DeviceDescriptor::UseDefaultDevice());
///
@ -3776,14 +3858,20 @@ namespace CNTK
CNTK_API size_t TotalNumberOfSamplesSeen() const;
private:
template <typename T1, typename ...CtorArgTypes>
friend std::shared_ptr<T1> MakeSharedObject(CtorArgTypes&& ...ctorArgs);
Trainer(const FunctionPtr& model, const FunctionPtr& lossFunction, const std::vector<LearnerPtr>& parameterLearners);
Trainer(const FunctionPtr& model, const FunctionPtr& lossFunction, const FunctionPtr& evaluationFunction, const std::vector<LearnerPtr>& parameterLearners);
void ExecuteForwardBackward(
const std::unordered_map<Variable, ValuePtr>& arguments,
std::unordered_map<Variable, ValuePtr>& outputsToFetch,
const DeviceDescriptor& computeDevice,
std::unordered_map<Variable, ValuePtr>& parameterGradients);
bool TrainLocalMinibatch(const std::unordered_map<Variable, ValuePtr>& arguments, std::unordered_map<Variable, ValuePtr>& outputsToFetch, const DeviceDescriptor& computeDevice);
bool TrainDistributedMinibatch(const std::unordered_map<Variable, ValuePtr>& arguments, std::unordered_map<Variable, ValuePtr>& outputsToFetch, const DeviceDescriptor& computeDevice);
bool TrainLocalMinibatch(const std::unordered_map<Variable, ValuePtr>& arguments, std::unordered_map<Variable, ValuePtr>& outputsToFetch, bool sweepEnd, const DeviceDescriptor& computeDevice);
bool TrainDistributedMinibatch(const std::unordered_map<Variable, ValuePtr>& arguments, std::unordered_map<Variable, ValuePtr>& outputsToFetch, bool sweepEnd, const DeviceDescriptor& computeDevice);
void Save(const std::wstring& modelFilePath, const std::vector<DictionaryValue>& learnerState, const Dictionary& externalState);
@ -3805,25 +3893,17 @@ namespace CNTK
};
///
/// Describes an input stream: its name, element type, storage, etc.
/// Construct a Trainer to train the specified 'model' with the specified 'trainingLoss' Variable as the training criterion
/// and using the specified set of 'parameterLearners' for updating the model's parameters using computed gradients.
///
struct StreamInformation
{
std::wstring m_name; // Unique name of the stream
size_t m_id; // Unique identifier of the stream
StorageFormat m_storageFormat; // Storage format of the stream
DataType m_elementType; // Element type of the stream
NDShape m_sampleLayout; // Layout of the sample for the stream
};
CNTK_API TrainerPtr CreateTrainer(const FunctionPtr& model, const FunctionPtr& lossFunction, const std::vector<LearnerPtr>& parameterLearners);
inline bool operator==(const StreamInformation& left, const StreamInformation& right)
{
return ((left.m_id == right.m_id) &&
(left.m_name == right.m_name) &&
(left.m_storageFormat == right.m_storageFormat) &&
(left.m_elementType == right.m_elementType) &&
(left.m_sampleLayout == right.m_sampleLayout));
}
///
/// Construct a Trainer to train the specified 'model' with the specified 'trainingLoss' as the training criterion,
/// the specified 'evaluationFunction' as the criterion for evaluating the trained model's quality, and using the specified set
/// of 'parameterLearners' for updating the model's parameters using computed gradients.
///
CNTK_API TrainerPtr CreateTrainer(const FunctionPtr& model, const FunctionPtr& lossFunction, const FunctionPtr& evaluationFunction, const std::vector<LearnerPtr>& parameterLearners);
}
namespace std {
@ -3838,11 +3918,34 @@ namespace std {
namespace CNTK
{
///
/// A struct that combines the minibatch meta-data with the actual minibatch data.
/// The former includes the number of sequences and samples in the minibatch,
/// as well as the sweep-end flag, which is set to true to indicate that the minibatch
/// concludes a data sweep (i.e, it's the last minibatch at the end of the sweep).
///
struct MinibatchData
{
size_t m_numSequences;
size_t m_numSamples;
ValuePtr m_data;
MinibatchData() : MinibatchData(nullptr)
{}
// a convenience constructor to allow passing ValuePtr arguments in place
// of MinibatchData parameter (e.g., in Trainer::TrainMinibatch)
MinibatchData(ValuePtr value) : MinibatchData(value, 0)
{}
MinibatchData(ValuePtr value, size_t numSamples, bool sweepEnd = false)
: MinibatchData(value, numSamples, numSamples, sweepEnd)
{}
MinibatchData(ValuePtr value, size_t numSequences, size_t numSamples, bool sweepEnd)
: data(value), numberOfSequences(numSequences), numberOfSamples(numSamples), sweepEnd(sweepEnd)
{}
ValuePtr data;
size_t numberOfSequences;
size_t numberOfSamples;
bool sweepEnd;
};
///
@ -3868,14 +3971,22 @@ namespace CNTK
/// In case the size is specified in terms of both #sequences and #samples, the smaller of the 2 is taken.
/// An empty map is returned when the MinibatchSource has no more data to return.
///
virtual const std::unordered_map<StreamInformation, MinibatchData>& GetNextMinibatch(size_t minibatchSizeInSamples,
CNTK_API const std::unordered_map<StreamInformation, MinibatchData>& GetNextMinibatch(
size_t minibatchSizeInSequences,
const DeviceDescriptor& device = DeviceDescriptor::UseDefaultDevice()) = 0;
size_t minibatchSizeInSamples,
const DeviceDescriptor& device = DeviceDescriptor::UseDefaultDevice());
///
/// Returns whether the MinibatchSource is running in distributed manner
/// Same as above but allows to specify partition of data in a distributed environment.
/// Depending on the number of workers the data is splitted in different partitions,
/// and depending on the worker rank, only a particular partition is read.
///
virtual bool IsDistributed() const = 0;
CNTK_API virtual const std::unordered_map<StreamInformation, MinibatchData>& GetNextMinibatch(
size_t minibatchSizeInSequences,
size_t minibatchSizeInSamples,
size_t numberOfWorkers,
size_t workerRank,
const DeviceDescriptor& device = DeviceDescriptor::UseDefaultDevice()) = 0;
///
/// Destruct this MinibatchSource.
@ -3942,7 +4053,7 @@ namespace CNTK
///
/// Instantiate the CNTK built-in test format minibatch source
///
inline MinibatchSourcePtr TextFormatMinibatchSource(const std::wstring& dataFilePath, const std::vector<StreamConfiguration>& streamConfigs, size_t epochSize = MinibatchSource::InfinitelyRepeat, bool randomize = true, size_t distributedAfterSampleCount = MinibatchSource::InfiniteSamples)
inline MinibatchSourcePtr TextFormatMinibatchSource(const std::wstring& dataFilePath, const std::vector<StreamConfiguration>& streamConfigs, size_t epochSize = MinibatchSource::InfinitelyRepeat, bool randomize = true)
{
::CNTK::Dictionary minibatchSourceConfiguration;
minibatchSourceConfiguration[L"epochSize"] = epochSize;
@ -3973,10 +4084,6 @@ namespace CNTK
deserializerConfiguration[L"input"] = inputStreamsConfig;
minibatchSourceConfiguration[L"deserializers"] = std::vector<::CNTK::DictionaryValue>({ deserializerConfiguration });
//TODO: change all these dictionary names to string constants
minibatchSourceConfiguration[L"distributedAfterSampleCount"] = distributedAfterSampleCount;
return CreateCompositeMinibatchSource(minibatchSourceConfiguration);
}
@ -4106,6 +4213,101 @@ namespace CNTK
/// Distributed communicator that allows quantized aggregations.
///
CNTK_API QuantizedDistributedCommunicatorPtr QuantizedMPICommunicator(bool zeroThresholdFor1Bit, bool useQuantizationForSelfStripe, size_t numQuantizationBits);
///
/// Base abstract class that represents a training session.
/// Derived classes can redefine different aspects of training, overriding base virtual methods (GetMinibatchSize, OnMinibatchStart, etc.)
///
class TrainingSession
{
public:
CNTK_API TrainingSession(
const MinibatchSourcePtr& trainingSource,
const TrainerPtr& trainer,
const std::unordered_map<Variable, StreamInformation>& modelInputToMinibatchSourceStream,
const TrainingParameterPerUnitSchedule<size_t, TrainingParameterSchedule<size_t>::UnitType::Sample>& minibatchSizeSchedule,
size_t checkpointFrequencyInSamples,
const std::wstring& checkPointFileName);
///
/// Runs the session.
///
CNTK_API void Train(const DeviceDescriptor& computeDevice);
///
/// Restores a session from a checkpoint.
///
CNTK_API void RestoreFromCheckpoint(const std::wstring& checkpointFileName);
CNTK_API virtual ~TrainingSession() {}
public:
///
/// Optionally overridable, called each time before a new minibatch is requested from the minibatch source
/// during training (from Run method).
///
virtual size_t GetMinibatchSize()
{
return m_minibatchSizeSchedule[Trainer()->TotalNumberOfSamplesSeen()];
}
///
/// Optionally overridable callback that is invoked before each minibatch.
///
CNTK_API virtual void OnMinibatchStart() {};
///
/// Optionally overridable callback that is invoked after each minibatch.
///
CNTK_API virtual void OnMinibatchEnd() {};
///
/// Optionally overridable callback that is invoked before each checkpoint.
///
CNTK_API virtual void OnCheckpointStart() {};
///
/// Optionally overridable callback that is invoked after each checkpoint.
///
CNTK_API virtual void OnCheckpointEnd() {};
protected:
///
/// Accessors.
///
TrainerPtr Trainer() const { return m_trainer; }
MinibatchSourcePtr TrainingMinibatchSource() const { return m_trainingSource; }
private:
/// Disallow copy and move construction and assignment
TrainingSession(const TrainingSession&) = delete; TrainingSession& operator=(const TrainingSession&) = delete; TrainingSession& operator=(TrainingSession&&) = delete; TrainingSession(TrainingSession&&) = delete;
void SaveCheckpoint();
static const std::wstring s_checkpointIndex;
static const std::wstring s_trainingMinibatchSource;
const size_t m_checkpointFrequencyinSamples;
const std::wstring m_checkPointFileName;
size_t m_currentCheckpointIndex;
MinibatchSourcePtr m_trainingSource;
TrainerPtr m_trainer;
std::unordered_map<Variable, StreamInformation> m_modelInputToMinibatchSourceStream;
size_t m_parallelAfterSamples;
size_t m_workerRank;
size_t m_numberOfWorkers;
const MinibatchSizeSchedule m_minibatchSizeSchedule;
};
CNTK_API TrainingSessionPtr CreateBasicTrainingSession(
const MinibatchSourcePtr& trainingSource,
const TrainerPtr& trainer,
const std::unordered_map<Variable, StreamInformation>& modelInputToMinibatchSourceStream,
const TrainingParameterPerUnitSchedule<size_t, TrainingParameterSchedule<size_t>::UnitType::Sample>& minibatchSizeSchedule,
size_t checkpointFrequencyinSamples,
const std::wstring& checkPointFileName);
}

Просмотреть файл

@ -160,6 +160,9 @@ namespace CNTK
enum class PrimitiveOpType : unsigned int;
enum class DataType : unsigned int;
struct MinibatchInfo;
struct MinibatchData;
class Serializer;
// Similar to make_shared except that it associates a custom deleter with the shared_ptr to ensure
@ -208,6 +211,12 @@ namespace CNTK
struct VariableFields;
typedef std::shared_ptr<VariableFields> VariableFieldsPtr;
class TrainingSession;
typedef std::shared_ptr<TrainingSession> TrainingSessionPtr;
class Trainer;
typedef std::shared_ptr<Trainer> TrainerPtr;
namespace Internal
{
CNTK_API FunctionPtr IsWithin(const Variable& operand, int offset, const std::wstring& name = L"");

Просмотреть файл

@ -284,6 +284,10 @@ namespace CNTK
opType = PrimitiveOpType::CrossEntropyWithSoftmax;
else if (node->OperationName() == OperationNameOf(ClassificationErrorNode))
opType = PrimitiveOpType::ClassificationError;
else if (node->OperationName() == OperationNameOf(LambdaRankNode))
opType = PrimitiveOpType::LambdaRank;
else if (node->OperationName() == OperationNameOf(NDCG1EvalNode))
opType = PrimitiveOpType::NDCG;
else if (node->OperationName() == OperationNameOf(ReduceElementsNode))
{
auto reduceElementsNode = node->As<ReduceElementsNode<ElementType>>();

Просмотреть файл

@ -176,6 +176,7 @@
<PrecompiledHeader>Create</PrecompiledHeader>
</ClCompile>
<ClCompile Include="Trainer.cpp" />
<ClCompile Include="TrainingSession.cpp" />
<ClCompile Include="Utils.cpp" />
<ClCompile Include="Value.cpp" />
<ClCompile Include="Variable.cpp" />

Просмотреть файл

@ -24,6 +24,7 @@
<ClCompile Include="PrimitiveFunction.cpp" />
<ClCompile Include="DistributedLearnerBase.cpp" />
<ClCompile Include="DataParallelDistributedLearner.cpp" />
<ClCompile Include="TrainingSession.cpp" />
</ItemGroup>
<ItemGroup>
<ClInclude Include="stdafx.h" />
@ -51,6 +52,7 @@
<ClInclude Include="DataParallelDistributedLearner.h" />
<ClInclude Include="BlockFunction.h" />
<ClInclude Include="Variable.h" />
<ClInclude Include="TrainingSession.h" />
</ItemGroup>
<ItemGroup>
<Filter Include="API">
@ -65,4 +67,4 @@
<Filter>proto</Filter>
</None>
</ItemGroup>
</Project>
</Project>

Просмотреть файл

@ -528,4 +528,16 @@ namespace CNTK
{
return Microsoft::MSR::CNTK::CPUMatrix<float>::GetMaxNumThreads();
}
static std::atomic<bool> s_defaultUnitGainValue(true);
bool DefaultUnitGainValue()
{
return s_defaultUnitGainValue;
}
void SetDefaultUnitGainValue(bool value)
{
s_defaultUnitGainValue.store(value);
}
}

Просмотреть файл

@ -88,7 +88,7 @@ namespace CNTK
// For block functions we need to recursively traverse the underlying composite
if (function->IsBlock())
PreorderTraverseFunctions(function->BlockComposite()->RootFunction(), SerializationTraversalFunc);
PreorderTraverseFunctions(function->BlockRoot(), SerializationTraversalFunc);
};
PreorderTraverseFunctions(RootFunction(), SerializationTraversalFunc);
@ -663,6 +663,12 @@ namespace CNTK
case PrimitiveOpType::ClassificationError:
computationNodePtr = New<ClassificationErrorNode<ElementType>>(network->GetDeviceId(), internalNodeName);
break;
case PrimitiveOpType::LambdaRank:
computationNodePtr = New<LambdaRankNode<ElementType>>(network->GetDeviceId(), internalNodeName);
break;
case PrimitiveOpType::NDCG:
computationNodePtr = New<NDCG1EvalNode<ElementType>>(network->GetDeviceId(), internalNodeName);
break;
case PrimitiveOpType::PastValue:
case PrimitiveOpType::FutureValue:
{
@ -909,7 +915,7 @@ namespace CNTK
for (auto compositeArgument : compositeArguments)
m_variableToNodeMap[compositeArgument] = m_variableToNodeMap.at(compositeArgument.BlockFunctionVariableMapping());
PreorderTraverseFunctions(function->BlockComposite()->RootFunction(), PatchBlockArgumentsMapping);
PreorderTraverseFunctions(function->BlockRoot(), PatchBlockArgumentsMapping);
}
};
PreorderTraverseFunctions(rootFunction, PatchBlockArgumentsMapping);
@ -921,15 +927,11 @@ namespace CNTK
return (m_isVariableRootMap[outputVar] && (!ownerBlockFunc || IsVariableRoot(ownerBlockFunc->CompositeOutputsMap().at(outputVar))));
};
// If any of the function outputs is not a root node, we need to explicitly add it to the 'output' group of the ComputationNetwork
for (auto rootOutput : rootFunctionOutputs)
{
if (!IsVariableRoot(rootOutput))
m_computationNetwork->AddToNodeGroup(L"output", m_variableToNodeMap.at(rootOutput));
}
// If any of the requested outputs is not a root node, we need to explicitly add it to the 'output' group of the ComputationNetwork
for (auto output : outputs)
// If any of the function or requested outputs is not a root node, we need to explicitly
// add it to the 'output' group of the ComputationNetwork
std::unordered_set<Variable> networkOutputs(outputs);
networkOutputs.insert(rootFunctionOutputs.begin(), rootFunctionOutputs.end());
for (auto output : networkOutputs)
{
if (!IsVariableRoot(output))
{
@ -1011,34 +1013,28 @@ namespace CNTK
if (!m_networkMatricesAllocated && allocateNetworkMatrices)
{
ComputationNodeBasePtr backpropRootNode;
if (!m_currentBackpropRoots.empty())
backpropRootNode = m_variableToNodeMap.at(*m_currentBackpropRoots.begin());
// Now recursively traverse the network in a top-down fashion
auto rootFunction = RootFunction();
auto rootFunctionOutputs = rootFunction->Outputs();
std::vector<ComputationNodeBasePtr> forwardRootNodes;
for (auto rootOutput : rootFunctionOutputs)
{
auto currentRootNode = m_variableToNodeMap.at(rootOutput);
forwardRootNodes.push_back(currentRootNode);
if (m_currentBackpropRoots.find(rootOutput) != m_currentBackpropRoots.end())
backpropRootNode = currentRootNode;
}
forwardRootNodes.push_back(m_variableToNodeMap.at(rootOutput));
std::vector<ComputationNodeBasePtr> forwardOutputNodes;
for (auto output : outputs)
{
auto currentOutputNode = m_variableToNodeMap.at(output);
forwardOutputNodes.push_back(currentOutputNode);
// Select the root node for backpropagation
if (m_currentBackpropRoots.find(output) != m_currentBackpropRoots.end())
backpropRootNode = currentOutputNode;
}
forwardOutputNodes.push_back(m_variableToNodeMap.at(output));
m_computationNetwork->AllocateAllMatrices(forwardRootNodes, forwardOutputNodes, backpropRootNode);
m_networkMatricesAllocated = allocateNetworkMatrices;
std::unordered_set<ComputationNodeBasePtr> allNetworkRoots = { backpropRootNode };
allNetworkRoots.insert(forwardRootNodes.begin(), forwardRootNodes.end());
allNetworkRoots.insert(forwardOutputNodes.begin(), forwardOutputNodes.end());
m_allNetworkRootsInGlobalEvalOrder = m_computationNetwork->SortByGlobalEvalOrder(allNetworkRoots);
m_currentOutputs = outputs;
m_currentOutputs.insert(rootFunctionOutputs.begin(), rootFunctionOutputs.end());
m_currentOutputs.insert(m_currentBackpropRoots.begin(), m_currentBackpropRoots.end());
@ -1348,7 +1344,7 @@ namespace CNTK
PopulateNetworkInputs(arguments);
// Dropout nodes have an implicit input in the form of the random mask that is applied to its explicit input
// This mask is regerated every minibatch and hence dropout nodes with a non-zero dropout rate must me marked outdated
// This mask is regenerated every minibatch and hence dropout nodes with a non-zero dropout rate must me marked outdated
// w.r.t. inputs to force evaluation in each minibatch
list<ComputationNodeBasePtr> dropoutNodes = m_computationNetwork->GetNodesWithType(OperationNameOf(DropoutNode));
for (auto& nodeIter : dropoutNodes)
@ -1382,7 +1378,16 @@ namespace CNTK
ScopedNetworkOperationMode modeGuard(m_computationNetwork, outputsToRetainBackwardStateFor.empty() ? NetworkOperationMode::inferring : NetworkOperationMode::training);
m_computationNetwork->ForwardProp(outputsToEvaluate);
// We may have to include additional nodes in the ForwardProp to align with how the memory sharing structure is setup
// We need to include all roots that lie earlier in the global eval order than the actual outputs we are interested
// in evaluation.
// TODO: This may incur additonal compute costs in some rare scenarios. We need to come up with a better way to handle this.
outputsToEvaluate = m_computationNetwork->SortByGlobalEvalOrder(outputsToEvaluate);
auto lastOutputInEvalOrder = outputsToEvaluate.back();
auto iterEndRootInEvalOrder = std::find(m_allNetworkRootsInGlobalEvalOrder.begin(), m_allNetworkRootsInGlobalEvalOrder.end(), lastOutputInEvalOrder) + 1;
auto augmentedOutputsToEvaluate = std::vector<ComputationNodeBasePtr>(m_allNetworkRootsInGlobalEvalOrder.begin(), iterEndRootInEvalOrder);
m_computationNetwork->ForwardProp(augmentedOutputsToEvaluate);
GetNetworkOutputs(outputs);

Просмотреть файл

@ -300,6 +300,8 @@ namespace CNTK
bool m_networkMatricesAllocated;
std::vector<Microsoft::MSR::CNTK::ComputationNodeBasePtr> m_allNetworkRootsInGlobalEvalOrder;
std::unordered_map<Parameter, size_t> m_lastRecordedParameterValueTimeStamps;
// Version history:

Просмотреть файл

@ -84,7 +84,7 @@ namespace CNTK
break;
for (auto& currentStreamKV : computedMeanAndInvStdDevs)
CompositeFunction::PopulateComputationNodeValue<float>({ streamToDummyInputVariableMap[currentStreamKV.first], minibatchData[currentStreamKV.first].m_data }, streamToInputNodeMap[currentStreamKV.first], layoutsPopulated);
CompositeFunction::PopulateComputationNodeValue<float>({ streamToDummyInputVariableMap[currentStreamKV.first], minibatchData[currentStreamKV.first].data }, streamToInputNodeMap[currentStreamKV.first], layoutsPopulated);
ComputationNetwork::BumpEvalTimeStamp(allInputNodes);

Просмотреть файл

@ -147,6 +147,6 @@ namespace CNTK
if (info.IsEmpty())
return false;
return m_learner->Update(gradientValues, info.numberOfSamples);
return m_learner->Update(gradientValues, info.numberOfSamples, info.atEndOfSweep);
}
}

Просмотреть файл

@ -10,8 +10,7 @@
namespace CNTK
{
DistributedLearnerBase::DistributedLearnerBase(DistributedCommunicatorPtr communicator, LearnerPtr learner, size_t distributeAfterSamples)
: DistributedLearner(communicator, learner),
m_distributeAfterSamples(distributeAfterSamples)
: DistributedLearner(communicator, learner, distributeAfterSamples)
{
if (!m_learner)
InvalidArgument("Learner is not allowed to be null.");

Просмотреть файл

@ -25,8 +25,6 @@ namespace CNTK
static void PrepaireZeroGradients(std::unordered_map<Parameter, NDArrayViewPtr>& gradientValues, MinibatchInfo& info);
static void ConvertToOrdered(const std::unordered_map<Parameter, NDArrayViewPtr>& gradientValues, std::vector<std::pair<Parameter, NDArrayViewPtr>>& result);
const size_t m_distributeAfterSamples;
std::vector<std::pair<Parameter, NDArrayViewPtr>> m_gradientBuffer;
std::vector<Parameter> m_parameters;

Просмотреть файл

@ -29,6 +29,9 @@ namespace CNTK
: Function(inputs, outputs, std::move(functionConfig), nullptr, name, uid)
{}
Function::Function(const std::vector<Variable>& inputs, const std::vector<Variable>& outputs, const std::wstring& name, const std::wstring& uid) :
Function(inputs, outputs, Dictionary(), name, uid) {}
Function::Function(const std::vector<Variable>& inputs, const std::vector<Variable>& outputs, Dictionary&& functionConfig, const FunctionPtr& rootFunction, const std::wstring& name, const std::wstring& uid)
: m_rootFunction(rootFunction), m_name(name != L"" ? name : uid), m_uid(uid), m_attributes(std::move(functionConfig))
{
@ -121,13 +124,13 @@ namespace CNTK
return (blockFunction != nullptr);
}
FunctionPtr Function::BlockComposite() const
FunctionPtr Function::BlockRoot() const
{
if (!IsBlock())
InvalidArgument("Function::BlockComposite() cannot be called for a Function which is not a block");
InvalidArgument("Function::BlockRoot() cannot be called for a Function which is not a block");
auto blockFunction = dynamic_cast<const BlockFunction*>(this);
return blockFunction->Composite();
return blockFunction->Composite()->RootFunction();
}
std::shared_ptr<std::vector<std::pair<Variable, Variable>>> Function::BlockArgumentsMappingImpl() const
@ -557,9 +560,10 @@ namespace CNTK
clonedFunction = MakeSharedObject<PrimitiveFunction>(primitiveFunction->OpType(), inputs, std::move(attributesCopy), primitiveFunction->Name());
else
{
auto clonedComposite = primitiveFunction->BlockComposite()->Clone(parameterCloneMethod, replacements);
auto cloneeComposite = dynamic_cast<const BlockFunction*>(primitiveFunction)->Composite();
auto clonedComposite = cloneeComposite->Clone(parameterCloneMethod, replacements);
auto cloneeBlockCompositeArguments = primitiveFunction->BlockComposite()->Arguments();
auto cloneeBlockCompositeArguments = cloneeComposite->Arguments();
auto clonedBlockCompositeArguments = clonedComposite->Arguments();
std::unordered_map<Variable, Variable> cloneeToClonedBlockCompositeArgumentsMap;
for (size_t i = 0; i < cloneeBlockCompositeArguments.size(); ++i)
@ -954,6 +958,18 @@ namespace CNTK
return CompositeFunction::Create(MakeSharedObject<PrimitiveFunction>(PrimitiveOpType::Logistic, operands, Dictionary(), name), name);
}
FunctionPtr LambdaRank(const Variable& prediction, const Variable& gains, const Variable& groupId, const std::wstring& name)
{
std::vector<Variable> operands = { prediction, gains, groupId };
return CompositeFunction::Create(MakeSharedObject<PrimitiveFunction>(PrimitiveOpType::LambdaRank, operands, Dictionary(), name), name);
}
FunctionPtr NDCGAt1(const Variable& prediction, const Variable& gains, const Variable& groupId, const std::wstring& name)
{
std::vector<Variable> operands = { prediction, gains, groupId };
return CompositeFunction::Create(MakeSharedObject<PrimitiveFunction>(PrimitiveOpType::NDCG, operands, Dictionary(), name), name);
}
FunctionPtr SquaredError(const Variable& prediction, const Variable& targets, const std::wstring& name)
{
auto predictionPlaceholder = PlaceholderVariable(L"prediction");

Просмотреть файл

@ -9,7 +9,7 @@
#include "Utils.h"
#include "Serialization.h"
#define UPDATE_FUNCTION \
#define DISPATCH_TO_TYPED_UPDATE_FUNCTION \
switch (smoothedGradientValue->GetDataType()) \
{ \
case DataType::Float: \
@ -22,6 +22,11 @@
NOT_IMPLEMENTED; \
}
#define GET_WRITABLE_MATRICES \
const auto& smoothedGradientMatrix = GetWritableMatrix<ElementType>(smoothedGradientValue); \
const auto& gradientMatrix = GetWritableMatrix<ElementType>(gradientValue); \
const auto& parameterMatrix = GetWritableMatrix<ElementType>(parameter.Value());
using namespace Microsoft::MSR::CNTK;
using namespace std;
@ -184,15 +189,13 @@ namespace CNTK
LogicError("Learner parameters contain duplicates.");
}
for (const auto& parameter : parameters)
if (allocateSmoothGradients)
{
if (!allocateSmoothGradients)
for (const auto& parameter : parameters)
{
continue;
NDArrayViewPtr view = AllocateNDArrayView(parameter, parameter.Shape());
m_smoothedGradientValues.emplace(parameter, view);
}
NDArrayViewPtr view = AllocateNDArrayView(parameter, parameter.Shape());
m_smoothedGradientValues.insert(make_pair(parameter, view));
}
}
@ -222,7 +225,7 @@ namespace CNTK
}
}
/*virtual*/ bool LearnerBase::Update(unordered_map<Parameter, NDArrayViewPtr>& gradientValues, size_t trainingSampleCount) /*override*/
/*virtual*/ bool LearnerBase::Update(unordered_map<Parameter, NDArrayViewPtr>& gradientValues, size_t trainingSampleCount, bool sweepEnd) /*override*/
{
if (LearningRate(trainingSampleCount) == 0.0)
{
@ -230,7 +233,10 @@ namespace CNTK
}
// make sure trainingSampleCount is a valid value
assert(trainingSampleCount > 0);
if (trainingSampleCount == 0)
{
InvalidArgument("Learner::Update(): cannot perform an update with an empty minibatch.");
}
for (const auto& parameter : Parameters())
{
@ -256,7 +262,7 @@ namespace CNTK
Print(gradientValue, "Gradient Update");
Print(smoothedGradientValue, "Smoothed Gradient Input");
#endif
UPDATE_FUNCTION;
DISPATCH_TO_TYPED_UPDATE_FUNCTION;
#if DUMPOUTPUT
Print(parameter.Value(), "Parameter Update");
@ -270,12 +276,17 @@ namespace CNTK
}
m_sampleCount += trainingSampleCount;
m_minibatchCount++;
// TODO: sweep count also needs to be updated.
if (sweepEnd)
{
m_sweepCount++;
}
return true;
}
template <typename ElementType>
void LearnerBase::Update(const Parameter& parameter, const NDArrayViewPtr& gradientValue, const NDArrayViewPtr& smoothedGradientValue, size_t trainingSampleCount) const
void LearnerBase::Update(const Parameter& parameter, const NDArrayViewPtr& gradientValue,
const NDArrayViewPtr& smoothedGradientValue, size_t trainingSampleCount) const
{
const auto& parameterValue = parameter.Value();
PreProcess<ElementType>(parameterValue, gradientValue, trainingSampleCount);
@ -364,27 +375,39 @@ namespace CNTK
}
}
/*virtual*/ void LearnerSGD::Update(const Parameter& parameter, const NDArrayViewPtr& gradientValue, const NDArrayViewPtr& smoothedGradientValue, size_t trainingSampleCount) const /*override*/
LearnerSGD::LearnerSGD(const std::vector<Parameter>& parameters,
const LearningRateSchedule& learningRateSchedule,
AdditionalLearningOptions additionalOptions,
bool allocateSmoothGradients)
: LearnerBase(parameters, learningRateSchedule, additionalOptions, allocateSmoothGradients)
{
UPDATE_FUNCTION;
if (!allocateSmoothGradients)
{
// the vanilla sgd does not need the smooth gradients per se,
// insert dummy nd views instead.
for (const auto& parameter : parameters)
{
m_smoothedGradientValues.emplace(parameter, AllocateNDArrayView(parameter, {}));
}
}
}
/*virtual*/ void LearnerSGD::Update(const Parameter& parameter, const NDArrayViewPtr& gradientValue,
const NDArrayViewPtr& smoothedGradientValue, size_t trainingSampleCount) const /*override*/
{
DISPATCH_TO_TYPED_UPDATE_FUNCTION;
}
template <typename ElementType>
void LearnerSGD::Update(const Parameter& parameter, const NDArrayViewPtr& gradientValue, const NDArrayViewPtr& smoothedGradientValue, size_t trainingSampleCount) const
void LearnerSGD::Update(const Parameter& parameter, const NDArrayViewPtr& gradientValue,
const NDArrayViewPtr& smoothedGradientValue, size_t trainingSampleCount) const
{
const auto& parameterValue = parameter.Value();
const auto& smoothedGradientMatrix = GetWritableMatrix<ElementType>(smoothedGradientValue);
UNUSED(smoothedGradientValue);
const auto& gradientMatrix = GetWritableMatrix<ElementType>(gradientValue);
const auto& parameterMatrix = GetWritableMatrix<ElementType>(parameterValue);
const auto& parameterMatrix = GetWritableMatrix<ElementType>(parameter.Value());
const auto learningRate = ElementType(LearningRate(trainingSampleCount));
const auto momentum = ElementType(MomentumValueForMB(trainingSampleCount));
// TODO: break up the NormalGrad into 3 different functions, each with its own set of parameters
// Also, come up with a better name for NormalGrad (Default? Regular? Plain?).
// (one for vanilla SGD, the other for momentum SGD, and the third one for NAG).
smoothedGradientMatrix->NormalGrad(*gradientMatrix, *parameterMatrix,
learningRate, momentum, UseNesterovMomentum());
parameterMatrix->SGDUpdate(*gradientMatrix, learningRate);
}
double LearnerMomentumSGD::MomentumValueForMB(const MomentumSchedule& schedule, size_t minibatchSize) const
@ -397,6 +420,44 @@ namespace CNTK
return std::pow(currentMomentum, minibatchSize);
}
/*virtual*/ void LearnerMomentumSGD::Update(const Parameter& parameter, const NDArrayViewPtr& gradientValue,
const NDArrayViewPtr& smoothedGradientValue, size_t trainingSampleCount) const /*override*/
{
DISPATCH_TO_TYPED_UPDATE_FUNCTION;
}
template <typename ElementType>
void LearnerMomentumSGD::Update(const Parameter& parameter, const NDArrayViewPtr& gradientValue,
const NDArrayViewPtr& smoothedGradientValue, size_t trainingSampleCount) const
{
GET_WRITABLE_MATRICES;
const auto learningRate = ElementType(LearningRate(trainingSampleCount));
const auto momentum = ElementType(MomentumValueForMB(trainingSampleCount));
parameterMatrix->MomentumSGDUpdate(*gradientMatrix, *smoothedGradientMatrix,
learningRate, momentum, UseUnitGainMomentum());
}
/*virtual*/ void LearnerNesterov::Update(const Parameter& parameter, const NDArrayViewPtr& gradientValue,
const NDArrayViewPtr& smoothedGradientValue, size_t trainingSampleCount) const /*override*/
{
DISPATCH_TO_TYPED_UPDATE_FUNCTION;
}
template <typename ElementType>
void LearnerNesterov::Update(const Parameter& parameter, const NDArrayViewPtr& gradientValue,
const NDArrayViewPtr& smoothedGradientValue, size_t trainingSampleCount) const
{
GET_WRITABLE_MATRICES;
const auto learningRate = ElementType(LearningRate(trainingSampleCount));
const auto momentum = ElementType(MomentumValueForMB(trainingSampleCount));
parameterMatrix->NesterovAcceleratedMomentumSGDUpdate(*gradientMatrix, *smoothedGradientMatrix,
learningRate, momentum, UseUnitGainMomentum());
}
LearnerAdaGrad::LearnerAdaGrad(const std::vector<Parameter>& parameters,
const LearningRateSchedule& learningRateSchedule,
bool needAveMultiplier,
@ -416,24 +477,21 @@ namespace CNTK
const auto shape = GetMatrixShape(parameter);
NDArrayViewPtr view = AllocateNDArrayView(parameter, { shape[0], factor * shape[1] });
m_smoothedGradientValues.insert(make_pair(parameter, view));
m_smoothedGradientValues.emplace(parameter, view);
}
}
/*virtual*/ void LearnerAdaGrad::Update(const Parameter& parameter, const NDArrayViewPtr& gradientValue, const NDArrayViewPtr& smoothedGradientValue, size_t trainingSampleCount) const /*override*/
/*virtual*/ void LearnerAdaGrad::Update(const Parameter& parameter, const NDArrayViewPtr& gradientValue,
const NDArrayViewPtr& smoothedGradientValue, size_t trainingSampleCount) const /*override*/
{
UPDATE_FUNCTION;
DISPATCH_TO_TYPED_UPDATE_FUNCTION;
}
template <typename ElementType>
void LearnerAdaGrad::Update(const Parameter& parameter, const NDArrayViewPtr& gradientValue, const NDArrayViewPtr& smoothedGradientValue, size_t trainingSampleCount) const
void LearnerAdaGrad::Update(const Parameter& parameter, const NDArrayViewPtr& gradientValue,
const NDArrayViewPtr& smoothedGradientValue, size_t trainingSampleCount) const
{
UNUSED(trainingSampleCount);
const auto& parameterValue = parameter.Value();
const auto& smoothedGradientMatrix = GetWritableMatrix<ElementType>(smoothedGradientValue);
const auto& gradientMatrix = GetWritableMatrix<ElementType>(gradientValue);
const auto& parameterMatrix = GetWritableMatrix<ElementType>(parameterValue);
GET_WRITABLE_MATRICES
const auto learningRate = LearningRate(trainingSampleCount);
@ -446,32 +504,33 @@ namespace CNTK
LearnerFSAdaGrad::LearnerFSAdaGrad(const vector<Parameter>& parameters,
const LearningRateSchedule& learningRateSchedule,
const MomentumSchedule& momentumSchedule,
bool unitGain,
const MomentumSchedule& varianceMomentumSchedule,
AdditionalLearningOptions additionalOptions)
: LearnerMomentumSGD(parameters, learningRateSchedule, momentumSchedule, additionalOptions, /*allocateSmoothGradients*/ false),
: LearnerMomentumSGD(parameters, learningRateSchedule, momentumSchedule,
unitGain, additionalOptions, /*allocateSmoothGradients*/ false),
m_varianceMomentumSchedule(varianceMomentumSchedule)
{
for (const auto& parameter : parameters)
{
const auto shape = GetMatrixShape(parameter);
NDArrayViewPtr view = AllocateNDArrayView(parameter, { shape[0], 2 * shape[1] });
m_smoothedGradientValues.insert(make_pair(parameter, view));
m_smoothedCounts.insert(make_pair(parameter, 0.0));
m_smoothedGradientValues.emplace(parameter, view);
m_smoothedCounts.emplace(parameter, 0.0);
}
}
/*virtual*/ void LearnerFSAdaGrad::Update(const Parameter& parameter, const NDArrayViewPtr& gradientValue, const NDArrayViewPtr& smoothedGradientValue, size_t trainingSampleCount) const /*override*/
/*virtual*/ void LearnerFSAdaGrad::Update(const Parameter& parameter, const NDArrayViewPtr& gradientValue,
const NDArrayViewPtr& smoothedGradientValue, size_t trainingSampleCount) const /*override*/
{
UPDATE_FUNCTION;
DISPATCH_TO_TYPED_UPDATE_FUNCTION;
}
template <typename ElementType>
void LearnerFSAdaGrad::Update(const Parameter& parameter, const NDArrayViewPtr& gradientValue, const NDArrayViewPtr& smoothedGradientValue, size_t trainingSampleCount) const
void LearnerFSAdaGrad::Update(const Parameter& parameter, const NDArrayViewPtr& gradientValue,
const NDArrayViewPtr& smoothedGradientValue, size_t trainingSampleCount) const
{
const auto& parameterValue = parameter.Value();
const auto& smoothedGradientMatrix = GetWritableMatrix<ElementType>(smoothedGradientValue);
const auto& gradientMatrix = GetWritableMatrix<ElementType>(gradientValue);
const auto& parameterMatrix = GetWritableMatrix<ElementType>(parameterValue);
GET_WRITABLE_MATRICES;
const auto learningRate = LearningRate(trainingSampleCount);
const auto momentum = MomentumValueForMB(trainingSampleCount);
@ -480,7 +539,8 @@ namespace CNTK
double& smoothedCount = m_smoothedCounts.at(parameter);
smoothedGradientMatrix->FSAdagradUpdate(trainingSampleCount, *gradientMatrix, *parameterMatrix, smoothedCount, learningRate, s_targetAdagradAvDenom, momentum, varMomentum);
smoothedGradientMatrix->FSAdagradUpdate(trainingSampleCount, *gradientMatrix, *parameterMatrix, smoothedCount, learningRate,
s_targetAdagradAvDenom, momentum, varMomentum, UseUnitGainMomentum());
}
LearnerRMSProp::LearnerRMSProp(const vector<Parameter>& parameters,
@ -503,24 +563,21 @@ namespace CNTK
const auto shape = GetMatrixShape(parameter);
NDArrayViewPtr view = AllocateNDArrayView(parameter, { shape[0], factor * shape[1] });
m_smoothedGradientValues.insert(make_pair(parameter, view));
m_smoothedGradientValues.emplace(parameter, view);
}
}
/*virtual*/ void LearnerRMSProp::Update(const Parameter& parameter, const NDArrayViewPtr& gradientValue, const NDArrayViewPtr& smoothedGradientValue, size_t trainingSampleCount) const /*override*/
/*virtual*/ void LearnerRMSProp::Update(const Parameter& parameter, const NDArrayViewPtr& gradientValue,
const NDArrayViewPtr& smoothedGradientValue, size_t trainingSampleCount) const /*override*/
{
UPDATE_FUNCTION;
DISPATCH_TO_TYPED_UPDATE_FUNCTION;
}
template <typename ElementType>
void LearnerRMSProp::Update(const Parameter& parameter, const NDArrayViewPtr& gradientValue, const NDArrayViewPtr& smoothedGradientValue, size_t trainingSampleCount) const
void LearnerRMSProp::Update(const Parameter& parameter, const NDArrayViewPtr& gradientValue,
const NDArrayViewPtr& smoothedGradientValue, size_t trainingSampleCount) const
{
UNUSED(trainingSampleCount);
const auto& parameterValue = parameter.Value();
const auto& smoothedGradientMatrix = GetWritableMatrix<ElementType>(smoothedGradientValue);
const auto& gradientMatrix = GetWritableMatrix<ElementType>(gradientValue);
const auto& parameterMatrix = GetWritableMatrix<ElementType>(parameterValue);
GET_WRITABLE_MATRICES;
const auto learningRate = LearningRate(trainingSampleCount);
@ -548,22 +605,25 @@ namespace CNTK
LearnerPtr MomentumSGDLearner(const vector<Parameter>& parameters,
const LearningRateSchedule& learningRateSchedule,
const MomentumSchedule& momentumSchedule,
bool unitGain,
AdditionalLearningOptions additionalOptions /*= AdditionalLearningOptions()*/)
{
return MakeSharedObject<LearnerMomentumSGD>(parameters, learningRateSchedule, momentumSchedule, additionalOptions);
return MakeSharedObject<LearnerMomentumSGD>(parameters, learningRateSchedule, momentumSchedule, unitGain, additionalOptions);
}
LearnerPtr NesterovLearner(const vector<Parameter>& parameters,
const LearningRateSchedule& learningRateSchedule,
const MomentumSchedule& momentumSchedule,
bool unitGain,
AdditionalLearningOptions additionalOptions /*= AdditionalLearningOptions()*/)
{
return MakeSharedObject<LearnerNesterov>(parameters, learningRateSchedule, momentumSchedule, additionalOptions);
return MakeSharedObject<LearnerNesterov>(parameters, learningRateSchedule, momentumSchedule, unitGain, additionalOptions);
}
LearnerPtr AdamLearner(const vector<Parameter>& parameters,
const LearningRateSchedule& learningRateSchedule,
const MomentumSchedule& momentumSchedule,
bool unitGain,
const MomentumSchedule& varianceMomentumSchedule, /*= MomentumAsTimeConstantSchedulePerSample(2 * 3600 * 100)*/
bool lowMemory, /*= true*/
AdditionalLearningOptions additionalOptions /*= AdditionalLearningOptions()*/)
@ -572,7 +632,7 @@ namespace CNTK
{
LogicError("AdamLearner: only the low-memory variant is supported at the moment.");
}
return MakeSharedObject<LearnerFSAdaGrad>(parameters, learningRateSchedule, momentumSchedule, varianceMomentumSchedule, additionalOptions);
return MakeSharedObject<LearnerFSAdaGrad>(parameters, learningRateSchedule, momentumSchedule, unitGain, varianceMomentumSchedule, additionalOptions);
}
LearnerPtr AdaGradLearner(const vector<Parameter>& parameters,

Просмотреть файл

@ -17,7 +17,7 @@ namespace CNTK
class LearnerBase : public Learner
{
public:
virtual bool Update(std::unordered_map<Parameter, NDArrayViewPtr>& gradientValues, size_t trainingSampleCount) override final;
virtual bool Update(std::unordered_map<Parameter, NDArrayViewPtr>& gradientValues, size_t trainingSampleCount, bool sweepEnd = false) override final;
virtual Dictionary CreateCheckpoint() override final;
@ -108,26 +108,13 @@ namespace CNTK
};
// Vanilla gradient descent optimization algorithm.
class LearnerSGD : public LearnerBase
class LearnerSGD final : public LearnerBase
{
public:
LearnerSGD(const std::vector<Parameter>& parameters,
const LearningRateSchedule& learningRateSchedule,
LearnerSGD(const std::vector<Parameter>& parameters,
const LearningRateSchedule& learningRateSchedule,
AdditionalLearningOptions additionalOptions,
bool allocateSmoothGradients = true)
: LearnerBase(parameters, learningRateSchedule, additionalOptions, allocateSmoothGradients)
{}
// TODO: get rid of this as soon as NormalGrad is refactored.
virtual double MomentumValueForMB(size_t /*minibatchSize*/) const
{
return 0.0;
}
virtual bool UseNesterovMomentum() const
{
return false;
}
bool allocateSmoothGradients = false);
protected:
@ -138,30 +125,45 @@ namespace CNTK
};
// SGD optimization with momentum.
class LearnerMomentumSGD : public LearnerSGD
class LearnerMomentumSGD : public LearnerBase
{
public:
LearnerMomentumSGD(const std::vector<Parameter>& parameters,
const LearningRateSchedule& learningRateSchedule,
const MomentumSchedule& momentumSchedule,
bool unitGain,
AdditionalLearningOptions additionalOptions,
bool allocateSmoothGradients = true)
: LearnerSGD(parameters, learningRateSchedule, additionalOptions, allocateSmoothGradients),
m_momentumSchedule(momentumSchedule)
: LearnerBase(parameters, learningRateSchedule, additionalOptions, allocateSmoothGradients),
m_momentumSchedule(momentumSchedule),
m_unitGain(unitGain)
{ }
// returns current per-minibatch momentum value.
virtual double MomentumValueForMB(size_t minibatchSize) const override
virtual double MomentumValueForMB(size_t minibatchSize) const
{
return MomentumValueForMB(m_momentumSchedule, minibatchSize);
}
protected:
virtual void Update(const Parameter& parameter, const NDArrayViewPtr& gradientValue, const NDArrayViewPtr& smoothedGradientValue, size_t trainingSampleCount) const override;
template <typename ElementType>
void Update(const Parameter& parameter, const NDArrayViewPtr& gradientValue, const NDArrayViewPtr& smoothedGradientValue, size_t trainingSampleCount) const;
// returns current per-minibatch momentum value from the provided schedule.
double MomentumValueForMB(const MomentumSchedule& schedule, size_t minibatchSize) const;
// Return true if the update should use classic momentum and
// false if the unit-gain momentum should be used instead.
bool UseUnitGainMomentum() const
{
return m_unitGain;
}
private:
MomentumSchedule m_momentumSchedule;
bool m_unitGain;
};
// Nesterov's accelerated SGDLearnerBase descent.
@ -172,14 +174,16 @@ namespace CNTK
LearnerNesterov(const std::vector<Parameter>& parameters,
const LearningRateSchedule& learningRateSchedule,
const MomentumSchedule& momentumSchedule,
bool unitGain,
AdditionalLearningOptions additionalOptions)
: LearnerMomentumSGD(parameters, learningRateSchedule, momentumSchedule, additionalOptions, /*allocateSmoothGradients*/ true)
: LearnerMomentumSGD(parameters, learningRateSchedule, momentumSchedule, unitGain, additionalOptions, /*allocateSmoothGradients*/ true)
{}
virtual bool UseNesterovMomentum() const override
{
return true;
}
protected:
virtual void Update(const Parameter& parameter, const NDArrayViewPtr& gradientValue, const NDArrayViewPtr& smoothedGradientValue, size_t trainingSampleCount) const override;
template <typename ElementType>
void Update(const Parameter& parameter, const NDArrayViewPtr& gradientValue, const NDArrayViewPtr& smoothedGradientValue, size_t trainingSampleCount) const;
};
class LearnerAdaGrad : public LearnerBase
@ -206,6 +210,7 @@ namespace CNTK
LearnerFSAdaGrad(const std::vector<Parameter>& parameters,
const LearningRateSchedule& learningRateSchedule,
const MomentumSchedule& momentumSchedule,
bool unitGain,
const MomentumSchedule& varianceMomentumSchedule,
AdditionalLearningOptions additionalOptions);

Просмотреть файл

@ -23,6 +23,11 @@ namespace CNTK
return GetNextMinibatch(0, minibatchSizeInSamples, device);
}
const std::unordered_map<StreamInformation, MinibatchData>& MinibatchSource::GetNextMinibatch(size_t minibatchSizeInSequences, size_t minibatchSizeInSamples, const DeviceDescriptor& device /*= DeviceDescriptor::UseDefaultDevice()*/)
{
return GetNextMinibatch(minibatchSizeInSequences, minibatchSizeInSamples, 1, 0, device);
}
const StreamInformation& MinibatchSource::StreamInfo(const std::wstring& streamName)
{
std::unordered_set<const StreamInformation*> matchingStreamInfos;
@ -68,18 +73,15 @@ namespace CNTK
}
/*static*/ const std::wstring CompositeMinibatchSource::PositionAttributeName = L"minibatchSourcePosition";
/*static*/ const std::wstring CompositeMinibatchSource::DistributedAfterSampleCountAttributeName = L"minibatchDistributedAfterSampleCount";
CompositeMinibatchSource::CompositeMinibatchSource(const Dictionary& configuration)
: m_epochEndReached(false),
m_prevMinibatchSize(0),
m_epochSize(MinibatchSource::InfinitelyRepeat),
m_maxNumSamplesToRead(MinibatchSource::InfinitelyRepeat),
m_randomizedWindow(MinibatchSource::DefaultRandomizationWindow),
m_truncationLength(0),
m_numWorkers(1),
m_workerRank(0),
m_distributed(false),
m_distributedAfterSampleCount(MinibatchSource::InfiniteSamples)
m_workerRank(0)
{
// The CNTK reader implementation requires for each deserializer both the module and deserializer type be specified
// This is redundant and the V2 API users will just specify type from which the module is automatically inferred
@ -134,13 +136,7 @@ namespace CNTK
const wchar_t* epochSizeConfigurationKey = L"epochSize";
if (augmentedConfiguration.Contains(epochSizeConfigurationKey))
m_epochSize = augmentedConfiguration[epochSizeConfigurationKey].Value<size_t>();
if (m_epochSize == MinibatchSource::FullDataSweep)
m_epochSize = Microsoft::MSR::CNTK::requestDataSize;
// Setting big value, but not the max in order to avoid bit overflow.
else if (m_epochSize == MinibatchSource::InfinitelyRepeat)
m_epochSize = std::numeric_limits<size_t>::max() / 2;
m_maxNumSamplesToRead = augmentedConfiguration[epochSizeConfigurationKey].Value<size_t>();
const wchar_t* randomizedWindowConfigurationKey = L"randomizationWindow";
if (augmentedConfiguration.Contains(randomizedWindowConfigurationKey))
@ -158,11 +154,6 @@ namespace CNTK
m_truncationLength = augmentedConfiguration[truncationLengthConfigurationKey].Value<size_t>();
}
// TODO: change all the dictionary names to string constants
const wchar_t* distributedAfterSampleCountConfigurationKey = L"distributedAfterSampleCount";
if (augmentedConfiguration.Contains(distributedAfterSampleCountConfigurationKey))
m_distributedAfterSampleCount = augmentedConfiguration[distributedAfterSampleCountConfigurationKey].Value<size_t>();
typedef Reader*(*CreateCompositeDataReaderProc)(const ConfigParameters* parameters);
CreateCompositeDataReaderProc createReaderProc = (CreateCompositeDataReaderProc)Plugin().Load(L"CompositeDataReader", "CreateCompositeDataReader");
std::shared_ptr<Microsoft::MSR::CNTK::Reader> compositeDataReader(createReaderProc(&config));
@ -194,6 +185,8 @@ namespace CNTK
/*virtual*/ const std::unordered_map<StreamInformation, MinibatchData>&
CompositeMinibatchSource::GetNextMinibatch(size_t minibatchSizeInSequences,
size_t minibatchSizeInSamples,
size_t numberOfWorkers,
size_t workerRank,
const DeviceDescriptor& device /*= DeviceDescriptor::UseDefaultDevice()*/) /*override*/
{
m_minibatchData.clear();
@ -206,35 +199,31 @@ namespace CNTK
if (minibatchSizeInSamples == 0)
InvalidArgument("GetNextMinibatch: Requested minibatch sizes must be > 0");
// For the first number of m_distributedAfterSampleCount samples, minibatch source won't run distributed.
bool wasDistributed = m_distributed;
if (!m_distributed && IsDistributed())
{
m_distributed = true;
if (m_numWorkers == 1)
{
MPIWrapperPtr mpi = MPIWrapper::GetInstance();
if (mpi == nullptr)
{
// create mpi instance if intended to be distributed
mpi = MPIWrapper::GetInstance(true);
}
m_numWorkers = mpi->NumNodesInUse();
m_workerRank = mpi->CurrentNodeRank();
}
}
if (m_prevMinibatchSize == 0)
{
EpochConfiguration epochConfig;
epochConfig.m_numberOfWorkers = m_distributed ? m_numWorkers : 1;
epochConfig.m_workerRank = m_distributed ? m_workerRank : 0;
epochConfig.m_numberOfWorkers = numberOfWorkers;
epochConfig.m_workerRank = workerRank;
epochConfig.m_minibatchSizeInSamples = minibatchSizeInSamples;
epochConfig.m_truncationSize = m_truncationLength;
epochConfig.m_allowMinibatchesToCrossSweepBoundaries = true;
if (m_maxNumSamplesToRead == MinibatchSource::FullDataSweep)
{
epochConfig.m_totalEpochSizeInSamples = Microsoft::MSR::CNTK::requestDataSize;
}
else if (m_maxNumSamplesToRead == MinibatchSource::InfinitelyRepeat)
{
// Setting big value, but not the max in order to aviod bit overflow.
epochConfig.m_totalEpochSizeInSamples = std::numeric_limits<size_t>::max() / 2;
}
else
{
epochConfig.m_totalEpochSizeInSamples = m_maxNumSamplesToRead;
}
epochConfig.m_totalEpochSizeInSamples = m_epochSize;
epochConfig.m_epochIndex = 0;
m_matrices.clear();
std::unordered_set<InputStreamDescription> inputs;
@ -262,31 +251,38 @@ namespace CNTK
m_shim->StartEpoch(epochConfig, inputs);
m_prevMinibatchSize = minibatchSizeInSamples;
wasDistributed = m_distributed;
m_workerRank = workerRank;
m_numWorkers = numberOfWorkers;
}
if (minibatchSizeInSamples != m_prevMinibatchSize || wasDistributed != m_distributed)
if (minibatchSizeInSamples != m_prevMinibatchSize || m_workerRank != workerRank || m_numWorkers != numberOfWorkers)
{
std::map<std::wstring, int> inputDescriptions;
for (const auto& s : m_streamInfos)
inputDescriptions[s.m_name] = AsCNTKImplDeviceId(device);
ReaderConfiguration newConfig;
newConfig.m_numberOfWorkers = m_distributed ? m_numWorkers : 1;
newConfig.m_workerRank = m_distributed ? m_workerRank : 0;
newConfig.m_numberOfWorkers = numberOfWorkers;
newConfig.m_workerRank = workerRank;
newConfig.m_minibatchSizeInSamples = minibatchSizeInSamples;
newConfig.m_truncationSize = m_truncationLength;
newConfig.m_allowMinibatchesToCrossSweepBoundaries = true;
m_shim->SetConfiguration(newConfig, inputDescriptions);
m_prevMinibatchSize = minibatchSizeInSamples;
m_workerRank = workerRank;
m_numWorkers = numberOfWorkers;
}
auto hasData = m_shim->GetMinibatch(m_matrices);
m_epochEndReached = m_shim->IsEndOfEpoch();
if (m_epochEndReached && !hasData)
return m_minibatchData;
bool hasReachedSweepEnd = m_shim->IsEndOfSweep();
for (const auto& s: m_streamInfos)
{
auto input = m_matrices.GetInput(s.m_name);
@ -310,7 +306,7 @@ namespace CNTK
size_t numSamples = input.pMBLayout->GetActualNumSamples();
size_t numSequences = input.pMBLayout->GetNumSequences();
m_minibatchData[currentStreamInfo] = { numSequences, numSamples, minibatchValuePtr };
m_minibatchData[currentStreamInfo] = { minibatchValuePtr, numSequences, numSamples, hasReachedSweepEnd };
}
else
LogicError("Input data of type other than DataType::Float is currently unsupported by the CNTK built-in composite MinibatchSource!");
@ -324,7 +320,6 @@ namespace CNTK
{
Dictionary checkpointState;
checkpointState[PositionAttributeName] = m_shim->GetCurrentSamplePosition();
checkpointState[DistributedAfterSampleCountAttributeName] = m_distributedAfterSampleCount;
return checkpointState;
}
@ -332,6 +327,5 @@ namespace CNTK
{
auto checkpointedMinibatchSourcePosition = checkpoint[PositionAttributeName].Value<size_t>();
m_shim->SetCurrentSamplePosition(checkpointedMinibatchSourcePosition);
m_distributedAfterSampleCount = checkpoint[DistributedAfterSampleCountAttributeName].Value<size_t>();
}
}

Просмотреть файл

@ -24,18 +24,16 @@ namespace CNTK
virtual const std::unordered_set<StreamInformation>& StreamInfos() override { return m_streamInfos; }
virtual const std::unordered_map<StreamInformation, MinibatchData>& GetNextMinibatch(size_t minibatchSizeInSamples,
size_t minibatchSizeInSequences,
const DeviceDescriptor& device = DeviceDescriptor::UseDefaultDevice()) override;
const std::unordered_map<StreamInformation, MinibatchData>& GetNextMinibatch(
size_t minibatchSizeInSamples,
size_t minibatchSizeInSequences,
size_t numberOfWorkers,
size_t workerRank,
const DeviceDescriptor& device = DeviceDescriptor::UseDefaultDevice()) override;
virtual Dictionary GetCheckpointState() const override;
virtual void RestoreFromCheckpoint(const Dictionary& checkpoint) override;
virtual bool IsDistributed() const override
{
return m_shim->GetCurrentSamplePosition() >= m_distributedAfterSampleCount;
}
private:
static Microsoft::MSR::CNTK::InputStreamDescription GetInputStreamDescription(const StreamInformation& s, const DeviceDescriptor& device)
{
@ -46,15 +44,13 @@ namespace CNTK
return Microsoft::MSR::CNTK::InputStreamDescription(s.m_name, CNTKdeviceId, CNTKMatrixType, CNTKMatrixFormat);
}
private:
private:
std::unordered_set<StreamInformation> m_streamInfos;
bool m_epochEndReached;
bool m_distributed;
size_t m_numWorkers;
size_t m_workerRank;
size_t m_distributedAfterSampleCount;
size_t m_prevMinibatchSize;
size_t m_epochSize;
size_t m_maxNumSamplesToRead;
size_t m_randomizedWindow;
size_t m_truncationLength;
std::unordered_map<StreamInformation, MinibatchData> m_minibatchData;

Просмотреть файл

@ -289,6 +289,33 @@ namespace CNTK
return MakeSharedObject<NDArrayView>(GetDataType(), Device(), GetStorageFormat(), Shape(), IsReadOnly() || readOnly, tensorView);
}
NDArrayViewPtr NDArrayView::AsShape(const NDShape& newShape) const
{
if (newShape.TotalSize() != Shape().TotalSize())
{
InvalidArgument("NDArrayView::AsShape: The size (%d) of 'source' view shape's (%S) must be same as the size (%d) of the newShape (%S)!",
(int)Shape().TotalSize(), AsStringForErrorReporting(Shape()).c_str(),
(int)newShape.TotalSize(), AsStringForErrorReporting(newShape).c_str());
}
auto newTensorShape = AsTensorShape(newShape);
void* tensorView = nullptr;
switch (m_dataType)
{
case DataType::Float:
tensorView = new TensorView<float>(*(GetTensorView<float>()), newTensorShape);
break;
case DataType::Double:
tensorView = new TensorView<double>(*(GetTensorView<double>()), newTensorShape);
break;
default:
LogicError("Unsupported DataType %s", DataTypeName(m_dataType));
break;
}
return MakeSharedObject<NDArrayView>(GetDataType(), Device(), GetStorageFormat(), newShape, IsReadOnly(), tensorView);
}
// TODO: This could actually be strided?
template <typename ElementType>
ElementType* NDArrayView::WritableDataBuffer()

Просмотреть файл

@ -130,7 +130,9 @@ namespace CNTK
(op == PrimitiveOpType::CrossEntropyWithSoftmax) ||
(op == PrimitiveOpType::ClassificationError) ||
(op == PrimitiveOpType::Logistic) ||
(op == PrimitiveOpType::CosDistance))
(op == PrimitiveOpType::CosDistance) ||
(op == PrimitiveOpType::LambdaRank) ||
(op == PrimitiveOpType::NDCG))
{
outputDynamicAxes = std::vector<Axis>({});
}
@ -527,9 +529,13 @@ namespace CNTK
case PrimitiveOpType::SquaredError:
case PrimitiveOpType::CrossEntropyWithSoftmax:
case PrimitiveOpType::ClassificationError:
case PrimitiveOpType::LambdaRank:
case PrimitiveOpType::NDCG:
{
if ((op == PrimitiveOpType::ClassificationError) || (op == PrimitiveOpType::Logistic))
assert(inputs.size() >= 2);
else if ((op == PrimitiveOpType::LambdaRank) || (op == PrimitiveOpType::NDCG))
assert(inputs.size() == 3);
else
assert(inputs.size() == 2);
@ -708,7 +714,8 @@ namespace CNTK
if (m_op == PrimitiveOpType::Block)
{
auto blockCompositeFunc = dynamic_cast<const CompositeFunction*>(BlockComposite().get());
auto blockFunction = dynamic_cast<const BlockFunction*>(this);
auto blockCompositeFunc = dynamic_cast<const CompositeFunction*>(blockFunction->Composite().get());
dict[blockFunctionCompositeKey] = blockCompositeFunc->SerializeBlockComposite();
dict[blockFunctionOpNameKey] = OpName();
@ -742,7 +749,7 @@ namespace CNTK
// The hard requirement that the serialization depends on is that
// new op type values are only added to the end of the list, after Combine.
// This also applies to other enums (DataType, VariableKind, etc.)
if (op > PrimitiveOpType::Unpooling)
if (op > PrimitiveOpType::NDCG)
{
CNTK::LogicError("Unexpected op '%ls':'%u' (%s).",
opKey.c_str(),

Просмотреть файл

@ -86,8 +86,10 @@ namespace CNTK
{PrimitiveOpType::Sin, L"Sin"},
{PrimitiveOpType::Cos, L"Cos"},
{PrimitiveOpType::Pass, L"Pass"},
{PrimitiveOpType::Block, L"Block"},
{PrimitiveOpType::Unpooling, L"Unpooling"},
{ PrimitiveOpType::Block, L"Block" },
{ PrimitiveOpType::Unpooling, L"Unpooling" },
{ PrimitiveOpType::LambdaRank, L"LambdaRank" },
{ PrimitiveOpType::NDCG, L"NDCG" },
};
inline const std::wstring& PrimitiveOpTypeName(PrimitiveOpType opType)
@ -118,6 +120,10 @@ namespace CNTK
if (numFunctionInputs > 2)
indexMap.insert({ 2, 2 });
}
else if (op == PrimitiveOpType::LambdaRank)
indexMap = std::unordered_map<size_t, size_t>({ { 0, 1 }, { 1, 0 }, { 2, 2 } });
else if (op == PrimitiveOpType::NDCG)
indexMap = std::unordered_map<size_t, size_t>({ { 0, 1 },{ 1, 0 },{ 2, 2 } });
else if (op == PrimitiveOpType::CrossEntropyWithSoftmax)
indexMap = std::unordered_map<size_t, size_t>({ { 0, 1 }, { 1, 0 } });
else if (op == PrimitiveOpType::GatherPacked)
@ -711,6 +717,6 @@ namespace CNTK
// Increasing s_serializationVersion every time we add more ops allows us to print
// a more meaningful message when trying to load a new model with a stale binary.
static const size_t s_serializationVersion = 2;
static const size_t s_serializationVersion = 3;
};
}

Просмотреть файл

@ -68,6 +68,8 @@ namespace CNTK
Pass = 56,
Block = 57,
Unpooling = 58,
LambdaRank = 59,
NDCG = 60,
// New op types should only be appended to the end of this list.
// If you append here, also add checks in SerializationTests (CheckEnumValuesNotModified)
// and bump up PrimitiveFunction::s_serializationVersion

Просмотреть файл

@ -119,6 +119,29 @@ namespace CNTK
return (numSamplesInDataArrayView - numMaskedSamples);
}
static std::unordered_map<Variable, ValuePtr> GetInputs(const std::unordered_map<Variable, MinibatchData>& arguments)
{
std::unordered_map<Variable, ValuePtr> inputs(arguments.size());
for (const auto& kv : arguments)
{
inputs[kv.first] = kv.second.data;
}
return inputs;
}
static bool IsAtSweepEnd(const std::unordered_map<Variable, MinibatchData>& arguments)
{
return std::any_of(arguments.begin(), arguments.end(), [](const std::pair<const Variable, MinibatchData>& kv)
{
return kv.second.sweepEnd;
});
}
double Trainer::TestMinibatch(const std::unordered_map<Variable, MinibatchData>& arguments, const DeviceDescriptor& computeDevice /*= DeviceDescriptor::UseDefaultDevice()*/)
{
return TestMinibatch(GetInputs(arguments), computeDevice);
}
double Trainer::TestMinibatch(const std::unordered_map<Variable, ValuePtr>& arguments, const DeviceDescriptor& computeDevice /*= DeviceDescriptor::UseDefaultDevice()*/)
{
if (!m_aggregatedEvaluationFunction)
@ -126,12 +149,26 @@ namespace CNTK
// TODO: Should we refactor this code that is somewhat similar to the prologue of the TrainMinibatch function
std::unordered_map<Variable, ValuePtr> outputs = { { m_aggregatedEvaluationFunction, nullptr }, { m_testSampleCountVar, nullptr } };
m_combinedTrainingFunction->Forward(arguments, outputs, computeDevice);
auto sampleCount = GetSampleCount(m_testSampleCountVar, outputs[m_testSampleCountVar]);
return (GetScalarValue(outputs[m_aggregatedEvaluationFunction]) / sampleCount);
}
bool Trainer::TrainMinibatch(const std::unordered_map<Variable, MinibatchData>& arguments, const DeviceDescriptor& computeDevice /*= DeviceDescriptor::UseDefaultDevice()*/)
{
std::unordered_map<Variable, ValuePtr> outputsToFetch = {};
return TrainMinibatch(arguments, outputsToFetch, computeDevice);
}
bool Trainer::TrainMinibatch(const std::unordered_map<Variable, MinibatchData>& arguments, std::unordered_map<Variable, ValuePtr>& outputsToFetch, const DeviceDescriptor& computeDevice /*= DeviceDescriptor::UseDefaultDevice()*/)
{
if (!m_distributed)
return TrainLocalMinibatch(GetInputs(arguments), outputsToFetch, IsAtSweepEnd(arguments), computeDevice);
return TrainDistributedMinibatch(GetInputs(arguments), outputsToFetch, IsAtSweepEnd(arguments), computeDevice);
}
bool Trainer::TrainMinibatch(const std::unordered_map<Variable, ValuePtr>& arguments, const DeviceDescriptor& computeDevice /*= DeviceDescriptor::UseDefaultDevice()*/)
{
std::unordered_map<Variable, ValuePtr> outputsToFetch = {};
@ -141,11 +178,11 @@ namespace CNTK
bool Trainer::TrainMinibatch(const std::unordered_map<Variable, ValuePtr>& arguments, std::unordered_map<Variable, ValuePtr>& outputsToFetch, const DeviceDescriptor& computeDevice /*= DeviceDescriptor::UseDefaultDevice()*/)
{
if (!m_distributed)
return TrainLocalMinibatch(arguments, outputsToFetch, computeDevice);
return TrainDistributedMinibatch(arguments, outputsToFetch, computeDevice);
return TrainLocalMinibatch(arguments, outputsToFetch, false, computeDevice);
return TrainDistributedMinibatch(arguments, outputsToFetch, false, computeDevice);
}
bool Trainer::TrainLocalMinibatch(const std::unordered_map<Variable, ValuePtr>& arguments, std::unordered_map<Variable, ValuePtr>& outputsToFetch, const DeviceDescriptor& computeDevice /*= DeviceDescriptor::UseDefaultDevice()*/)
bool Trainer::TrainLocalMinibatch(const std::unordered_map<Variable, ValuePtr>& arguments, std::unordered_map<Variable, ValuePtr>& outputsToFetch, bool sweepEnd, const DeviceDescriptor& computeDevice /*= DeviceDescriptor::UseDefaultDevice()*/)
{
bool emptyMinibatch = arguments.empty() || (arguments.begin()->second == nullptr);
if (emptyMinibatch) // Nothing to train with.
@ -157,10 +194,10 @@ namespace CNTK
std::unordered_map<Parameter, NDArrayViewPtr> gradients;
for (const auto& parameter : m_combinedTrainingFunction->Parameters())
gradients[parameter] = parameterGradients[parameter]->Data();
return m_parameterLearners->Update(gradients, m_prevMinibatchNumSamples);
return m_parameterLearners->Update(gradients, m_prevMinibatchNumSamples, sweepEnd);
}
bool Trainer::TrainDistributedMinibatch(const std::unordered_map<Variable, ValuePtr>& arguments, std::unordered_map<Variable, ValuePtr>& outputsToFetch, const DeviceDescriptor& computeDevice /*= DeviceDescriptor::UseDefaultDevice()*/)
bool Trainer::TrainDistributedMinibatch(const std::unordered_map<Variable, ValuePtr>& arguments, std::unordered_map<Variable, ValuePtr>& outputsToFetch, bool sweepEnd, const DeviceDescriptor& computeDevice /*= DeviceDescriptor::UseDefaultDevice()*/)
{
std::unordered_map<Parameter, NDArrayViewPtr> gradients;
auto modelParameters = m_combinedTrainingFunction->Parameters();
@ -187,7 +224,7 @@ namespace CNTK
evalCriterion = m_prevMinibatchAggregateEvalCriterionValue->Data();
}
MinibatchInfo info { arguments.empty(), m_prevMinibatchNumSamples, trainingLoss, evalCriterion };
MinibatchInfo info{ arguments.empty(), sweepEnd, m_prevMinibatchNumSamples, trainingLoss, evalCriterion };
bool updated = m_parameterLearners->Update(gradients, info);
m_prevMinibatchNumSamples = info.numberOfSamples;
@ -344,4 +381,14 @@ namespace CNTK
{
return m_parameterLearners->ParameterLearners().front()->TotalNumberOfSamplesSeen();
}
TrainerPtr CreateTrainer(const FunctionPtr& model, const FunctionPtr& lossFunction, const std::vector<LearnerPtr>& parameterLearners)
{
return MakeSharedObject<Trainer>(model, lossFunction, parameterLearners);
}
TrainerPtr CreateTrainer(const FunctionPtr& model, const FunctionPtr& lossFunction, const FunctionPtr& evaluationFunction, const std::vector<LearnerPtr>& parameterLearners)
{
return MakeSharedObject<Trainer>(model, lossFunction, evaluationFunction, parameterLearners);
}
}

Просмотреть файл

@ -0,0 +1,148 @@
//
// Copyright (c) Microsoft. All rights reserved.
// Licensed under the MIT license. See LICENSE.md file in the project root for full license information.
//
#include "stdafx.h"
#include "CNTKLibrary.h"
#include "fileutil.h"
namespace CNTK
{
const std::wstring TrainingSession::s_checkpointIndex = L"CheckpointIndex";
const std::wstring TrainingSession::s_trainingMinibatchSource = L"TrainingMinibatchSource";
TrainingSessionPtr CreateBasicTrainingSession(
const MinibatchSourcePtr& trainingSource,
const TrainerPtr& trainer,
const std::unordered_map<Variable, StreamInformation>& modelInputToMinibatchSourceStream,
const MinibatchSizeSchedule& minibatchSizeSchedule,
size_t checkpointFrequencyinSamples,
const std::wstring& checkPointFileName)
{
return MakeSharedObject<TrainingSession>(trainingSource,
trainer,
modelInputToMinibatchSourceStream,
minibatchSizeSchedule,
checkpointFrequencyinSamples,
checkPointFileName);
}
TrainingSession::TrainingSession(
const MinibatchSourcePtr& trainingSource,
const TrainerPtr& trainer,
const std::unordered_map<Variable, StreamInformation>& modelInputToMinibatchSourceStream,
const MinibatchSizeSchedule& schedule,
size_t checkpointFrequencyInSamples,
const std::wstring& checkPointFileName) :
m_trainingSource(trainingSource),
m_trainer(trainer),
m_modelInputToMinibatchSourceStream(modelInputToMinibatchSourceStream),
m_checkpointFrequencyinSamples(checkpointFrequencyInSamples),
m_checkPointFileName(checkPointFileName),
m_currentCheckpointIndex(0),
m_parallelAfterSamples(0),
m_workerRank(0),
m_numberOfWorkers(1),
m_minibatchSizeSchedule(schedule)
{
if (!trainingSource)
InvalidArgument("Minibatch source is not allowed to be null.");
if (!trainer)
InvalidArgument("Trainer is not allowed to be null.");
if(modelInputToMinibatchSourceStream.empty())
InvalidArgument("Input mapping is not allowed to be empty.");
if (m_checkPointFileName.empty() && checkpointFrequencyInSamples != 0)
InvalidArgument("Checkpoint file name is not allowed to be empty.");
// Let's calculate the warm up period the distributed learners may need.
// We will take the maximum warm up period required.
auto learners = trainer->ParameterLearners();
m_parallelAfterSamples = 0;
for (const auto& l: learners)
{
auto distributed = std::dynamic_pointer_cast<DistributedLearner>(l);
if (distributed)
{
m_parallelAfterSamples = std::max(m_parallelAfterSamples, distributed->ParallelizationAfter());
m_workerRank = distributed->GetCommunicator()->CurrentWorker().m_globalRank;
m_numberOfWorkers = distributed->GetCommunicator()->Workers().size();
}
}
}
void TrainingSession::Train(const DeviceDescriptor& computeDevice)
{
std::unordered_map<Variable, ValuePtr> minibatch;
bool shouldTrain = true;
size_t workerRank = 0, numberOfWorkers = 1;
size_t samplesInEpoch = 0;
while (shouldTrain)
{
// Check if we are operating in distributed mode.
if (m_parallelAfterSamples >= m_trainer->TotalNumberOfSamplesSeen())
{
numberOfWorkers = m_numberOfWorkers;
workerRank = m_workerRank;
}
size_t mbSize = GetMinibatchSize();
auto minibatchData = m_trainingSource->GetNextMinibatch(0 /*numberOfSequences*/, mbSize, numberOfWorkers, workerRank, computeDevice);
minibatch.clear();
if (!minibatchData.empty())
{
for (auto v : m_modelInputToMinibatchSourceStream)
minibatch.insert({ v.first, minibatchData[v.second].data });
}
OnMinibatchStart();
shouldTrain = m_trainer->TrainMinibatch(minibatch, computeDevice);
OnMinibatchEnd();
// Local number of samples.
samplesInEpoch += m_trainer->PreviousMinibatchSampleCount();
// Check whether to create a checkpoint
if (m_checkpointFrequencyinSamples > 0)
{
size_t checkpointIndex = m_trainer->TotalNumberOfSamplesSeen() / m_checkpointFrequencyinSamples;
if (checkpointIndex > m_currentCheckpointIndex)
{
samplesInEpoch = 0;
m_currentCheckpointIndex = checkpointIndex;
SaveCheckpoint();
}
}
}
if (m_checkpointFrequencyinSamples > 0)
SaveCheckpoint();
}
void TrainingSession::RestoreFromCheckpoint(const std::wstring& checkpointFileName)
{
Dictionary externalState = m_trainer->RestoreFromCheckpoint(checkpointFileName);
m_currentCheckpointIndex = externalState[s_checkpointIndex].Value<size_t>();
m_trainingSource->RestoreFromCheckpoint(externalState[s_trainingMinibatchSource].Value<Dictionary>());
}
void TrainingSession::SaveCheckpoint()
{
OnCheckpointStart();
Dictionary externalState;
externalState[s_checkpointIndex] = m_currentCheckpointIndex;
externalState[s_trainingMinibatchSource] = m_trainingSource->GetCheckpointState();
std::wstring tempFileName = m_checkPointFileName + L".tmp";
m_trainer->SaveCheckpoint(tempFileName, externalState);
// Perform the actual renaming only on the main worker.
if (m_workerRank == 0)
{
_wunlink(m_checkPointFileName.c_str());
renameOrDie(tempFileName, m_checkPointFileName);
}
OnCheckpointEnd();
}
}

Просмотреть файл

@ -241,7 +241,7 @@ namespace CNTK
template <typename T>
TrainingParameterSchedule<T>::TrainingParameterSchedule(T value, UnitType unit)
: m_schedule({ make_pair(0, value) }), m_unit(unit), m_epochSize(EntireSweep)
: m_schedule({ make_pair(0, value) }), m_unit(unit), m_epochSize(FullDataSweep)
{
}
@ -268,13 +268,9 @@ namespace CNTK
template <typename T>
void TrainingParameterSchedule<T>::ConstructSchedule(const std::vector<std::pair<size_t, T>>& schedule)
{
if (m_epochSize == EntireSweep)
{
//Sweep based schedules are currently not functional (learners don't have sweep info).
NOT_IMPLEMENTED;
}
const auto epochSize = (m_epochSize == EntireSweep) ? 1 : m_epochSize;
// In case of the FullDataSweep, the scheduling unit is just 1 sweep,
// otherwise, it's the epoch size in samples.
const auto unitSize = (m_epochSize == FullDataSweep) ? 1 : m_epochSize;
if (schedule.size() == 0)
RuntimeError("TrainingParameterSchedule::ConstructSchedule : schedule is empty.");
@ -288,7 +284,7 @@ namespace CNTK
RuntimeError("TrainingParameterSchedule::ConstructSchedule : unit count in the 'schedule' argument cannot be 0.");
unitCount += (pair.first != 0) ? pair.first : 1;
m_schedule[epochSize * unitCount] = pair.second;
m_schedule[unitSize * unitCount] = pair.second;
}
}
@ -830,6 +826,7 @@ namespace CNTK
template void DictionaryValue::FreePtrAsType<NDArrayView>();
template class TrainingParameterSchedule<double>;
template class TrainingParameterSchedule<size_t>;
Learners::Learners(const std::vector<LearnerPtr>& learners) :
m_learners(learners),
@ -879,14 +876,14 @@ namespace CNTK
}
}
bool Learners::Update(std::unordered_map<Parameter, NDArrayViewPtr>& gradientValues, size_t sampleInMinibatch)
bool Learners::Update(std::unordered_map<Parameter, NDArrayViewPtr>& gradientValues, size_t sampleInMinibatch, bool sweepEnd)
{
bool anyUpdatesPerformed = false;
for (auto learner : m_learners)
{
std::unordered_map<Parameter, NDArrayViewPtr> learnerGradients;
GetLearnerGradients(learner, gradientValues, learnerGradients);
anyUpdatesPerformed |= learner->Update(learnerGradients, sampleInMinibatch);
anyUpdatesPerformed |= learner->Update(learnerGradients, sampleInMinibatch, sweepEnd);
}
return anyUpdatesPerformed;
}

Просмотреть файл

@ -501,7 +501,7 @@ namespace CNTK
public:
explicit Learners(const std::vector<LearnerPtr>& learners);
bool Update(std::unordered_map<Parameter, NDArrayViewPtr>& gradientValues, size_t trainingSampleCount);
bool Update(std::unordered_map<Parameter, NDArrayViewPtr>& gradientValues, size_t trainingSampleCount, bool sweepEnd);
bool Update(std::unordered_map<Parameter, NDArrayViewPtr>& gradientValues, MinibatchInfo& minibatchInfo);
std::vector<DictionaryValue> CreateCheckpoint();

Просмотреть файл

@ -194,16 +194,19 @@ namespace CNTK
NDMaskPtr deviceValueMask = CreateMask(sequenceLengths, sequenceStartFlags, DeviceDescriptor::CPUDevice());
NDArrayViewPtr valueData;
NDShape valueDataShape = sampleShape.AppendShape({ maxSequenceLength, numSequences });
if (numSequences == 1)
{
if (createNewCopy)
valueData = sequences[0]->DeepClone();
else
valueData = sequences[0];
// We can use the original buffer directly but need to reshape to the valueDataShape
valueData = valueData->AsShape(valueDataShape);
}
else
{
NDShape valueDataShape = sampleShape.AppendShape({ maxSequenceLength, numSequences });
if (isDataSparse)
{
if (storageFormat != StorageFormat::SparseCSC)

Просмотреть файл

@ -61,8 +61,6 @@ struct ProcessorData
nvmlMemory_t memory;
nvmlUtilization_t utilization;
cudaDeviceProp deviceProp;
size_t cudaFreeMem;
size_t cudaTotalMem;
bool cntkFound;
int deviceId; // the deviceId (cuda side) for this processor
};
@ -270,29 +268,16 @@ void BestGpu::GetCudaProperties()
if (m_cudaData)
return;
int currentDevice, rc;
rc = cudaGetDevice(&currentDevice);
int dev = 0;
for (ProcessorData* pd : m_procData)
{
cudaSetDevice(dev);
pd->deviceId = dev;
cudaGetDeviceProperties(&pd->deviceProp, dev);
size_t free;
size_t total;
cudaMemGetInfo(&free, &total);
pd->cores = _ConvertSMVer2Cores(pd->deviceProp.major, pd->deviceProp.minor) * pd->deviceProp.multiProcessorCount;
pd->cudaFreeMem = free;
pd->cudaTotalMem = total;
dev++;
cudaDeviceReset();
}
m_cudaData = m_procData.size() > 0;
if (rc == CUDA_SUCCESS)
{
cudaSetDevice(currentDevice);
}
}
void BestGpu::Init()
@ -486,10 +471,7 @@ std::vector<int> BestGpu::GetDevices(int number, BestGpuFlags p_bestFlags)
score = (1.0 - pd->utilization.gpu / 75.0f) * utilGpuW;
score += (1.0 - pd->utilization.memory / 60.0f) * utilMemW;
score += pd->cores / 1000.0f * speedW;
double mem = pd->memory.total > 0 ? pd->memory.free / (double) pd->memory.total : 1000000; // I saw this to be 0 when remoted in
// if it's not a tcc driver, then it's WDDM driver and values will be off because windows allocates all the memory from the nvml point of view
if (!pd->deviceProp.tccDriver || pd->memory.total == 0)
mem = pd->cudaFreeMem / (double) pd->cudaTotalMem;
double mem = pd->memory.total > 0 ? pd->memory.free / (double) pd->memory.total : 1; // I saw this to be 0 when remoted in
score += mem * freeMemW;
score += (pd->cntkFound ? 0 : 1) * mlAppRunningW;
for (int i = 0; i < best.size(); i++)

Просмотреть файл

@ -294,6 +294,13 @@ public:
return GetNumTimeSteps() * GetNumParallelSequences();
}
// Get the number of frames of the input sequence that belong to the MB, i.e. disregarding sequence elements that are outside of the MB boundaries
// Input sequence is expected to belong to this MBLayout
size_t GetNumSequenceFramesInCurrentMB(const SequenceInfo& sequenceInfo) const
{
return min(sequenceInfo.tEnd, GetNumTimeSteps()) - max(sequenceInfo.tBegin, (ptrdiff_t)0);
}
// return all sequences stored in this minibatch
const vector<SequenceInfo>& GetAllSequences() const
{
@ -515,6 +522,18 @@ public:
return col;
}
// get the matrix-column indices for a given sequence
// sequence is expected to belong to this MB
vector<size_t> GetColumnIndices(const SequenceInfo& seq) const
{
size_t numFrames = GetNumSequenceFramesInCurrentMB(seq);
vector<size_t> res;
res.reserve(numFrames);
for (size_t i = 0; i < numFrames;++i)
res.push_back(GetColumnIndex(seq,i));
return res;
}
private:
// we are trying to access content--this verifies that the structure is consistent
// All frames must now be declared.
@ -836,7 +855,7 @@ inline bool MBLayout::IsBeyondMinibatch(const FrameRange& fr) const
if (fr.IsAllFrames())
LogicError("MBLayout::IsBeyondStartOrEnd() cannot be applied to FrameRange that specifies more than a single time step.");
const auto beginTime = (ptrdiff_t)fr.timeIdxInSeq + fr.m_timeOffset; // we test off the frame without offset
const auto beginTime = (ptrdiff_t)fr.timeIdxInSeq + fr.m_timeOffset; // we test off the frame with offset
const auto endTime = beginTime + (ptrdiff_t)fr.m_timeRange;
return beginTime < 0 || endTime > (ptrdiff_t)GetNumTimeSteps();
}

Просмотреть файл

@ -446,6 +446,7 @@ bool ComputationNetwork::IsTypicalCriterionNode(ComputationNodeBasePtr nodePtr)
nodePtr->OperationName() == OperationNameOf(CrossEntropyNode) ||
nodePtr->OperationName() == OperationNameOf(ClassBasedCrossEntropyWithSoftmaxNode) ||
nodePtr->OperationName() == OperationNameOf(ClassificationErrorNode) ||
nodePtr->OperationName() == OperationNameOf(EditDistanceErrorNode) ||
#ifdef COMING_SOON
nodePtr->OperationName() == OperationNameOf(CRFNode) ||
#endif

Просмотреть файл

@ -54,6 +54,7 @@ static shared_ptr<ComputationNode<ElemType>> CreateStandardNode(const std::wstri
else if (nodeType == OperationNameOf(DropoutNode)) return New<DropoutNode<ElemType>>(forward<_Types>(_Args)...);
else if (nodeType == OperationNameOf(DummyCriterionNode)) return New<DummyCriterionNode<ElemType>>(forward<_Types>(_Args)...);
else if (nodeType == OperationNameOf(DynamicAxisNode)) return New<DynamicAxisNode<ElemType>>(forward<_Types>(_Args)...);
else if (nodeType == OperationNameOf(EditDistanceErrorNode)) return New<EditDistanceErrorNode<ElemType>>(forward<_Types>(_Args)...);
else if (nodeType == OperationNameOf(ElementTimesNode)) return New<ElementTimesNode<ElemType>>(forward<_Types>(_Args)...);
else if (nodeType == OperationNameOf(EnvironmentInputNode)) return New<EnvironmentInputNode<ElemType>>(forward<_Types>(_Args)...);
else if (nodeType == OperationNameOf(EpochAccumulatorNode)) return New<EpochAccumulatorNode<ElemType>>(forward<_Types>(_Args)...);
@ -428,6 +429,12 @@ shared_ptr<ComputationNode<ElemType>> ComputationNetworkBuilder<ElemType>::Class
return net.AddNodeToNetAndAttachInputs(New<ClassificationErrorNode<ElemType>>(net.GetDeviceId(), nodeName), { a, b });
}
template <class ElemType>
shared_ptr<ComputationNode<ElemType>> ComputationNetworkBuilder<ElemType>::EditDistanceError(const ComputationNodePtr a, const ComputationNodePtr b, float subPen, float delPen, float insPen, bool squashInputs, vector<int> samplesToIgnore, const std::wstring nodeName)
{
return net.AddNodeToNetAndAttachInputs(New<EditDistanceErrorNode<ElemType>>(net.GetDeviceId(), nodeName, subPen, delPen, insPen, squashInputs, samplesToIgnore), { a, b });
}
template <class ElemType>
shared_ptr<ComputationNode<ElemType>> ComputationNetworkBuilder<ElemType>::PerDimMeanVarNormalization(const ComputationNodePtr feature, const ComputationNodePtr mean,
const ComputationNodePtr InvStdDev, const std::wstring nodeName)

Просмотреть файл

@ -129,6 +129,7 @@ public:
ComputationNodePtr Diagonal(const ComputationNodePtr a, const std::wstring nodeName = L"");
ComputationNodePtr Dropout(const ComputationNodePtr a, const std::wstring nodeName = L"");
ComputationNodePtr DummyCriterion(const ComputationNodePtr objectives, const ComputationNodePtr derivatives, const ComputationNodePtr prediction, const std::wstring nodeName = L"");
ComputationNodePtr EditDistanceError(const ComputationNodePtr a, const ComputationNodePtr b, float subPen, float delPen, float insPen, bool squashInputs, vector<int> samplesToIgnore, const std::wstring nodeName = L"");
ComputationNodePtr ElementTimes(const ComputationNodePtr a, const ComputationNodePtr b, const std::wstring nodeName = L"");
ComputationNodePtr DynamicAxis(const ComputationNodePtr a, const std::wstring& nodeName = L"");
ComputationNodePtr ClassificationError(const ComputationNodePtr a, const ComputationNodePtr b, const std::wstring nodeName = L"");

Просмотреть файл

@ -1005,15 +1005,27 @@ void ComputationNetwork::AllocateAllMatrices(const std::vector<ComputationNodeBa
bool performingBackPropagation = (trainRootNode != nullptr) || (Globals::ShouldEnableHyperCompressMemory());
// Construct the composite forward prop eval order by enumerating the
// nodes corresponding to each of our roots in global eval oder
forwardPropRoots = SortByGlobalEvalOrder(forwardPropRoots);
// Create a composite Eval order with the specified nodes as roots
// For each node determine parents and whether the output of the
// node is needed during back propagation
std::unordered_map<ComputationNodeBasePtr, bool> outputValueNeededDuringBackProp;
std::unordered_map<ComputationNodeBasePtr, std::unordered_set<ComputationNodeBasePtr>> parentsMap;
std::vector<ComputationNodeBasePtr> compositeForwardPropEvalOrder;
std::unordered_set<ComputationNodeBasePtr> uniqueForwardPropEvalNodes;
for (auto& rootNode : forwardPropRoots)
{
for (const auto& node : GetEvalOrder(rootNode))
{
if (uniqueForwardPropEvalNodes.find(node) == uniqueForwardPropEvalNodes.end())
{
uniqueForwardPropEvalNodes.insert(node);
compositeForwardPropEvalOrder.push_back(node);
}
for (int i = 0; i < node->GetNumInputs(); i++)
{
ComputationNodeBasePtr input = node->GetInputs()[i];
@ -1050,13 +1062,6 @@ void ComputationNetwork::AllocateAllMatrices(const std::vector<ComputationNodeBa
}
}
// Construct the composite forward prop eval order by enumerating the
// nodes corresponding to each of our roots and then arranging them in the
// relative order that they appear in the global evaluation order
std::list<ComputationNodeBasePtr> nodesForForwardPropRoots = ComputationNodeBase::EnumerateNodes(forwardPropRoots);
std::vector<ComputationNodeBasePtr> compositeForwardPropEvalOrder = SortByGlobalEvalOrder(nodesForForwardPropRoots);
set<ComputationNodeBasePtr> completedEvaluate;
for (auto& nodeIter : compositeForwardPropEvalOrder)
{

Некоторые файлы не были показаны из-за слишком большого количества измененных файлов Показать больше