Merge branch 'master' into qiwye/asgd-dev

# Conflicts:
#	CNTK.sln
#	Makefile
This commit is contained in:
unknown 2016-11-10 16:50:46 +08:00
Родитель 10a6535ef9 ac1a9469ef
Коммит 249989b95f
91 изменённых файлов: 12485 добавлений и 974 удалений

Просмотреть файл

@ -158,7 +158,7 @@
</Link>
</ItemDefinitionGroup>
<ItemDefinitionGroup Condition="'$(ConfigurationType)' == 'StaticLibrary' And $(ReleaseBuild) And !$(NoOptBuild)">
<ItemDefinitionGroup Condition="$(ReleaseBuild) And !$(NoOptBuild)">
<ClCompile>
<Optimization>MaxSpeed</Optimization>
<FunctionLevelLinking>true</FunctionLevelLinking>
@ -180,8 +180,16 @@
<IntrinsicFunctions>false</IntrinsicFunctions>
</ClCompile>
<Link>
<EnableCOMDATFolding>false</EnableCOMDATFolding>
<OptimizeReferences>false</OptimizeReferences>
<Profile>false</Profile>
</Link>
</ItemDefinitionGroup>
<PropertyGroup Condition="$(NoOptBuild)" Label="Configuration">
<UseDebugLibraries>false</UseDebugLibraries>
<WholeProgramOptimization>false</WholeProgramOptimization>
<LinkIncremental>true</LinkIncremental>
</PropertyGroup>
</Project>

Просмотреть файл

@ -1292,6 +1292,11 @@ Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "Multiverso", "Source\Multiv
EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "MultiversoTests", "Source\Multiverso\Test\unittests\MultiversoTests.vcxproj", "{EC7157E9-A51F-4702-A5FD-8DAF88C7029F}"
EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "CPPEvalExtendedClientTest", "Tests\EndToEndTests\EvalClientTests\CPPEvalExtendedClientTest\CPPEvalExtendedClientTest.vcxproj", "{5D29C76D-648A-456F-920D-48230F2FB3C8}"
ProjectSection(ProjectDependencies) = postProject
{482999D1-B7E2-466E-9F8D-2119F93EAFD9} = {482999D1-B7E2-466E-9F8D-2119F93EAFD9}
EndProjectSection
EndProject
Global
GlobalSection(SolutionConfigurationPlatforms) = preSolution
Debug_CpuOnly|Any CPU = Debug_CpuOnly|Any CPU
@ -2297,6 +2302,31 @@ Global
{EC7157E9-A51F-4702-A5FD-8DAF88C7029F}.Release|Mixed Platforms.Build.0 = Release|x64
{EC7157E9-A51F-4702-A5FD-8DAF88C7029F}.Release|x64.ActiveCfg = Release|x64
{EC7157E9-A51F-4702-A5FD-8DAF88C7029F}.Release|x64.Build.0 = Release|x64
{5D29C76D-648A-456F-920D-48230F2FB3C8}.Debug_CpuOnly|Any CPU.ActiveCfg = Debug_CpuOnly|x64
{5D29C76D-648A-456F-920D-48230F2FB3C8}.Debug_CpuOnly|Mixed Platforms.ActiveCfg = Debug_CpuOnly|x64
{5D29C76D-648A-456F-920D-48230F2FB3C8}.Debug_CpuOnly|Mixed Platforms.Build.0 = Debug_CpuOnly|x64
{5D29C76D-648A-456F-920D-48230F2FB3C8}.Debug_CpuOnly|x64.ActiveCfg = Debug_CpuOnly|x64
{5D29C76D-648A-456F-920D-48230F2FB3C8}.Debug_CpuOnly|x64.Build.0 = Debug_CpuOnly|x64
{5D29C76D-648A-456F-920D-48230F2FB3C8}.Debug|Any CPU.ActiveCfg = Debug|x64
{5D29C76D-648A-456F-920D-48230F2FB3C8}.Debug|Mixed Platforms.ActiveCfg = Debug|x64
{5D29C76D-648A-456F-920D-48230F2FB3C8}.Debug|Mixed Platforms.Build.0 = Debug|x64
{5D29C76D-648A-456F-920D-48230F2FB3C8}.Debug|x64.ActiveCfg = Debug|x64
{5D29C76D-648A-456F-920D-48230F2FB3C8}.Debug|x64.Build.0 = Debug|x64
{5D29C76D-648A-456F-920D-48230F2FB3C8}.Release_CpuOnly|Any CPU.ActiveCfg = Release_CpuOnly|x64
{5D29C76D-648A-456F-920D-48230F2FB3C8}.Release_CpuOnly|Mixed Platforms.ActiveCfg = Release_CpuOnly|x64
{5D29C76D-648A-456F-920D-48230F2FB3C8}.Release_CpuOnly|Mixed Platforms.Build.0 = Release_CpuOnly|x64
{5D29C76D-648A-456F-920D-48230F2FB3C8}.Release_CpuOnly|x64.ActiveCfg = Release_CpuOnly|x64
{5D29C76D-648A-456F-920D-48230F2FB3C8}.Release_CpuOnly|x64.Build.0 = Release_CpuOnly|x64
{5D29C76D-648A-456F-920D-48230F2FB3C8}.Release_NoOpt|Any CPU.ActiveCfg = Release_CpuOnly|x64
{5D29C76D-648A-456F-920D-48230F2FB3C8}.Release_NoOpt|Mixed Platforms.ActiveCfg = Release_CpuOnly|x64
{5D29C76D-648A-456F-920D-48230F2FB3C8}.Release_NoOpt|Mixed Platforms.Build.0 = Release_CpuOnly|x64
{5D29C76D-648A-456F-920D-48230F2FB3C8}.Release_NoOpt|x64.ActiveCfg = Release_CpuOnly|x64
{5D29C76D-648A-456F-920D-48230F2FB3C8}.Release_NoOpt|x64.Build.0 = Release_CpuOnly|x64
{5D29C76D-648A-456F-920D-48230F2FB3C8}.Release|Any CPU.ActiveCfg = Release|x64
{5D29C76D-648A-456F-920D-48230F2FB3C8}.Release|Mixed Platforms.ActiveCfg = Release|x64
{5D29C76D-648A-456F-920D-48230F2FB3C8}.Release|Mixed Platforms.Build.0 = Release|x64
{5D29C76D-648A-456F-920D-48230F2FB3C8}.Release|x64.ActiveCfg = Release|x64
{5D29C76D-648A-456F-920D-48230F2FB3C8}.Release|x64.Build.0 = Release|x64
EndGlobalSection
GlobalSection(SolutionProperties) = preSolution
HideSolutionNode = FALSE
@ -2476,5 +2506,6 @@ Global
{F4CCAAB2-0DB2-4281-929A-2E68E30F0F6E} = {6F19321A-65E7-4829-B00C-3886CD6C6EDE}
{16F14058-B116-49D9-8BA0-209F3AFFE849} = {DD043083-71A4-409A-AA91-F9C548DCF7EC}
{EC7157E9-A51F-4702-A5FD-8DAF88C7029F} = {6F19321A-65E7-4829-B00C-3886CD6C6EDE}
{5D29C76D-648A-456F-920D-48230F2FB3C8} = {05E45AF7-C069-4057-BC16-0A532D068CE4}
EndGlobalSection
EndGlobal

Просмотреть файл

@ -0,0 +1,326 @@
//
// Copyright (c) Microsoft. All rights reserved.
// Licensed under the MIT license. See LICENSE.md file in the project root for full license information.
//
// CPPEvalExtendedClient.cpp : Sample application using the extended evaluation interface from C++
//
#include <sys/stat.h>
#include <inttypes.h>
#include <algorithm>
#include <fstream>
#include <unordered_map>
#include "Eval.h"
#ifdef _WIN32
#include "Windows.h"
#endif
using namespace std;
using namespace Microsoft::MSR::CNTK;
// Used for retrieving the model appropriate for the element type (float / double)
template<typename ElemType>
using GetEvalProc = void(*)(IEvaluateModelExtended<ElemType>**);
std::unordered_map<std::string, size_t> buildVocab(std::string filePath)
{
std::ifstream ifs(filePath);
size_t idx = 0;
std::unordered_map<std::string, size_t> vocab;
std::string line;
while (std::getline(ifs, line))
{
vocab.insert(std::pair<std::string, size_t>(line, idx));
idx += 1;
}
ifs.close();
return vocab;
}
std::unordered_map<size_t, std::string> buildInvVocab(std::string filePath)
{
std::ifstream ifs(filePath);
size_t idx = 1;
std::unordered_map<size_t, std::string> vocab;
std::string line;
while (std::getline(ifs, line))
{
vocab.insert(std::pair<size_t, std::string>(idx, line));
idx += 1;
}
ifs.close();
return vocab;
}
size_t word2idx(std::string word, std::unordered_map<std::string, size_t>& word2idxVocab)
{
std::unordered_map<std::string, size_t>::iterator iter = word2idxVocab.find(word);
if (iter == word2idxVocab.end())
{
throw std::runtime_error("word not found in source vocab");
}
return iter->second;
}
std::string idx2word(size_t idx, std::unordered_map<size_t, std::string>& idx2wordVocab)
{
std::unordered_map<size_t, std::string>::iterator iter = idx2wordVocab.find(idx);
if (iter == idx2wordVocab.end())
{
throw std::runtime_error("word index is not found in target vocab");
}
return iter->second;
}
void addOneHotWord(Values<float>& inputBuffers, size_t idx, VariableSchema& inputLayouts, size_t inputNode)
{
size_t inputDim = inputLayouts[inputNode].m_numElements;
for (size_t i = 0; i < inputDim; i++)
{
if (i == idx)
{
inputBuffers[inputNode].m_buffer.push_back(1);
}
else
{
inputBuffers[inputNode].m_buffer.push_back(0);
}
}
}
std::vector<std::string> feedInputVectors(std::string sentence, std::unordered_map<std::string, size_t>& word2idxVocab, Values<float>& inputBuffers, VariableSchema& inputLayouts)
{
std::vector<std::string> words;
// Split input sentence by space.
char delimiters = ' ';
size_t begin = 0;
size_t end = sentence.find_first_of(delimiters);
while (end != sentence.npos)
{
words.push_back(sentence.substr(begin, end - begin));
begin = end + 1;
end = sentence.find(delimiters, begin);
}
words.push_back(sentence.substr(begin));
// Convert words to ids.
std::vector<size_t> wordIds;
for (size_t i = 0; i < words.size(); i++)
{
size_t id = word2idx(words[i], word2idxVocab);
wordIds.push_back(id);
}
// Process the input words to construct network input vectors.
// As the sentence begins and ends with special tag, we will ignore the first and last word.
for (size_t i = 1; i < words.size() - 1; i++)
{
// Current word.
size_t cwIdx = wordIds[i];
addOneHotWord(inputBuffers, cwIdx, inputLayouts, 0);
// Next word.
size_t nwIdx = wordIds[i + 1];
addOneHotWord(inputBuffers, nwIdx, inputLayouts, 1);
// Previous word.
size_t pwIdx = wordIds[i - 1];
addOneHotWord(inputBuffers, pwIdx, inputLayouts, 2);
}
return words;
}
IEvaluateModelExtended<float>* SetupNetworkAndGetLayouts(std::string modelDefinition, VariableSchema& inputLayouts, VariableSchema& outputLayouts)
{
// Native model evaluation instance
IEvaluateModelExtended<float> *eval;
GetEvalExtendedF(&eval);
try
{
eval->CreateNetwork(modelDefinition);
}
catch (std::exception& ex)
{
fprintf(stderr, "%s\n", ex.what());
throw;
}
fflush(stderr);
// Get the model's layers dimensions
outputLayouts = eval->GetOutputSchema();
for (auto vl : outputLayouts)
{
fprintf(stderr, "Output dimension: %" PRIu64 "\n", vl.m_numElements);
fprintf(stderr, "Output name: %ls\n", vl.m_name.c_str());
}
eval->StartForwardEvaluation({ outputLayouts[0].m_name });
inputLayouts = eval->GetInputSchema();
outputLayouts = eval->GetOutputSchema();
return eval;
}
/// <summary>
/// Program for demonstrating how to run model evaluations using the native extended evaluation interface, also show
/// how to input sequence vectors to LSTM(RNN) network.
/// </summary>
/// <description>
/// This program is a native C++ client using the native extended evaluation interface
/// located in the <see cref="eval.h"/> file.
/// The CNTK evaluation library (EvalDLL.dll on Windows, and LibEval.so on Linux), must be found through the system's path.
/// The other requirement is that Eval.h be included
/// In order to run this program the model must already exist in the example. To create the model,
/// first run the example in <CNTK>/Examples/Text/ATIS. Once the model file ATIS.slot.lstm is created,
/// you can run this client.
/// This program demonstrates the usage of the Evaluate method requiring the input and output layers as parameters.
int main(int argc, char* argv[])
{
// Get the binary path (current working directory)
argc = 0;
std::string app = argv[0];
std::string path;
size_t pos;
int ret;
#ifdef _WIN32
pos = app.rfind("\\");
path = (pos == std::string::npos) ? "." : app.substr(0, pos);
// This relative path assumes launching from CNTK's binary folder, e.g. x64\Release
const std::string modelBaseDir = path + "/../../Examples/Text/ATIS/";
#else // on Linux
pos = app.rfind("/");
path = (pos == std::string::npos) ? "." : app.substr(0, pos);
// This relative path assumes launching from CNTK's binary folder, e.g. build/cpu/release/bin/
const std::string modelBaseDir = path + "/../../../../Examples/Text/ATIS/";
#endif
const std::string modelWorkingDirectory = modelBaseDir + "work/";
const std::string modelFilePath = modelWorkingDirectory + "ATIS.slot.lstm";
try
{
struct stat statBuf;
if (stat(modelFilePath.c_str(), &statBuf) != 0)
{
fprintf(stderr, "Error: The model %s does not exist. Please follow instructions in README.md in <CNTK>/Examples/Text/ATIS to create the model.\n", modelFilePath.c_str());
return(1);
}
std::string networkConfiguration;
networkConfiguration += "modelPath=\"" + modelFilePath + "\"";
VariableSchema inputLayouts;
VariableSchema outputLayouts;
IEvaluateModelExtended<float> *eval;
eval = SetupNetworkAndGetLayouts(networkConfiguration, inputLayouts, outputLayouts);
vector<size_t> inputBufferSize;
for (size_t i = 0; i < inputLayouts.size(); i++)
{
fprintf(stdout, "Input node name: %ls\n", inputLayouts[i].m_name.c_str());
fprintf(stdout, "Input feature dimension: %" PRIu64 "\n", inputLayouts[i].m_numElements);
inputBufferSize.push_back(inputLayouts[i].m_numElements);
}
vector<size_t> outputBufferSize;
for (size_t i = 0; i < outputLayouts.size(); i++)
{
outputBufferSize.push_back(outputLayouts[i].m_numElements);
}
// Build source word vocab to id
const::string sourceVocab = modelBaseDir + "/Data/ATIS.vocab";
if (stat(sourceVocab.c_str(), &statBuf) != 0)
{
fprintf(stderr, "Error: The file '%s' does not exist.\n", sourceVocab.c_str());
return(1);
}
std::unordered_map<std::string, size_t> word2idxVocab = buildVocab(sourceVocab);
// Build id to target word vocab
const::string targetVocab = modelBaseDir + "/Data/ATIS.label";
if (stat(targetVocab.c_str(), &statBuf) != 0)
{
fprintf(stderr, "Error: The file '%s' does not exist.\n", targetVocab.c_str());
return(1);
}
std::unordered_map<size_t, std::string> idx2wordVocab = buildInvVocab(targetVocab);
// Use the following sentence as input example.
// One single space is used as word sperator.
std::string inputSequences = "BOS i would like to find a flight from charlotte to las vegas that makes a stop in st. louis EOS";
Values<float> inputBuffers = inputLayouts.CreateBuffers<float>(inputBufferSize);
Values<float> outputBuffers = outputLayouts.CreateBuffers<float>(outputBufferSize);
// Feed input sequence vectors to network
std::vector<std::string> words = feedInputVectors(inputSequences, word2idxVocab, inputBuffers, inputLayouts);
// Forward propagation
eval->ForwardPass(inputBuffers, outputBuffers);
// Get output from output layer
auto buf = outputBuffers[0].m_buffer;
size_t bufSize = outputBuffers[0].m_buffer.size();
std::vector<std::string> outputs;
size_t outputDim = outputLayouts[0].m_numElements;
size_t outputStep = bufSize / outputDim;
auto iter = buf.begin();
for (size_t i = 0; i < outputStep; i++)
{
auto max_iter = std::max_element(iter, iter + outputDim);
auto index = max_iter - iter;
outputs.push_back(idx2word(index, idx2wordVocab));
iter += outputDim;
}
words.erase(words.begin());
words.pop_back();
fprintf(stdout, "Slot tag for sentence \"%s\" is as follows:\n", inputSequences.c_str());
for (size_t i = 0; i < outputs.size(); i++)
{
fprintf(stdout, "%10s -- %s\n", words[i].c_str(), outputs[i].c_str());
}
eval->Destroy();
// This pattern is used by End2EndTests to check whether the program runs to complete.
fprintf(stdout, "Evaluation complete.\n");
ret = 0;
}
catch (const std::exception& err)
{
fprintf(stderr, "Evaluation failed. EXCEPTION occurred: %s\n", err.what());
ret = 1;
}
catch (...)
{
fprintf(stderr, "Evaluation failed. Unknown ERROR occurred.\n");
ret = 1;
}
fflush(stdout);
fflush(stderr);
return ret;
}

Просмотреть файл

@ -0,0 +1,66 @@
<?xml version="1.0" encoding="utf-8"?>
<Project DefaultTargets="Build" ToolsVersion="12.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
<ItemGroup Label="ProjectConfigurations">
<ProjectConfiguration Include="Release|x64">
<Configuration>Release</Configuration>
<Platform>x64</Platform>
</ProjectConfiguration>
</ItemGroup>
<PropertyGroup Label="Globals">
<ProjectGuid>{93ECB70B-FDDD-44B4-BD6A-D63E094C704B}</ProjectGuid>
<Keyword>Win32Proj</Keyword>
<RootNamespace>CPPEvalExtendedClient</RootNamespace>
<ProjectName>CPPEvalExtendedClient</ProjectName>
</PropertyGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
<ConfigurationType>Application</ConfigurationType>
<UseDebugLibraries>false</UseDebugLibraries>
<PlatformToolset>v120</PlatformToolset>
<WholeProgramOptimization>true</WholeProgramOptimization>
<CharacterSet>Unicode</CharacterSet>
</PropertyGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
<ImportGroup Label="ExtensionSettings">
</ImportGroup>
<ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="PropertySheets">
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
</ImportGroup>
<PropertyGroup Label="UserMacros" />
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
<LinkIncremental>false</LinkIncremental>
<OutDir>$(SolutionDir)..\..\$(Platform)\$(ProjectName).$(Configuration)\</OutDir>
</PropertyGroup>
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
<ClCompile>
<WarningLevel>Level4</WarningLevel>
<PrecompiledHeader>NotUsing</PrecompiledHeader>
<Optimization>MaxSpeed</Optimization>
<FunctionLevelLinking>true</FunctionLevelLinking>
<IntrinsicFunctions>true</IntrinsicFunctions>
<PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<AdditionalIncludeDirectories>$(SolutionDir)..\..\Include</AdditionalIncludeDirectories>
<TreatWarningAsError>true</TreatWarningAsError>
<SDLCheck>true</SDLCheck>
<MultiProcessorCompilation>true</MultiProcessorCompilation>
<FloatingPointModel>Fast</FloatingPointModel>
<OpenMPSupport>true</OpenMPSupport>
</ClCompile>
<Link>
<SubSystem>Console</SubSystem>
<GenerateDebugInformation>true</GenerateDebugInformation>
<EnableCOMDATFolding>true</EnableCOMDATFolding>
<OptimizeReferences>true</OptimizeReferences>
<AdditionalLibraryDirectories>$(SolutionDir)\..\..\cntk</AdditionalLibraryDirectories>
<AdditionalDependencies>EvalDll.lib;%(AdditionalDependencies)</AdditionalDependencies>
<OutputFile>$(OutDir)$(TargetName)$(TargetExt)</OutputFile>
<Profile>true</Profile>
</Link>
</ItemDefinitionGroup>
<ItemGroup>
<ClCompile Include="CPPEvalExtendedClient.cpp" />
</ItemGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
<ImportGroup Label="ExtensionTargets">
</ImportGroup>
</Project>

Просмотреть файл

@ -0,0 +1,22 @@
<?xml version="1.0" encoding="utf-8"?>
<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
<ItemGroup>
<Filter Include="Source Files">
<UniqueIdentifier>{4FC737F1-C7A5-4376-A066-2A32D752A2FF}</UniqueIdentifier>
<Extensions>cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx</Extensions>
</Filter>
<Filter Include="Header Files">
<UniqueIdentifier>{93995380-89BD-4b04-88EB-625FBE52EBFB}</UniqueIdentifier>
<Extensions>h;hh;hpp;hxx;hm;inl;inc;xsd</Extensions>
</Filter>
<Filter Include="Resource Files">
<UniqueIdentifier>{67DA6AB6-F800-4c08-8B7A-83BB121AAD01}</UniqueIdentifier>
<Extensions>rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms</Extensions>
</Filter>
</ItemGroup>
<ItemGroup>
<ClCompile Include="CPPEvalExtendedClient.cpp">
<Filter>Source Files</Filter>
</ClCompile>
</ItemGroup>
</Project>

Просмотреть файл

@ -9,6 +9,8 @@ Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "CSEvalClient", "CSEvalClien
EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "CPPEvalV2Client", "CPPEvalV2Client\CPPEvalV2Client.vcxproj", "{D771A06D-CC25-4582-B5CD-D2A4782BB005}"
EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "CPPEvalExtendedClient", "CPPEvalExtendedClient\CPPEvalExtendedClient.vcxproj", "{93ECB70B-FDDD-44B4-BD6A-D63E094C704B}"
EndProject
Global
GlobalSection(SolutionConfigurationPlatforms) = preSolution
Debug|x64 = Debug|x64
@ -25,6 +27,9 @@ Global
{D771A06D-CC25-4582-B5CD-D2A4782BB005}.Debug|x64.ActiveCfg = Release|x64
{D771A06D-CC25-4582-B5CD-D2A4782BB005}.Release|x64.ActiveCfg = Release|x64
{D771A06D-CC25-4582-B5CD-D2A4782BB005}.Release|x64.Build.0 = Release|x64
{93ECB70B-FDDD-44B4-BD6A-D63E094C704B}.Debug|x64.ActiveCfg = Release|x64
{93ECB70B-FDDD-44B4-BD6A-D63E094C704B}.Release|x64.ActiveCfg = Release|x64
{93ECB70B-FDDD-44B4-BD6A-D63E094C704B}.Release|x64.Build.0 = Release|x64
EndGlobalSection
GlobalSection(SolutionProperties) = preSolution
HideSolutionNode = FALSE

Просмотреть файл

@ -1,8 +1,13 @@
#EvalClients
The folder contains some examples using the CNTK evaluation library. Please note that only the 64-bit target is supported by CNTK evaluation library.
-CPPEvalClient: demonstrate the use of the C++ CNTK eval lib. Only the release configuration is supported.
-CSEvalClient: demonstrate the use of the C# CNTK eval lib.
-EvalClients.sln: the VS2013 solution file to build examples. It creates two binaries in the directory $(SolutionDir)..\..\x64\:
* CPPEvalClient.$(Configuration)\CPPEvalClient.exe: the C++ example executable. To run the example, please first include the directory containing CNTK dependent dlls, usually $(SolutionDir)..\..\cntk, in the PATH environment variable.
* CSEvalClient.$(Configuration)\CSEvalClient.exe: the C# example executable.
- CPPEvalClient.$(Configuration)\CPPEvalClient.exe: the C++ example executable. To run the example, please first include the directory containing CNTK dependent dlls, usually $(SolutionDir)..\..\cntk, in the PATH environment variable.
- CSEvalClient.$(Configuration)\CSEvalClient.exe: the C# example executable.

Просмотреть файл

@ -13,6 +13,7 @@ modelPath = "$outputDir$/Models/ResNet_101"
stderr = "$outputDir$/ResNet_101_BS_out"
parallelTrain = true
hyperCompressMemory = true
TrainNetwork = {
action = "train"

Просмотреть файл

@ -13,6 +13,7 @@ modelPath = "$outputDir$/Models/ResNet_152"
stderr = "$outputDir$/ResNet_152_BS_out"
parallelTrain = true
hyperCompressMemory = true
TrainNetwork = {
action = "train"

Просмотреть файл

@ -2,7 +2,7 @@
# An LSTM model is built to tag each word in sentences with its semantic label.
WorkDir = work
DataDir = data
DataDir = Data
makeMode = false
modelPath = $WorkDir$/ATIS.slot.lstm
@ -96,9 +96,11 @@ Train = [
parallelizationMethod = "DataParallelSGD"
parallelizationStartEpoch = 2
distributedMBReading = true
dataParallelSGD = [
gradientBits = 1
]
# Comment out the following lines if you want to enable parallelTrain to use 1-bit-SGD.
# For that you also need CNTK binaries built with 1-bit-SGD enabled.
# dataParallelSGD = [
# gradientBits = 1
# ]
]
]

Просмотреть файл

@ -147,6 +147,14 @@ ifdef CUDA_PATH
LIBS_LIST += cudnn
COMMON_FLAGS +=-DUSE_CUDNN
endif
# Set up NCCL if needed
ifdef NCCL_PATH
INCLUDEPATH += $(NCCL_PATH)/include
LIBPATH += $(NCCL_PATH)/lib
LIBS_LIST += nccl
COMMON_FLAGS += -DUSE_NCCL
endif
else
DEVICE = cpu
@ -313,6 +321,7 @@ MATH_SRC =\
$(SOURCEDIR)/Math/DataTransferer.cpp \
$(SOURCEDIR)/Math/RNGHandle.cpp \
$(SOURCEDIR)/Math/TensorView.cpp \
$(SOURCEDIR)/Math/NcclComm.cpp \
ifdef SUPPORT_AVX2
MATH_SRC +=\
@ -406,7 +415,7 @@ CNTKLIBRARY_COMMON_SRC =\
$(SOURCEDIR)/CNTKv2LibraryDll/Utils.cpp \
$(SOURCEDIR)/CNTKv2LibraryDll/Value.cpp \
$(SOURCEDIR)/CNTKv2LibraryDll/Variable.cpp \
$(SOURCEDIR)/CNTKv2LibraryDll/Learner.cpp \
$(SOURCEDIR)/CNTKv2LibraryDll/Learner.cpp \
$(SOURCEDIR)/CNTKv2LibraryDll/Serialization.cpp \
$(SOURCEDIR)/CNTKv2LibraryDll/DistributedCommunicator.cpp \
$(SOURCEDIR)/CNTKv2LibraryDll/DataParallelDistributedTrainer.cpp \
@ -415,7 +424,6 @@ CNTKLIBRARY_COMMON_SRC =\
CNTKLIBRARY_SRC =\
$(SOURCEDIR)/CNTKv2LibraryDll/ComputeInputStatistics.cpp \
$(SOURCEDIR)/CNTKv2LibraryDll/MinibatchSource.cpp \
$(SOURCEDIR)/CNTKv2LibraryDll/Globals.cpp \
CNTKLIBRARY_SRC+=$(CNTKLIBRARY_COMMON_SRC)
CNTKLIBRARY_SRC+=$(CNTK_COMMON_SRC)
@ -510,7 +518,7 @@ SGDLIB_SRC=\
$(SOURCEDIR)/SGDLib/Profiler.cpp \
$(SOURCEDIR)/SGDLib/SGD.cpp \
$(SOURCEDIR)/SGDLib/PostComputingActions.cpp \
SGDLIB_SRC+=$(CNTKLIBRARY_COMMON_SRC)
EVAL_SRC=\
@ -538,31 +546,46 @@ EVAL_LIB:=$(LIBDIR)/lib$(EVAL).so
ALL_LIBS+=$(EVAL_LIB)
SRC+=$(EVAL_SRC)
$(EVAL_LIB): $(EVAL_OBJ) | $(CNTKMATH_LIB)
$(EVAL_LIB): $(EVAL_OBJ) | $(CNTKMATH_LIB)
@echo $(SEPARATOR)
@mkdir -p $(dir $@)
@echo Building $(EVAL_LIB) for $(ARCH) with build type $(BUILDTYPE)
$(CXX) $(LDFLAGS) -shared $(patsubst %,-L%, $(LIBDIR) $(LIBPATH) $(GDK_NVML_LIB_PATH)) $(patsubst %,$(RPATH)%, $(ORIGINDIR) $(LIBPATH)) -o $@ $^ $(LIBS) -l$(CNTKMATH) $(lMULTIVERSO) $(PROTOBUF_PATH)/lib/libprotobuf.a
########################################
# Eval Sample client
# Eval Sample clients
########################################
EVAL_SAMPLE_CLIENT:=$(BINDIR)/cppevalclient
EVAL_CLIENT:=$(BINDIR)/cppevalclient
EVAL_SAMPLE_CLIENT_SRC=\
EVAL_CLIENT_SRC=\
$(SOURCEDIR)/../Examples/Evaluation/CPPEvalClient/CPPEvalClient.cpp
EVAL_SAMPLE_CLIENT_OBJ:=$(patsubst %.cpp, $(OBJDIR)/%.o, $(EVAL_SAMPLE_CLIENT_SRC))
EVAL_CLIENT_OBJ:=$(patsubst %.cpp, $(OBJDIR)/%.o, $(EVAL_CLIENT_SRC))
ALL+=$(EVAL_SAMPLE_CLIENT)
SRC+=$(EVAL_SAMPLE_CLIENT_SRC)
ALL+=$(EVAL_CLIENT)
SRC+=$(EVAL_CLIENT_SRC)
$(EVAL_SAMPLE_CLIENT): $(EVAL_SAMPLE_CLIENT_OBJ) | $(EVAL_LIB)
$(EVAL_CLIENT): $(EVAL_CLIENT_OBJ) | $(EVAL_LIB)
@echo $(SEPARATOR)
@mkdir -p $(dir $@)
@echo building $(EVAL_SAMPLE_CLIENT) for $(ARCH) with build type $(BUILDTYPE)
$(CXX) $(LDFLAGS) $(patsubst %,-L%, $(LIBDIR) $(LIBPATH) $(GDK_NVML_LIB_PATH)) $(patsubst %,$(RPATH)%, $(ORIGINLIBDIR) $(LIBPATH)) -o $@ $^ $(LIBS) -l$(EVAL) -l$(CNTKMATH) $(lMULTIVERSO)
@echo building $(EVAL_CLIENT) for $(ARCH) with build type $(BUILDTYPE)
$(CXX) $(LDFLAGS) $(patsubst %,-L%, $(LIBDIR) $(LIBPATH) $(GDK_NVML_LIB_PATH)) $(patsubst %,$(RPATH)%, $(ORIGINLIBDIR) $(LIBPATH)) -o $@ $^ $(LIBS) -l$(EVAL) -l$(CNTKMATH) $(lMULTIVERSO)
EVAL_EXTENDED_CLIENT:=$(BINDIR)/cppevalextendedclient
EVAL_EXTENDED_CLIENT_SRC=\
$(SOURCEDIR)/../Examples/Evaluation/CPPEvalExtendedClient/CPPEvalExtendedClient.cpp
EVAL_EXTENDED_CLIENT_OBJ:=$(patsubst %.cpp, $(OBJDIR)/%.o, $(EVAL_EXTENDED_CLIENT_SRC))
ALL+=$(EVAL_EXTENDED_CLIENT)
SRC+=$(EVAL_EXTENDED_CLIENT_SRC)
$(EVAL_EXTENDED_CLIENT): $(EVAL_EXTENDED_CLIENT_OBJ) | $(EVAL_LIB)
@echo $(SEPARATOR)
@mkdir -p $(dir $@)
@echo building $(EVAL_EXTENDED_CLIENT) for $(ARCH) with build type $(BUILDTYPE)
$(CXX) $(LDFLAGS) $(patsubst %,-L%, $(LIBDIR) $(LIBPATH) $(GDK_NVML_LIB_PATH)) $(patsubst %,$(RPATH)%, $(ORIGINLIBDIR) $(LIBPATH)) -o $@ $^ $(LIBS) -l$(EVAL) -l$(CNTKMATH)
########################################
# Eval V2 Sample client

Просмотреть файл

@ -10,7 +10,7 @@
# TODO cut down on logging
set -x -e -o pipefail
REPO_TAG=v2.0.beta2.0
REPO_TAG=v2.0.beta3.0
while [ $# -gt 0 ]; do
case "$1" in
@ -41,7 +41,7 @@ CNTK_DEP_LIB_PATH="$PWD/cntk/dependencies/lib"
CNTK_EXAMPLES_PATH="$PWD/Examples"
CNTK_BINARY="$CNTK_BIN_PATH/cntk"
CNTK_PY34_ENV_FILE="$SCRIPT_DIR/conda-linux-cntk-py34-environment.yml"
CNTK_WHEEL_PATH="cntk/python/cntk-2.0.beta2.0-cp34-cp34m-linux_x86_64.whl"
CNTK_WHEEL_PATH="cntk/python/cntk-2.0.beta3.0-cp34-cp34m-linux_x86_64.whl"
test -d "$CNTK_BIN_PATH" && test -d "$CNTK_LIB_PATH" && test -d "$CNTK_DEP_LIB_PATH" &&
test -d "$CNTK_EXAMPLES_PATH" && test -x "$CNTK_BINARY" &&
test -f "$CNTK_PY34_ENV_FILE" && test -f "$CNTK_WHEEL_PATH" || {

Просмотреть файл

@ -26,11 +26,7 @@ function ActionItem(
$expr = $func +' $item'
Write-Verbose "Calling Operation: [$func]"
$result = Invoke-Expression $expr
if (-not $result) {
return
}
return
Invoke-Expression $expr
}
@ -47,10 +43,14 @@ function InstallExe(
$processWait = $table["ProcessWait"]
$message = $table["message"]
$runAs = $table["runAs"]
$maxErrorLevel = $table["maxErrorLevel"]
if ($runAs -eq $null) {
$runAs = $true
}
if ($maxErrorLevel -eq $null) {
$maxErrorLevel = 0
}
if ($platform -ne $null) {
$runningOn = ((Get-WmiObject -class Win32_OperatingSystem).Caption).ToUpper()
$platform = ($platform.ToString()).ToUpper()
@ -65,10 +65,10 @@ function InstallExe(
}
if ($dir -eq $null) {
$ecode = DoProcess -command $cmd -param "$param" -requiresRunAs $runAs
DoProcess -command $cmd -param $param -requiresRunAs $runAs -maxErrorLevel $maxErrorLevel
}
else {
$ecode = DoProcess -command $cmd -param "$param" -requiresRunAs $runAs -workingDir "$dir"
DoProcess -command $cmd -param $param -requiresRunAs $runAs -workingDir $dir -maxErrorLevel $maxErrorLevel
}
if ( ($processWait -ne $null) -and ($Execute) -and ($false) ) {
@ -77,11 +77,44 @@ function InstallExe(
$pwait = Get-Process $processWait -ErrorAction SilentlyContinue
} while (-not ($pwait -eq $null))
}
}
function ExecuteApplication(
[Parameter(Mandatory = $true)][hashtable] $table)
{
FunctionIntro $table
if ($ecode -eq 0) { return $true }
return $false
$func = $table["Function"]
$appName = $table["AppName"]
$param= $table["Param"]
$appDir = $table["AppDir"]
$usePath = $table["UseEnvPath"]
$dir = $table["WorkDir"]
$maxErrorLevel = $table["maxErrorLevel"]
if ($appDir -eq $null) {
$appDir = ""
}
if ($usePath -eq $null) {
$usePath = $false
}
if ($maxErrorLevel -eq $null) {
$maxErrorLevel = 0
}
if ($Execute) {
$application = ResolveApplicationName $appName $appDir $usePath
if ($application.Length -eq 0) {
throw "ExecuteApplication: Couldn't resolve program [$appName] with location directory [$appDir] and usePath [$usePath]"
}
if ($dir -eq $null) {
DoProcess -command $application -param $param -maxErrorLevel $maxErrorLevel
}
else {
DoProcess -command $application -param $param -workingDir $dir -maxErrorLevel $maxErrorLevel
}
}
}
function InstallWheel(
@ -110,12 +143,12 @@ function InstallWheel(
$whl = $whlFile.FullName
$condaExe = Join-Path $BasePath 'Scripts\conda.exe'
$newPaths = Invoke-DosCommand $condaExe (Write-Output ..activate cmd.exe $EnvName)
$newPaths = Invoke-DosCommand $condaExe (Write-Output ..activate cmd.exe $EnvName) -maxErrorLevel 0
$oldPath = $env:PATH
$env:PATH = $newPaths + ';' + $env:PATH
Invoke-DosCommand pip (Write-Output install $whl)
Invoke-DosCommand pip (Write-Output install $whl) -maxErrorLevel 0
$env:PATH = $oldPath
return
}
@ -133,8 +166,6 @@ function MakeDirectory(
New-Item $path -type directory
}
}
return $true
}
function AddToPath(
@ -160,7 +191,6 @@ function AddToPath(
if ($pv.Contains("$ap")) {
Write-Verbose "AddToPath - path information already up-to-date"
return $true
}
Write-Host Adding [$dir] to environment [$env]
@ -173,7 +203,6 @@ function AddToPath(
if ($Execute) {
SetEnvVar -name $env -content "$pathvalue"
}
return $true
}
function ExtractAllFromZip(
@ -186,10 +215,10 @@ function ExtractAllFromZip(
$destinationFolder = $table["destinationFolder"]
if (-not (test-path -path $destinationFolder)) {
return $false
throw "$destinationFolder doesn't exist"
}
if (-not (test-path $zipFileName -PathType Leaf)) {
return $false
throw "$zipFileName doesn't exist"
}
if ($Execute) {
@ -199,7 +228,6 @@ function ExtractAllFromZip(
$destination.CopyHere($zipFile.Items())
}
return $true
}
function CreateBatch(
@ -237,7 +265,8 @@ function DoProcess(
[string] $command,
[string] $param,
[string] $workingDir = "",
[boolean] $requiresRunAs = $false)
[boolean] $requiresRunAs = $false,
[int] $maxErrorLevel)
{
$info = "start-process [$command] with [$param]"
@ -245,7 +274,7 @@ function DoProcess(
if (-not $Execute) {
Write-Host "** Running in DEMOMODE - setting Exit Code **: 0"
return 0
return
}
if ($workingDir.Length -eq 0) {
@ -266,15 +295,13 @@ function DoProcess(
}
}
$eCode = ($process.ExitCode)
if ($eCode -ne 0) {
Write-Host "$message ** Exit Code **:($eCode)"
} else {
Write-Verbose "$message ** Exit Code **:($eCode)"
if ($ecode -gt $maxErrorLevel) {
throw "Running 'start-process $commandString $param' failed with exit code [$ecode]"
}
return $eCode
return
}
@ -287,17 +314,15 @@ function SetEnvVar(
Write-Verbose "SetEnvVar [$name] with [$content]"
if ($Execute) {
# [environment]::SetEnvironmentVariable($name, $content, $location)
$commandString = "& { [environment]::SetEnvironmentVariable('"+$name+"', '"+$content+"', '"+$location+"') }"
RunPowershellCommand -command "$commandString" -elevated $true
RunPowershellCommand -command "$commandString" -elevated $true -maxErrorLevel 0
}
}
function RunPowershellCommand(
[string] $commandString,
[boolean] $elevated
[boolean] $elevated,
[int] $maxErrorLevel
)
{
$commandBytes = [System.Text.Encoding]::Unicode.GetBytes($commandString)
@ -310,8 +335,12 @@ function RunPowershellCommand(
else {
$process = Start-Process -PassThru -FilePath powershell.exe -ArgumentList $commandLine -wait
}
$eCode = ($process.ExitCode)
return ($ecode -eq 0)
if ($ecode -gt $maxErrorLevel) {
throw "Running 'powershell.exe $commandString' failed with exit code [$ecode]"
}
return
}
function Invoke-DosCommand {
@ -321,7 +350,7 @@ function Invoke-DosCommand {
[string] $Command,
[string[]] $Argument,
[string] [ValidateScript({ Test-Path -PathType Container $_ })] $WorkingDirectory,
[switch] $IgnoreNonZeroExitCode,
[int] $maxErrorLevel,
[switch] $SuppressOutput
)
Write-Verbose "Running '$Command $Argument'"
@ -336,7 +365,43 @@ function Invoke-DosCommand {
if ($WorkingDirectory) {
Pop-Location
}
if (($LASTEXITCODE -ne 0) -and -not $IgnoreNonZeroExitCode) {
if ($LASTEXITCODE -gt $maxErrorLevel) {
throw "Running '$Command $Argument' failed with exit code $LASTEXITCODE"
}
}
function ResolveApplicationName(
[string] $name,
[string] $directory,
[bool] $usePath)
{
$application = ""
if ($directory.Length -gt 0) {
$application = CallGetCommand (join-path $directory $name)
}
if ($application.Length -eq 0) {
if ($usePath) {
# we are at this point if we are supposed to check in the path environment for a match and
# $directory was empty or we couldn't find it in the $directory
$application = CallGetCommand $name
}
}
# application will be an empty string if we couldn't resolve the name, otherwise we can execute $application
return $application
}
function CallGetCommand(
[string] $application)
{
try {
get-command $application -CommandType Application -ErrorAction Stop | Out-Null
return $application
}
catch {
# the application can't be found, so return empty string
return ""
}
}

Просмотреть файл

@ -82,6 +82,23 @@ function CheckPowershellVersion
return $false
}
function CheckOSVersion
{
$runningOn = (Get-WmiObject -class Win32_OperatingSystem).Caption
$isMatching = ($runningOn -match "^Microsoft Windows (8\.1|10|Server 2012 R2)")
if ($isMatching) {
return
}
Write-Host "
You are running the this install script on [$runningOn].
The Microsoft Cognitive Toolkit is designed and tested on Windows 8.1, Windows 10,
and Windows Server 2012 R2.
"
return
}
function DisplayStart()
{
Write-Host $(DisplayStartMessage)
@ -90,6 +107,8 @@ function DisplayStart()
return $false
}
CheckOSVersion
if (-not $Execute) {
Write-Host $(DisplayWarningNoExecuteMessage)
}

Просмотреть файл

@ -4,6 +4,9 @@
#
$operations = @(
@{Name = "Scan System for installed programs"; ShortName = "SCANPROG"; Info = "Scan System for installed programs";
Verification = @( @{Function = "VerifyScanPrograms" } )
},
@{Name = "Verifying Installation contents"; ShortName = "INSTCONTENT"; Info = "Verifying Installation contents";
Verification = @( @{Function = "VerifyInstallationContent"; Path = "$cntkRootDir" } )
},
@ -45,8 +48,9 @@ $operations = @(
@{Function = "AddToPath"; Dir = "C:\Program Files\Git\cmd"; AtStart = $true; } )
},
@{Name = "Clone CNTK from Github"; ShortName = "CNTKCLONE"; Info = "Clone CNTK from Github repository";
Verification = @( @{Function = "VerifyDirectory"; Path = "$RepoLocation" } );
Verification = @( @{Function = "VerifyDirectory"; Path = $RepoLocation } );
Action = @( @{Function = "MakeDirectory"; Path = $repoDirectory },
@{Function = "InstallExe"; Command = "C:\Program Files\Git\bin\git.exe"; Param = "clone --branch $RepoTag --recursive https://github.com/Microsoft/CNTK/ $repoName"; WorkDir = "$repoDirectory"; Message="Cloning CNTK (branch $RepoTag) repository...." } )
@{Function = "ExecuteApplication"; AppName = "git.exe"; Param = "clone --branch $RepoTag --recursive https://github.com/Microsoft/CNTK/ $repoName"; AppDir = "C:\Program Files\Git\bin"; UseEnvPath = $true; WorkDir = $repoDirectory } )
}
)

Просмотреть файл

@ -58,6 +58,19 @@ function VerifyItem(
return $noInstallRequired
}
function VerifyScanPrograms(
[Parameter(Mandatory = $true)][hashtable] $table)
{
FunctionIntro $table
$func = $table["Function"]
$noInstallRequired = $true
# no actual work is being performed, just the script local datastructure with the list
# of installed programs is being initialized
LoadWin32Product
return $noInstallRequired
}
function VerifyWin32ProductExists(
[Parameter(Mandatory = $true)][hashtable] $table)
{

Просмотреть файл

@ -60,7 +60,7 @@
Param(
[parameter(Mandatory=$false)] [string] $AnacondaBasePath = "C:\local\Anaconda3-4.1.1-Windows-x86_64",
[parameter(Mandatory=$false)] [switch] $Execute,
[parameter(Mandatory=$false)] [string] $RepoTag="v2.0.beta2.0",
[parameter(Mandatory=$false)] [string] $RepoTag="v2.0.beta3.0",
[parameter(Mandatory=$false)] [string] $RepoLocation="c:\repos\CNTK"
)

Просмотреть файл

@ -61,11 +61,6 @@
#define let const auto
#endif
// TODO: Temporary mechanism to enable memory sharing for
// node output value matrices. This will go away when the
// sharing is ready to be enabled by default
bool g_shareNodeValueMatrices = false;
using namespace std;
using namespace Microsoft::MSR;
using namespace Microsoft::MSR::CNTK;
@ -243,6 +238,9 @@ void DoCommands(const ConfigParameters& config, const shared_ptr<MPIWrapper>& mp
ProgressTracing::SetStepOffset(fullEpochsOffset); // this is the epoch number that SGD will log relative to
}
if (Globals::ShouldEnableHyperCompressMemory())
Matrix<ElemType>::UseCachedResizeOrNot(true);
// determine the action to perform, and do it
for (int j = 0; j < action.size(); j++)
{
@ -563,7 +561,10 @@ int wmainWithBS(int argc, wchar_t* argv[]) // called from wmain which is a wrapp
mpi = MPIWrapper::GetInstance(true /*create*/);
}
g_shareNodeValueMatrices = config(L"shareNodeValueMatrices", false);
if (config(L"shareNodeValueMatrices", false))
Globals::EnableShareNodeValueMatrices();
if (config(L"hyperCompressMemory", false))
Globals::EnableHyperCompressMemory();
TracingGPUMemoryAllocator::SetTraceLevel(config(L"traceGPUMemoryAllocations", 0));
@ -644,7 +645,7 @@ int wmainWithBS(int argc, wchar_t* argv[]) // called from wmain which is a wrapp
static void PrintBanner(int argc, wchar_t* argv[], const string& timestamp)
{
fprintf(stderr, "CNTK 2.0.beta2.0+ (");
fprintf(stderr, "CNTK 2.0.beta3.0+ (");
#ifdef _GIT_EXIST
fprintf(stderr, "%s %.6s, ", _BUILDBRANCH_, _BUILDSHA1_);
#endif
@ -705,7 +706,10 @@ int wmainOldCNTKConfig(int argc, wchar_t* argv[])
mpi = MPIWrapper::GetInstance(true /*create*/);
}
g_shareNodeValueMatrices = config(L"shareNodeValueMatrices", false);
if (config(L"shareNodeValueMatrices", false))
Globals::EnableShareNodeValueMatrices();
if (config(L"hyperCompressMemory", false))
Globals::EnableHyperCompressMemory();
TracingGPUMemoryAllocator::SetTraceLevel(config(L"traceGPUMemoryAllocations", 0));

Просмотреть файл

@ -2399,6 +2399,11 @@ namespace CNTK
///
CNTK_API static FunctionPtr LoadModel(DataType dataType, const std::wstring& modelFile, const DeviceDescriptor& computeDevice = DeviceDescriptor::UseDefaultDevice());
///
/// Prints the entire graph underlying this function to stderr
///
CNTK_API void PrintGraph() const;
private:
template <typename VariableType, typename FilterFunction>
@ -2694,6 +2699,16 @@ namespace CNTK
return TransposeTimes(leftOperand, rightOperand, /*outputRank =*/ 1, name);
}
///
/// Create an instance of the CNTK built-in operation to compute binary cross-entropy for specified input operands.
///
CNTK_API FunctionPtr BinaryCrossEntropy(const Variable& prediction, const Variable& targets, const std::wstring& name = L"");
///
/// Create an instance of the CNTK built-in operation to compute weighted binary cross-entropy for specified input operands.
///
CNTK_API FunctionPtr WeightedBinaryCrossEntropy(const Variable& prediction, const Variable& targets, const Variable& weights, const std::wstring& name = L"");
///
/// Create an instance of the CNTK built-in operation to compute squared-error for specified input operands.
///
@ -2899,6 +2914,13 @@ namespace CNTK
CNTK_API FunctionPtr IsFirst(const Variable& operand, const std::wstring& name = L"");
CNTK_API FunctionPtr IsLast(const Variable& operand, const std::wstring& name = L"");
CNTK_API FunctionPtr Slice(const Variable& operand, int beginIndex, int endIndex, const std::wstring& name = L"");
///
/// Create an instance of the CNTK built-in sum reduction operation on specified tensor input operand along the operands lone dynamic sequence axis
///
CNTK_API FunctionPtr ReduceSum(const Variable& operand, const std::wstring& name = L"");
CNTK_API FunctionPtr First(const Variable& operand, const std::wstring& name = L"");
CNTK_API FunctionPtr Last(const Variable& operand, const std::wstring& name = L"");

Просмотреть файл

@ -206,9 +206,11 @@ namespace CNTK
CNTK_API FunctionPtr GatherPacked(const Variable& operand, const Variable& packedIndex, const std::wstring& name = L"");
CNTK_API FunctionPtr ScatterPacked(const Variable& operand, const Variable& packedIndex, const Variable& condition, const std::wstring& name = L"");
CNTK_API FunctionPtr ZeroesWithDynamicAxesLike(const Variable& operand);
CNTK_API FunctionPtr Where(const Variable& condition, const std::vector<Axis>& newDynamicAxes, const std::wstring& name = L"");
CNTK_API FunctionPtr Gather(const Variable& operand, const Variable& condition, const std::vector<Axis>& newDynamicAxes, const std::wstring& name = L"");
CNTK_API FunctionPtr Scatter(const Variable& operand, const Variable& condition, const std::vector<Axis>& newDynamicAxes, const std::wstring& name = L"");
CNTK_API FunctionPtr Where(const Variable& condition, const std::pair<size_t, int>& newDerivedSequenceAxisScalingAndAdditiveFactor, const std::wstring& name = L"");
CNTK_API FunctionPtr Gather(const Variable& operand, const Variable& condition, const std::wstring& name = L"");
CNTK_API FunctionPtr Gather(const Variable& operand, const Variable& condition, const std::pair<size_t, int>& newDerivedSequenceAxisScalingAndAdditiveFactor, const std::wstring& name = L"");
CNTK_API FunctionPtr Scatter(const Variable& operand, const Variable& condition, const std::wstring& name = L"");
CNTK_API FunctionPtr Scatter(const Variable& operand, const Variable& condition, const std::pair<size_t, int>& newDerivedSequenceAxisScalingAndAdditiveFactor, const std::wstring& name = L"");
CNTK_API FunctionPtr Slice(const Variable& operand, const Axis& axis, int beginIndex, int endIndex, const std::wstring& name = L"");
CNTK_API FunctionPtr ReduceElements(const Variable& operand, const std::wstring& reductionOpName, const Axis& axis, const std::wstring& name = L"");
@ -236,7 +238,8 @@ namespace CNTK
CNTK_API void SetFixedRandomSeed(unsigned long fixedRandomSeed);
CNTK_API void SetForwardValuesSharing(bool enableSharing);
CNTK_API void EnableForwardValuesSharing();
CNTK_API void EnableHyperMemoryCompress();
CNTK_API bool AreEquivalent(const ::CNTK::FunctionPtr& f1, const ::CNTK::FunctionPtr& f2);
CNTK_API bool AreEquivalent(const ::CNTK::Variable& v1, const ::CNTK::Variable& v2, bool allowParameterAndConstantsEquivalence = false);

Просмотреть файл

@ -232,6 +232,8 @@ namespace CNTK
primitiveFunctionConfigParameters[PrimitiveFunction::AttributeNameOffset] = (size_t)node->As<FutureValueNode<ElementType>>()->TimeStep();
opType = PrimitiveOpType::FutureValue;
}
else if (node->OperationName() == OperationNameOf(LogisticNode))
opType = PrimitiveOpType::Logistic;
else if (node->OperationName() == OperationNameOf(SquareErrorNode))
opType = PrimitiveOpType::SquaredError;
else if (node->OperationName() == OperationNameOf(CrossEntropyWithSoftmaxNode))

Просмотреть файл

@ -39,10 +39,6 @@
<PropertyGroup Condition="$(DebugBuild)" Label="Configuration">
<UseDebugLibraries>true</UseDebugLibraries>
</PropertyGroup>
<PropertyGroup Condition="$(ReleaseBuild)" Label="Configuration">
<UseDebugLibraries>false</UseDebugLibraries>
<WholeProgramOptimization>true</WholeProgramOptimization>
</PropertyGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
<ImportGroup Label="ExtensionSettings">
</ImportGroup>
@ -51,11 +47,10 @@
</ImportGroup>
<PropertyGroup Label="UserMacros" />
<PropertyGroup Condition="$(DebugBuild)">
<LinkIncremental>true</LinkIncremental>
<TargetName>CNTKLibrary-$(LibraryVersion)</TargetName>
<LinkIncremental>true</LinkIncremental>
<TargetName>CNTKLibrary-$(LibraryVersion)</TargetName>
</PropertyGroup>
<PropertyGroup Condition="$(ReleaseBuild)">
<LinkIncremental>false</LinkIncremental>
<TargetName>CNTKLibrary-$(LibraryVersion)</TargetName>
</PropertyGroup>
<PropertyGroup>
@ -100,9 +95,6 @@
<ClCompile>
<WarningLevel>Level4</WarningLevel>
<PrecompiledHeader>NotUsing</PrecompiledHeader>
<Optimization>MaxSpeed</Optimization>
<FunctionLevelLinking>true</FunctionLevelLinking>
<IntrinsicFunctions>true</IntrinsicFunctions>
<PreprocessorDefinitions>CNTKV2LIBRARYDLL;WIN32;NDEBUG;_WINDOWS;_USRDLL;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<SDLCheck>true</SDLCheck>
<OpenMPSupport>false</OpenMPSupport>
@ -114,10 +106,7 @@
<Link>
<SubSystem>Console</SubSystem>
<GenerateDebugInformation>true</GenerateDebugInformation>
<EnableCOMDATFolding>true</EnableCOMDATFolding>
<OptimizeReferences>true</OptimizeReferences>
<AdditionalDependencies>ComputationNetworkLib.lib; Math.lib; Common.lib; ReaderLib.lib; kernel32.lib; user32.lib; shell32.lib; SequenceTrainingLib.lib;$(ProtobufLib);%(AdditionalDependencies)</AdditionalDependencies>
<Profile>true</Profile>
<DelayLoadDLLs>Math.dll; nvml.dll; $(CudaRuntimeDll)</DelayLoadDLLs>
</Link>
</ItemDefinitionGroup>
@ -169,7 +158,6 @@
</PrecompiledHeader>
</ClCompile>
<ClCompile Include="Function.cpp" />
<ClCompile Include="Globals.cpp" />
<ClCompile Include="Learner.cpp" />
<ClCompile Include="MinibatchSource.cpp" />
<ClCompile Include="NDArrayView.cpp" />

Просмотреть файл

@ -21,7 +21,6 @@
</ClCompile>
<ClCompile Include="DistributedCommunicator.cpp" />
<ClCompile Include="DataParallelDistributedTrainer.cpp" />
<ClCompile Include="Globals.cpp" />
</ItemGroup>
<ItemGroup>
<ClInclude Include="stdafx.h" />

Просмотреть файл

@ -60,9 +60,14 @@ namespace CNTK
return s_disableAutomaticUnpackingOfPackedValues.load();
}
void SetForwardValuesSharing(bool enableSharing)
void EnableForwardValuesSharing()
{
g_shareNodeValueMatrices = enableSharing;
Microsoft::MSR::CNTK::Globals::EnableShareNodeValueMatrices();
}
void EnableHyperMemoryCompress()
{
Microsoft::MSR::CNTK::Globals::EnableHyperCompressMemory();
}
bool AreEquivalent(const Variable& var1, const Variable& var2, bool allowParameterAndConstantsEquivalence)

Просмотреть файл

@ -544,6 +544,12 @@ namespace CNTK
return CompositeFunction::Deserialize(modelDictionary, device);
}
void Function::PrintGraph() const
{
CompositeFunction::Traverse(RootFunction(), [](const FunctionPtr& function) {
});
}
// Names for the reduction operations as used by the CNTK ReduceElementsNode
/*static*/ const std::wstring PrimitiveFunction::InternalSumReductionOpName = L"Sum";
/*static*/ const std::wstring PrimitiveFunction::InternalLogSumReductionOpName = L"LogSum";
@ -580,6 +586,8 @@ namespace CNTK
/*static*/ const std::wstring PrimitiveFunction::AttributeNameEpsilon = L"epsilon";
/*static*/ const std::wstring PrimitiveFunction::AttributeNameUseCuDNNEngine = L"useCuDNNEngine";
/*static*/ const std::wstring PrimitiveFunction::AttributeNameNewDynamicAxes = L"newDynamicAxes";
/*static*/ const std::wstring PrimitiveFunction::AttributeNameNewSequenceAxisLengthScalingFactor = L"newSequenceAxisLengthScalingFactor";
/*static*/ const std::wstring PrimitiveFunction::AttributeNameNewSequenceAxisLengthAdditiveFactor = L"newSequenceAxisLengthAdditiveFactor";
/*static*/ const std::wstring PrimitiveFunction::AttributeNameBeginIndex = L"beginIndex";
/*static*/ const std::wstring PrimitiveFunction::AttributeNameEndIndex = L"endIndex";
/*static*/ const std::wstring PrimitiveFunction::AttributeNameReductionOpName = L"reductionOpName";
@ -626,12 +634,47 @@ namespace CNTK
if (outputDataType == DataType::Unknown)
outputDataType = firstKnownInputDataType;
// We currently require that the inputs' dynamic axes if any match
// We currently require that the inputs' dynamic axes, if any, match
std::vector<Axis> outputDynamicAxes;
if ((op == PrimitiveOpType::SumAll) || (op == PrimitiveOpType::SquaredError) || (op == PrimitiveOpType::CrossEntropyWithSoftmax) || (op == PrimitiveOpType::ClassificationError))
if ((op == PrimitiveOpType::SumAll) ||
(op == PrimitiveOpType::SquaredError) ||
(op == PrimitiveOpType::CrossEntropyWithSoftmax) ||
(op == PrimitiveOpType::ClassificationError) ||
(op == PrimitiveOpType::Logistic))
{
outputDynamicAxes = std::vector<Axis>({});
}
else if (op == PrimitiveOpType::Where)
outputDynamicAxes = AsVector<Axis>(functionConfig[PrimitiveFunction::AttributeNameNewDynamicAxes].Value<std::vector<DictionaryValue>>());
{
if (functionConfig.Contains(PrimitiveFunction::AttributeNameNewDynamicAxes))
outputDynamicAxes = AsVector<Axis>(functionConfig[PrimitiveFunction::AttributeNameNewDynamicAxes].Value<std::vector<DictionaryValue>>());
else
{
if (inputs[0].DynamicAxes() == Axis::UnknownDynamicAxes())
outputDynamicAxes = Axis::UnknownDynamicAxes();
else
{
if (functionConfig.Contains(PrimitiveFunction::AttributeNameNewSequenceAxisLengthScalingFactor) &&
functionConfig.Contains(PrimitiveFunction::AttributeNameNewSequenceAxisLengthAdditiveFactor))
{
size_t newSequenceAxisLengthScalingFactor = functionConfig[PrimitiveFunction::AttributeNameNewSequenceAxisLengthScalingFactor].Value<size_t>();
int newSequenceAxisLengthAdditiveFactor = functionConfig[PrimitiveFunction::AttributeNameNewSequenceAxisLengthAdditiveFactor].Value<int>();
auto derivedDynamicAxes = GetDerivedDynamicAxes(inputs[0].DynamicAxes()[0], newSequenceAxisLengthScalingFactor, newSequenceAxisLengthAdditiveFactor);
std::copy(derivedDynamicAxes.begin(), derivedDynamicAxes.end(), std::back_inserter(outputDynamicAxes));
}
else
{
outputDynamicAxes.push_back(Axis::NewUniqueDynamicAxis(L"whereNodeDynamicAxis"));
}
for (size_t i = 1; i < inputs[0].DynamicAxes().size(); ++i)
outputDynamicAxes.push_back(inputs[0].DynamicAxes()[i]);
functionConfig[PrimitiveFunction::AttributeNameNewDynamicAxes] = AsDictionaryValueVector(outputDynamicAxes);
}
}
}
else if (op == PrimitiveOpType::ScatterPacked)
outputDynamicAxes = inputs[2].DynamicAxes();
else if ((op == PrimitiveOpType::PackedIndex) || (op == PrimitiveOpType::GatherPacked))
@ -852,9 +895,9 @@ namespace CNTK
case PrimitiveOpType::Convolution:
{
assert(inputs.size() == 2);
auto& strides = functionConfig[PrimitiveFunction::AttributeNameStrides].Value<NDShape>();
auto& lowerPad = functionConfig[PrimitiveFunction::AttributeNameLowerPad].Value<NDShape>();
auto& upperPad = functionConfig[PrimitiveFunction::AttributeNameUpperPad].Value<NDShape>();
auto& strides = functionConfig[PrimitiveFunction::AttributeNameStrides].Value<NDShape>();
auto& lowerPad = functionConfig[PrimitiveFunction::AttributeNameLowerPad].Value<NDShape>();
auto& upperPad = functionConfig[PrimitiveFunction::AttributeNameUpperPad].Value<NDShape>();
auto sharing = AsVector<bool>(functionConfig[PrimitiveFunction::AttributeNameSharing].Value<std::vector<DictionaryValue>>());
auto autoPadding = AsVector<bool>(functionConfig[PrimitiveFunction::AttributeNameAutoPadding].Value<std::vector<DictionaryValue>>());
bool transpose = functionConfig[PrimitiveFunction::AttributeNameTranspose].Value<bool>();
@ -863,23 +906,24 @@ namespace CNTK
NDShape outputMapCount, kernelShape;
std::tie(outputMapCount, kernelShape) = GetConvolutionOutputMapCountAndKernelShape(inputs[0].Shape(), inputs[1].Shape());
auto originalKernelShape = kernelShape;
outputShape = ConvolutionOpOutputShape(op, inputs[1].Shape(), kernelShape, outputMapCount, strides, sharing, autoPadding, lowerPad, upperPad, transpose, inferDimensions);
if (originalKernelShape != kernelShape)
{
for (size_t i = 0; i < kernelShape.Rank(); ++i)
inputs[0].m_dataFields->m_shape[i] = kernelShape[i];
}
auto originalKernelShape = kernelShape;
outputShape = ConvolutionOpOutputShape(op, inputs[1].Shape(), kernelShape, outputMapCount, strides, sharing, autoPadding, lowerPad, upperPad, transpose, inferDimensions);
if (originalKernelShape != kernelShape)
{
for (size_t i = 0; i < kernelShape.Rank(); ++i)
inputs[0].m_dataFields->m_shape[i] = kernelShape[i];
}
functionConfig[PrimitiveFunction::AttributeNameSharing] = AsDictionaryValueVector(sharing);
functionConfig[PrimitiveFunction::AttributeNameAutoPadding] = AsDictionaryValueVector(autoPadding);
functionConfig[PrimitiveFunction::AttributeNameSharing] = AsDictionaryValueVector(sharing);
functionConfig[PrimitiveFunction::AttributeNameAutoPadding] = AsDictionaryValueVector(autoPadding);
break;
}
case PrimitiveOpType::Logistic:
case PrimitiveOpType::SquaredError:
case PrimitiveOpType::CrossEntropyWithSoftmax:
case PrimitiveOpType::ClassificationError:
{
if (op == PrimitiveOpType::ClassificationError)
if ((op == PrimitiveOpType::ClassificationError) || (op == PrimitiveOpType::Logistic))
assert(inputs.size() >= 2);
else
assert(inputs.size() == 2);
@ -892,9 +936,9 @@ namespace CNTK
if (predictionShape != labelsShape)
RuntimeError("Prediction output operand's shape %S is incompatible with label operand's shape %S for the %S operation", AsStringForErrorReporting(predictionShape).c_str(), AsStringForErrorReporting(labelsShape).c_str(), PrimitiveOpTypeName(op).c_str());
std::vector<int> reductionAxes;
for (int i = 0; i < (int)inputs[0].Shape().Rank(); ++i)
reductionAxes.push_back(i);
std::vector<int> reductionAxes;
for (int i = 0; i < (int)inputs[0].Shape().Rank(); ++i)
reductionAxes.push_back(i);
outputShape = ReductionOpOutputShape(op, predictionShape, reductionAxes, /*preserveReductionAxes =*/ false);
break;
@ -1098,7 +1142,7 @@ namespace CNTK
std::vector<FunctionPtr> topoSortedPrimitiveFunctions;
std::vector<Variable> inputs;
std::unordered_set<std::wstring> inputUids;
Traverse([&visitedFunctions, &inputs, &topoSortedPrimitiveFunctions, &inputUids](const FunctionPtr& function) {
Traverse(RootFunction(), [&visitedFunctions, &inputs, &topoSortedPrimitiveFunctions, &inputUids](const FunctionPtr& function) {
std::vector<Variable> functionInputs = function->Inputs();
for (const auto& input : functionInputs)
{
@ -1576,6 +1620,9 @@ namespace CNTK
computationNodePtr = New<ConvolutionNode<ElementType>>(network->GetDeviceId(), internalNodeName, AsTensorShape(kernelShape), AsTensorShape(outputMapCount), AsTensorShape(strides), sharing, autoPadding, AsTensorShape(lowerPad), AsTensorShape(upperPad), transpose, ImageLayoutKind::CHW, maxTempMemSizeInSamples);
break;
}
case PrimitiveOpType::Logistic:
computationNodePtr = New<LogisticNode<ElementType>>(network->GetDeviceId(), internalNodeName);
break;
case PrimitiveOpType::SquaredError:
computationNodePtr = New<SquareErrorNode<ElementType>>(network->GetDeviceId(), internalNodeName);
break;
@ -2585,7 +2632,7 @@ namespace CNTK
FunctionPtr Round(const Variable& operand, const std::wstring& name)
{
return Floor(Plus(operand, Constant::Scalar(operand.GetDataType(), 0.5)), name);
return Floor(Plus(operand, Constant::Scalar(0.5f)), name);
}
FunctionPtr Floor(const Variable& operand, const std::wstring& name)
@ -2633,11 +2680,9 @@ namespace CNTK
return TransposeAxes(operand, Axis(0), Axis(1), name);
}
FunctionPtr Slice(const Variable& operand, const Axis& axis, int beginIndex, int endIndex, const std::wstring& name)
{
if (axis == Axis::DefaultBatchAxis())
LogicError("Slice is currently unsupported along the batch axis");
if (axis.IsStaticAxis())
{
if ((endIndex - beginIndex) <= 0)
@ -2646,46 +2691,10 @@ namespace CNTK
return Internal::Slice(operand, axis, beginIndex, endIndex, name);
}
if ((beginIndex == 0) && (endIndex == 0))
return operand;
if (axis == Axis::DefaultBatchAxis())
LogicError("Slice is currently unsupported along the batch axis");
auto operandAxes = operand.DynamicAxes();
auto findAxis = std::find(operandAxes.begin(), operandAxes.end(), axis);
if (findAxis == operandAxes.end())
InvalidArgument("The specified dynamic axis named %S does not match any of the dynamic axes of the operand", axis.Name().c_str());
auto beginFlagsLambda = [beginIndex, operand]() {
return (beginIndex > 0) ? Minus(Constant::Scalar(operand.GetDataType(), 1.0), Internal::IsWithin(operand, beginIndex)) : Internal::IsWithin(operand, beginIndex);
};
auto endFlagsLambda = [endIndex, operand]() {
return (endIndex > 0) ? Internal::IsWithin(operand, endIndex) : Minus(Constant::Scalar(operand.GetDataType(), 1.0), Internal::IsWithin(operand, endIndex));
};
FunctionPtr flags;
if (beginIndex == 0)
flags = endFlagsLambda();
else if (endIndex == 0)
flags = beginFlagsLambda();
else
flags = ElementTimes(beginFlagsLambda(), endFlagsLambda());
// Since we are slicing along a dynamic axis, the output variable's dynamic axes will be different than the operand
std::vector<Axis> newDynamicAxes;
for (auto operandAxis : operandAxes)
{
if (operandAxis == axis)
{
int sliceLength = (endIndex - beginIndex);
size_t multiplicativeFactor = (sliceLength > 0) ? 0 : 1;
auto derivedDynamicAxes = GetDerivedDynamicAxes(operandAxis, multiplicativeFactor, sliceLength);
std::copy(derivedDynamicAxes.begin(), derivedDynamicAxes.end(), std::back_inserter(newDynamicAxes));
}
else
newDynamicAxes.push_back(operandAxis);
}
return Internal::Gather(operand, flags, newDynamicAxes, name);
LogicError("CNTK::Slice: Invalid axis argument provided. To slice a sequence along its ordered dynamic axis use Sequence::Slice.");
}
FunctionPtr RandomSample(const Variable& operand, size_t numSamples, bool allowDuplicates, const std::wstring& name)
@ -2721,6 +2730,7 @@ namespace CNTK
return UnaryOp(PrimitiveOpType::Reshape, operand, std::move(additionalProperties), name);
}
FunctionPtr BinaryOp(PrimitiveOpType op, const Variable& leftOperand, const Variable& rightOperand, Dictionary&& opConfig, const std::wstring& name)
{
std::vector<Variable> operands = { leftOperand, rightOperand };
@ -2792,6 +2802,18 @@ namespace CNTK
return BinaryOp(PrimitiveOpType::TransposeTimes, leftOperand, rightOperand, std::move(additionalProperties), name);
}
FunctionPtr BinaryCrossEntropy(const Variable& prediction, const Variable& targets, const std::wstring& name)
{
std::vector<Variable> operands = { prediction, targets };
return CompositeFunction::Create(MakeSharedObject<PrimitiveFunction>(PrimitiveOpType::Logistic, operands, Dictionary(), name), name);
}
FunctionPtr WeightedBinaryCrossEntropy(const Variable& prediction, const Variable& targets, const Variable& weights, const std::wstring& name)
{
std::vector<Variable> operands = { prediction, targets, weights };
return CompositeFunction::Create(MakeSharedObject<PrimitiveFunction>(PrimitiveOpType::Logistic, operands, Dictionary(), name), name);
}
FunctionPtr SquaredError(const Variable& prediction, const Variable& targets, const std::wstring& name)
{
auto difference = Minus(prediction, targets);
@ -2815,14 +2837,14 @@ namespace CNTK
if (topN == 1)
{
if (axis == Axis(0))
return Minus(Constant::Scalar(prediction.GetDataType(), 1.0), TransposeTimes(labels, Hardmax(prediction)), name);
return Minus(Constant::Scalar(1.0f), TransposeTimes(labels, Hardmax(prediction)), name);
else
{
auto axMax = ReduceMax(prediction, axis);
auto pred = Equal(prediction, axMax);
auto wrongPred = NotEqual(labels, pred);
auto axErr = ReduceSum(wrongPred, axis);
auto capErr = GreaterEqual(axErr, Constant::Scalar(prediction.GetDataType(), 1.0));
auto capErr = GreaterEqual(axErr, Constant::Scalar(1.0f));
return ReduceMean(capErr, Axis::AllStaticAxes(), name);
}
}
@ -2831,7 +2853,7 @@ namespace CNTK
if (axis != Axis(0))
LogicError("ClassificationError along a specific axis does not support topN!");
std::vector<Variable> operands = { prediction, labels, Constant::Scalar(prediction.GetDataType(), (double)topN) };
std::vector<Variable> operands = { prediction, labels, Constant::Scalar((float)topN) };
return CompositeFunction::Create(MakeSharedObject<PrimitiveFunction>(PrimitiveOpType::ClassificationError, operands, Dictionary(), name), name);
}
}
@ -3011,75 +3033,113 @@ namespace CNTK
{
// TODO: This is a temporary and expensive hack until we have a real alias implementation
// that does not waste memory and compute cycles
return Plus(operand, Constant::Scalar(operand.GetDataType(), 0), name);
return Plus(operand, Constant::Scalar(0.0f), name);
}
namespace Sequence
{
void VerifyIsSequence(const Variable& operand)
{
// The operand must have at least one dynamic axis and its first dynamic axis must be ordered
if (operand.DynamicAxes().empty() || !operand.DynamicAxes()[0].IsOrdered())
// The operand must have at least one dynamic axis
if (operand.DynamicAxes().empty())
InvalidArgument("A sequence function can only be applied on operands with at least one dynamic axis and whose first dynamic axis is ordered");
}
FunctionPtr IsFirst(const Variable& operand, const std::wstring& name)
{
VerifyIsSequence(operand);
return Internal::IsWithin(operand, 1, name);
}
FunctionPtr IsLast(const Variable& operand, const std::wstring& name)
{
VerifyIsSequence(operand);
return Internal::IsWithin(operand, -1, name);
}
FunctionPtr Slice(const Variable& operand, int beginIndex, int endIndex, const std::wstring& name)
{
VerifyIsSequence(operand);
if ((beginIndex == 0) && (endIndex == 0))
return operand;
auto beginFlagsLambda = [beginIndex, operand]() {
return (beginIndex > 0) ? Minus(Constant::Scalar(1.0f), Internal::IsWithin(operand, beginIndex)) : Internal::IsWithin(operand, beginIndex);
};
auto endFlagsLambda = [endIndex, operand]() {
return (endIndex > 0) ? Internal::IsWithin(operand, endIndex) : Minus(Constant::Scalar(1.0f), Internal::IsWithin(operand, endIndex));
};
FunctionPtr flags;
if (beginIndex == 0)
flags = endFlagsLambda();
else if (endIndex == 0)
flags = beginFlagsLambda();
else
flags = ElementTimes(beginFlagsLambda(), endFlagsLambda());
int sliceLength = (endIndex - beginIndex);
size_t multiplicativeFactor = (sliceLength > 0) ? 0 : 1;
return Internal::Gather(operand, flags, { multiplicativeFactor, sliceLength }, name);
}
FunctionPtr First(const Variable& operand, const std::wstring& name)
{
VerifyIsSequence(operand);
return Slice(operand, operand.DynamicAxes()[0], 0, 1, name);
return Sequence::Slice(operand, 0, 1, name);
}
FunctionPtr Last(const Variable& operand, const std::wstring& name)
{
VerifyIsSequence(operand);
return Slice(operand, operand.DynamicAxes()[0], -1, 0, name);
}
std::vector<Axis> WhereOpDynamicAxes(const Variable& operand)
{
VerifyIsSequence(operand);
std::vector<Axis> newDynamicAxes = { Axis::NewUniqueDynamicAxis(L"whereNodeDynamicAxis") };
for (size_t i = 1; i < operand.DynamicAxes().size(); ++i)
newDynamicAxes.push_back(operand.DynamicAxes()[i]);
return newDynamicAxes;
return Sequence::Slice(operand, -1, 0, name);
}
FunctionPtr Where(const Variable& condition, const std::wstring& name)
{
return Internal::Where(condition, WhereOpDynamicAxes(condition), name);
return UnaryOp(PrimitiveOpType::Where, condition, Dictionary(), name);
}
FunctionPtr Gather(const Variable& operand, const Variable& condition, const std::wstring& name)
{
return Internal::Gather(operand, condition, WhereOpDynamicAxes(condition), name);
return Internal::Gather(operand, condition, name);
}
FunctionPtr Scatter(const Variable& operand, const Variable& condition, const std::wstring& name)
{
return Internal::Scatter(operand, condition, WhereOpDynamicAxes(condition), name);
return Internal::Scatter(operand, condition, name);
}
FunctionPtr BroadcastAs(const Variable& operand, const Variable& broadcastAs, const std::wstring& name)
{
auto dataPadded = Internal::Scatter(operand, Sequence::IsFirst(broadcastAs), operand.DynamicAxes());
auto dataPadded = Internal::Scatter(operand, Sequence::IsFirst(broadcastAs), std::make_pair<size_t, int>(0, 1));
auto placeHolderOutput = PlaceholderVariable(operand.Shape(), broadcastAs.DynamicAxes());
auto output = ElementSelect(Sequence::IsFirst(broadcastAs), dataPadded, PastValue(placeHolderOutput), name);
return output->ReplacePlaceholders({ { placeHolderOutput, output } });
}
FunctionPtr ReduceElements(const Variable& operand, const std::wstring& reductionOpName, const std::wstring& name)
{
using namespace std::placeholders;
std::function<FunctionPtr(const Variable& leftOperand, const Variable& rightOperand)> reductionFunctor;
if (reductionOpName == PrimitiveFunction::InternalSumReductionOpName)
reductionFunctor = std::bind(Plus, _1, _2, L"");
else
LogicError("%S reduction along dynamic axis is currently unsupported", reductionOpName.c_str());
// We are reducing over a dynamic axis which is currently implemented using recurrence
auto cumulativeSumFunctionPlaceholder = PlaceholderVariable(operand.Shape());
auto prevAccumulatedValuesFunction = PastValue(cumulativeSumFunctionPlaceholder);
auto cumulativeSumFunction = reductionFunctor(prevAccumulatedValuesFunction, operand);
cumulativeSumFunction->ReplacePlaceholders({ { cumulativeSumFunctionPlaceholder, cumulativeSumFunction } });
return Sequence::Slice(cumulativeSumFunction, -1, 0, name);
}
FunctionPtr ReduceSum(const Variable& operand, const std::wstring& name)
{
return ReduceElements(operand, PrimitiveFunction::InternalSumReductionOpName, name);
}
}
namespace Internal
@ -3092,9 +3152,9 @@ namespace CNTK
InvalidArgument("CNTK::Sequence::IsWithin: The offset must be positive");
if (offset > 0)
return PastValue(Internal::ZeroesWithDynamicAxesLike(operand), Constant::Scalar(operand.GetDataType(), 1.0), offset, name);
return PastValue(Internal::ZeroesWithDynamicAxesLike(operand), Constant::Scalar(1.0f), offset, name);
else
return FutureValue(Internal::ZeroesWithDynamicAxesLike(operand), Constant::Scalar(operand.GetDataType(), 1.0), -offset, name);
return FutureValue(Internal::ZeroesWithDynamicAxesLike(operand), Constant::Scalar(1.0f), -offset, name);
}
FunctionPtr PackedIndex(const Variable& operand, const Variable& index, const std::wstring& name)
@ -3131,21 +3191,32 @@ namespace CNTK
}
}
FunctionPtr Where(const Variable& condition, const std::vector<Axis>& newDynamicAxes, const std::wstring& name)
FunctionPtr Where(const Variable& condition, const std::pair<size_t, int>& newDerivedSequenceAxisScalingAndAdditiveFactor, const std::wstring& name)
{
auto additionalProperties = Dictionary();
additionalProperties[PrimitiveFunction::AttributeNameNewDynamicAxes] = AsDictionaryValueVector(newDynamicAxes);
additionalProperties[PrimitiveFunction::AttributeNameNewSequenceAxisLengthScalingFactor] = newDerivedSequenceAxisScalingAndAdditiveFactor.first;
additionalProperties[PrimitiveFunction::AttributeNameNewSequenceAxisLengthAdditiveFactor] = newDerivedSequenceAxisScalingAndAdditiveFactor.second;
return UnaryOp(PrimitiveOpType::Where, condition, std::move(additionalProperties), name);
}
FunctionPtr Gather(const Variable& operand, const Variable& condition, const std::vector<Axis>& newDynamicAxes, const std::wstring& name)
FunctionPtr Gather(const Variable& operand, const Variable& condition, const std::wstring& name)
{
return Internal::GatherPacked(operand, Internal::PackedIndex(/*layout of*/ operand, Where(condition, newDynamicAxes)), name);
return Internal::GatherPacked(operand, Internal::PackedIndex(/*layout of*/ operand, Sequence::Where(condition)), name);
}
FunctionPtr Scatter(const Variable& operand, const Variable& condition, const std::vector<Axis>& whereNodeDynamicAxes, const std::wstring& name)
FunctionPtr Gather(const Variable& operand, const Variable& condition, const std::pair<size_t, int>& newDerivedSequenceAxisScalingAndAdditiveFactor, const std::wstring& name)
{
return Internal::ScatterPacked(operand, Internal::PackedIndex(/*layout of*/ condition, Where(condition, whereNodeDynamicAxes)), /*layout of*/ condition, name);
return Internal::GatherPacked(operand, Internal::PackedIndex(/*layout of*/ operand, Where(condition, newDerivedSequenceAxisScalingAndAdditiveFactor)), name);
}
FunctionPtr Scatter(const Variable& operand, const Variable& condition, const std::wstring& name)
{
return Internal::ScatterPacked(operand, Internal::PackedIndex(/*layout of*/ condition, Sequence::Where(condition)), /*layout of*/ condition, name);
}
FunctionPtr Scatter(const Variable& operand, const Variable& condition, const std::pair<size_t, int>& newDerivedSequenceAxisScalingAndAdditiveFactor, const std::wstring& name)
{
return Internal::ScatterPacked(operand, Internal::PackedIndex(/*layout of*/ condition, Where(condition, newDerivedSequenceAxisScalingAndAdditiveFactor)), /*layout of*/ condition, name);
}
FunctionPtr Slice(const Variable& operand, const Axis& axis, int beginIndex, int endIndex, const std::wstring& name)
@ -3160,8 +3231,6 @@ namespace CNTK
FunctionPtr ReduceElements(const Variable& operand, const std::wstring& reductionOpName, const Axis& axis, const std::wstring& name)
{
using namespace std::placeholders;
if (axis.IsStaticAxis() || (axis == Axis::AllStaticAxes()))
{
auto additionalProperties = Dictionary();
@ -3173,20 +3242,7 @@ namespace CNTK
if (axis == Axis::DefaultBatchAxis())
LogicError("Reduction is currently unsupported along the batch axis");
if (reductionOpName != PrimitiveFunction::InternalSumReductionOpName)
LogicError("%S reduction along dynamic axis is currently unsupported", reductionOpName.c_str());
std::function<FunctionPtr(const Variable& leftOperand, const Variable& rightOperand)> reductionFunctor;
if (reductionOpName == PrimitiveFunction::InternalSumReductionOpName)
reductionFunctor = std::bind(Plus, _1, _2, L"");
// We are reducing over a dynamic axis which is currently implemented using recurrence
auto cumulativeSumFunctionPlaceholder = PlaceholderVariable(operand.Shape());
auto prevAccumulatedValuesFunction = PastValue(cumulativeSumFunctionPlaceholder);
auto cumulativeSumFunction = reductionFunctor(prevAccumulatedValuesFunction, operand);
cumulativeSumFunction->ReplacePlaceholders({ { cumulativeSumFunctionPlaceholder, cumulativeSumFunction } });
return CNTK::Slice(cumulativeSumFunction, axis, -1, 0, name);
LogicError("CNTK::ReduceElements: Invalid axis argument provided. To reduce a sequence along its ordered dynamic axis use Sequence::ReduceElements.");
}
}
}

Просмотреть файл

@ -65,7 +65,7 @@ namespace CNTK
{PrimitiveOpType::Times, L"Times"},
{PrimitiveOpType::TransposeTimes, L"TransposeTimes"},
{PrimitiveOpType::Convolution, L"Convolution"},
{PrimitiveOpType::SquaredError, L"SquaredError"},
{ PrimitiveOpType::SquaredError, L"SquaredError" },
{PrimitiveOpType::CrossEntropyWithSoftmax, L"CrossEntropyWithSoftmax"},
{PrimitiveOpType::ClassificationError, L"ClassificationError"},
{PrimitiveOpType::PastValue, L"PastValue"},
@ -79,6 +79,7 @@ namespace CNTK
{PrimitiveOpType::RandomSample, L"RandomSample"},
{PrimitiveOpType::RandomSampleInclusionFrequency, L"RandomSampleInclusionFrequency"},
{PrimitiveOpType::ROIPooling, L"ROIPooling"},
{ PrimitiveOpType::Logistic, L"Logistic" },
};
inline const std::wstring& PrimitiveOpTypeName(PrimitiveOpType opType)
@ -103,7 +104,15 @@ namespace CNTK
if (numFunctionInputs > 2)
indexMap.insert({2, 2});
}
else if ((op == PrimitiveOpType::CrossEntropyWithSoftmax) || (op == PrimitiveOpType::GatherPacked))
else if (op == PrimitiveOpType::Logistic)
{
indexMap = std::unordered_map<size_t, size_t>({ { 0, 1 }, { 1, 0 } });
if (numFunctionInputs > 2)
indexMap.insert({ 2, 2 });
}
else if (op == PrimitiveOpType::CrossEntropyWithSoftmax)
indexMap = std::unordered_map<size_t, size_t>({ { 0, 1 }, { 1, 0 } });
else if (op == PrimitiveOpType::GatherPacked)
indexMap = std::unordered_map<size_t, size_t>({ { 0, 1 }, { 1, 0 } });
else if (op == PrimitiveOpType::ScatterPacked)
indexMap = std::unordered_map<size_t, size_t>({ { 0, 2 }, { 1, 1 }, { 2, 0 } });
@ -187,6 +196,8 @@ namespace CNTK
static const std::wstring AttributeNameEpsilon;
static const std::wstring AttributeNameUseCuDNNEngine;
static const std::wstring AttributeNameNewDynamicAxes;
static const std::wstring AttributeNameNewSequenceAxisLengthScalingFactor;
static const std::wstring AttributeNameNewSequenceAxisLengthAdditiveFactor;
static const std::wstring AttributeNameBeginIndex;
static const std::wstring AttributeNameEndIndex;
static const std::wstring AttributeNameReductionOpName;
@ -699,22 +710,11 @@ namespace CNTK
return CompositeFunctionOpName;
}
private:
virtual void ReplacePlaceholdersInPlace(const std::unordered_map<Variable, Variable>& placeholderReplacements,
std::unordered_set<const Function*>& visitedFunctions,
std::unordered_set<Variable>& replacedPlaceholders) override;
CompositeFunction(const FunctionPtr& rootFunction, std::unordered_set<FunctionPtr>&& allPrimitiveFunctions, const std::wstring& name, const std::wstring& uid = Internal::GenerateUid(L"CompositeFunction"))
: Function({}, rootFunction->Outputs(), Dictionary(), rootFunction, name, uid),
m_allPrimitiveFunctions(std::move(allPrimitiveFunctions)), m_networkMatricesAllocated(false)
{}
template <typename FunctionType>
void Traverse(const FunctionType& functor) const
static void Traverse(const FunctionPtr& rootFunction, const FunctionType& functor)
{
const auto& root = RootFunction();
std::unordered_set<FunctionPtr> visitedFunctions;
Traverse(root, visitedFunctions, functor);
Traverse(rootFunction, visitedFunctions, functor);
}
// Recursively traverses the Function graph underlying the 'rootFunction' invoking the provided functor for all visited nodes in the graph.
@ -735,6 +735,16 @@ namespace CNTK
}
}
private:
virtual void ReplacePlaceholdersInPlace(const std::unordered_map<Variable, Variable>& placeholderReplacements,
std::unordered_set<const Function*>& visitedFunctions,
std::unordered_set<Variable>& replacedPlaceholders) override;
CompositeFunction(const FunctionPtr& rootFunction, std::unordered_set<FunctionPtr>&& allPrimitiveFunctions, const std::wstring& name, const std::wstring& uid = Internal::GenerateUid(L"CompositeFunction"))
: Function({}, rootFunction->Outputs(), Dictionary(), rootFunction, name, uid),
m_allPrimitiveFunctions(std::move(allPrimitiveFunctions)), m_networkMatricesAllocated(false)
{}
std::vector<Variable> DetermineInputs() const
{
const auto& root = RootFunction();

Просмотреть файл

@ -1,10 +0,0 @@
//
// Copyright (c) Microsoft. All rights reserved.
// Licensed under the MIT license. See LICENSE.md file in the project root for full license information.
//
#include "stdafx.h"
// TODO: Currently there are some known issues with memory sharing for forward pass output matrices that
// need to be addressed before we can switch to using memory sharing by default here.
bool g_shareNodeValueMatrices = false;

Просмотреть файл

@ -57,6 +57,7 @@ namespace CNTK
RandomSample = 45,
RandomSampleInclusionFrequency = 46,
ROIPooling = 47,
Logistic = 48,
// New op types should only be appended to the end of this list.
};
}

Просмотреть файл

@ -62,6 +62,7 @@
</ClCompile>
</ItemDefinitionGroup>
<ItemGroup>
<ClCompile Include="..\Math\NcclComm.cpp" />
<ClCompile Include="Config.cpp" />
<ClCompile Include="DataReader.cpp" />
<ClCompile Include="DataWriter.cpp" />
@ -76,4 +77,4 @@
</ItemGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
<ImportGroup Label="ExtensionTargets" />
</Project>
</Project>

Просмотреть файл

@ -13,4 +13,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
std::atomic<bool> Globals::m_forceDeterministicAlgorithms(false);
std::atomic<bool> Globals::m_forceConstantRandomSeed(false);
}}}
std::atomic<bool> Globals::m_enableShareNodeValueMatrices(false);
std::atomic<bool> Globals::m_enableHyperCompressMemory(false);
}}}

Просмотреть файл

@ -22,8 +22,32 @@ namespace Microsoft { namespace MSR { namespace CNTK {
// TODO: Currently the flag is set to false. Should be switched to true after more rigorous testing.
static bool UseV2Aggregator() { return false; }
static void EnableShareNodeValueMatrices()
{
m_enableShareNodeValueMatrices = true;
}
static bool ShouldEnableShareNodeValueMatrices()
{
return m_enableShareNodeValueMatrices;
}
static void EnableHyperCompressMemory()
{
m_enableHyperCompressMemory = true;
}
static bool ShouldEnableHyperCompressMemory()
{
return m_enableHyperCompressMemory;
}
private:
static std::atomic<bool> m_forceDeterministicAlgorithms;
// The global flag to enable matrices values in forward and backward prop
static std::atomic<bool> m_enableShareNodeValueMatrices;
// The global flag to enable hyper memory compression
static std::atomic<bool> m_enableHyperCompressMemory;
static std::atomic<bool> m_forceConstantRandomSeed;
};
}}}

Просмотреть файл

@ -1,3 +1,9 @@
//
// Copyright (c) Microsoft. All rights reserved.
// Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved.
// Licensed under the MIT license. See LICENSE.md file in the project root for full license information.
//
#pragma once
// Please see https://github.com/Microsoft/CNTK/wiki/Setup-CNTK-on-Windows#ms-mpi or
@ -71,6 +77,7 @@ class MPIWrapper : public std::enable_shared_from_this<MPIWrapper>
std::wstring m_myName;
int m_numMPINodes;
size_t m_numNodesInUse;
bool m_multiHost;
// MPI communicator that reflects the current subset selection
MPI_Comm m_currentComm;
@ -145,6 +152,7 @@ public:
MPI_Comm_rank(MPI_COMM_WORLD, &m_myRank);
MPI_Comm_size(MPI_COMM_WORLD, &m_numMPINodes);
m_numNodesInUse = m_numMPINodes;
m_multiHost = true;
// Verify that the environment variable used by GetTotalNumberOfMPINodes()
// matches what the MPI API says. There're actually two possible cases:
@ -305,6 +313,35 @@ private:
fflush(stderr);
}
Ping("requestnodes (after change)");
// If all ranks run on a single host, we can enable optimized communication
// paths (e.g. NCCL). To determine if a single machine is being used, we
// check that MPI_Get_processor_name matches for all ranks.
const int nameMax = MPI_MAX_PROCESSOR_NAME + 1;
char myName[nameMax] = {0};
int myNameLen = 0;
MPI_Get_processor_name(myName, &myNameLen) || MpiFail("requestnodes: MPI_Get_processor_name");
myName[myNameLen] = '\0';
std::vector<char> nameBuffer(m_numNodesInUse * nameMax);
char* allNames = nameBuffer.data();
MPI_Allgather(myName, nameMax, MPI_CHAR, allNames, nameMax, MPI_CHAR, m_currentComm)
|| MpiFail("requestnodes: MPI_Allgather");
m_multiHost = false;
for(size_t i=1; i<m_numNodesInUse; i++)
{
if (strcmp(allNames, allNames+i*nameMax) != 0)
{
m_multiHost = true;
break;
}
}
fprintf(stderr, "requestnodes [%s]: using %d out of %d MPI nodes on %s (%d requested); we (%d) are %s\n",
msg, (int) m_numNodesInUse, (int) m_numMPINodes, m_multiHost ? "multiple hosts" : "a single host",
(int) requestednodes, (int) CurrentNodeRank(), IsIdle() ? "out (idle)" : "in (participating)");
fflush(stderr);
}
public:
@ -360,6 +397,11 @@ public:
return 0;
}
bool IsMultiHost()
{
return m_multiHost;
}
// -----------------------------------------------------------------------
// data-exchange functions (wrappers around MPI functions)
// -----------------------------------------------------------------------

Просмотреть файл

@ -1,6 +1,11 @@
//
// Copyright (c) Microsoft. All rights reserved.
// Licensed under the MIT license. See LICENSE.md file in the project root for full license information.
//
#include "Include/Basics.h"
#include "Include/MPIWrapper.h"
using namespace Microsoft::MSR::CNTK;
int MPIWrapper::s_myRank = -1;
std::shared_ptr<MPIWrapper> Microsoft::MSR::CNTK::MPIWrapper::s_mpi = nullptr;
std::shared_ptr<MPIWrapper> Microsoft::MSR::CNTK::MPIWrapper::s_mpi = nullptr;

Просмотреть файл

@ -1002,7 +1002,7 @@ void ComputationNetwork::AllocateAllMatrices(const std::vector<ComputationNodeBa
// Due to special topology, if a node is solely induced by parameters, its function value should not be shared
MarkValueNonSharableNodes();
bool performingBackPropagation = (trainRootNode != nullptr);
bool performingBackPropagation = (trainRootNode != nullptr) || (Globals::ShouldEnableHyperCompressMemory());
// Create a composite Eval order with the specified nodes as roots
// For each node determine parents and whether the output of the

Просмотреть файл

@ -61,7 +61,7 @@
</ItemDefinitionGroup>
<ItemDefinitionGroup Condition="$(ReleaseBuild)">
<ClCompile>
<AdditionalOptions>/d2Zi+ %(AdditionalOptions)</AdditionalOptions>
<AdditionalOptions>/d2Zi+ /bigobj %(AdditionalOptions)</AdditionalOptions>
</ClCompile>
</ItemDefinitionGroup>
<ItemDefinitionGroup Condition="$(CpuOnlyBuild)">
@ -136,4 +136,4 @@
</ItemGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
<ImportGroup Label="ExtensionTargets" />
</Project>
</Project>

Просмотреть файл

@ -12,6 +12,7 @@
#include "TensorShape.h"
#include "MatrixPool.h"
#include "ComputationEnvironment.h"
#include "Globals.h"
#include <unordered_set>
#include <map>
@ -46,8 +47,6 @@
#define CNTK_MODEL_VERSION_15 15 // add new nodes: LambdaRankNode and NDCG1Eval
#define CURRENT_CNTK_MODEL_VERSION CNTK_MODEL_VERSION_15
extern bool g_shareNodeValueMatrices;
// helper mode for debugging
// If TRACK_GAP_NANS is defined then initialize layout gaps to NaN and do NaN checks. Also do detailed logging of node computations.
// #define TRACK_GAP_NANS
@ -768,7 +767,11 @@ public:
virtual bool InputUsedInComputingInputNodesGradients(size_t /*childIndex*/) const { return true; }
void SetOutputNeededDuringBackprop(bool f) { m_outputNeededDuringBackprop = f; }
bool IsOutputNeededDuringBackprop() const { return !g_shareNodeValueMatrices || m_outputNeededDuringBackprop; }
bool IsOutputNeededDuringBackprop() const
{
return (!Globals::ShouldEnableShareNodeValueMatrices() && !Globals::ShouldEnableHyperCompressMemory())
|| m_outputNeededDuringBackprop;
}
// -----------------------------------------------------------------------
// helpers for network traversal
@ -1631,6 +1634,20 @@ public:
#endif
// tracing
Trace();
// Any memory not needed could resize to zero immediately when HyperCompressMemory active. Since the memory won't really release,
// all these memory blocks are gathered into a memory pool. When the next request coming, the best fitting block will be chosen.
if (Globals::ShouldEnableHyperCompressMemory())
{
for (auto& input : GetInputs())
{
if (!input->IsOutputNeededDuringBackprop())
{
auto inputNodePtr = DownCast(input);
inputNodePtr->Value().Resize(0, 0);
}
}
}
}
#if 0 // (keep it around in case we need to add stuff in the future)
@ -1640,9 +1657,9 @@ public:
}
#endif
#ifdef _DEBUG
virtual void /*IComputationNode::*/ EndBackprop() override
{
#ifdef _DEBUG
Base::EndBackprop();
#ifdef TRACK_GAP_NANS
for (size_t i = 0; i < m_inputs.size(); i++)
@ -1656,8 +1673,18 @@ public:
}
}
#endif
}
#endif
// We could release the gradient of value sharable nodes and all no-longer used memory generated in forward.
if (IsValueSharable() && Globals::ShouldEnableHyperCompressMemory())
{
if (GradientPtr())
Gradient().Resize(0, 0);
// canceling the graph dependency
if (IsOutputNeededDuringBackprop())
Value().Resize(0, 0);
}
}
// this is the entry point from Network; while it will call virtual BackpropTo() into the actual node implementation
// TODO: move to -Base (or -Network?)

Просмотреть файл

@ -395,7 +395,7 @@ public:
// If input data is sparse, then gradient is block sparse.
if (InputRef(1).Value().GetMatrixType() == SPARSE && InputRef(0).Gradient().GetMatrixType() == DENSE && Gradient().GetMatrixType() == DENSE)
{
// We need a sparse matrix for the gradient. However, we should allocate a new one instead of switching the type in place
// We need a sparse matrix for the gradient. We allocate a new one instead of switching the type in place
// since switching in place may affect other nodes who share this matrix due to memory sharing
auto& currentInput0GradientMatrixRef = InputRef(0).Gradient();
auto newInput0SparseGradientMatrix = std::make_shared<Matrix<ElemType>>(currentInput0GradientMatrixRef.GetNumRows(),
@ -556,7 +556,7 @@ public:
{
Input(0)->CreateGradientMatrixIfNull();
// We need a sparse matrix for the gradient. However, we should allocate a new one instead of switching the type in place
// We need a sparse matrix for the gradient. We allocate a new one instead of switching the type in place
// since switching in place may affect other nodes who share this matrix due to memory sharing
auto& currentInput0GradientMatrixRef = InputRef(0).Gradient();
if (currentInput0GradientMatrixRef.GetMatrixType() != SPARSE)

Просмотреть файл

@ -126,7 +126,7 @@ void RandomSampleNode<ElemType>::ForwardPropNonLooping()
if (ValueAsMatrix().GetMatrixType() != SPARSE)
{
// BUGBUG: matrix type should be configured during validation
// We should allocate a new one instead of switching the type in place since switching in place may
// Note: We allocate a new one instead of switching the type in place since switching in place may
// affect other nodes who share this matrix due to memory sharing
auto newSparseValueMatrix = std::make_shared<Matrix<ElemType>>(ValueAsMatrix().GetNumRows(), ValueAsMatrix().GetNumCols(), CPUDEVICE, SPARSE, matrixFormatSparseCSC);
#ifdef _MSC_VER
@ -140,10 +140,7 @@ void RandomSampleNode<ElemType>::ForwardPropNonLooping()
// TODO: Should we prepare the CSC data directly on the CPU and move it in one go?
// Currently the reader will place the data onto the GPU. It will then be pulled on-demand to the CPU once (and cached there).
valueMatrix.TransferToDeviceIfNotThere(CPUDEVICE, /*ismoved =*/ true/*means: BOTH state not ok */, /*emptyTransfer =*/ true, /*updatePreferredDevice =*/ false);
// BUGUBUG: This is a no-op; was the intent to change the preferred device to CPU?
valueMatrix.SetDevice(CPUDEVICE);
valueMatrix.TransferToDeviceIfNotThere(CPUDEVICE, /*ismoved =*/ true/*means: BOTH state not ok */, /*emptyTransfer =*/ true, /*updatePreferredDevice =*/ true);
valueMatrix.Reset();
// Get vector with indices of randomly sampled classes

Просмотреть файл

@ -2506,7 +2506,7 @@ public:
if (expAvgFactor != 0 || blendFactor != 1)
m_samplesSeen += GetMBLayout()->GetActualNumSamples();
Base::EndBackprop();
Base::EndForwardProp();
}
virtual bool OutputUsedInComputingInputNodesGradients() const override { return false; }

Просмотреть файл

@ -30,11 +30,6 @@
#include "latticearchive.h"
#include <limits>
// TODO: Temporary mechanism to enable memory sharing for
// node output value matrices. This will go away when the
// sharing is ready to be enabled by default
bool g_shareNodeValueMatrices = false;
namespace Microsoft { namespace MSR { namespace CNTK {
@ -44,7 +39,10 @@ void CNTKEvalBase<ElemType>::Init(const std::string& config)
m_config.Parse(config);
size_t nThreads = m_config("numCPUThreads", "1");
CPUMatrix<ElemType>::SetNumThreads(nThreads);
g_shareNodeValueMatrices = m_config(L"shareNodeValueMatrices", false);
if (m_config(L"shareNodeValueMatrices", false))
Globals::EnableShareNodeValueMatrices();
if (m_config(L"hyperCompressMemory", false))
Globals::EnableHyperCompressMemory();
}

Просмотреть файл

@ -14,9 +14,12 @@
#endif
#include "Basics.h"
#include "basetypes.h"
#include <string>
#include <stdint.h>
#include <memory>
#include <unordered_map>
#include <map>
#pragma warning( disable: 4251 )
typedef unsigned char byte;
@ -38,6 +41,8 @@ typedef unsigned char byte;
#define GPUSPARSE_INDEX_TYPE int // cuSparse only supports int array indexes
#define CPUSPARSE_INDEX_TYPE int // to be consistent with cuSparse but limited the possible size of the matrix.
#define MEM_MAX_LIMIT_TIMES 2 // The maximum times allowed a cached memory block allocated to a request
namespace Microsoft { namespace MSR { namespace CNTK {
MATH_API void SetMathLibTraceLevel(int traceLevel);
@ -61,11 +66,13 @@ public:
template <typename AllocatedElemType>
static void Free(int deviceId, AllocatedElemType* bufferPtr, bool ignoreCUDARetCode = false);
// Let it be public method, the memory manager could check the totoal free memory and decide whether to physically
// release all the cached memory.
static std::pair<size_t, size_t> GetFreeAndTotalMemoryInMBs(int deviceId);
private:
template <typename AllocatedElemType>
static AllocatedElemType* AllocateNoTrace(int deviceId, size_t numElements);
static std::pair<size_t, size_t> GetFreeAndTotalMemoryInMBs(int deviceId);
};
// -----------------------------------------------------------------------
@ -205,6 +212,158 @@ enum MatrixFlags
matrixFlagSetValueOnDevice = 1 << bitPosSetValueOnDevice, // SetValue() call has a buffer that is already on the device
};
// -----------------------------------------------------------------------
// BufferManagement -- to control the allocation and release of memory
//
// 1. The goal of buffer management
// The best way to save memory is releasing memory right after no longer used in the rest of the mini-batch, which makes
// the extra cost on memory operation and slows down the speed. An option to solve that is building the static link between
// all nodes in pre-computing process and making memory re-use in the runtime, known as shared node value matrices in CNTK.
// The other option is using a buffer pool to take over the allocation and release request. Whereas the physical operation on
// memory, logical operation will make nearly no cost on allocation or release. Since the second option, achieved as
// BufferManagement below, could control all the memory operation, including some trivial ones, like the workspace in convolutions,
// and more flexible, allocating based on size and being easy to implement new algorithm, it is usually more powerful than the
// first method.
// 2. How it works?
// First, it should be called in Resize function. In Resize function, using Request and LogicalReleaseFunction to replace the original
// request and release functions. Since BufferManagement is singleton for deviceId, just call the GetManagementInstance. And in Resize,
// there is a flag named growthOnly, which will request only the size increases to save the allocation cost. In the case, since the
// buffer pool, nearly no cost on allocation, the growth only will be disable in BufferManagement mode.
// -----------------------------------------------------------------------
class BufferManagement
{
private:
BufferManagement() = default;
// Disable all the copy & move functions to keep the instance safely
DISABLE_COPY_AND_MOVE(BufferManagement);
public:
static BufferManagement& GetManagerInstance(DEVICEID_TYPE deviceId)
{
static std::mutex instancLock;
auto instance = m_instances.find(deviceId);
if (instance == m_instances.end())
{
std::lock_guard<std::mutex> lock(instancLock);
if (instance == m_instances.end())
{
instance = m_instances.insert(std::make_pair(deviceId, std::unique_ptr<BufferManagement>(
new BufferManagement()))).first;
instance->second->m_deviceId = deviceId;
instance->second->m_totalManageSize = 0;
instance->second->m_totalAllocSize = 0;
}
}
return *(instance->second);
}
// for requesting, find in buffer container first, if failed, allocate a new one
// if allocating from buffer, the size will be modified to the real buffer size
template<class ElemType>
ElemType* RequestBuffer(size_t& size)
{
ElemType* bufferPtr = nullptr;
auto& bufferContainer = BufferContainer<ElemType>();
// simply allocating based on size, more efficient and complex algorithm could be implemented here
auto bufferHint = bufferContainer.lower_bound(size);
if (bufferHint != bufferContainer.end() && bufferHint->first < size * MEM_MAX_LIMIT_TIMES)
{
bufferPtr = bufferHint->second;
size = bufferHint->first;
m_totalManageSize -= size;
bufferContainer.erase(bufferHint);
return bufferPtr;
}
if (m_deviceId >= 0) {
#ifndef CPUONLY
auto deviceSize = TracingGPUMemoryAllocator::GetFreeAndTotalMemoryInMBs(m_deviceId);
float freeMemoryRatio = (float)deviceSize.first / deviceSize.second;
if (freeMemoryRatio < 0.05f || (deviceSize.first << 20) / sizeof(ElemType) < size)
{
PhysicalReleaseAllBuffer<ElemType>();
}
bufferPtr = TracingGPUMemoryAllocator::Allocate<ElemType>(m_deviceId, size);
m_totalAllocSize += size;
#endif
}
else
{
// first, try no-throw allocation.
// if failed, empty the buffer and re-try a throwing allocation
// if failed again, let system throw the bad_alloc exception
bufferPtr = new (std::nothrow) ElemType[size];
if (!bufferPtr)
{
PhysicalReleaseAllBuffer<ElemType>();
bufferPtr = new ElemType[size];
}
m_totalAllocSize += size;
}
return bufferPtr;
}
// insert the header of buffer into the buffer container
template<class ElemType>
void LogicalReleaseBuffer(ElemType* buffer, size_t size)
{
auto& bufferContainer = BufferContainer<ElemType>();
bufferContainer.insert(std::make_pair(size, buffer));
m_totalManageSize += size;
}
// physical release the buffer
template<class ElemType>
void PhysicalReleaseBuffer(ElemType* buffer)
{
if (m_deviceId >= 0)
{
#ifndef CPUONLY
TracingGPUMemoryAllocator::Free<ElemType>(m_deviceId, buffer, false);
#endif
}
else {
delete[] buffer;
}
}
// empty all the cached buffer
template<class ElemType>
void PhysicalReleaseAllBuffer()
{
auto& bufferContainer = BufferContainer<ElemType>();
for (auto& iter : bufferContainer)
{
PhysicalReleaseBuffer<ElemType>(iter.second);
}
bufferContainer.clear();
m_totalManageSize = 0;
}
private:
static std::unordered_map<DEVICEID_TYPE, std::unique_ptr<BufferManagement>> m_instances;
template <class ElemType>
std::multimap<size_t, ElemType*>& BufferContainer();
DEVICEID_TYPE m_deviceId;
size_t m_totalManageSize;
size_t m_totalAllocSize;
// map to store all the temp buffer handle
std::multimap<size_t, float*> m_bufferFloatContainer;
std::multimap<size_t, double*> m_bufferDoubleContainer;
std::multimap<size_t, char*> m_bufferCharContainer;
std::multimap<size_t, short*> m_bufferShortContainer;
std::multimap<size_t, int*> m_bufferIntContainer;
};
// -----------------------------------------------------------------------
// BaseMatrixStorage -- base class for all matrix types (CPU, GPU) x (dense, sparse)
// -----------------------------------------------------------------------

Просмотреть файл

@ -247,6 +247,11 @@ protected:
if (CUDNN_STATUS_SUCCESS == err2)
err = CUDNN_STATUS_SUCCESS;
}
// Only supported in MatrixPool enable
// NOTE: it's unnecessary to keep the workspace.
workspace.Resize(0, 0);
CUDNN_CALL(err);
}
@ -278,6 +283,7 @@ protected:
// Compute gradients with respect to the output tensor (data).
CUDNN_CALL(cudnnConvolutionBackwardData(*m_cudnn, &C::One, *m_kernelT, ptr(kernel), m_outT, ptr(srcGrad), *m_conv, m_backDataAlgo.Algo.algo,
ptr(workspace), m_backDataAlgo.Algo.memory, &C::One, m_inT, ptr(grad)));
workspace.Resize(0, 0);
}
void BackwardKernelCore(const Mat& srcGrad, const Mat& in, Mat& kernelGrad, bool /*allowReuse*/, Mat& workspace) override
@ -308,6 +314,7 @@ protected:
// Compute gradients with respect to the output tensor (data).
CUDNN_CALL(cudnnConvolutionBackwardFilter(*m_cudnn, &C::One, m_inT, ptr(in), m_outT, ptr(srcGrad), *m_conv, m_backFiltAlgo.Algo.algo,
ptr(workspace), m_backFiltAlgo.Algo.memory, &C::One, *m_kernelT, ptr(kernelGrad)));
workspace.Resize(0, 0);
}
void EnsurePoolingInitialized() override

Просмотреть файл

@ -1505,32 +1505,43 @@ void GPUMatrix<ElemType>::Reshape(const size_t numRows, const size_t numCols)
}
template <class ElemType>
void GPUMatrix<ElemType>::RequireSize(const size_t numRows, const size_t numCols, bool growOnly)
void GPUMatrix<ElemType>::RequireSize(const size_t numRows, const size_t numCols, bool growOnly, bool cachedResize)
{
if (GetNumRows() != numRows || GetNumCols() != numCols)
Resize(numRows, numCols, growOnly);
Resize(numRows, numCols, growOnly, cachedResize);
}
template <class ElemType>
void GPUMatrix<ElemType>::Resize(const size_t numRows, const size_t numCols, bool growOnly)
void GPUMatrix<ElemType>::Resize(const size_t numRows, const size_t numCols, bool growOnly, bool cachedResize)
{
if (GetNumRows() == numRows && GetNumCols() == numCols)
return;
VerifyResizable(__func__);
bool isForceResize = (!growOnly) || cachedResize;
size_t numElements = numRows * numCols;
if (numElements > GetSizeAllocated() || // grow allocation
(!growOnly && numElements != GetSizeAllocated())) // shrink allocation if not growOnly
if (numElements > GetSizeAllocated() || // grow allocation
(isForceResize && numElements != GetSizeAllocated())) // shrink allocation if not growOnly
{
// reallocate buffer if numElements > 0
ElemType* pArray = nullptr;
if (numElements > 0)
pArray = TracingGPUMemoryAllocator::Allocate<ElemType>(GetComputeDeviceId(), numRows, numCols);
{
if (cachedResize)
pArray = BufferManagement::GetManagerInstance(GetComputeDeviceId()).RequestBuffer<ElemType>(numElements);
else
pArray = TracingGPUMemoryAllocator::Allocate<ElemType>(GetComputeDeviceId(), numRows, numCols);
}
// If the buffer exists, free it
if (Buffer())
TracingGPUMemoryAllocator::Free<ElemType>(GetComputeDeviceId(), Buffer());
{
if(cachedResize)
BufferManagement::GetManagerInstance(GetComputeDeviceId()).LogicalReleaseBuffer<ElemType>(Buffer(), GetSizeAllocated());
else
TracingGPUMemoryAllocator::Free<ElemType>(GetComputeDeviceId(), Buffer());
}
SetBuffer(pArray, numElements * sizeof(ElemType));
SetSizeAllocated(numElements);
@ -4559,8 +4570,8 @@ template GPUMatrix<char>::GPUMatrix(const GPUMatrix<char>&);
template GPUMatrix<char>::GPUMatrix(GPUMatrix<char>&&);
template char* GPUMatrix<char>::CopyToArray() const;
template void GPUMatrix<char>::ChangeDeviceTo(int);
template void GPUMatrix<char>::Resize(size_t, size_t, bool);
template void GPUMatrix<char>::RequireSize(size_t, size_t, bool);
template void GPUMatrix<char>::Resize(size_t, size_t, bool, bool);
template void GPUMatrix<char>::RequireSize(size_t, size_t, bool, bool);
template GPUMatrix<char>::~GPUMatrix();
template GPUMatrix<char> GPUMatrix<char>::ColumnSlice(size_t startColumn, size_t numCols) const;
@ -4584,8 +4595,8 @@ template GPUMatrix<short>::GPUMatrix(const GPUMatrix<short>&);
template GPUMatrix<short>::GPUMatrix(GPUMatrix<short>&&);
template short* GPUMatrix<short>::CopyToArray() const;
template void GPUMatrix<short>::ChangeDeviceTo(int);
template void GPUMatrix<short>::Resize(size_t, size_t, bool);
template void GPUMatrix<short>::RequireSize(size_t, size_t, bool);
template void GPUMatrix<short>::Resize(size_t, size_t, bool, bool);
template void GPUMatrix<short>::RequireSize(size_t, size_t, bool, bool);
template GPUMatrix<short>::~GPUMatrix();
template GPUMatrix<short> GPUMatrix<short>::ColumnSlice(size_t startColumn, size_t numCols) const;

Просмотреть файл

@ -232,12 +232,12 @@ public:
// RequireSize is now the new preferred method of ensuring the correct size inside of the Matrix class. Since Resize will fail if the storage object has
// multiple views, RequireSize will first check to see if Resize is required. If it is not, then it short-circuits and is a noop. Otherwise, RequireSize
// will call Resize, which may fail if the matrix has multiple views.
void RequireSize(const size_t numRows, const size_t numCols, bool growOnly = true); // by default we only reallocate if need to grow
void RequireSize(const GPUMatrix<ElemType>& like, bool growOnly = true) { RequireSize(like.GetNumRows(), like.GetNumCols(), growOnly); }
void RequireSize(const size_t numRows, const size_t numCols, bool growOnly = true, bool cachedResize = false); // by default we only reallocate if need to grow
void RequireSize(const GPUMatrix<ElemType>& like, bool growOnly = true, bool cachedResize = false) { RequireSize(like.GetNumRows(), like.GetNumCols(), growOnly, cachedResize); }
// Resize first checks to ensure that the caller has the authority to call Resize (i.e., it checks to ensure the underlying data is owned by only this matrix), and then
// actually resizes the underlying matrix, doing any allocation as required.
void Resize(const size_t numRows, const size_t numCols, bool growOnly = true); // by default we only reallocate if need to grow
void Resize(const size_t numRows, const size_t numCols, bool growOnly = true, bool cachedResize = false); // by default we only reallocate if need to grow
ElemType& operator()(const size_t /*row*/, const size_t /*col*/) { LogicError("GPUMatrix doesn't support operator(,) on the CPU."); }
const ElemType& operator()(const size_t /*row*/, const size_t /*col*/) const { LogicError("GPUMatrix doesn't support operator(,) on the CPU."); }

Просмотреть файл

@ -156,6 +156,23 @@ int GetMathLibTraceLevel()
MatrixBase::~MatrixBase() { }
#pragma region BufferManagement
std::unordered_map<DEVICEID_TYPE, std::unique_ptr<BufferManagement>> BufferManagement::m_instances;
template <>
std::multimap<size_t, float*>& BufferManagement::BufferContainer<float>() { return m_bufferFloatContainer; }
template <>
std::multimap<size_t, double*>& BufferManagement::BufferContainer<double>() { return m_bufferDoubleContainer; }
template <>
std::multimap<size_t, char*>& BufferManagement::BufferContainer<char>() { return m_bufferCharContainer; }
template <>
std::multimap<size_t, short*>& BufferManagement::BufferContainer<short>() { return m_bufferShortContainer; }
template <>
std::multimap<size_t, int*>& BufferManagement::BufferContainer<int>() { return m_bufferIntContainer; }
#pragma endregion
#pragma region Constructors, destructors and other static matrix builders
@ -165,6 +182,10 @@ MatrixBase::~MatrixBase() { }
// { GPU code },
// ...
// By default, the CachedMatrixBuffer is disable
template <class ElemType>
bool Matrix<ElemType>::m_useCachedResize = false;
// Initialize members
template <class ElemType>
void Matrix<ElemType>::Init(DEVICEID_TYPE deviceId)
@ -278,6 +299,9 @@ void Matrix<ElemType>::SetDataLocation(CurrentDataLocation location, MatrixType
LogicError("SetDataLocation: New m_baseMatrix must not be NULL.");
}
template <class ElemType>
void Matrix<ElemType>::UseCachedResizeOrNot(bool useCachedResize) { m_useCachedResize = useCachedResize; }
//this is a private constructor only used internally to initialize a blank matrix
template <class ElemType>
Matrix<ElemType>::Matrix(const MatrixFlags matrixFlags, const MatrixType matrixType, const MatrixFormat matrixFormat, DEVICEID_TYPE deviceID)
@ -1593,7 +1617,7 @@ void Matrix<ElemType>::Resize(const size_t numRows, const size_t numCols, const
// TODO: should this function test whether the size is changing, and skip if it isn't? We have at least one explicit test for this code calling this (recurrent node)
DISPATCH_MATRIX_ON_FLAG_USEBOTH_4BOTH(this,
{ m_CPUMatrix->Resize(numRows, numCols, growOnly); },
{ m_GPUMatrix->Resize(numRows, numCols, growOnly); },
{ m_GPUMatrix->Resize(numRows, numCols, growOnly, m_useCachedResize); },
{ m_CPUSparseMatrix->RequireSizeAndAllocate(numRows, numCols, numNZElemToReserve, growOnly, false); },
{ m_GPUSparseMatrix->RequireSizeAndAllocate(numRows, numCols, numNZElemToReserve, growOnly, false); });
#ifdef _DEBUG

Просмотреть файл

@ -76,6 +76,9 @@ private:
mutable size_t m_numTimesDeviceChanged;
mutable size_t m_numTimesMatrixTypeChanged;
mutable int m_devicesTransferedTo[2]; // TODO: what is this for? Seems only diagnostics
// whether to use cached memory Resize() or not
static bool m_useCachedResize;
// Moves matrix from device id_from to device with id_to. This method doesn't change preferred device Id
void _transferFromDeviceToDevice(int id_from, int id_to, bool isBeingMoved = true, bool emptyTransfer = false) const;
@ -130,6 +133,8 @@ public:
SetDataLocation(GetDeviceId() < 0 ? CurrentDataLocation::CPU : CurrentDataLocation::GPU, GetMatrixType());
}
static void UseCachedResizeOrNot(bool useCachedResize);
private:
Matrix(const MatrixFlags matrixFlags, const MatrixType matrixType, const MatrixFormat matrixFormat, DEVICEID_TYPE deviceID); // only used internally to initialize a blank matrix
Matrix(const MatrixFlags matrixFlags, const MatrixType matrixType, DEVICEID_TYPE deviceID); // only used internally to initialize a blank matrix

121
Source/Math/NcclComm.cpp Normal file
Просмотреть файл

@ -0,0 +1,121 @@
//
// Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved.
// Licensed under the MIT license. See LICENSE.md file in the project root for full license information.
//
#include "NcclComm.h"
#ifdef USE_NCCL
#include "GPUMatrix.h"
#include <nccl.h>
#include <cuda_runtime.h>
namespace Microsoft { namespace MSR { namespace CNTK {
// allows to write cudaFunction() || "error" (CUDA runtime)
static void operator||(cudaError_t rc, const char *msg)
{
if (rc != cudaSuccess)
RuntimeError("%s: %s (cuda error %d)", msg, cudaGetErrorString(rc), (int) rc);
}
NcclComm::NcclComm(int deviceId, const MPIWrapperPtr& mpi)
: m_ncclComm(nullptr), m_stream(nullptr)
{
if (mpi->IsMultiHost())
return;
size_t numRanks = mpi->NumNodesInUse();
MPI_Comm mpiComm = mpi->Communicator();
std::vector<int> allDevs(numRanks);
MPI_Allgather(&deviceId, 1, MPI_INT, allDevs.data(), 1, MPI_INT, mpiComm)
|| MpiFail("NcclComm: MPI_Allgather");
for (size_t r = 0; r<numRanks; r++)
{
if (allDevs[r] == CPUDEVICE)
{
fprintf(stderr, "NcclComm: disabled, at least one rank using CPU device\n");
return;
}
for (size_t s = 0; s<r; s++)
if (allDevs[r] == allDevs[s])
{
fprintf(stderr, "NcclComm: disabled, same device used by more than one rank\n");
return;
}
}
ncclUniqueId ncclId;
ncclResult_t res;
res = ncclGetUniqueId(&ncclId);
if (res != ncclSuccess)
RuntimeError("NcclComm failed to obtain ncclUniqueId: %s", ncclGetErrorString(res));
MPI_Bcast(&ncclId, NCCL_UNIQUE_ID_BYTES, MPI_CHAR, 0, mpiComm)
|| MpiFail("NcclComm: MPI_Bcase");
PrepareDevice(deviceId);
res = ncclCommInitRank(&m_ncclComm, numRanks, ncclId, mpi->CurrentNodeRank());
if (res != ncclSuccess)
RuntimeError("NcclComm failed to initialize ncclComm_t: %s", ncclGetErrorString(res));
cudaStreamCreateWithFlags(&m_stream, cudaStreamNonBlocking)
|| "cudaStreamCreateWithFlags failed";
fprintf(stderr, "NcclComm: initialized\n");
}
NcclComm::~NcclComm()
{
if (m_stream != nullptr)
cudaStreamDestroy(m_stream);
if (m_ncclComm != nullptr)
ncclCommDestroy(m_ncclComm);
}
bool NcclComm::IsSupported()
{
return m_ncclComm != nullptr;
}
void NcclComm::AllReduceImpl(void* buffer, size_t count, DataType dtype)
{
ncclResult_t res;
if (dtype == DataType::FLOAT)
{
res = ncclAllReduce(buffer, buffer, count, ncclFloat, ncclSum, m_ncclComm, m_stream);
}
else
{
assert(dtype == DataType::DOUBLE);
res = ncclAllReduce(buffer, buffer, count, ncclDouble, ncclSum, m_ncclComm, m_stream);
}
if (res != ncclSuccess)
RuntimeError("NcclComm ncclAllReduce failed: %s", ncclGetErrorString(res));
}
void NcclComm::Sync()
{
cudaStreamSynchronize(m_stream) || "NcclComm: cudaStreamSynchronize failed";
}
}}} // end namespaces
#else // !USE_NCCL
namespace Microsoft { namespace MSR { namespace CNTK {
NcclComm::NcclComm(int /*deviceId*/, const MPIWrapperPtr& /*mpi*/) { }
NcclComm::~NcclComm() { }
bool NcclComm::IsSupported()
{
return false;
}
void NcclComm::Sync() { }
}}} // end namespaces
#endif

56
Source/Math/NcclComm.h Normal file
Просмотреть файл

@ -0,0 +1,56 @@
//
// Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved.
// Licensed under the MIT license. See LICENSE.md file in the project root for full license information.
//
// Encapsulates NCCLs dependencies
#pragma once
#include "Matrix.h"
#include "MPIWrapper.h"
#include <vector>
#include <type_traits>
// Forward declare CUDA stuff
typedef struct CUstream_st* cudaStream_t;
typedef struct ncclComm* ncclComm_t;
namespace Microsoft { namespace MSR { namespace CNTK {
class NcclComm
{
#ifdef USE_NCCL
private:
enum class DataType : int {FLOAT, DOUBLE};
void AllReduceImpl(void* buffer, size_t count, DataType dtype);
cudaStream_t m_stream;
ncclComm_t m_ncclComm;
#endif
public:
NcclComm(int deviceId, const MPIWrapperPtr& mpiComm);
~NcclComm();
bool IsSupported();
void Sync(); // waits for outstanding reductions to complete
template <typename ElemType>
void AllReduce(const std::vector<Matrix<ElemType>*>& grads)
{
#ifdef USE_NCCL
DataType dtype = DataType::FLOAT;
if (std::is_same<ElemType, double>::value)
dtype = DataType::DOUBLE;
else if (!std::is_same<ElemType, float>::value)
RuntimeError("NcclComm Unsupported reduction type");
for (size_t i=0; i<grads.size(); ++i)
{
AllReduceImpl(grads[i]->Data(), grads[i]->GetNumElements(), dtype);
}
#else
RuntimeError("NcclComm: CNTK was built without NCCL support.");
#endif
}
};
}}}

Просмотреть файл

@ -1067,12 +1067,12 @@ void GPUMatrix<ElemType>::Reshape(const size_t numRows, const size_t numCols)
}
template <class ElemType>
void GPUMatrix<ElemType>::RequireSize(const size_t numRows, const size_t numCols, bool growOnly)
void GPUMatrix<ElemType>::RequireSize(const size_t numRows, const size_t numCols, bool growOnly, bool cachedResize)
{
}
template <class ElemType>
void GPUMatrix<ElemType>::Resize(const size_t numRows, const size_t numCols, bool growOnly)
void GPUMatrix<ElemType>::Resize(const size_t numRows, const size_t numCols, bool growOnly, bool cachedResize)
{
}

Просмотреть файл

@ -368,7 +368,6 @@ typename TextParser<ElemType>::SequenceBuffer TextParser<ElemType>::LoadSequence
}
else
{
IncrementNumberOfErrorsOrDie();
if (ShouldWarn())
{
fprintf(stderr,
@ -378,6 +377,7 @@ typename TextParser<ElemType>::SequenceBuffer TextParser<ElemType>::LoadSequence
GetSequenceKey(sequenceDsc).c_str(),
GetFileInfo().c_str());
}
IncrementNumberOfErrorsOrDie();
}
if (!bytesToRead && numRowsRead < expectedRowCount)
@ -585,7 +585,6 @@ bool TextParser<ElemType>::TryReadSample(SequenceBuffer& sequence, size_t& bytes
size_t id;
if (!TryGetInputId(id, bytesToRead))
{
IncrementNumberOfErrorsOrDie();
return false;
}
@ -672,12 +671,16 @@ bool TextParser<ElemType>::TryGetInputId(size_t& id, size_t& bytesToRead)
if (ShouldWarn())
{
fprintf(stderr,
"WARNING: Invalid input ('%s') %ls. "
"WARNING: Unknown input ('%s') %ls. "
"Input name '%s' was not specified in the reader config section.\n",
name.c_str(), GetFileInfo().c_str(), name.c_str());
}
// return false here to skip this input, but do not call IncrementNumberOfErrorsOrDie()
return false;
}
else if (ShouldWarn())
if (ShouldWarn())
{
fprintf(stderr,
"WARNING: Input name prefix ('%c') is followed by"
@ -685,7 +688,7 @@ bool TextParser<ElemType>::TryGetInputId(size_t& id, size_t& bytesToRead)
NAME_PREFIX, c, GetFileInfo().c_str());
}
return false;
break;
}
else if (scratchIndex < (m_scratch.get() + m_maxAliasLength))
{
@ -702,19 +705,20 @@ bool TextParser<ElemType>::TryGetInputId(size_t& id, size_t& bytesToRead)
"WARNING: Did not find a valid input name %ls.\n",
GetFileInfo().c_str());
}
return false;
break;
}
++m_pos;
--bytesToRead;
}
if (ShouldWarn())
if (bytesToRead == 0 && ShouldWarn())
{
fprintf(stderr,
"WARNING: Exhausted all input expected for the current sequence"
" while reading an input name %ls.\n", GetFileInfo().c_str());
}
IncrementNumberOfErrorsOrDie();
return false;
}
@ -781,13 +785,13 @@ bool TextParser<ElemType>::TryReadDenseSample(vector<ElemType>& values, size_t s
++counter;
}
IncrementNumberOfErrorsOrDie();
if (ShouldWarn())
{
fprintf(stderr,
"WARNING: Exhausted all input expected for the current sequence"
" while reading a dense sample %ls.\n", GetFileInfo().c_str());
}
IncrementNumberOfErrorsOrDie();
return false;
}
@ -1135,8 +1139,13 @@ bool TextParser<ElemType>::TryReadRealNumber(ElemType& value, size_t& bytesToRea
}
break;
default:
LogicError("Reached an invalid state while reading a floating point value %ls.\n",
GetFileInfo().c_str());
if (ShouldWarn())
{
fprintf(stderr,
"WARNING: Reached an invalid state while reading a floating point value %ls.\n",
GetFileInfo().c_str());
}
return false;
}
++m_pos;

Просмотреть файл

@ -2,8 +2,6 @@
// Copyright (c) Microsoft. All rights reserved.
// Licensed under the MIT license. See LICENSE.md file in the project root for full license information.
//
// PostStat.cpp -- CNTK post statistics related actions
//
#include "PostComputingActions.h"
@ -118,7 +116,7 @@ void PostComputingActions<ElemType>::BatchNormalizationStatistics(IDataReader *
// push the statistics results of mean and variance of bn nodes into mpi updating vector
std::vector<Matrix<ElemType>*> learnParamsValues(2, nullptr);
SimpleDistGradAggregator<ElemType> distGradAgg(m_mpi, false /*useAsyncAggregation*/, 0 /*syncStatsTrace*/);
SimpleDistGradAggregator<ElemType> distGradAgg(m_mpi, false /*useAsyncAggregation*/, m_net->GetDeviceId(), 0 /*syncStatsTrace*/);
auto runMeanParameterPtr = node->Input(3);
auto runStdParameterPtr = node->Input(4);

Просмотреть файл

@ -1,4 +1,10 @@
//
// Copyright (c) Microsoft. All rights reserved.
// Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved.
// Licensed under the MIT license. See LICENSE.md file in the project root for full license information.
//
// SGD.cpp -- implements SGD with all bells and whistles, parallelization, randomization, etc.
//
#define _CRT_SECURE_NO_WARNINGS // "secure" CRT not available on all platforms --add this at the top of all CPP files that give "function or variable may be unsafe" warnings
@ -327,7 +333,7 @@ void SGD<ElemType>::TrainOrAdaptModel(int startEpoch, ComputationNetworkPtr net,
if (GetParallelizationMethod() == ParallelizationMethod::dataParallelSGD)
{
currentNumGradientBits = m_numGradientBits[startEpoch]; // remember so that we can detect a change
InitDistGradAgg(evaluationNodes.size(), currentNumGradientBits, m_traceLevel);
InitDistGradAgg(evaluationNodes.size(), currentNumGradientBits, net->GetDeviceId(), m_traceLevel);
}
else if (GetParallelizationMethod() == ParallelizationMethod::modelAveragingSGD ||
GetParallelizationMethod() == ParallelizationMethod::blockMomentumSGD)
@ -434,7 +440,7 @@ void SGD<ElemType>::TrainOrAdaptModel(int startEpoch, ComputationNetworkPtr net,
currentNumGradientBits != m_numGradientBits[i])
{
currentNumGradientBits = m_numGradientBits[i];
InitDistGradAgg(evaluationNodes.size(), currentNumGradientBits, m_traceLevel);
InitDistGradAgg(evaluationNodes.size(), currentNumGradientBits, net->GetDeviceId(), m_traceLevel);
}
Timer timer;
@ -2076,31 +2082,35 @@ void SGD<ElemType>::AttemptUtteranceDerivativeFeatures(ComputationNetworkPtr net
}
template <class ElemType>
void SGD<ElemType>::InitDistGradAgg(int numEvalNodes, int numGradientBits, int traceLevel)
void SGD<ElemType>::InitDistGradAgg(int numEvalNodes, int numGradientBits, int deviceId, int traceLevel)
{
assert(GetParallelizationMethod() == ParallelizationMethod::dataParallelSGD);
if (traceLevel > 0)
fprintf(stderr, "Initializing dataParallelSGD for %d-bit quantization.\n", numGradientBits);
#ifdef CNTK_PARALLEL_TRAINING_SUPPORT
if (Globals::UseV2Aggregator())
{
auto communicator = ::CNTK::QuantizedMPICommunicator(m_zeroThresholdFor1Bit, true, numGradientBits);
m_distGradAgg = std::make_shared<V2AllReduceDistGradAggregator<ElemType>>(communicator, m_bufferedAsyncGradientAggregation, traceLevel, m_syncStatsTrace);
}
else
m_distGradAgg = std::make_shared<AllReduceDistGradAggregator<ElemType>>(m_mpi, numGradientBits, m_zeroThresholdFor1Bit, true /*useQuantizationForSelfStripe*/, m_bufferedAsyncGradientAggregation, traceLevel, m_syncStatsTrace);
#else
if (numGradientBits != (8 * sizeof(ElemType)))
{
if (traceLevel > 0)
fprintf(stderr, "Initializing dataParallelSGD for %d-bit quantization.\n", numGradientBits);
#ifdef CNTK_PARALLEL_TRAINING_SUPPORT
if (Globals::UseV2Aggregator())
{
auto communicator = ::CNTK::QuantizedMPICommunicator(m_zeroThresholdFor1Bit, true, numGradientBits);
m_distGradAgg = std::make_shared<V2AllReduceDistGradAggregator<ElemType>>(communicator, m_bufferedAsyncGradientAggregation, traceLevel, m_syncStatsTrace);
}
else
m_distGradAgg = std::make_shared<AllReduceDistGradAggregator<ElemType>>(m_mpi, numGradientBits, m_zeroThresholdFor1Bit, true /*useQuantizationForSelfStripe*/, m_bufferedAsyncGradientAggregation, traceLevel, m_syncStatsTrace);
#else
RuntimeError("Gradient quantization is unsupported in CNTK binaries built without quantized gradient aggregation support!");
}
if (Globals::UseV2Aggregator()) // Currently used to check V2 against baselines.
m_distGradAgg = std::make_shared<V2SimpleDistGradAggregator<ElemType>>(m_mpi, m_bufferedAsyncGradientAggregation, m_syncStatsTrace, ::CNTK::MPICommunicator());
else
m_distGradAgg = std::make_shared<SimpleDistGradAggregator<ElemType>>(m_mpi, m_bufferedAsyncGradientAggregation, m_syncStatsTrace);
#endif // !CNTK_PARALLEL_TRAINING_SUPPORT
}
else
{
if (traceLevel > 0)
fprintf(stderr, "Initializing dataParallelSGD with FP%d aggregation.\n", numGradientBits);
if (Globals::UseV2Aggregator()) // Currently used to check V2 against baselines.
m_distGradAgg = std::make_shared<V2SimpleDistGradAggregator<ElemType>>(m_mpi, m_bufferedAsyncGradientAggregation, m_syncStatsTrace, ::CNTK::MPICommunicator());
else
m_distGradAgg = std::make_shared<SimpleDistGradAggregator<ElemType>>(m_mpi, m_bufferedAsyncGradientAggregation, deviceId, m_syncStatsTrace);
}
m_gradHeader.reset(DistGradHeader::Create(numEvalNodes), [](DistGradHeader* ptr) { DistGradHeader::Destroy(ptr); });
}

Просмотреть файл

@ -491,7 +491,7 @@ protected:
const std::string& prefixMsg = "",
const size_t maxNumberOfSamples = SIZE_MAX);
void InitDistGradAgg(int numEvalNodes, int numGradientBits, int traceLevel);
void InitDistGradAgg(int numEvalNodes, int numGradientBits, int deviceId, int traceLevel);
void InitModelAggregationHandler(int traceLevel, DEVICEID_TYPE devID);
public:
// UpdateWeights() - actual weight update, implementing various update rules

Просмотреть файл

@ -1,7 +1,14 @@
//
// Copyright (c) Microsoft. All rights reserved.
// Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved.
// Licensed under the MIT license. See LICENSE.md file in the project root for full license information.
//
#pragma once
#include "IDistGradAggregator.h"
#include "CUDAPageLockedMemAllocator.h"
#include "NcclComm.h"
#include <future>
#include "GPUDataTransferer.h"
#include "TimerUtility.h"
@ -15,8 +22,8 @@ class SimpleDistGradAggregator : public IDistGradAggregator<ElemType>
UsingIDistGradAggregatorMembers;
public:
SimpleDistGradAggregator(const MPIWrapperPtr& mpi, bool useAsyncAggregation, int syncStatsTrace)
: IDistGradAggregator<ElemType>(mpi), m_useAsyncAggregation(useAsyncAggregation), m_initialized(false), m_bufferedGradHeader(nullptr), m_syncStatsTrace(syncStatsTrace), m_iterationCount(0)
SimpleDistGradAggregator(const MPIWrapperPtr& mpi, bool useAsyncAggregation, int deviceId, int syncStatsTrace)
: IDistGradAggregator<ElemType>(mpi), m_useAsyncAggregation(useAsyncAggregation), m_initialized(false), m_bufferedGradHeader(nullptr), m_syncStatsTrace(syncStatsTrace), m_iterationCount(0), m_nccl(deviceId, mpi)
{}
~SimpleDistGradAggregator()
@ -136,7 +143,8 @@ private:
{
m_initialized = true;
int deviceId = gradients[0]->GetDeviceId();
if (deviceId != CPUDEVICE)
if (!m_nccl.IsSupported() && deviceId != CPUDEVICE)
m_allocator.reset(new CUDAPageLockedMemAllocator(deviceId));
for (size_t i = 0; i < gradients.size(); i++)
@ -145,7 +153,7 @@ private:
if (gradients[i]->GetMatrixType() != DENSE)
RuntimeError("Gradient aggregation for sparse gradient matrices is currently unsupported!");
if (deviceId != CPUDEVICE)
if (!m_nccl.IsSupported() && deviceId != CPUDEVICE)
{
m_gpuDataTransferers.push_back(std::make_unique<GPUDataTransferer>(deviceId, m_useAsyncAggregation));
m_intermediateCPUBuffers.push_back(AllocateIntermediateBuffer(deviceId, gradients[i]->GetNumElements()));
@ -216,7 +224,7 @@ private:
}
// Initiate transfer of the gradient matrices to the CPU if needed
if (deviceId >= 0)
if (!m_nccl.IsSupported() && deviceId >= 0)
{
for (size_t i = 0; i < numGradMatrices; ++i)
m_gpuDataTransferers[i]->CopyGPUToCPUAsync(gradients[i]->Data(), gradients[i]->GetNumElements(), m_intermediateCPUBuffers[i].get());
@ -239,20 +247,27 @@ private:
if (!m_mpi->IsMainNode())
MPI_Isend(headerCPU, headerCPU->Size(), MPI_CHAR, m_mpi->MainNodeRank(), numGradMatrices, m_mpi->Communicator(), &sendHeaderRequest) || MpiFail("MPI_Isend");
// Perform MPI async allreduce on the gradient data
// Perform async allreduce on the gradient data
std::vector<MPI_Request> allReduceRequests(numGradMatrices);
for (size_t i = 0; i < numGradMatrices; ++i)
if (!m_nccl.IsSupported())
{
ElemType* reductionBuffer = gradients[i]->Data();
if (deviceId >= 0)
for (size_t i = 0; i < numGradMatrices; ++i)
{
m_gpuDataTransferers[i]->WaitForCopyGPUToCPUAsync();
reductionBuffer = m_intermediateCPUBuffers[i].get();
}
ElemType* reductionBuffer = gradients[i]->Data();
if (deviceId >= 0)
{
m_gpuDataTransferers[i]->WaitForCopyGPUToCPUAsync();
reductionBuffer = m_intermediateCPUBuffers[i].get();
}
// On Windows this async MPI_Iallreduce call requires MS MPI v7 or higher to be installed
MPI_Iallreduce(MPI_IN_PLACE, reductionBuffer, gradients[i]->GetNumElements(), MPIWrapper::GetDataType(reductionBuffer), MPI_SUM, m_mpi->Communicator(), &allReduceRequests[i]) || MpiFail("MPI_Iallreduce");
// On Windows this async MPI_Iallreduce call requires MS MPI v7 or higher to be installed
MPI_Iallreduce(MPI_IN_PLACE, reductionBuffer, gradients[i]->GetNumElements(),
MPIWrapper::GetDataType(reductionBuffer), MPI_SUM,
m_mpi->Communicator(), &allReduceRequests[i]) || MpiFail("MPI_Iallreduce");
}
}
else
m_nccl.AllReduce(gradients);
// On the main node wait for the headers to arrive and aggregate
if (m_mpi->IsMainNode())
@ -293,11 +308,14 @@ private:
}
// Wait for the allreduce operations to finish and initiate transfer back to the GPU if needed
for (size_t i = 0; i < numGradMatrices; ++i)
if (!m_nccl.IsSupported())
{
MPI_Wait(&allReduceRequests[i], MPI_STATUSES_IGNORE) || MpiFail("MPI_Wait");
if (deviceId >= 0)
m_gpuDataTransferers[i]->CopyCPUToGPUAsync(m_intermediateCPUBuffers[i].get(), gradients[i]->GetNumElements(), gradients[i]->Data());
for (size_t i = 0; i < numGradMatrices; ++i)
{
MPI_Wait(&allReduceRequests[i], MPI_STATUSES_IGNORE) || MpiFail("MPI_Wait");
if (deviceId >= 0)
m_gpuDataTransferers[i]->CopyCPUToGPUAsync(m_intermediateCPUBuffers[i].get(), gradients[i]->GetNumElements(), gradients[i]->Data());
}
}
// Wait to receive aggregate header
@ -305,7 +323,9 @@ private:
MPI_Wait(&recvAggHeaderRequest, MPI_STATUSES_IGNORE) || MpiFail("MPI_Wait");
// Wait for all the transfers to finish
if (deviceId >= 0)
if (m_nccl.IsSupported())
m_nccl.Sync();
else if (deviceId >= 0)
{
for (size_t i = 0; i < numGradMatrices; ++i)
m_gpuDataTransferers[i]->WaitForCopyCPUToGPUAsync();
@ -349,5 +369,7 @@ private:
size_t m_iterationCount;
bool m_initialized;
NcclComm m_nccl;
};
} } }

Просмотреть файл

@ -2,6 +2,7 @@
// Copyright (c) Microsoft. All rights reserved.
// Licensed under the MIT license. See LICENSE.md file in the project root for full license information.
//
#pragma once
#include "V2SimpleDistGradAggregator.h"
@ -167,7 +168,7 @@ public:
if (Globals::UseV2Aggregator())
m_distGradAgg = make_shared<V2SimpleDistGradAggregator<ElemType>>(m_mpi, false /*useAsyncAggregation*/, 0 /*syncStatsTrace*/, ::CNTK::MPICommunicator());
else
m_distGradAgg = make_shared<SimpleDistGradAggregator<ElemType>>(m_mpi, false /*useAsyncAggregation*/, 0 /*syncStatsTrace*/);
m_distGradAgg = make_shared<SimpleDistGradAggregator<ElemType>>(m_mpi, false /*useAsyncAggregation*/, m_net->GetDeviceId(), 0 /*syncStatsTrace*/);
}
m_gradHeader->numEvalNode = evalNodes.size();

Просмотреть файл

@ -0,0 +1,113 @@
<?xml version="1.0" encoding="utf-8"?>
<Project DefaultTargets="Build" ToolsVersion="12.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
<ItemGroup Label="ProjectConfigurations">
<ProjectConfiguration Include="Debug|x64">
<Configuration>Debug</Configuration>
<Platform>x64</Platform>
</ProjectConfiguration>
<ProjectConfiguration Include="Release|x64">
<Configuration>Release</Configuration>
<Platform>x64</Platform>
</ProjectConfiguration>
<ProjectConfiguration Include="Debug_CpuOnly|x64">
<Configuration>Debug_CpuOnly</Configuration>
<Platform>x64</Platform>
</ProjectConfiguration>
<ProjectConfiguration Include="Release_CpuOnly|x64">
<Configuration>Release_CpuOnly</Configuration>
<Platform>x64</Platform>
</ProjectConfiguration>
</ItemGroup>
<PropertyGroup Label="Globals">
<ProjectGuid>{5D29C76D-648A-456F-920D-48230F2FB3C8}</ProjectGuid>
<Keyword>Win32Proj</Keyword>
<RootNamespace>CPPEvalExtendedClientTest</RootNamespace>
<ProjectName>CPPEvalExtendedClientTest</ProjectName>
</PropertyGroup>
<Import Project="$(SolutionDir)\CNTK.Cpp.props" />
<PropertyGroup Condition="$(DebugBuild)" Label="Configuration">
<ConfigurationType>Application</ConfigurationType>
<UseDebugLibraries>true</UseDebugLibraries>
<PlatformToolset>v120</PlatformToolset>
<CharacterSet>Unicode</CharacterSet>
<UseIntelMKL>No</UseIntelMKL>
</PropertyGroup>
<PropertyGroup Condition="$(ReleaseBuild)" Label="Configuration">
<ConfigurationType>Application</ConfigurationType>
<UseDebugLibraries>false</UseDebugLibraries>
<PlatformToolset>v120</PlatformToolset>
<WholeProgramOptimization>true</WholeProgramOptimization>
<CharacterSet>Unicode</CharacterSet>
<UseIntelMKL>No</UseIntelMKL>
<UseIntelIPP>false</UseIntelIPP>
</PropertyGroup>
<!--Importing CPP defaults must occur after declaring the desired toolset above
Otherwise, the build may default back to an previous toolset -->
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
<ImportGroup Label="ExtensionSettings" />
<ImportGroup Label="PropertySheets">
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
</ImportGroup>
<PropertyGroup>
<!-- TODO intentional for all? -->
<LinkIncremental>false</LinkIncremental>
<TargetName>CPPEvalExtendedClientTest</TargetName>
</PropertyGroup>
<ItemDefinitionGroup>
<ClCompile>
<PrecompiledHeader>NotUsing</PrecompiledHeader>
<WarningLevel>Level4</WarningLevel>
<AdditionalIncludeDirectories>$(SolutionDir)Source\Common\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
<PreprocessorDefinitions>WIN32;UNICODE;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<SDLCheck>true</SDLCheck>
<MultiProcessorCompilation>true</MultiProcessorCompilation>
<FloatingPointModel>Fast</FloatingPointModel>
<OpenMPSupport>true</OpenMPSupport>
<TreatWarningAsError>true</TreatWarningAsError>
</ClCompile>
<Link>
<AdditionalLibraryDirectories>$(OutDir)</AdditionalLibraryDirectories>
<SubSystem>Console</SubSystem>
<GenerateDebugInformation>true</GenerateDebugInformation>
<AdditionalDependencies>EvalDLL.lib;%(AdditionalDependencies)</AdditionalDependencies>
<DelayLoadDLLs>%(DelayLoadDLLs)</DelayLoadDLLs>
<Profile>true</Profile>
</Link>
</ItemDefinitionGroup>
<ItemDefinitionGroup Condition="$(DebugBuild)">
<ClCompile>
<PreprocessorDefinitions>_DEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<Optimization>Disabled</Optimization>
<MinimalRebuild>false</MinimalRebuild>
</ClCompile>
<Link />
<ProjectReference>
<LinkLibraryDependencies>false</LinkLibraryDependencies>
</ProjectReference>
</ItemDefinitionGroup>
<ItemDefinitionGroup Condition="$(ReleaseBuild)">
<ClCompile>
<Optimization>MaxSpeed</Optimization>
<FunctionLevelLinking>true</FunctionLevelLinking>
<IntrinsicFunctions>true</IntrinsicFunctions>
<PreprocessorDefinitions>NDEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<EnableParallelCodeGeneration>true</EnableParallelCodeGeneration>
<FloatingPointExceptions>false</FloatingPointExceptions>
<AdditionalOptions>/d2Zi+ %(AdditionalOptions)</AdditionalOptions>
<RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
</ClCompile>
<Link>
<EnableCOMDATFolding>true</EnableCOMDATFolding>
<OptimizeReferences>true</OptimizeReferences>
</Link>
<ProjectReference>
<LinkLibraryDependencies>true</LinkLibraryDependencies>
</ProjectReference>
</ItemDefinitionGroup>
<ItemGroup>
<ClCompile Include="..\..\..\..\Examples\Evaluation\CPPEvalExtendedClient\CPPEvalExtendedClient.cpp" />
</ItemGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
<ImportGroup Label="ExtensionTargets" />
</Project>

Просмотреть файл

@ -0,0 +1,22 @@
<?xml version="1.0" encoding="utf-8"?>
<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
<ItemGroup>
<Filter Include="Source Files">
<UniqueIdentifier>{4FC737F1-C7A5-4376-A066-2A32D752A2FF}</UniqueIdentifier>
<Extensions>cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx</Extensions>
</Filter>
<Filter Include="Header Files">
<UniqueIdentifier>{93995380-89BD-4b04-88EB-625FBE52EBFB}</UniqueIdentifier>
<Extensions>h;hh;hpp;hxx;hm;inl;inc;xsd</Extensions>
</Filter>
<Filter Include="Resource Files">
<UniqueIdentifier>{67DA6AB6-F800-4c08-8B7A-83BB121AAD01}</UniqueIdentifier>
<Extensions>rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms</Extensions>
</Filter>
</ItemGroup>
<ItemGroup>
<ClCompile Include="CPPEvalExtendedClient.cpp">
<Filter>Source Files</Filter>
</ClCompile>
</ItemGroup>
</Project>

Просмотреть файл

@ -0,0 +1,2 @@
This folder contains the VC++ project file for building CPPEvalExtendedClientTest.exe.
The C++ source code used by the project is in Examples\Evaluation\CPPEvalExtendedClient.

Просмотреть файл

@ -0,0 +1,114 @@
CPU info:
CPU Model Name: Intel(R) Xeon(R) CPU E5-2630 v3 @ 2.40GHz
Hardware threads: 32
Total Memory: 33468508 kB
-------------------------------------------------------------------
+ [[ -z E:\CNTKTestData ]]
+ [[ ! -d E:\CNTKTestData ]]
+ '[' Windows_NT == Windows_NT ']'
++ cygpath -au 'E:\CNTKTestData'
+ TestDataDir=/cygdrive/e/CNTKTestData
+ ATISDir=/cygdrive/c/repos/cntk/Tests/EndToEndTests/../../Examples/Text/ATIS
+ DataDir=/cygdrive/c/repos/cntk/Tests/EndToEndTests/../../Examples/Text/ATIS/Data
+ OutputDir=/cygdrive/c/repos/cntk/Tests/EndToEndTests/../../Examples/Text/ATIS/Data
+ ConfigDir=/cygdrive/c/repos/cntk/Tests/EndToEndTests/../../Examples/Text/ATIS
+ DeleteModelsAfterTest=0
+ '[' -f /cygdrive/c/repos/cntk/Tests/EndToEndTests/../../Examples/Text/ATIS/ATIS.cntk ']'
+ cntkrun ATIS.cntk 'stderr=- command=Train Train=[SGD=[maxEpochs=1]]'
+ configFileName=ATIS.cntk
+ additionalCNTKArgs='stderr=- command=Train Train=[SGD=[maxEpochs=1]]'
+ '[' Windows_NT == Windows_NT ']'
++ cygpath -aw /cygdrive/c/repos/cntk/Tests/EndToEndTests/../../Examples/Text/ATIS
+ ConfigDir='C:\repos\cntk\Examples\Text\ATIS'
++ cygpath -aw /tmp/cntk-test-20161108174139.565799/EvalClientTests_CPPEvalExtendedClientTest@release_cpu
+ RunDir='C:\cygwin64\tmp\cntk-test-20161108174139.565799\EvalClientTests_CPPEvalExtendedClientTest@release_cpu'
++ cygpath -aw /cygdrive/c/repos/cntk/Tests/EndToEndTests/../../Examples/Text/ATIS/Data
+ DataDir='C:\repos\cntk\Examples\Text\ATIS\Data'
++ cygpath -aw /cygdrive/c/repos/cntk/Tests/EndToEndTests/../../Examples/Text/ATIS/Data
+ OutputDir='C:\repos\cntk\Examples\Text\ATIS\Data'
+ CNTKArgs='configFile=C:\repos\cntk\Examples\Text\ATIS/ATIS.cntk currentDirectory=C:\repos\cntk\Examples\Text\ATIS\Data RunDir=C:\cygwin64\tmp\cntk-test-20161108174139.565799\EvalClientTests_CPPEvalExtendedClientTest@release_cpu DataDir=C:\repos\cntk\Examples\Text\ATIS\Data ConfigDir=C:\repos\cntk\Examples\Text\ATIS OutputDir=C:\repos\cntk\Examples\Text\ATIS\Data DeviceId=-1 timestamping=true stderr=- command=Train Train=[SGD=[maxEpochs=1]]'
+ '[' '' '!=' '' ']'
+ modelsDir=/tmp/cntk-test-20161108174139.565799/EvalClientTests_CPPEvalExtendedClientTest@release_cpu/Models
+ [[ 1 == 1 ]]
+ '[' -d /tmp/cntk-test-20161108174139.565799/EvalClientTests_CPPEvalExtendedClientTest@release_cpu/Models ']'
+ mkdir -p /tmp/cntk-test-20161108174139.565799/EvalClientTests_CPPEvalExtendedClientTest@release_cpu/Models
+ [[ 0 == 0 ]]
+ run /cygdrive/c/repos/cntk/x64/release_CpuOnly/cntk.exe 'configFile=C:\repos\cntk\Examples\Text\ATIS/ATIS.cntk' 'currentDirectory=C:\repos\cntk\Examples\Text\ATIS\Data' 'RunDir=C:\cygwin64\tmp\cntk-test-20161108174139.565799\EvalClientTests_CPPEvalExtendedClientTest@release_cpu' 'DataDir=C:\repos\cntk\Examples\Text\ATIS\Data' 'ConfigDir=C:\repos\cntk\Examples\Text\ATIS' 'OutputDir=C:\repos\cntk\Examples\Text\ATIS\Data' DeviceId=-1 timestamping=true stderr=- command=Train 'Train=[SGD=[maxEpochs=1]]'
+ cmd=/cygdrive/c/repos/cntk/x64/release_CpuOnly/cntk.exe
+ shift
+ '[' '' == 1 ']'
+ echo === Running /cygdrive/c/repos/cntk/x64/release_CpuOnly/cntk.exe 'configFile=C:\repos\cntk\Examples\Text\ATIS/ATIS.cntk' 'currentDirectory=C:\repos\cntk\Examples\Text\ATIS\Data' 'RunDir=C:\cygwin64\tmp\cntk-test-20161108174139.565799\EvalClientTests_CPPEvalExtendedClientTest@release_cpu' 'DataDir=C:\repos\cntk\Examples\Text\ATIS\Data' 'ConfigDir=C:\repos\cntk\Examples\Text\ATIS' 'OutputDir=C:\repos\cntk\Examples\Text\ATIS\Data' DeviceId=-1 timestamping=true stderr=- command=Train 'Train=[SGD=[maxEpochs=1]]'
=== Running /cygdrive/c/repos/cntk/x64/release_CpuOnly/cntk.exe configFile=C:\repos\cntk\Examples\Text\ATIS/ATIS.cntk currentDirectory=C:\repos\cntk\Examples\Text\ATIS\Data RunDir=C:\cygwin64\tmp\cntk-test-20161108174139.565799\EvalClientTests_CPPEvalExtendedClientTest@release_cpu DataDir=C:\repos\cntk\Examples\Text\ATIS\Data ConfigDir=C:\repos\cntk\Examples\Text\ATIS OutputDir=C:\repos\cntk\Examples\Text\ATIS\Data DeviceId=-1 timestamping=true stderr=- command=Train Train=[SGD=[maxEpochs=1]]
+ /cygdrive/c/repos/cntk/x64/release_CpuOnly/cntk.exe 'configFile=C:\repos\cntk\Examples\Text\ATIS/ATIS.cntk' 'currentDirectory=C:\repos\cntk\Examples\Text\ATIS\Data' 'RunDir=C:\cygwin64\tmp\cntk-test-20161108174139.565799\EvalClientTests_CPPEvalExtendedClientTest@release_cpu' 'DataDir=C:\repos\cntk\Examples\Text\ATIS\Data' 'ConfigDir=C:\repos\cntk\Examples\Text\ATIS' 'OutputDir=C:\repos\cntk\Examples\Text\ATIS\Data' DeviceId=-1 timestamping=true stderr=- command=Train 'Train=[SGD=[maxEpochs=1]]'
CNTK 2.0.beta2.0+ (zhouwang/pr899 0b1214, Nov 8 2016 17:27:36) on ZHOUWANGDEV4 at 2016/11/08 16:41:40
C:\repos\cntk\x64\release_CpuOnly\cntk.exe configFile=C:\repos\cntk\Examples\Text\ATIS/ATIS.cntk currentDirectory=C:\repos\cntk\Examples\Text\ATIS\Data RunDir=C:\cygwin64\tmp\cntk-test-20161108174139.565799\EvalClientTests_CPPEvalExtendedClientTest@release_cpu DataDir=C:\repos\cntk\Examples\Text\ATIS\Data ConfigDir=C:\repos\cntk\Examples\Text\ATIS OutputDir=C:\repos\cntk\Examples\Text\ATIS\Data DeviceId=-1 timestamping=true stderr=- command=Train Train=[SGD=[maxEpochs=1]]
Changed current directory to C:\repos\cntk\Examples\Text\ATIS\Data
11/08/2016 16:41:40: Redirecting stderr to file -_Train.logrank0
CNTK 2.0.beta2.0+ (zhouwang/pr899 0b1214, Nov 8 2016 17:27:36) on ZHOUWANGDEV4 at 2016/11/08 16:41:40
C:\repos\cntk\x64\release_CpuOnly\cntk.exe configFile=C:\repos\cntk\Examples\Text\ATIS/ATIS.cntk currentDirectory=C:\repos\cntk\Examples\Text\ATIS\Data RunDir=C:\cygwin64\tmp\cntk-test-20161108174139.565799\EvalClientTests_CPPEvalExtendedClientTest@release_cpu DataDir=C:\repos\cntk\Examples\Text\ATIS\Data ConfigDir=C:\repos\cntk\Examples\Text\ATIS OutputDir=C:\repos\cntk\Examples\Text\ATIS\Data DeviceId=-1 timestamping=true stderr=- command=Train Train=[SGD=[maxEpochs=1]]
11/08/2016 16:41:40: ##############################################################################
11/08/2016 16:41:40: # #
11/08/2016 16:41:40: # Train command (train action) #
11/08/2016 16:41:40: # #
11/08/2016 16:41:40: ##############################################################################
Node 'lstmStack.layers[0].lstmState._.ot._.PlusArgs[0].PlusArgs[0].PlusArgs[1].TimesArgs[0]' (LearnableParameter operation) operation: Tensor shape was inferred as [300 x 150].
Node 'lstmStack.layers[0].lstmState._.ft._.PlusArgs[0].PlusArgs[0].PlusArgs[1].TimesArgs[0]' (LearnableParameter operation) operation: Tensor shape was inferred as [300 x 150].
Node 'lstmStack.layers[0].lstmState._.it._.PlusArgs[0].PlusArgs[0].PlusArgs[1].TimesArgs[0]' (LearnableParameter operation) operation: Tensor shape was inferred as [300 x 150].
Node 'lstmStack.layers[0].lstmState._.bit.ElementTimesArgs[1].z.PlusArgs[0].PlusArgs[1].TimesArgs[0]' (LearnableParameter operation) operation: Tensor shape was inferred as [300 x 150].
11/08/2016 16:41:40:
Model has 61 nodes. Using CPU.
11/08/2016 16:41:40: Training criterion: cr = CrossEntropyWithSoftmax
11/08/2016 16:41:40: Evaluation criterion: errs = ClassificationError
11/08/2016 16:41:40: Training 1005127 parameters in 18 parameter tensors.
11/08/2016 16:42:02: Finished Epoch[ 1 of 1]: [Training] cr = 0.40189165 * 36006; errs = 8.254% * 36006; totalSamplesSeen = 36006; learningRatePerSample = 0.0099999998; epochTime=22.2249s
11/08/2016 16:42:02: __COMPLETED__
+ return 0
+ local ExitCode=0
+ [[ 0 == 1 ]]
+ return 0
+ '[' -d 'C:\repos\cntk\Examples\Text\ATIS\Data/work' ']'
+ '[' -d /cygdrive/c/repos/cntk/Tests/EndToEndTests/../../Examples/Text/ATIS/work ']'
+ mv 'C:\repos\cntk\Examples\Text\ATIS\Data/work' /cygdrive/c/repos/cntk/Tests/EndToEndTests/../../Examples/Text/ATIS/
+ '[' Windows_NT == Windows_NT ']'
+ /cygdrive/c/repos/cntk/x64/release_CpuOnly/CPPEvalExtendedClientTest.exe
Input node name: featuresCW
Input feature dimension: 944
Input node name: featuresNW
Input feature dimension: 944
Input node name: featuresPW
Input feature dimension: 944
Slot tag for sentence "BOS i would like to find a flight from charlotte to las vegas that makes a stop in st. louis EOS" is as followings:
i -- I-transport_type
would -- I-transport_type
like -- I-transport_type
to -- I-transport_type
find -- I-transport_type
a -- I-transport_type
flight -- I-transport_type
from -- I-transport_type
charlotte -- B-fromloc.airport_name
to -- I-transport_type
las -- B-toloc.airport_name
vegas -- I-toloc.airport_name
that -- I-transport_type
makes -- I-transport_type
a -- I-transport_type
stop -- I-transport_type
in -- I-transport_type
st. -- B-stoploc.airport_name
louis -- I-state_name
Evaluation complete.
Output dimension: 127
Output name: outputs
+ ExitCode=0
+ '[' -d /cygdrive/c/repos/cntk/Tests/EndToEndTests/../../Examples/Text/ATIS/work ']'
+ rm -rf /cygdrive/c/repos/cntk/Tests/EndToEndTests/../../Examples/Text/ATIS/work
+ exit 0

Просмотреть файл

@ -0,0 +1,48 @@
#!/bin/bash
. $TEST_ROOT_DIR/run-test-common
set -x
# This test case is to test CPPEvalClient works with the same setup of users.
# For that purpose, the test needs to create the pre-trained model in the Examples directories as expected by CPPEvalExtendedClient.
# These files are removed by Jenkins during workspace cleanup.
# The eval test uses some pretrained models which are not part of the CNTK repository itself
# We use the dataset from an external location specified using an environment variable
if [[ -z "$CNTK_EXTERNAL_TESTDATA_SOURCE_DIRECTORY" || ! -d "$CNTK_EXTERNAL_TESTDATA_SOURCE_DIRECTORY" ]]; then
echo This test uses external data that is not part of the CNTK repository. Environment variable CNTK_EXTERNAL_TESTDATA_SOURCE_DIRECTORY must be set to point to the external test data location.
exit 1
fi
if [ "$OS" == "Windows_NT" ]; then
TestDataDir=`cygpath -au $CNTK_EXTERNAL_TESTDATA_SOURCE_DIRECTORY`
else
TestDataDir=$CNTK_EXTERNAL_TESTDATA_SOURCE_DIRECTORY
fi
ATISDir=$TEST_ROOT_DIR/../../Examples/Text/ATIS
DataDir=$ATISDir/Data
OutputDir=$ATISDir/Data
ConfigDir=$ATISDir
# Train model for evaluation
DeleteModelsAfterTest=0
[ -f $ConfigDir/ATIS.cntk ] || exit 1
cntkrun ATIS.cntk "stderr=- command=Train Train=[SGD=[maxEpochs=1]]" || exit $?
# The created model is saved under $DataDir/work, according to ATIS.cntk. Move it to the $ATISDir/work
[ -d $DataDir/work ] || exit $?
[ -d $ATISDir/work ] && rm -rf $ATISDir/work
mv $DataDir/work $ATISDir/ || exit $?
if [ "$OS" == "Windows_NT" ]; then
$TEST_BIN_DIR/CPPEvalExtendedClientTest.exe
else
$TEST_BIN_DIR/cppevalextendedclient
fi
ExitCode=$?
[ -d $ATISDir/work ] && rm -rf $ATISDir/work
exit $ExitCode

Просмотреть файл

@ -0,0 +1,92 @@
dataDir: .
tags:
- bvt-i (build_sku != '1bitsgd') and ((build_sku == 'cpu') or (device == 'gpu')) and (flavor == 'release')
# This test also runs in debug mode, as the debug version of EvalDll is also included in the NuGet package.
- nightly-i (build_sku != '1bitsgd') and ((build_sku == 'cpu') or (device == 'gpu'))
testCases:
Test run must be completed:
patterns:
- Evaluation complete
# Due to time limitation, the test can only train the model with 1 Epoch, so the
# model is not accurate enough to create correct results under some build flavors.
# Disable to check results for now.
#Test results Line 1:
#patterns:
# - i -- I-transport_type
#Test results Line 2:
# patterns:
# - would -- I-transport_type
#Test results Line 3:
# patterns:
# - like -- I-transport_type
#Test results Line 4:
# patterns:
# - to -- I-transport_type
#Test results Line 5:
# patterns:
# - find -- I-transport_type
#Test results Line 6:
# patterns:
# - a -- I-transport_type
#Test results Line 7:
# patterns:
# - flight -- I-transport_type
#Test results Line 8:
# patterns:
# - from -- I-transport_type
#Test results Line 9:
# patterns:
# - charlotte -- B-fromloc.airport_name
#Test results Line 10:
# patterns:
# - to -- I-transport_type
#Test results Line 11:
# patterns:
# - las -- B-toloc.airport_name
#Test results Line 12:
# patterns:
# - vegas -- I-toloc.airport_name
#Test results Line 13:
# patterns:
# - that -- I-transport_type
#Test results Line 14:
# patterns:
# - makes -- I-transport_type
#Test results Line 15:
# patterns:
# - a -- I-transport_type
#Test results Line 16:
# patterns:
# - stop -- I-transport_type
#Test results Line 17:
# patterns:
# - in -- I-transport_type
#Test results Line 18:
# patterns:
# - st. -- B-stoploc.airport_name
#Test results Line 19:
# patterns:
# - louis -- I-state_name

Просмотреть файл

@ -1229,6 +1229,9 @@ Test module "ReaderTests" has passed with:
Test case "ReaderTestSuite/CNTKTextFormatReader_Simple_dense" has passed with:
1 assertion out of 1 passed
Test case "ReaderTestSuite/CNTKTextFormatReader_Simple_dense_single_stream" has passed with:
1 assertion out of 1 passed
Test case "ReaderTestSuite/CNTKTextFormatReader_MNIST_dense" has passed with:
1 assertion out of 1 passed

Просмотреть файл

@ -3,7 +3,7 @@ set -x -e -o pipefail
USAGE="Usage: $0 <drops-to-test>"
REPO_TAG=v2.0.beta2.0
REPO_TAG=v2.0.beta3.0
while [ $# -gt 0 ]; do
case "$1" in
@ -52,9 +52,11 @@ for drop in $*; do
if [[ "$DROP_FILE" == *CPU* ]] || [[ "$DROP_FILE" == *cpu* ]]; then
TEST_DEVICE=cpu
DOCKER_TO_RUN=docker
DOCKERFILE_SUFFIX=CPU
else
TEST_DEVICE=gpu
DOCKER_TO_RUN=nvidia-docker
DOCKERFILE_SUFFIX=GPU
fi
rm -f "$DROP_RESERVED"
@ -63,7 +65,7 @@ for drop in $*; do
IMAGE=cntk:installtest
for base in Ubuntu16 Ubuntu14; do
docker build -t $IMAGE -f Dockerfile-$base-GPU --build-arg REPO_TAG=$REPO_TAG .
docker build -t $IMAGE -f Dockerfile-$base-$DOCKERFILE_SUFFIX --build-arg REPO_TAG=$REPO_TAG .
$DOCKER_TO_RUN run --rm $IMAGE su - testuser -c "./run-test.sh $TEST_DEVICE"
docker rmi $IMAGE
done

Просмотреть файл

@ -8,9 +8,4 @@
#define BOOST_TEST_MODULE BrainScriptTests
#include "stdafx.h"
// TODO: Temporary mechanism to enable memory sharing for
// node output value matrices. This will go away when the
// sharing is ready to be enabled by default
bool g_shareNodeValueMatrices = false;
#include "stdafx.h"

Просмотреть файл

@ -9,9 +9,4 @@
#include "MPIWrapper.h"
// TODO: Get rid of these globals
Microsoft::MSR::CNTK::MPIWrapper* g_mpi = nullptr;
// TODO: Temporary mechanism to enable memory sharing for
// node output value matrices. This will go away when the
// sharing is ready to be enabled by default
bool g_shareNodeValueMatrices = false;
Microsoft::MSR::CNTK::MPIWrapper* g_mpi = nullptr;

Просмотреть файл

@ -101,6 +101,23 @@ BOOST_AUTO_TEST_CASE(CNTKTextFormatReader_Simple_dense)
1);
};
BOOST_AUTO_TEST_CASE(CNTKTextFormatReader_Simple_dense_single_stream)
{
HelperRunReaderTest<float>(
testDataPath() + "/Config/CNTKTextFormatReader/dense.cntk",
testDataPath() + "/Control/CNTKTextFormatReader/Simple_dense_single_stream.txt",
testDataPath() + "/Control/CNTKTextFormatReader/Simple_dense_single_stream_Output.txt",
"Simple_single_stream",
"reader",
1000, // epoch size
250, // mb size
10, // num epochs
1,
0,
0,
1);
};
BOOST_AUTO_TEST_CASE(CNTKTextFormatReader_MNIST_dense)
{

Просмотреть файл

@ -219,6 +219,26 @@ Simple = [
]
]
Simple_single_stream = [
precision = "float"
reader = [
traceLevel = 0 # this will disable warnings triggered by the unknown input name.
readerType = "CNTKTextFormatReader"
file = "Simple_dense.txt"
randomize = false
input = [
features = [
alias = "F"
dim = 2
format = "dense"
]
]
]
]
50x20_jagged_sequences = [
precision = "double"

Разница между файлами не показана из-за своего большого размера Загрузить разницу

Просмотреть файл

@ -16,7 +16,7 @@ WARNING: Maximum per-input number of samples for sequence (id = 2) at offset 435
INFO: Finished loading sequence (id = 2) at offset 435 in the input file (invalid_inputs.txt), successfully read 14 out of expected 14 rows.
WARNING: Input name prefix ('|') is followed by an invalid character (' ') at offset 454 in the input file (invalid_inputs.txt).
WARNING: Input name prefix ('|') is followed by an invalid character (' ') at offset 483 in the input file (invalid_inputs.txt).
WARNING: Invalid input ('C') at offset 544 in the input file (invalid_inputs.txt). Input name 'C' was not specified in the reader config section.
WARNING: Unknown input ('C') at offset 544 in the input file (invalid_inputs.txt). Input name 'C' was not specified in the reader config section.
WARNING: Empty input row at offset 549 in the input file (invalid_inputs.txt).
WARNING: Could not read a row (# 9) while loading sequence (id = 3) at offset 549 in the input file (invalid_inputs.txt).
WARNING: Exhausted all input expected for the current sequence (id = 3) at offset 549 in the input file (invalid_inputs.txt), but only read 8 out of 9 expected rows.

Просмотреть файл

@ -200,7 +200,6 @@ inline CNTK::FunctionPtr Stabilize(const CNTK::Variable& x, const CNTK::DeviceDe
template <typename ElementType>
std::pair<CNTK::FunctionPtr, CNTK::FunctionPtr> LSTMPCellWithSelfStabilization(CNTK::Variable input, CNTK::Variable prevOutput, CNTK::Variable prevCellState, const CNTK::DeviceDescriptor& device)
{
size_t inputDim = input.Shape()[0];
size_t outputDim = prevOutput.Shape()[0];
size_t cellDim = prevCellState.Shape()[0];
@ -209,8 +208,8 @@ std::pair<CNTK::FunctionPtr, CNTK::FunctionPtr> LSTMPCellWithSelfStabilization(C
};
unsigned long seed = 1;
auto createProjectionParam = [device, &seed](size_t outputDim, size_t inputDim) {
return CNTK::Parameter({ outputDim, inputDim }, CNTK::AsDataType<ElementType>(), CNTK::GlorotUniformInitializer(1, 0, 1, seed++), device);
auto createProjectionParam = [device, &seed](size_t outputDim) {
return CNTK::Parameter({ outputDim, CNTK::NDShape::InferredDimension }, CNTK::AsDataType<ElementType>(), CNTK::GlorotUniformInitializer(1, 0, 1, seed++), device);
};
auto createDiagWeightParam = [device, &seed](size_t dim) {
@ -220,26 +219,26 @@ std::pair<CNTK::FunctionPtr, CNTK::FunctionPtr> LSTMPCellWithSelfStabilization(C
auto stabilizedPrevOutput = Stabilize<ElementType>(prevOutput, device);
auto stabilizedPrevCellState = Stabilize<ElementType>(prevCellState, device);
auto projectInput = [input, cellDim, inputDim, createBiasParam, createProjectionParam]() {
return createBiasParam(cellDim) + CNTK::Times(createProjectionParam(cellDim, inputDim), input);
auto projectInput = [input, cellDim, createBiasParam, createProjectionParam]() {
return createBiasParam(cellDim) + CNTK::Times(createProjectionParam(cellDim), input);
};
// Input gate
auto it = CNTK::Sigmoid(projectInput() + CNTK::Times(createProjectionParam(cellDim, outputDim), stabilizedPrevOutput) + CNTK::ElementTimes(createDiagWeightParam(cellDim), stabilizedPrevCellState));
auto bit = CNTK::ElementTimes(it, CNTK::Tanh(projectInput() + CNTK::Times(createProjectionParam(cellDim, outputDim), stabilizedPrevOutput)));
auto it = CNTK::Sigmoid(projectInput() + CNTK::Times(createProjectionParam(cellDim), stabilizedPrevOutput) + CNTK::ElementTimes(createDiagWeightParam(cellDim), stabilizedPrevCellState));
auto bit = CNTK::ElementTimes(it, CNTK::Tanh(projectInput() + CNTK::Times(createProjectionParam(cellDim), stabilizedPrevOutput)));
// Forget-me-not gate
auto ft = CNTK::Sigmoid(projectInput() + CNTK::Times(createProjectionParam(cellDim, outputDim), stabilizedPrevOutput) + CNTK::ElementTimes(createDiagWeightParam(cellDim), stabilizedPrevCellState));
auto ft = CNTK::Sigmoid(projectInput() + CNTK::Times(createProjectionParam(cellDim), stabilizedPrevOutput) + CNTK::ElementTimes(createDiagWeightParam(cellDim), stabilizedPrevCellState));
auto bft = CNTK::ElementTimes(ft, prevCellState);
auto ct = bft + bit;
// Output gate
auto ot = CNTK::Sigmoid(projectInput() + CNTK::Times(createProjectionParam(cellDim, outputDim), stabilizedPrevOutput) + CNTK::ElementTimes(createDiagWeightParam(cellDim), Stabilize<ElementType>(ct, device)));
auto ot = CNTK::Sigmoid(projectInput() + CNTK::Times(createProjectionParam(cellDim), stabilizedPrevOutput) + CNTK::ElementTimes(createDiagWeightParam(cellDim), Stabilize<ElementType>(ct, device)));
auto ht = CNTK::ElementTimes(ot, CNTK::Tanh(ct));
auto c = ct;
auto h = (outputDim != cellDim) ? CNTK::Times(createProjectionParam(outputDim, cellDim), Stabilize<ElementType>(ht, device)) : ht;
auto h = (outputDim != cellDim) ? CNTK::Times(createProjectionParam(outputDim), Stabilize<ElementType>(ht, device)) : ht;
return{ h, c };
}

Просмотреть файл

@ -99,18 +99,14 @@ void TestReduceSum(size_t sampleRank, const DeviceDescriptor& device)
// Test ReduceSum along a dynamic axis
{
auto testReduceSum = [&sequences, &sequenceLengths, inputShape, sequencesValue, device](const Axis& axis)
auto testReduceSum = [&sequences, &sequenceLengths, inputShape, sequencesValue, device]()
{
if (!axis.IsDynamicAxis())
RuntimeError("Called the dynamic axis ReduceSum test with a static axis");
size_t maxActualSequenceLength = sequencesValue->Shape()[inputShape.Rank()];
size_t numSequences = sequencesValue->Shape()[inputShape.Rank() + 1];
auto inputVar = InputVariable({ inputShape }, DataType::Float, L"input");
FunctionPtr reduceSumFunc = ReduceSum(inputVar, axis);
FunctionPtr reduceSumFunc = Sequence::ReduceSum(inputVar);
NDShape maskShape = { ((axis == Axis::DefaultBatchAxis()) ? maxActualSequenceLength : 1), ((axis == Axis::DefaultBatchAxis()) ? 1 : numSequences) };
NDShape maskShape = { 1, numSequences };
NDShape outputShape = reduceSumFunc->Output().Shape();
auto outputDataShape = outputShape.AppendShape(maskShape);
@ -130,10 +126,7 @@ void TestReduceSum(size_t sampleRank, const DeviceDescriptor& device)
for (size_t k = 0; k < inputShape.TotalSize(); ++k)
{
float value = sequences[i][(j * inputShape.TotalSize()) + k];
if (axis == Axis::DefaultBatchAxis())
expectedTotals[(j * inputShape.TotalSize()) + k] += value;
else
expectedTotals[(i * inputShape.TotalSize()) + k] += value;
expectedTotals[(i * inputShape.TotalSize()) + k] += value;
}
}
}
@ -141,7 +134,7 @@ void TestReduceSum(size_t sampleRank, const DeviceDescriptor& device)
FloatingPointVectorCompare(outputData, expectedTotals, "testReduceSum: Forward prop results do not match expected results");
};
testReduceSum(Axis::DefaultDynamicAxis());
testReduceSum();
}
}
@ -217,11 +210,8 @@ void TestSlice(size_t sampleRank, const DeviceDescriptor& device)
// Test slice along a dynamic axis
{
auto testDynamicAxisSlice = [&sequences, &sequenceLengths, inputShape, sequencesValue, device](const Axis& axis, int beginOffset, int endOffset)
auto testDynamicAxisSlice = [&sequences, &sequenceLengths, inputShape, sequencesValue, device](int beginOffset, int endOffset)
{
if (!axis.IsDynamicAxis())
RuntimeError("Called the dynamic axis slice test with a static axis");
size_t maxActualSequenceLength = sequencesValue->Shape()[inputShape.Rank()];
size_t numSequences = sequencesValue->Shape()[inputShape.Rank() + 1];
@ -229,11 +219,11 @@ void TestSlice(size_t sampleRank, const DeviceDescriptor& device)
size_t maxSliceLength = (endAndBeginOffsetDiff > 0) ? endAndBeginOffsetDiff : maxActualSequenceLength + endAndBeginOffsetDiff;
auto inputVar = InputVariable(inputShape, DataType::Float, L"input");
auto sliceFunc = Slice(inputVar, axis, beginOffset, endOffset);
auto sliceFunc = Sequence::Slice(inputVar, beginOffset, endOffset);
sliceFunc = sliceFunc + sliceFunc;
size_t outputSequenceAxisLength = (axis == Axis::DefaultDynamicAxis()) ? maxSliceLength : maxActualSequenceLength;
size_t outputBatchAxisLength = (axis == Axis::DefaultBatchAxis()) ? maxSliceLength : numSequences;
size_t outputSequenceAxisLength = maxSliceLength;
size_t outputBatchAxisLength = numSequences;
NDShape outputShape = sliceFunc->Output().Shape().AppendShape({ outputSequenceAxisLength, outputBatchAxisLength });
std::vector<float> outputData(outputShape.TotalSize(), 0);
NDMaskPtr mask;
@ -247,15 +237,15 @@ void TestSlice(size_t sampleRank, const DeviceDescriptor& device)
std::unordered_map<Variable, ValuePtr> outputs = { { sliceFunc->Output(), outputValue } };
sliceFunc->Forward({ { inputVar, sequencesValue } }, outputs, device);
size_t startSequenceIdx = (axis == Axis::DefaultBatchAxis()) ? ((beginOffset >= 0) ? beginOffset : (numSequences + beginOffset)) : 0;
size_t endSequenceIdx = (axis == Axis::DefaultBatchAxis()) ? ((endOffset > 0) ? endOffset : (numSequences + endOffset)) : numSequences;
size_t startSequenceIdx = 0;
size_t endSequenceIdx = numSequences;
std::vector<float> expectedOutputValues(inputShape.TotalSize() * outputSequenceAxisLength * outputBatchAxisLength);
for (size_t i = startSequenceIdx; i < endSequenceIdx; ++i)
{
size_t currentSequenceLength = sequenceLengths[i];
size_t startFrameIdx = (axis == Axis::DefaultDynamicAxis()) ? ((beginOffset >= 0) ? beginOffset : (currentSequenceLength + beginOffset)) : 0;
size_t endFrameIdx = (axis == Axis::DefaultDynamicAxis()) ? ((endOffset > 0) ? endOffset : (currentSequenceLength + endOffset)) : currentSequenceLength;
size_t startFrameIdx = ((beginOffset >= 0) ? beginOffset : (currentSequenceLength + beginOffset));
size_t endFrameIdx = ((endOffset > 0) ? endOffset : (currentSequenceLength + endOffset));
size_t j = startFrameIdx;
for (; j < endFrameIdx; ++j)
{
@ -272,12 +262,12 @@ void TestSlice(size_t sampleRank, const DeviceDescriptor& device)
FloatingPointVectorCompare(outputData, expectedOutputValues, "testDynamicAxisSlice: Forward prop results do not match expected results");
};
testDynamicAxisSlice(Axis::DefaultDynamicAxis(), 0, 1);
testDynamicAxisSlice(Axis::DefaultDynamicAxis(), 0, 2);
testDynamicAxisSlice(Axis::DefaultDynamicAxis(), -1, 0);
testDynamicAxisSlice(Axis::DefaultDynamicAxis(), -2, 0);
testDynamicAxisSlice(Axis::DefaultDynamicAxis(), 0, -1);
testDynamicAxisSlice(Axis::DefaultDynamicAxis(), 1, 0);
testDynamicAxisSlice(0, 1);
testDynamicAxisSlice(0, 2);
testDynamicAxisSlice(-1, 0);
testDynamicAxisSlice(-2, 0);
testDynamicAxisSlice(0, -1);
testDynamicAxisSlice(1, 0);
}
}

Просмотреть файл

@ -6,7 +6,7 @@ using namespace CNTK;
using namespace std::placeholders;
void TrainSequenceToSequenceTranslator(const DeviceDescriptor& device, bool useSparseInputs, bool testSaveAndReLoad, bool testCheckpointing, bool addBeamSearchReorderingHook, bool testCloning)
void TrainSequenceToSequenceTranslator(const DeviceDescriptor& device, bool useSparseInputs, bool testSaveAndReLoad, bool testCheckpointing, bool addBeamSearchReorderingHook, bool testCloning, bool usePlaceholders)
{
using namespace std::placeholders;
@ -30,7 +30,7 @@ void TrainSequenceToSequenceTranslator(const DeviceDescriptor& device, bool useS
FunctionPtr inputSequence = Alias(rawInput, L"inputSequence");
// Drop the sentence start token from the label, for decoder training
auto labelSequence = Slice(rawLabels, labelDynamicAxes[0], 1, 0, L"labelSequenceWithStartTrimmed");
auto labelSequence = Sequence::Slice(rawLabels, 1, 0, L"labelSequenceWithStartTrimmed");
auto labelSentenceStart = Sequence::First(rawLabels, L"labelSequenceStart");
auto isFirstLabel = Sequence::IsFirst(labelSequence, L"isFirstLabel");
@ -38,8 +38,8 @@ void TrainSequenceToSequenceTranslator(const DeviceDescriptor& device, bool useS
bool forceEmbedding = useSparseInputs;
/* Embeddings */
auto inputEmbeddingWeights = Parameter({ inputEmbeddingDim, inputVocabDim }, DataType::Float, GlorotUniformInitializer(), device, L"inputEmbeddingWeights");
auto labelEmbeddingWeights = Parameter({ labelEmbeddingDim, labelVocabDim }, DataType::Float, GlorotUniformInitializer(), device, L"labelEmbeddingWeights");
auto inputEmbeddingWeights = Parameter({ inputEmbeddingDim, NDShape::InferredDimension }, DataType::Float, GlorotUniformInitializer(), device, L"inputEmbeddingWeights");
auto labelEmbeddingWeights = Parameter({ labelEmbeddingDim, NDShape::InferredDimension }, DataType::Float, GlorotUniformInitializer(), device, L"labelEmbeddingWeights");
auto inputEmbedding = Alias((!forceEmbedding && (inputVocabDim <= inputEmbeddingDim)) ? inputSequence : Times(inputEmbeddingWeights, inputSequence), L"inputEmbedding");
auto labelEmbedding = Alias((!forceEmbedding && (labelVocabDim <= labelEmbeddingDim)) ? labelSequence : Times(labelEmbeddingWeights, labelSequence), L"labelEmbedding");
@ -63,8 +63,20 @@ void TrainSequenceToSequenceTranslator(const DeviceDescriptor& device, bool useS
labelSentenceStartEmbeddedScattered = Reshape(labelSentenceStartEmbeddedScattered, labelSentenceStartEmbeddedScattered->Output().Shape().AppendShape({ 1 }), L"labelSentenceStartEmbeddedScattered");
}
auto thoughtVectorBroadcastH = Sequence::BroadcastAs(thoughtVectorH, labelEmbedding, L"thoughtVectorBroadcastH");
auto thoughtVectorBroadcastC = Sequence::BroadcastAs(thoughtVectorC, labelEmbedding, L"thoughtVectorBroadcastC");
auto actualThoughtVectorBroadcastH = Sequence::BroadcastAs(thoughtVectorH, labelEmbedding, L"thoughtVectorBroadcastH");
auto actualThoughtVectorBroadcastC = Sequence::BroadcastAs(thoughtVectorC, labelEmbedding, L"thoughtVectorBroadcastC");
Variable thoughtVectorBroadcastH, thoughtVectorBroadcastC;
if (usePlaceholders)
{
thoughtVectorBroadcastH = PlaceholderVariable();
thoughtVectorBroadcastC = PlaceholderVariable();
}
else
{
thoughtVectorBroadcastH = actualThoughtVectorBroadcastH;
thoughtVectorBroadcastC = actualThoughtVectorBroadcastC;
}
/* Decoder */
auto beamSearchReorderHook = Constant({ 1, 1 }, 1.0f, device);
@ -116,6 +128,10 @@ void TrainSequenceToSequenceTranslator(const DeviceDescriptor& device, bool useS
auto biasWeights = Parameter({ labelVocabDim }, 0.0f, device);
auto z = Plus(Times(outputLayerProjWeights, Stabilize<float>(decoderOutput, device)), biasWeights, L"classifierOutput");
if (usePlaceholders)
z->ReplacePlaceholders({ { thoughtVectorBroadcastH, actualThoughtVectorBroadcastH }, { thoughtVectorBroadcastC, actualThoughtVectorBroadcastC } });
auto ce = CrossEntropyWithSoftmax(z, labelSequence, L"lossFunction");
auto errs = ClassificationError(z, labelSequence, L"classificationError");
@ -218,8 +234,8 @@ void TrainSequenceToSequenceTranslator()
fprintf(stderr, "\nTrainSequenceToSequenceTranslator..\n");
// TODO: Also test with sparse input variables in the graph
TrainSequenceToSequenceTranslator(DeviceDescriptor::CPUDevice(), false, true, false, true, true);
TrainSequenceToSequenceTranslator(DeviceDescriptor::CPUDevice(), false, true, false, false, true, true);
if (IsGPUAvailable())
TrainSequenceToSequenceTranslator(DeviceDescriptor::GPUDevice(0), false, false, true, false, false);
TrainSequenceToSequenceTranslator(DeviceDescriptor::GPUDevice(0), false, false, true, true, false, false);
}

Просмотреть файл

@ -19,6 +19,8 @@
%rename(gpu_device) CNTK::DeviceDescriptor::GPUDevice;
%rename(cpu_device) CNTK::DeviceDescriptor::CPUDevice;
%rename(times_transpose) CNTK::TransposeTimes;
%rename(sequence_slice) CNTK::Sequence::Slice;
%rename(sequence_reduce_sum) CNTK::Sequence::ReduceSum;
%rename(momentum_as_time_constant_schedule) CNTK::MomentumAsTimeConstantSchedule;
@ -42,7 +44,6 @@
%template() std::vector<CNTK::Axis>;
%template() std::vector<CNTK::DeviceDescriptor>;
%template() std::vector<CNTK::StreamConfiguration>;
//%template() std::vector<CNTK::DictionaryValue>;
%template() std::vector<std::shared_ptr<CNTK::Function>>;
%template() std::vector<std::shared_ptr<CNTK::Learner>>;
%template() std::pair<size_t, double>;
@ -74,7 +75,7 @@
//
%feature("shadow") CNTK::Variable::DynamicAxes %{
def dynamic_axes(self):
return ($action(self))[::-1]
return tuple(reversed($action(self)))
%}
%fragment("NDShapeToTuple", "header")
@ -86,7 +87,7 @@ def dynamic_axes(self):
for (size_t i=0; i<rank; i++)
{
size_t dim = (&shape)->operator[](i);
PyTuple_SetItem(result, i, PyInt_FromLong(dim));
PyTuple_SetItem(result, rank-i-1, PyInt_FromLong(dim));
}
return result;
}
@ -160,6 +161,57 @@ def dynamic_axes(self):
}
}
//
// Converting Python list {DictionaryValue} to std::vector
//
%typecheck(1000) std::vector<CNTK::DictionaryValue>& {
// '1000' is the typecheck precedence code. It means: check after basic
// types, but before arrays. See: http://www.swig.org/Doc1.3/Typemaps.html#Typemaps_overloading
$1 = PyList_Check($input) ? 1 : 0;
}
%typemap(in) std::vector<CNTK::DictionaryValue>& {
if (PyList_Check($input)) {
std::vector<CNTK::DictionaryValue>* vec = new std::vector<CNTK::DictionaryValue>();
PyObject *item;
PyObject *iterator = PyObject_GetIter($input);
if (iterator == NULL) {
SWIG_exception_fail(SWIG_ValueError, "cannot convert list element to CNTK::DictionaryValue");
}
while ((item = PyIter_Next(iterator))) {
void *raw_var = 0 ;
int res1 = SWIG_ConvertPtr(item, &raw_var, SWIGTYPE_p_CNTK__DictionaryValue, 0);
if (!SWIG_IsOK(res1)) {
SWIG_exception_fail(SWIG_ArgError(res1), "cannot convert list element to CNTK::DictionaryValue");
}
if (!raw_var) {
SWIG_exception_fail(SWIG_ValueError, "invalid null reference when converting a list element to CNTK::DictionaryValue");
}
CNTK::DictionaryValue* var = reinterpret_cast<CNTK::DictionaryValue*>(raw_var);
vec->push_back(*var);
Py_DECREF(item);
}
Py_DECREF(iterator);
if (PyErr_Occurred()) {
SWIG_exception_fail(SWIG_ValueError, "cannot convert list element to CNTK::DictionaryValue");
}
$1 = vec;
} else {
SWIG_exception(SWIG_ValueError, "list expected");
}
}
%fragment("DictionaryValueToPy", "header", fragment="NDShapeToTuple", fragment="NDArrayViewToNumPy")
{
PyObject *DictionaryValueToPy(const CNTK::DictionaryValue& dictVal)
@ -340,10 +392,10 @@ fail:
%typemap(in) CNTK::NDShape const & {
if (PyTuple_Check($input)) {
std::vector<size_t> dimensions;
size_t rank = PyTuple_Size($input);
std::vector<size_t> dimensions(rank);
for (size_t i=0; i<rank; i++)
dimensions.push_back(PyLong_AsLong(PyTuple_GET_ITEM($input, i)));
dimensions[i] = PyLong_AsLong(PyTuple_GET_ITEM($input, rank-i-1));
$1 = new CNTK::NDShape(dimensions);
} else {
@ -405,97 +457,60 @@ fail:
//
// Converting Python dictionary {Variable: ValuePtr} to std::unordered_map
//
%typecheck(1000) const std::unordered_map<CNTK::Variable, const CNTK::ValuePtr>&, std::unordered_map<CNTK::Variable, CNTK::ValuePtr>& {
%define %unordered_map_conversion(KEY_TYPE, VALUE_TYPE, SWIG_KEY_TYPE, SWIG_VALUE_TYPE)
// '1000' is the typecheck precedence code. It means: check after basic
// types, but before arrays. See: http://www.swig.org/Doc1.3/Typemaps.html#Typemaps_overloading
$1 = PyDict_Check($input) ? 1 : 0;
}
%typecheck(1000) std::unordered_map<KEY_TYPE, VALUE_TYPE> const&,
const std::unordered_map<KEY_TYPE, VALUE_TYPE>&,
std::unordered_map<KEY_TYPE, VALUE_TYPE>&
{ $1 = PyDict_Check($input) ? 1 : 0; }
%typemap(in) const std::unordered_map<CNTK::Variable, const CNTK::ValuePtr>& (
std::unordered_map<CNTK::Variable, const CNTK::ValuePtr> args_map
) {
if (PyDict_Check($input)) {
%typemap(in) std::unordered_map<KEY_TYPE, VALUE_TYPE>& (
std::unordered_map<KEY_TYPE, VALUE_TYPE> args_map
) {
if (PyDict_Check($input)) {
PyObject *key, *value;
Py_ssize_t pos = 0;
PyObject *key, *value;
Py_ssize_t pos = 0;
while (PyDict_Next($input, &pos, &key, &value)) {
void *raw_var = 0 ;
int res1 = SWIG_ConvertPtr(key, &raw_var, SWIG_KEY_TYPE, 0);
if (!SWIG_IsOK(res1)) {
SWIG_exception_fail(SWIG_ArgError(res1), "cannot convert key of dictionary");
}
if (!raw_var) {
SWIG_exception_fail(SWIG_ValueError, "invalid null reference when converting key of dictionary");
}
KEY_TYPE* var = reinterpret_cast<KEY_TYPE*>(raw_var);
void *raw_value = 0;
int res2 = SWIG_ConvertPtr(value, &raw_value, SWIG_VALUE_TYPE, 0);
if (!SWIG_IsOK(res2)) {
SWIG_exception_fail(SWIG_ArgError(res2), "cannot convert value of dictionary");
}
VALUE_TYPE* value;
if (raw_value) {
value = reinterpret_cast<VALUE_TYPE*>(raw_value);
args_map.insert(std::make_pair(*var, *value));
} else {
// We got an empty VALUE_TYPE, which carries a nullptr.
// This is only used for ValuePtr
args_map.insert(std::make_pair(*var, VALUE_TYPE()));
}
while (PyDict_Next($input, &pos, &key, &value)) {
void *raw_var = 0 ;
int res1 = SWIG_ConvertPtr(key, &raw_var, SWIGTYPE_p_CNTK__Variable, 0);
if (!SWIG_IsOK(res1)) {
SWIG_exception_fail(SWIG_ArgError(res1), "cannot convert key of dictionary to CNTK::Variable");
}
if (!raw_var) {
SWIG_exception_fail(SWIG_ValueError, "invalid null reference when converting key of dictionary to CNTK::Variable");
}
CNTK::Variable* var = reinterpret_cast<CNTK::Variable*>(raw_var);
$1 = &args_map;
} else {
SWIG_exception(SWIG_TypeError, "dictionary expected");
}
}
%enddef
void *raw_value = 0;
int res2 = SWIG_ConvertPtr(value, &raw_value, SWIGTYPE_p_std__shared_ptrT_CNTK__Value_t, 0);
if (!SWIG_IsOK(res2)) {
SWIG_exception_fail(SWIG_ArgError(res2), "cannot convert value of dictionary to CNTK::ValuePtr");
}
CNTK::ValuePtr* value;
if (raw_value) {
value = reinterpret_cast<CNTK::ValuePtr*>(raw_value);
args_map.insert(std::make_pair(*var, *value));
} else {
// We got an empty ValuePtr, which carries a nullptr.
args_map.insert(std::make_pair(*var, CNTK::ValuePtr()));
}
}
$1 = &args_map;
} else {
SWIG_exception(SWIG_TypeError, "dictionary expected");
}
}
// supporting the non-const version
%typemap(in) std::unordered_map<CNTK::Variable, CNTK::ValuePtr>& (
std::unordered_map<CNTK::Variable, CNTK::ValuePtr> args_map
) {
if (PyDict_Check($input)) {
PyObject *key, *value;
Py_ssize_t pos = 0;
while (PyDict_Next($input, &pos, &key, &value)) {
void *raw_var = 0 ;
int res1 = SWIG_ConvertPtr(key, &raw_var, SWIGTYPE_p_CNTK__Variable, 0);
if (!SWIG_IsOK(res1)) {
SWIG_exception_fail(SWIG_ArgError(res1), "cannot convert key of dictionary to CNTK::Variable");
}
if (!raw_var) {
SWIG_exception_fail(SWIG_ValueError, "invalid null reference when converting key of dictionary to CNTK::Variable");
}
CNTK::Variable* var = reinterpret_cast<CNTK::Variable*>(raw_var);
void *raw_value = 0;
int res2 = SWIG_ConvertPtr(value, &raw_value, SWIGTYPE_p_std__shared_ptrT_CNTK__Value_t, 0);
if (!SWIG_IsOK(res2)) {
SWIG_exception_fail(SWIG_ArgError(res2), "cannot convert value of dictionary to CNTK::ValuePtr");
}
CNTK::ValuePtr* value;
if (raw_value) {
value = reinterpret_cast<CNTK::ValuePtr*>(raw_value);
args_map.insert(std::make_pair(*var, *value));
} else {
// We got an empty ValuePtr, which carries a nullptr.
args_map.insert(std::make_pair(*var, CNTK::ValuePtr()));
}
}
$1 = &args_map;
} else {
SWIG_exception(SWIG_TypeError, "dictionary expected");
}
}
// For the output dict (the non-const unordered_map) we need to get the
// modified values and put them back into the dictionary. This is used, when
@ -727,368 +742,6 @@ fail:
}
}
//
// Converting Python dictionary {Parameter: NDArrayViewPtr} to std::unordered_map
//
%typecheck(1000) const std::unordered_map<CNTK::Parameter, CNTK::NDArrayViewPtr>& {
// '1000' is the typecheck precedence code. It means: check after basic
// types, but before arrays. See: http://www.swig.org/Doc1.3/Typemaps.html#Typemaps_overloading
$1 = PyDict_Check($input) ? 1 : 0;
}
%typemap(in) const std::unordered_map<CNTK::Parameter, CNTK::NDArrayViewPtr>& (
std::unordered_map<CNTK::Parameter, CNTK::NDArrayViewPtr> args_map
) {
if (PyDict_Check($input)) {
PyObject *key, *value;
Py_ssize_t pos = 0;
while (PyDict_Next($input, &pos, &key, &value)) {
void *raw_var = 0 ;
int res1 = SWIG_ConvertPtr(key, &raw_var, SWIGTYPE_p_CNTK__Parameter, 0);
if (!SWIG_IsOK(res1)) {
SWIG_exception_fail(SWIG_ArgError(res1), "cannot convert key of dictionary to CNTK::Parameter");
}
if (!raw_var) {
SWIG_exception_fail(SWIG_ValueError, "invalid null reference when converting key of dictionary to CNTK::Parameter");
}
CNTK::Parameter* var = reinterpret_cast<CNTK::Parameter*>(raw_var);
void *raw_value = 0;
int res2 = SWIG_ConvertPtr(value, &raw_value, SWIGTYPE_p_std__shared_ptrT_CNTK__NDArrayView_t, 0);
if (!SWIG_IsOK(res2)) {
SWIG_exception_fail(SWIG_ArgError(res2), "cannot convert value of dictionary to CNTK::NDArrayViewPtr");
}
CNTK::NDArrayViewPtr* value;
if (raw_value) {
value = reinterpret_cast<CNTK::NDArrayViewPtr*>(raw_value);
} else {
// We got an empty NDArrayViewPtr, which carries a nullptr.
value = new CNTK::NDArrayViewPtr();
}
args_map.insert(std::make_pair(*var, *value));
}
$1 = &args_map;
} else {
SWIG_exception(SWIG_TypeError, "dictionary expected");
}
}
//
// Converting Python list {DictionaryValue} to std::vector
//
%typecheck(1000) std::vector<CNTK::DictionaryValue>& {
// '1000' is the typecheck precedence code. It means: check after basic
// types, but before arrays. See: http://www.swig.org/Doc1.3/Typemaps.html#Typemaps_overloading
$1 = PyList_Check($input) ? 1 : 0;
}
%typemap(in) std::vector<CNTK::DictionaryValue>& {
if (PyList_Check($input)) {
std::vector<CNTK::DictionaryValue>* vec = new std::vector<CNTK::DictionaryValue>();
PyObject *item;
PyObject *iterator = PyObject_GetIter($input);
if (iterator == NULL) {
SWIG_exception_fail(SWIG_ValueError, "cannot convert list element to CNTK::DictionaryValue");
}
while ((item = PyIter_Next(iterator))) {
void *raw_var = 0 ;
int res1 = SWIG_ConvertPtr(item, &raw_var, SWIGTYPE_p_CNTK__DictionaryValue, 0);
if (!SWIG_IsOK(res1)) {
SWIG_exception_fail(SWIG_ArgError(res1), "cannot convert list element to CNTK::DictionaryValue");
}
if (!raw_var) {
SWIG_exception_fail(SWIG_ValueError, "invalid null reference when converting a list element to CNTK::DictionaryValue");
}
CNTK::DictionaryValue* var = reinterpret_cast<CNTK::DictionaryValue*>(raw_var);
vec->push_back(*var);
Py_DECREF(item);
}
Py_DECREF(iterator);
if (PyErr_Occurred()) {
SWIG_exception_fail(SWIG_ValueError, "cannot convert list element to CNTK::DictionaryValue");
}
$1 = vec;
} else {
SWIG_exception(SWIG_ValueError, "list expected");
}
}
// end of map conversion
// TODO: Parametrize the following four typemaps and unify set/list usage.
//
// Converting Python set {Variable} to std::unordered_set
//
%typecheck(1000) std::unordered_set<CNTK::Variable>& {
// '1000' is the typecheck precedence code. It means: check after basic
// types, but before arrays. See: http://www.swig.org/Doc1.3/Typemaps.html#Typemaps_overloading
$1 = PySet_Check($input) ? 1 : 0;
}
%typemap(in) std::unordered_set<CNTK::Variable>& (
std::unordered_set<CNTK::Variable> args_set
) {
if (PySet_Check($input)) {
PyObject *item;
PyObject *iterator = PyObject_GetIter($input);
if (iterator == NULL) {
SWIG_exception_fail(SWIG_ValueError, "cannot convert list element to CNTK::Variable");
}
while ((item = PyIter_Next(iterator))) {
void *raw_var = 0 ;
int res1 = SWIG_ConvertPtr(item, &raw_var, SWIGTYPE_p_CNTK__Variable, 0);
if (!SWIG_IsOK(res1)) {
SWIG_exception_fail(SWIG_ArgError(res1), "cannot convert set element to CNTK::Variable");
}
if (!raw_var) {
SWIG_exception_fail(SWIG_ValueError, "invalid null reference when converting a list element to CNTK::Variable");
}
CNTK::Variable* var = reinterpret_cast<CNTK::Variable*>(raw_var);
args_set.insert(*var);
Py_DECREF(item);
}
Py_DECREF(iterator);
if (PyErr_Occurred()) {
SWIG_exception_fail(SWIG_ValueError, "cannot convert set element to CNTK::Variable");
}
$1 = &args_set;
} else {
SWIG_exception(SWIG_ValueError, "set expected");
}
}
//
// Converting Python set {StreamInformation} to std::unordered_set
//
%typecheck(1000) std::unordered_set<CNTK::StreamInformation>& {
// '1000' is the typecheck precedence code. It means: check after basic
// types, but before arrays. See: http://www.swig.org/Doc1.3/Typemaps.html#Typemaps_overloading
$1 = PySet_Check($input) ? 1 : 0;
}
%typemap(in) std::unordered_set<CNTK::StreamInformation>& (
std::unordered_set<CNTK::StreamInformation> args_set
) {
if (PySet_Check($input)) {
PyObject *item;
PyObject *iterator = PyObject_GetIter($input);
if (iterator == NULL) {
SWIG_exception_fail(SWIG_ValueError, "cannot convert list element to CNTK::StreamInformation");
}
while ((item = PyIter_Next(iterator))) {
void *raw_var = 0 ;
int res1 = SWIG_ConvertPtr(item, &raw_var, SWIGTYPE_p_CNTK__StreamInformation, 0);
if (!SWIG_IsOK(res1)) {
SWIG_exception_fail(SWIG_ArgError(res1), "cannot convert set element to CNTK::StreamInformation");
}
if (!raw_var) {
SWIG_exception_fail(SWIG_ValueError, "invalid null reference when converting a set element to CNTK::StreamInformation");
}
CNTK::StreamInformation* var = reinterpret_cast<CNTK::StreamInformation*>(raw_var);
args_set.insert(*var);
Py_DECREF(item);
}
Py_DECREF(iterator);
if (PyErr_Occurred()) {
SWIG_exception_fail(SWIG_ValueError, "cannot convert set element to CNTK::StreamInformation");
}
$1 = &args_set;
} else {
SWIG_exception(SWIG_ValueError, "set expected");
}
}
//
// Converting Python list {Parameter} to std::unordered_set
//
%typecheck(1000) std::unordered_set<CNTK::Parameter>& {
// '1000' is the typecheck precedence code. It means: check after basic
// types, but before arrays. See: http://www.swig.org/Doc1.3/Typemaps.html#Typemaps_overloading
$1 = PyList_Check($input) ? 1 : 0;
}
%typemap(in) std::unordered_set<CNTK::Parameter>& (
std::unordered_set<CNTK::Parameter> args_set
) {
if (PyList_Check($input)) {
PyObject *item;
PyObject *iterator = PyObject_GetIter($input);
if (iterator == NULL) {
SWIG_exception_fail(SWIG_ValueError, "cannot convert list element to CNTK::Parameter");
}
while ((item = PyIter_Next(iterator))) {
void *raw_var = 0 ;
int res1 = SWIG_ConvertPtr(item, &raw_var, SWIGTYPE_p_CNTK__Parameter, 0);
if (!SWIG_IsOK(res1)) {
SWIG_exception_fail(SWIG_ArgError(res1), "cannot convert set element to CNTK::Parameter");
}
if (!raw_var) {
SWIG_exception_fail(SWIG_ValueError, "invalid null reference when converting a list element to CNTK::Parameter");
}
CNTK::Parameter* var = reinterpret_cast<CNTK::Parameter*>(raw_var);
args_set.insert(*var);
Py_DECREF(item);
}
Py_DECREF(iterator);
if (PyErr_Occurred()) {
SWIG_exception_fail(SWIG_ValueError, "cannot convert set element to CNTK::Parameter");
}
$1 = &args_set;
} else {
SWIG_exception(SWIG_ValueError, "list expected");
}
}
//
// Converting Python list {LearnerPtr} to std::unordered_set
//
%typecheck(1000) std::unordered_set<CNTK::LearnerPtr>& {
// '1000' is the typecheck precedence code. It means: check after basic
// types, but before arrays. See: http://www.swig.org/Doc1.3/Typemaps.html#Typemaps_overloading
$1 = PyList_Check($input) ? 1 : 0;
}
%typemap(in) std::unordered_set<CNTK::LearnerPtr>& (
std::unordered_set<CNTK::LearnerPtr> args_set
) {
if (PyList_Check($input)) {
PyObject *item;
PyObject *iterator = PyObject_GetIter($input);
if (iterator == NULL) {
SWIG_exception_fail(SWIG_ValueError, "cannot convert list element to CNTK::LearnerPtr");
}
while ((item = PyIter_Next(iterator))) {
void *raw_var = 0 ;
int res1 = SWIG_ConvertPtr(item, &raw_var, SWIGTYPE_p_std__shared_ptrT_CNTK__Learner_t, 0);
if (!SWIG_IsOK(res1)) {
SWIG_exception_fail(SWIG_ArgError(res1), "cannot convert list element to CNTK::LearnerPtr");
}
if (!raw_var) {
SWIG_exception_fail(SWIG_ValueError, "invalid null reference when converting a list element to CNTK::LearnerPtr");
}
CNTK::LearnerPtr* var = reinterpret_cast<CNTK::LearnerPtr*>(raw_var);
args_set.insert(*var);
Py_DECREF(item);
}
Py_DECREF(iterator);
if (PyErr_Occurred()) {
SWIG_exception_fail(SWIG_ValueError, "cannot convert list element to CNTK::LearnerPtr");
}
$1 = &args_set;
} else {
SWIG_exception(SWIG_ValueError, "list expected");
}
}
%typecheck(1000) const std::unordered_map<CNTK::Variable, CNTK::Variable>& {
// '1000' is the typecheck precedence code. It means: check after basic
// types, but before arrays. See: http://www.swig.org/Doc1.3/Typemaps.html#Typemaps_overloading
$1 = PyDict_Check($input) ? 1 : 0;
}
%typemap(in) std::unordered_map<CNTK::Variable, CNTK::Variable>& (
std::unordered_map<CNTK::Variable, CNTK::Variable> args_map
) {
if (PyDict_Check($input)) {
PyObject *key, *value;
Py_ssize_t pos = 0;
while (PyDict_Next($input, &pos, &key, &value)) {
void *raw_var = 0 ;
int res1 = SWIG_ConvertPtr(key, &raw_var, SWIGTYPE_p_CNTK__Variable, 0);
if (!SWIG_IsOK(res1)) {
SWIG_exception_fail(SWIG_ArgError(res1), "cannot convert key of dictionary to CNTK::Variable");
}
if (!raw_var) {
SWIG_exception_fail(SWIG_ValueError, "invalid null reference when converting key of dictionary to CNTK::Variable");
}
CNTK::Variable* var = reinterpret_cast<CNTK::Variable*>(raw_var);
void *raw_value = 0;
int res2 = SWIG_ConvertPtr(value, &raw_value, SWIGTYPE_p_CNTK__Variable, 0);
if (!SWIG_IsOK(res2)) {
SWIG_exception_fail(SWIG_ArgError(res2), "cannot convert value of dictionary to CNTK::Variable");
}
CNTK::Variable* value;
if (raw_value) {
value = reinterpret_cast<CNTK::Variable*>(raw_value);
} else {
// We got an empty Variable, which carries a nullptr.
value = new CNTK::Variable();
}
args_map.insert(std::make_pair(*var, *value));
}
$1 = &args_map;
} else {
SWIG_exception(SWIG_TypeError, "dictionary expected");
}
}
//
// Converting std::unordered_set to Python list.
@ -1104,9 +757,9 @@ fail:
{
SWIG_exception(SWIG_RuntimeError, "error passing set to Python");
}
// *&$1 -> $1 is the returned result being converted (unordered_set<...>*),
// wrapped by SwigValueWrapper. So we need to unwrap it using '&',
// wrapped by SwigValueWrapper. So we need to unwrap it using '&',
// then access its value using '*'.
for (auto var : *&$1)
{
@ -1119,15 +772,58 @@ fail:
$result = container;
}
%enddef
%unordered_set_conversion(Variable, SWIGTYPE_p_CNTK__Variable)
%unordered_set_conversion(Constant, SWIGTYPE_p_CNTK__Constant)
%unordered_set_conversion(Parameter, SWIGTYPE_p_CNTK__Parameter)
%unordered_set_conversion(DistributedWorkerDescriptor, SWIGTYPE_p_CNTK__DistributedWorkerDescriptor)
%define %unordered_set_ref_conversion(DATA_TYPE, _SWIG_TYPE)
%typemap(out) std::unordered_set<CNTK::DATA_TYPE>& {
%typecheck(1000) std::unordered_set<DATA_TYPE>&, std::unordered_set<DATA_TYPE>const & {
// '1000' is the typecheck precedence code. It means: check after basic
// types, but before arrays. See: http://www.swig.org/Doc1.3/Typemaps.html#Typemaps_overloading
$1 = PySet_Check($input) || PyList_Check($input) ? 1 : 0;
}
%typemap(in) std::unordered_set<DATA_TYPE>& (
std::unordered_set<DATA_TYPE> args_set
) {
if (PySet_Check($input) || PyList_Check($input)) {
PyObject *item;
PyObject *iterator = PyObject_GetIter($input);
if (iterator == NULL) {
SWIG_exception_fail(SWIG_ValueError, "cannot convert element");
}
while ((item = PyIter_Next(iterator))) {
void *raw_var = 0 ;
int res1 = SWIG_ConvertPtr(item, &raw_var, _SWIG_TYPE, 0);
if (!SWIG_IsOK(res1)) {
SWIG_exception_fail(SWIG_ArgError(res1), "cannot convert set element");
}
if (!raw_var) {
SWIG_exception_fail(SWIG_ValueError, "invalid null reference");
}
DATA_TYPE* var = reinterpret_cast<DATA_TYPE*>(raw_var);
args_set.insert(*var);
Py_DECREF(item);
}
Py_DECREF(iterator);
if (PyErr_Occurred()) {
SWIG_exception_fail(SWIG_ValueError, "cannot convert set element");
}
$1 = &args_set;
} else {
SWIG_exception(SWIG_ValueError, "set expected");
}
}
%typemap(out) std::unordered_set<DATA_TYPE>& {
PyObject* container = PyList_New(0);
if (container == NULL)
{
@ -1136,7 +832,7 @@ fail:
for (auto var : *$1)
{
PyObject *item = SWIG_NewPointerObj(new CNTK::DATA_TYPE(var), _SWIG_TYPE, SWIG_POINTER_OWN );
PyObject *item = SWIG_NewPointerObj(new DATA_TYPE(var), _SWIG_TYPE, SWIG_POINTER_OWN );
// No error handling here, because the error will be passed directly to Python
PyList_Append(container, item);
Py_DECREF(item);
@ -1146,16 +842,23 @@ fail:
}
%enddef
%unordered_set_ref_conversion(StreamInformation, SWIGTYPE_p_CNTK__StreamInformation)
%unordered_set_ref_conversion(LearnerPtr, SWIGTYPE_p_std__shared_ptrT_CNTK__Learner_t)
%unordered_set_ref_conversion(Parameter, SWIGTYPE_p_CNTK__Parameter)
%unordered_set_ref_conversion(DistributedWorkerDescriptor, SWIGTYPE_p_CNTK__DistributedWorkerDescriptor)
%unordered_set_conversion(CNTK::Variable, SWIGTYPE_p_CNTK__Variable)
%unordered_set_conversion(CNTK::Constant, SWIGTYPE_p_CNTK__Constant)
%unordered_set_conversion(CNTK::Parameter, SWIGTYPE_p_CNTK__Parameter)
%unordered_set_conversion(CNTK::StreamInformation, SWIGTYPE_p_CNTK__StreamInformation)
%unordered_set_conversion(CNTK::DistributedWorkerDescriptor, SWIGTYPE_p_CNTK__DistributedWorkerDescriptor)
%unordered_set_ref_conversion(CNTK::Variable, SWIGTYPE_p_CNTK__Variable)
%unordered_set_ref_conversion(CNTK::Parameter, SWIGTYPE_p_CNTK__Parameter)
%unordered_set_ref_conversion(CNTK::StreamInformation, SWIGTYPE_p_CNTK__StreamInformation)
%unordered_set_ref_conversion(CNTK::LearnerPtr, SWIGTYPE_p_std__shared_ptrT_CNTK__Learner_t)
%unordered_set_ref_conversion(CNTK::DistributedWorkerDescriptor, SWIGTYPE_p_CNTK__DistributedWorkerDescriptor)
// Unordered map conversion
%define %unordered_map_ref_conversion(DATA_TYPE1, _SWIG_TYPE1, DATA_TYPE2, _SWIG_TYPE2)
%typemap(out) std::unordered_map<CNTK::DATA_TYPE1, CNTK::DATA_TYPE2>& {
%typemap(out) std::unordered_map<DATA_TYPE1, DATA_TYPE2>& {
PyObject* container = PyDict_New();
if (container == NULL)
{
@ -1167,8 +870,8 @@ fail:
// then access its value using '*'.
for (auto it : *$1)
{
PyObject *returned_var = SWIG_NewPointerObj(SWIG_as_voidptr(new CNTK::DATA_TYPE1(it.first)), _SWIG_TYPE1, SWIG_POINTER_OWN);
PyObject *returned_val = SWIG_NewPointerObj(SWIG_as_voidptr(new CNTK::DATA_TYPE2(it.second)), _SWIG_TYPE2, SWIG_POINTER_OWN);
PyObject *returned_var = SWIG_NewPointerObj(SWIG_as_voidptr(new DATA_TYPE1(it.first)), _SWIG_TYPE1, SWIG_POINTER_OWN);
PyObject *returned_val = SWIG_NewPointerObj(SWIG_as_voidptr(new DATA_TYPE2(it.second)), _SWIG_TYPE2, SWIG_POINTER_OWN);
PyDict_SetItem(container, returned_var, returned_val);
@ -1180,8 +883,15 @@ fail:
}
%enddef
%unordered_map_ref_conversion(StreamInformation, SWIGTYPE_p_CNTK__StreamInformation, MinibatchData, SWIGTYPE_p_CNTK__MinibatchData);
%unordered_map_ref_conversion(Parameter, SWIGTYPE_p_CNTK__Parameter, NDArrayViewPtr, SWIGTYPE_p_std__shared_ptrT_CNTK__NDArrayView);
%unordered_map_conversion(CNTK::Variable, const CNTK::ValuePtr, SWIGTYPE_p_CNTK__Variable, SWIGTYPE_p_std__shared_ptrT_CNTK__Value_t)
%unordered_map_conversion(CNTK::Variable, CNTK::ValuePtr, SWIGTYPE_p_CNTK__Variable, SWIGTYPE_p_std__shared_ptrT_CNTK__Value_t)
%unordered_map_conversion(CNTK::Variable, CNTK::Variable, SWIGTYPE_p_CNTK__Variable, SWIGTYPE_p_CNTK__Variable)
%unordered_map_conversion(CNTK::Parameter, const CNTK::NDArrayViewPtr, SWIGTYPE_p_CNTK__Parameter, SWIGTYPE_p_std__shared_ptrT_CNTK__NDArrayView_t)
%unordered_map_conversion(CNTK::Parameter, CNTK::NDArrayViewPtr, SWIGTYPE_p_CNTK__Parameter, SWIGTYPE_p_std__shared_ptrT_CNTK__NDArrayView_t)
%unordered_map_ref_conversion(CNTK::StreamInformation, SWIGTYPE_p_CNTK__StreamInformation, CNTK::MinibatchData, SWIGTYPE_p_CNTK__MinibatchData);
%unordered_map_ref_conversion(CNTK::Parameter, SWIGTYPE_p_CNTK__Parameter, CNTK::NDArrayViewPtr, SWIGTYPE_p_std__shared_ptrT_CNTK__NDArrayView);
%unordered_map_ref_conversion(CNTK::Variable, SWIGTYPE_p_CNTK__Variable, CNTK::Variable, SWIGTYPE_p_CNTK__Variable);
%shared_ptr(CNTK::Function)
%shared_ptr(CNTK::NDArrayView)
@ -1206,7 +916,7 @@ fail:
%extend CNTK::NDMask {
PyObject* to_numpy() {
std::vector<size_t> cntk_dims = (*self).Shape().Dimensions();
static_assert(dims.size()==2, "mask requires exactly two dimensions");
static_assert(cntk_dims.size()==2, "mask requires exactly two dimensions");
std::vector<size_t> dimensions = {cntk_dims[1], cntk_dims[0]};
size_t num_elements = dimensions[0] * dimensions[1];
@ -1258,17 +968,17 @@ fail:
PyArrayObject* array = (PyArrayObject*)pyobj;
int rank = PyArray_NDIM(array);
npy_intp* np_shape = PyArray_SHAPE(array);
std::vector<size_t> shape;
int rank = PyArray_NDIM(array);
npy_intp* np_shape = PyArray_SHAPE(array);
std::vector<size_t> shape(rank);
npy_intp num_elements = 1;
// CNTK uses column major, thus we reverse the shape
for (int i=rank-1; i>=0; i--)
for (int i=0; i<rank; i++)
{
shape.push_back(np_shape[i]);
num_elements *= np_shape[i];
shape[rank-i-1] = np_shape[i];
num_elements *= np_shape[i];
}
int typecode = PyArray_TYPE(array);
@ -1342,7 +1052,7 @@ public:
// Setting up hash calculation so that __hash__ on Swig objects
// are redirected to the std::hash computation of the C++ API
//
%define %py_hash_for(DATA_TYPE, EQ)
%define %py_hash_for(DATA_TYPE)
%extend CNTK::DATA_TYPE {
const size_t __hash__() {
return std::hash<CNTK::DATA_TYPE>()(*$self);
@ -1357,14 +1067,16 @@ DATA_TYPE.__eq__ = lambda a,b: EQ(a,b)
%enddef
%py_eq_for(Variable, Variable_eq)
%py_eq_for(Constant, Variable_eq)
%py_eq_for(Parameter, Variable_eq)
%py_eq_for(NDShape, NDShape_eq)
%py_hash_for(Variable)
%py_hash_for(Variable, Variable_eq)
%py_hash_for(Constant, Variable_eq)
%py_hash_for(Parameter, Variable_eq)
%py_hash_for(NDShape, NDShape_eq)
%py_eq_for(Constant, Variable_eq)
%py_hash_for(Constant)
%py_eq_for(Parameter, Variable_eq)
%py_hash_for(Parameter)
%py_eq_for(NDShape, NDShape_eq)
%py_hash_for(NDShape)
%py_eq_for(DeviceDescriptor, DeviceDescriptor_eq)
@ -1395,4 +1107,3 @@ for klass in [Variable, Value, NDArrayView, NDMask]:
enable_reversing_tensor_shapes_in_error_messages()
%}

Просмотреть файл

@ -10,7 +10,7 @@ import numpy as np
abs_path = os.path.dirname(os.path.abspath(__file__))
def test_text_format():
def _test_text_format():
from cntk.io import text_format_minibatch_source, StreamConfiguration, MinibatchSource
# 0 |x 560 |y 1 0 0 0 0

Просмотреть файл

@ -59,9 +59,52 @@ def alias(x, name=''):
return alias(x, name)
##########################################################################
# evaluation ops
# loss and evaluation ops
##########################################################################
@typemap
def binary_cross_entropy(output, target, name=''):
r'''
This operation computes the binary cross entropy between the ``output`` and ``target``.
Example:
TBA
Args:
output: the computed posterior probability from the network
target: ground-truth label, 0 or 1
name (`str`, optional): the name of the Function instance in the network
Returns:
:class:`cntk.ops.functions.Function`
'''
from cntk.cntk_py import binary_cross_entropy
dtype = get_data_type(output, target)
output = sanitize_input(output, dtype)
target = sanitize_input(target, dtype)
return binary_cross_entropy(output, target, name)
@typemap
def weighted_binary_cross_entropy(output, target, weight, name=''):
r'''
This operation computes the weighted binary cross entropy between the ``output`` and ``target``.
Example:
TBA
Args:
output: the computed posterior probability from the network
target: ground-truth label, 0 or 1
weight: weight of each example
name (`str`, optional): the name of the Function instance in the network
Returns:
:class:`cntk.ops.functions.Function`
'''
from cntk.cntk_py import weighted_binary_cross_entropy
dtype = get_data_type(output, target, weight)
output = sanitize_input(output, dtype)
target = sanitize_input(target, dtype)
weight = sanitize_input(weight, dtype)
return weighted_binary_cross_entropy(output, target, weight, name)
@typemap
def cross_entropy_with_softmax(output_vector, target_vector, axis=-1, name=''):
@ -185,17 +228,21 @@ def convolution(convolution_map, operand, strides=(1,), sharing=[True],
auto_padding=[True], lower_pad=(0,), upper_pad=(0,), transpose=False,
max_temp_mem_size_in_samples=0, name=''):
'''
Computes the convolution of a weight matrix with an image or tensor. This operation is used in image-processing applications
and language processing. It supports any dimensions, stride, sharing or padding.
Computes the convolution of ``convolution_map`` (typically a tensor of learnable parameters) with
``operand`` (commonly an image or output of a previous convolution/pooling operation).
This operation is used in image and language processing applications. It supports arbitrary
dimensions, strides, sharing, and padding.
This function operates on input tensors of the form [M1 x M2 x ... x Mn x inChannels]. This can be understood as a rank-n
object, where each entry consists of a inChannels-dimensional vector. For example, an RGB image would have dimensions
[W x H x 3], i.e. a [W x H]-sized structure, where each entry (pixel) consists of a 3-tuple (note, however, that the
memory-storage format is the concatenation of 3 planes of size [W x H]).
This function operates on input tensors with dimensions :math:`[C \\times M_1 \\times M_2 \\times \\ldots \\times M_n]`. This can be understood as a rank-n
object, where each entry consists of a :math:`C`-dimensional vector. For example, an RGB image would have dimensions
:math:`[3 \\times W \\times H]`, i.e. a :math:`[W \\times H]`-sized structure, where each entry (pixel) consists of a 3-tuple.
`convolution` convolves the input with n+1-dimensional filters, where the first n dimensions are the spatial extent of the
filter, and the last one must be equal to inChannels. There are outChannels filters. I.e. for each output position, a vector of
dimension outChannels is computed. Hence, the total number of filter parameters is (M1*M2*...*Mn) * inChannels * outChannels.
`convolution` convolves the input ``operand`` with a :math:`n+2` rank tensor of (typically learnable) filters called
``convolution_map`` of shape :math:`[O \\times I \\times m_1 \\times m_2 \\times \\ldots \\times m_n ]` (typically :math:`m_i \\ll M_i`).
The first dimension, :math:`O`, is the nunber of convolution filters (i.e. the number of
channels in the output). The second dimension, :math:`I`, must match the number of channels in the input.
The last n dimensions are the spatial extent of the filter. I.e. for each output position, a vector of
dimension :math:`O` is computed. Hence, the total number of filter parameters is :math:`O \\times I \\times m_1 \\times m_2 \\times \\ldots \\times m_n`
Example:
@ -210,12 +257,12 @@ def convolution(convolution_map, operand, strides=(1,), sharing=[True],
[ 36., 38., 40., 42.]]]]], dtype=float32)
Args:
convolution_map: convolution filter weights, stored as a tensor of dimensions [outChannels x M1 x M2 x ... x Mn],
where [M1 x M2 x ... x Mn] must be the kernel dimensions.
operand: convolution input. A tensor with dimensions [M1 x M2 x ... x Mn x inChannels].
strides (optional): stride dimensions. A stride > 1 means that only pixel positions that are multiples of the stride value are computed.
For example, a stride of 2 will lead to a halving of the dimensions. The last stride dimension that lines up with the number
of input channels must be equal to the number of input channels.
convolution_map: convolution filter weights, stored as a tensor of dimensions :math:`[O \\times I \\times m_1 \\times m_2 \\times \\ldots \\times m_n]`,
where :math:`[m_1 \\times m_2 \\times \\ldots \\times m_n]` must be the kernel dimensions (spatial extent of the filter).
operand: convolution input. A tensor with dimensions :math:`[I \\times M_1 \\times M_2 \\times \\ldots \\times M_n]`.
strides (`tuple`, optional): stride dimensions. If strides[i] > 1 then only pixel positions that are multiples of strides[i] are computed.
For example, a stride of 2 will lead to a halving of that dimension. The first stride dimension that lines up with the number
of input channels can be set to any non-zero value.
sharing (bool): sharing flags for each input dimension
auto_padding (bool): flags for each input dimension whether it should be padded automatically (that is,
symmetrically) or not padded at all. Padding means that the convolution kernel is applied to all pixel positions, where all
@ -235,9 +282,8 @@ def convolution(convolution_map, operand, strides=(1,), sharing=[True],
'''
from cntk.cntk_py import convolution
operand = sanitize_input(operand)
return convolution(convolution_map, operand, tuple(reversed(strides)), sharing, auto_padding,
tuple(reversed(lower_pad)), tuple(
reversed(upper_pad)), transpose,
return convolution(convolution_map, operand, tuple(strides), sharing, auto_padding,
tuple(lower_pad), tuple(upper_pad), transpose,
max_temp_mem_size_in_samples, name)
@ -333,7 +379,7 @@ def batch_normalization(operand, scale, bias, running_mean, running_inv_std, spa
spatial(`bool`): flag that indicates whether to compute mean/var for each feature in a minibatch
independently or, in case of convolutional layers, per future map
normalization_time_constant(`float`, default 5000): time constant for computing running average of
mean and variance as a low-pass filtered version of the batch statistics.
mean and variance as a low-pass filtered version of the batch statistics.
blend_time_constant(`float`, default 0): constant for smoothing batch estimates with the running
statistics
epsilon: conditioner constant added to the variance when computing the inverse standard deviation
@ -1702,32 +1748,32 @@ def random_sample(weights, num_samples, allow_duplicates, name=''):
@typemap
def random_sample_inclusion_frequency(
weights,
num_samples,
allow_duplicates,
weights,
num_samples,
allow_duplicates,
name=''):
'''
For weighted sampling with the specifed sample size (`num_samples`)
this node computes the expected number of occurences of each class
in the the sampled set. In case of sampling without replacement
in the the sampled set. In case of sampling without replacement
the result is only an estimate which might be quite rough in the
case of small sample sizes.
Intended uses are e.g. sampled softmax, noise contrastive
Intended uses are e.g. sampled softmax, noise contrastive
estimation etc.
This operation will be typically used together
This operation will be typically used together
with :func:`random_sample`.
Args:
weights: input vector of sampling weights which should be
non-negative numbers.
weights: input vector of sampling weights which should be
non-negative numbers.
num_samples (`int`): number of expected samples
allow_duplicates (`bool`): If sampling is done
allow_duplicates (`bool`): If sampling is done
with replacement (`True`) or without (`False`).
Examples:
>>> import numpy as np
>>> from cntk import *
>>> # weight vector with 100 '1000'-values followed
>>> # weight vector with 100 '1000'-values followed
>>> # by 100 '1' values
>>> w1 = np.full((100),1000, dtype = np.float)
>>> w2 = np.full((100),1, dtype = np.float)
@ -1752,9 +1798,9 @@ def random_sample_inclusion_frequency(
weights = sanitize_input(weights)
return random_sample_inclusion_frequency(
weights,
num_samples,
allow_duplicates,
weights,
num_samples,
allow_duplicates,
name)

Просмотреть файл

@ -63,6 +63,28 @@ def is_last(seq, name=''):
seq = sanitize_input(seq, get_data_type(seq))
return is_last(seq, name)
@typemap
def slice(seq, begin_index, end_index, name=''):
'''
Slice the input sequence.
Examples:
TBA
Args:
seq: sequence input tensor
begin_index (`int`): the index along sequence axis where the slicing starts
end_index (`int`): the index along sequence axis where the slicing ends
name (`str`, optional): the name of the Function instance in the network
See also:
Indexing in NumPy: http://docs.scipy.org/doc/numpy/reference/arrays.indexing.html
Returns:
:class:`cntk.ops.functions.Function`
'''
from cntk.cntk_py import sequence_slice
seq = sanitize_input(seq, get_data_type(seq))
return sequence_slice(seq, begin_index, end_index, name)
@typemap
def first(seq, name=''):
@ -281,3 +303,21 @@ def broadcast_as(operand, broadcast_as_operand, name=''):
broadcast_as_operand = sanitize_input(
broadcast_as_operand, get_data_type(broadcast_as_operand))
return broadcast_as(operand, broadcast_as_operand, name)
@typemap
def reduce_sum(seq, name=''):
'''
Computes the sum of the input sequence's elements across the sequence axis.
Examples:
TBA
Args:
seq: sequence input tensor
name (`str`, optional): the name of the Function instance in the network
Returns:
:class:`cntk.ops.functions.Function`
'''
from cntk.cntk_py import sequence_reduce_sum
seq = sanitize_input(seq, get_data_type(seq))
return sequence_reduce_sum(seq, name)

Просмотреть файл

@ -19,8 +19,8 @@ TENSOR_PAIRS = [
([30.], [10.]),
([[10.]], [[30.]]),
([[1.5, 2.1]], [[10., 20.]]),
#([[100., 200.], [300., 400.], [10., 20.]],
# [[10., 20.], [30., 40.], [1., 2.]]),
([[100., 200.], [300., 400.], [10., 20.]],
[[10., 20.], [30., 40.], [1., 2.]]),
# Adding two 3x2 inputs of sequence length 1
([[30., 40.], [1., 2.], [0.1, 0.2]], [[10, 20], [3, 4], [-0.5, -0.4]]),
@ -175,6 +175,8 @@ NEGATE_TENSORS = [
([[100., 200.], [300., 400.], [10., 20.]]),
([[30, 40], [1, 2], [0.1, 0.2]])
]
@pytest.mark.parametrize("operand", NEGATE_TENSORS)
def test_op_negate(operand, device_id, precision):
t = -1 * AA(operand, dtype=PRECISION_TO_TYPE[precision])
@ -193,34 +195,41 @@ def test_op_negate(operand, device_id, precision):
_test_unary_op(precision, device_id, '-', operand,
expected_forward, expected_backward)
TIMES_PAIRS = [
# transpose_times currently only supports right operands of rank 1 or 2
TRANSPOSE_TIMES_PAIRS = [
([[30.]], [[10.]]),
([[1.5, 2.1]], [[10.], [20.]]),
([[100., 200.]], [[10.], [20.]]),
([[100., 200.]], [[-10.], [20.]]),
([[100., 200.], [300., 400.]], [[10.], [20.]]),
([[100., 200.], [300., 400.]], [[10., 20.], [20., 30.]])
([[100., 200.], [-300., 400.]], [[10., 20.], [20., 30.]]),
(np.reshape(np.arange(24), (4, 3, 2)),
np.array([[1, 3], [2, 4]])),
]
# TODO: Handle sparse matrices
# TODO: Handle sparse matrices (left_matrix_type, right_matrix_type)
# adding a rank 3 operand for times operation
TIMES_PAIRS = TRANSPOSE_TIMES_PAIRS + \
list((np.reshape(np.arange(8), (2, 2, 2)), np.reshape(np.arange(8), (2, 2, 2))))
@pytest.mark.parametrize("left_operand, right_operand", TIMES_PAIRS)
def test_op_times(left_operand, right_operand, device_id, precision,
left_matrix_type, right_matrix_type):
def test_op_times(left_operand, right_operand, device_id, precision):
dt_precision = PRECISION_TO_TYPE[precision]
a = AA(left_operand, dtype=dt_precision)
b = AA(right_operand, dtype=dt_precision)
expected_forward = [[np.dot(a, b)]]
assert len(a.shape) == len(b.shape) == 2
expected_forward = [[np.tensordot(a, b, axes=len(b.shape) - 1)]]
left_backward = np.zeros_like(a)
left_backward[:, :] = b.sum(axis=1)
left_backward[...] = b.sum(axis=-1)
right_backward = np.zeros_like(b)
right_backward[:, :] = np.transpose([a.sum(axis=0)])
transpose_axes = list(np.roll(np.arange(len(b.shape)), -1))
sum_axes = tuple(np.arange(0, len(a.shape) - len(b.shape) + 1))
right_backward[...] = np.transpose(
AA([a.sum(axis=sum_axes)]), axes=transpose_axes)
expected_backward = {
'left_arg': [[left_backward]],
@ -231,3 +240,32 @@ def test_op_times(left_operand, right_operand, device_id, precision,
_test_binary_op(precision, device_id, times,
left_operand, right_operand, expected_forward, expected_backward)
@pytest.mark.parametrize("left_operand, right_operand", TRANSPOSE_TIMES_PAIRS)
def test_op_transpose_times(left_operand, right_operand, device_id, precision):
dt_precision = PRECISION_TO_TYPE[precision]
# tranpose right_operand to make product possible
right_operand = np.transpose(right_operand).tolist()
a = AA(left_operand, dtype=dt_precision)
b = AA(right_operand, dtype=dt_precision)
expected_forward = [[np.dot(a, np.transpose(b))]]
left_backward = np.zeros_like(a)
left_backward[...] = b.sum(axis=tuple(range(len(b.shape) - 1)))
right_backward = np.zeros_like(b)
right_backward[...] = a.sum(axis=tuple(range(len(a.shape) - 1)))
expected_backward = {
'left_arg': [[left_backward]],
'right_arg': [[right_backward]]
}
from cntk import times_transpose
_test_binary_op(precision, device_id, times_transpose,
left_operand, right_operand, expected_forward, expected_backward)

Просмотреть файл

@ -166,8 +166,9 @@ def test_op_slice_sequence(input_data, slice_params, expected_result, device_id,
dynamic_axes=[Axis.default_batch_axis(), t],
name='a')
result = C.slice(a, axis=t, begin_index=slice_params[
0], end_index=slice_params[1])
result = C.sequence.slice(a,
begin_index=slice_params[0],
end_index=slice_params[1])
def grad_slice(x, beg_index, end_index):
res = np.zeros_like(x)
@ -176,8 +177,8 @@ def test_op_slice_sequence(input_data, slice_params, expected_result, device_id,
expected_gradient = grad_slice(np.asarray(input_data), *slice_params)
expected_forward = AA(
[expected_result], dtype=PRECISION_TO_TYPE[precision])
expected_forward = AA([expected_result],
dtype=PRECISION_TO_TYPE[precision])
expected_backward = {
a: [grad_slice(np.asarray(input_data), *slice_params)]
}

Просмотреть файл

@ -183,12 +183,9 @@ def get_temp_filename(directory=None):
def sanitize_shape(shape):
"""
If shape is scalar, it creates a tuple out of it and reverse it as cntk uses
column major.
If shape is scalar, it creates a tuple out of it.
"""
if np.isscalar(shape):
shape = (shape,)
return tuple(reversed(shape))
return _as_tuple(shape)
def sanitize_input(arg, fallback_dtype=np.float32, reshape=None):
@ -383,14 +380,15 @@ def sanitize_batch(var, batch, seq_starts=None, data_type=None, device=None):
'array and not "%s"' % type(batch))
from cntk.cntk_py import NDMask
mask = NDMask((max(seq_lens), num_seq), device)
mask = NDMask((num_seq, max(seq_lens)), device)
for idx, seq_len in enumerate(seq_lens):
if seq_starts is None:
mask.mark_sequence_begin((0, idx))
elif seq_starts[idx]:
if seq_starts is None or seq_starts[idx]:
mask.mark_sequence_begin((0, idx))
# The second parameter is specifying the rectangle of the mask that
# is invalid. As C++ is taking an NDShape, and we reverse the shape
# in the SWIG layer, we provide it here as row-major.
mask.invalidate_section((seq_len, idx),
(cntk_py.InferredDimension, 1))
(1, cntk_py.InferredDimension))
# Then we pad the batch to rectangular shape
if isinstance(batch, list):
@ -814,6 +812,17 @@ class _ClassFromDict(dict):
def Record(**kwargs):
return _ClassFromDict(kwargs)
# type-cast a shape given as a scalar into a tuple
def _as_tuple(x):
return x if (isinstance(x,tuple)) else (x,)
'''
Convert an argument to a tuple.
Args:
x: if scalar, it returns ``(x,)``. If iterable, it converts it to
tuple.
Returns:
Tuple of ``x``.
'''
if np.isscalar(x):
x = (x,)
return tuple(x)

Просмотреть файл

@ -63,9 +63,9 @@ author = 'Microsoft'
# built documents.
#
# The short X.Y version.
version = '2.0.beta2.0'
version = '2.0.beta3.0'
# The full version, including alpha/beta/rc tags.
release = '2.0.beta2.0'
release = '2.0.beta3.0'
# The language for content autogenerated by Sphinx. Refer to documentation
# for a list of supported languages.

Просмотреть файл

@ -5,27 +5,27 @@ The best way to learn about the APIs currently is to look at the
following examples in the [CNTK clone root]/bindings/python/examples
directory:
- `MNIST <https://github.com/Microsoft/CNTK/blob/v2.0.beta2.0/bindings/python/examples/MNIST/SimpleMNIST.py>`__:
- `MNIST <https://github.com/Microsoft/CNTK/blob/v2.0.beta3.0/bindings/python/examples/MNIST/SimpleMNIST.py>`__:
A fully connected feed-forward model for classification of MNIST
images. (follow the instructions in
Examples/Image/DataSets/MNIST/README.md)
- `CifarResNet <https://github.com/Microsoft/CNTK/blob/v2.0.beta2.0/bindings/python/examples/CifarResNet/CifarResNet.py>`__:
- `CifarResNet <https://github.com/Microsoft/CNTK/blob/v2.0.beta3.0/bindings/python/examples/CifarResNet/CifarResNet.py>`__:
An image classification ResNet model for training on the CIFAR image
dataset. (follow the instructions in
Examples/Image/DataSets/CIFAR-10/README.md to get the CIFAR dataset
and convert it to the CNTK supported format)
- `SequenceClassification <https://github.com/Microsoft/CNTK/blob/v2.0.beta2.0/bindings/python/examples/SequenceClassification/SequenceClassification.py>`__:
- `SequenceClassification <https://github.com/Microsoft/CNTK/blob/v2.0.beta3.0/bindings/python/examples/SequenceClassification/SequenceClassification.py>`__:
An LSTM sequence classification model for text data.
- `Sequence2Sequence <https://github.com/Microsoft/CNTK/blob/v2.0.beta2.0/bindings/python/examples/Sequence2Sequence/Sequence2Sequence.py>`__:
- `Sequence2Sequence <https://github.com/Microsoft/CNTK/blob/v2.0.beta3.0/bindings/python/examples/Sequence2Sequence/Sequence2Sequence.py>`__:
A sequence to sequence grapheme to phoneme translation model that
trains on the CMUDict corpus.
- `NumpyInterop <https://github.com/Microsoft/CNTK/blob/v2.0.beta2.0/bindings/python/examples/NumpyInterop/FeedForwardNet.py>`__
- `NumpyInterop <https://github.com/Microsoft/CNTK/blob/v2.0.beta3.0/bindings/python/examples/NumpyInterop/FeedForwardNet.py>`__
- NumPy interoperability example showing how to train a simple feed-forward
network with training data fed using NumPy arrays.
- `LanguageUnderstanding <https://github.com/Microsoft/CNTK/blob/v2.0.beta2.0/bindings/python/examples/LanguageUnderstanding/LanguageUnderstanding.py>`__
- `LanguageUnderstanding <https://github.com/Microsoft/CNTK/blob/v2.0.beta3.0/bindings/python/examples/LanguageUnderstanding/LanguageUnderstanding.py>`__
- Language Understanding.

Просмотреть файл

@ -2,7 +2,7 @@
.. some aliases
.. _CNTK: http://cntk.ai/
Python API for CNTK (2.0.beta2.0)
Python API for CNTK (2.0.beta3.0)
===============================
CNTK_, the Microsoft Cognitive Toolkit, is a system for describing, training,
@ -12,7 +12,7 @@ neural networks (CNNs), recurrent neural networks (RNNs), long short term
memory (LSTM), logistic regression, and maximum entropy model. CNTK is an
implementation of computational networks that supports both CPU and GPU.
This page describes the Python API for CNTK_ version 2.0.beta2.0. This is an ongoing effort
This page describes the Python API for CNTK_ version 2.0.beta3.0. This is an ongoing effort
to expose such an API to the CNTK system, thus enabling the use of higher-level
tools such as IDEs to facilitate the definition of computational networks, to execute
them on sample data in real time.

Просмотреть файл

@ -1,23 +1,28 @@
Tutorials
===============
#. `Logistic Regression`_ with CNTK and NumPy
#. `Feed Forward Network`_ with CNTK and NumPy
#. Image 101 Feed Forward Classifier with MNIST data
#. CNTK 101: `Logistic Regression`_ with CNTK and NumPy
#. CNTK 102: `Feed Forward Network`_ with CNTK and NumPy
#. CNTK 103: Feed Forward image classifier with MNIST data
* Part A: `MNIST Data preparation`_
* Part A: `MNIST data preparation`_
* Part B: `Feed Forward Classifier`_
#. Image 201 ResNet Classifier with CIFAR-10 data
#. CNTK 201: Image classifiers with CIFAR-10 data
* Part A: `CIFAR-10 Data preparation`_
* Part B: `ResNet Classifier`_
.. _`Logistic Regression`: https://github.com/Microsoft/CNTK/tree/v2.0.beta2.0/bindings/python/tutorials/CNTK_101_LogisticRegression.ipynb
.. _`Feed Forward Network`: https://github.com/Microsoft/CNTK/tree/v2.0.beta2.0/bindings/python/tutorials/CNTK_102_FeedForward.ipynb
.. _`MNIST Data preparation`: https://github.com/Microsoft/CNTK/tree/v2.0.beta2.0/bindings/python/tutorials/CNTK_103A_MNIST_DataLoader.ipynb
.. _`Feed Forward Classifier`: https://github.com/Microsoft/CNTK/tree/v2.0.beta2.0/bindings/python/tutorials/CNTK_103B_MNIST_FeedForwardNetwork.ipynb
.. _`CIFAR-10 Data preparation`: https://github.com/Microsoft/CNTK/tree/v2.0.beta2.0/bindings/python/tutorials/CNTK_201A_CIFAR-10_DataLoader.ipynb
.. _`ResNet Classifier`: https://github.com/Microsoft/CNTK/tree/v2.0.beta2.0/bindings/python/tutorials/CNTK_201B_CIFAR-10_ImageHandsOn.ipynb
* Part B: `VGG and ResNet classifiers`_
#. CNTK 202: `Language understanding`_ with ATIS3 text data
#. CNTK 203: `Reinforcement learning basics`_ with OpenAI Gym data
.. _`Logistic Regression`: https://github.com/Microsoft/CNTK/tree/v2.0.beta3.0/bindings/python/tutorials/CNTK_101_LogisticRegression.ipynb
.. _`Feed Forward Network`: https://github.com/Microsoft/CNTK/tree/v2.0.beta3.0/bindings/python/tutorials/CNTK_102_FeedForward.ipynb
.. _`MNIST data preparation`: https://github.com/Microsoft/CNTK/tree/v2.0.beta3.0/bindings/python/tutorials/CNTK_103A_MNIST_DataLoader.ipynb
.. _`Feed Forward Classifier`: https://github.com/Microsoft/CNTK/tree/v2.0.beta3.0/bindings/python/tutorials/CNTK_103B_MNIST_FeedForwardNetwork.ipynb
.. _`CIFAR-10 Data preparation`: https://github.com/Microsoft/CNTK/tree/v2.0.beta3.0/bindings/python/tutorials/CNTK_201A_CIFAR-10_DataLoader.ipynb
.. _`VGG and ResNet classifiers`: https://github.com/Microsoft/CNTK/tree/v2.0.beta3.0/bindings/python/tutorials/CNTK_201B_CIFAR-10_ImageHandsOn.ipynb
.. _`Language understanding`: https://github.com/Microsoft/CNTK/blob/v2.0.beta3.0/bindings/python/tutorials/CNTK_202_Language_Understanding.ipynb
.. _`Reinforcement learning basics`: https://github.com/Microsoft/CNTK/blob/master/bindings/python/tutorials/CNTK_203_Reinforcement_Learning_Basics.ipynb

Просмотреть файл

@ -11,7 +11,7 @@ from cntk import Trainer, Axis, save_model, load_model #, text_format_minibatch_
from cntk.io import MinibatchSource, CTFDeserializer, StreamDef, StreamDefs, INFINITELY_REPEAT, FULL_DATA_SWEEP
from cntk.device import cpu, set_default_device
from cntk.learner import momentum_sgd, momentum_as_time_constant_schedule
from cntk.ops import input_variable, cross_entropy_with_softmax, classification_error, sequence, slice, past_value, future_value, element_select, alias, hardmax
from cntk.ops import input_variable, cross_entropy_with_softmax, classification_error, sequence, past_value, future_value, element_select, alias, hardmax
from cntk.ops.functions import CloneMethod
abs_path = os.path.dirname(os.path.abspath(__file__))
@ -94,7 +94,7 @@ def sequence_to_sequence_translator(debug_output=False, run_test=False):
input_sequence = raw_input
# Drop the sentence start token from the label, for decoder training
label_sequence = slice(raw_labels, label_seq_axis, 1, 0) # <s> A B C </s> --> A B C </s>
label_sequence = sequence.slice(raw_labels, 1, 0) # <s> A B C </s> --> A B C </s>
label_sentence_start = sequence.first(raw_labels) # <s>
is_first_label = sequence.is_first(label_sequence) # <s> 0 0 0 ...
@ -239,7 +239,7 @@ def sequence_to_sequence_translator(debug_output=False, run_test=False):
z = load_model("seq2seq.dnn")
label_seq_axis = Axis('labelAxis')
label_sequence = slice(find_arg_by_name('raw_labels',z), label_seq_axis, 1, 0)
label_sequence = sequence.slice(find_arg_by_name('raw_labels',z), 1, 0)
ce = cross_entropy_with_softmax(z, label_sequence)
errs = classification_error(z, label_sequence)
trainer = Trainer(z, ce, errs, [momentum_sgd(

Просмотреть файл

@ -10,11 +10,11 @@ from cntk import Trainer, Axis #, text_format_minibatch_source, StreamConfigurat
from cntk.io import MinibatchSource, CTFDeserializer, StreamDef, StreamDefs, INFINITELY_REPEAT, FULL_DATA_SWEEP
from cntk.device import cpu, set_default_device
from cntk.learner import sgd
from cntk.ops import input_variable, cross_entropy_with_softmax, classification_error
from cntk.ops import input_variable, cross_entropy_with_softmax, classification_error, sequence
abs_path = os.path.dirname(os.path.abspath(__file__))
sys.path.append(os.path.join(abs_path, "..", ".."))
from examples.common.nn import LSTMP_component_with_self_stabilization, embedding, linear_layer, select_last, print_training_progress
from examples.common.nn import LSTMP_component_with_self_stabilization, embedding, linear_layer, print_training_progress
# Creates the reader
def create_reader(path, is_training, input_dim, label_dim):
@ -28,7 +28,7 @@ def LSTM_sequence_classifer_net(input, num_output_classes, embedding_dim, LSTM_d
embedding_function = embedding(input, embedding_dim)
LSTM_function = LSTMP_component_with_self_stabilization(
embedding_function.output, LSTM_dim, cell_dim)[0]
thought_vector = select_last(LSTM_function)
thought_vector = sequence.last(LSTM_function)
return linear_layer(thought_vector, num_output_classes)

Просмотреть файл

@ -114,7 +114,6 @@ if IS_WINDOWS:
"/EHsc",
"/DEBUG",
"/Zi",
"/EHsc",
]
runtime_library_dirs = []
else:
@ -166,7 +165,7 @@ else:
kwargs = dict(package_data = package_data)
setup(name="cntk",
version="2.0.beta2.0",
version="2.0.beta3.0",
url="http://cntk.ai",
ext_modules=[cntk_module],
packages=packages,

Просмотреть файл

@ -10,7 +10,7 @@
"\n",
"This tutorial is targeted to individuals who are new to CNTK and to machine learning. In this tutorial, you will train a simple yet powerful machine learning model that is widely used in industry for a variety of applications. The model trained below scales to massive data sets in the most expeditious manner by harnessing computational scalability leveraging the computational resources you may have (one or more CPU cores, one or more GPUs, a cluster of CPUs or a cluster of GPUs), transparently via the CNTK library.\n",
"\n",
"The following notebook users Python APIs. If you are looking for this example in Brainscript, please look [here](https://github.com/Microsoft/CNTK/tree/v2.0.beta2.0/Examples/Tutorials/LogisticRegressionAndMultiClass). \n",
"The following notebook users Python APIs. If you are looking for this example in Brainscript, please look [here](https://github.com/Microsoft/CNTK/tree/v2.0.beta3.0/Examples/Tutorials/LogisticRegressionAndMultiClass). \n",
"\n",
"## Introduction\n",
"\n",

Просмотреть файл

@ -767,7 +767,7 @@
"\n",
"If you want to try running the tutorial from python command prompt. Please run the [FeedForwardNet.py][] example.\n",
"\n",
"[FeedForwardNet.py]: https://github.com/Microsoft/CNTK/blob/v2.0.beta2.0/bindings/python/examples/NumpyInterop/FeedForwardNet.py"
"[FeedForwardNet.py]: https://github.com/Microsoft/CNTK/blob/v2.0.beta3.0/bindings/python/examples/NumpyInterop/FeedForwardNet.py"
]
},
{

Просмотреть файл

@ -12,7 +12,7 @@
"\n",
"CNTK 103 tutorial is divided into two parts:\n",
"- Part A: Familiarize with the [MNIST][] database that will be used later in the tutorial\n",
"- [Part B](https://github.com/Microsoft/CNTK/blob/v2.0.beta2.0/bindings/python/tutorials/CNTK_103A_MNIST_DataLoader.ipynb): We will use the feedforward classifier used in CNTK 102 to classify digits in MNIST data set.\n",
"- [Part B](https://github.com/Microsoft/CNTK/blob/v2.0.beta3.0/bindings/python/tutorials/CNTK_103A_MNIST_DataLoader.ipynb): We will use the feedforward classifier used in CNTK 102 to classify digits in MNIST data set.\n",
"\n",
"[MNIST]: http://yann.lecun.com/exdb/mnist/\n",
"\n"

Просмотреть файл

@ -12,7 +12,7 @@
"\n",
"We assume that you have successfully completed CNTK 103 Part A.\n",
"\n",
"In this tutorial we will train a fully connected network on MNIST data. This notebook provides the recipe using Python APIs. If you are looking for this example in Brainscript, please look [here](https://github.com/Microsoft/CNTK/tree/v2.0.beta2.0/Examples/Image/GettingStarted)\n",
"In this tutorial we will train a fully connected network on MNIST data. This notebook provides the recipe using Python APIs. If you are looking for this example in Brainscript, please look [here](https://github.com/Microsoft/CNTK/tree/v2.0.beta3.0/Examples/Image/GettingStarted)\n",
"\n",
"## Introduction\n",
"\n",
@ -765,7 +765,7 @@
"source": [
"#### Code link\n",
"\n",
"If you want to try running the tutorial from python command prompt. Please run the [SimpleMNIST.py](https://github.com/Microsoft/CNTK/tree/v2.0.beta2.0/bindings/python/examples/MNIST) example."
"If you want to try running the tutorial from python command prompt. Please run the [SimpleMNIST.py](https://github.com/Microsoft/CNTK/tree/v2.0.beta3.0/bindings/python/examples/MNIST) example."
]
},
{

43
configure поставляемый
Просмотреть файл

@ -16,6 +16,11 @@ enable_cuda=
enable_python=
# NCCL communication library
have_nccl=no
nccl_path=
nccl_check=include/nccl.h
# CNTK Custom MKL Version
cntk_custom_mkl_version=2
@ -99,6 +104,7 @@ default_boost="boost-1.60.0"
# NOTE: Will get compilation errors with cuda-6.0
default_cudas="cuda-7.5 cuda-7.0 cuda-6.5"
default_nccls="nccl"
default_kaldis="kaldi-trunk kaldi-c024e8aa"
default_gdk_includes="include/nvidia/gdk"
default_gdk_nvml_libs="src/gdk/nvml/lib"
@ -165,6 +171,11 @@ function find_protobuf ()
find_dir "$default_protobuf" "$protobuf_check"
}
function find_nccl ()
{
find_dir "$default_nccls" "$nccl_check"
}
function find_cuda ()
{
find_dir "$default_cudas" "$cuda_check"
@ -322,6 +333,7 @@ function show_help ()
echo " --with-gdk-include[=directory] $(show_default $(find_gdk_include))"
echo " --with-gdk-nvml-lib[=directory] $(show_default $(find_gdk_nvml_lib))"
echo " --with-cudnn[=directory] $(show_default $(find_cudnn))"
echo " --with-nccl[=directory] $(show_default $(find_nccl))"
echo " --with-mkl[=directory] $(show_default $(find_mkl))"
echo " --with-mkl-sequential[=directory] $(show_default $(find_mkl))"
echo " --with-openblas[=directory] (experimental) $(show_default $(find_openblas))"
@ -603,6 +615,28 @@ do
fi
fi
;;
--with-nccl*)
have_nccl=yes
if test x$optarg = x
then
nccl_path=$(find_nccl)
if test x$nccl_path = x
then
echo "Cannot find NCCL directory."
echo "Please specify a value for --with-nccl"
echo "NCCL can be downloaded from https://github.com/NVIDIA/nccl"
exit 1
fi
else
if test $(check_dir $optarg $nccl_check) = yes
then
nccl_path=$optarg
else
echo "Invalid NCCL directory $optarg"
exit 1
fi
fi
;;
--with-mkl*)
have_mkl=yes
mathlib=mkl
@ -898,6 +932,14 @@ then
done
fi
if test $enable_cuda = yes && test x$nccl_path = x
then
nccl_path=$(find_nccl)
if test x$nccl_path != x; then
echo Found NCCL at $nccl_path
fi
fi
if test x$opencv_path = x
then
opencv_path=$(find_opencv)
@ -978,6 +1020,7 @@ if test $enable_cuda = yes ; then
echo GDK_NVML_LIB_PATH=$gdk_nvml_lib_path >> $config
echo CUB_PATH=$cub_path >> $config
echo CUDNN_PATH=$cudnn_path >> $config
[-z "$nccl_path"] || echo NCCL_PATH=$nccl_path >> $config
fi
if test $enable_python = yes ; then
echo PYTHON_SUPPORT=true >> $config