Merge branch 'qiwye/asgd-dev' of https://github.com/Microsoft/CNTK into qiwye/asgd-dev

This commit is contained in:
feiga 2016-10-10 13:04:06 +08:00
Родитель 5e37b37093 614a51e7fb
Коммит ebefc5ade5
392 изменённых файлов: 22041 добавлений и 12503 удалений

9
.gitignore поставляемый
Просмотреть файл

@ -199,3 +199,12 @@ Tests/EndToEndTests/UnitTests/MathTests/MS.txt
Dependencies/CNTKCustomMKL/Publish Dependencies/CNTKCustomMKL/Publish
Dependencies/CNTKCustomMKL/CNTKCustomMKL-Linux-*.tgz Dependencies/CNTKCustomMKL/CNTKCustomMKL-Linux-*.tgz
Dependencies/CNTKCustomMKL/CNTKCustomMKL-Windows-*.zip Dependencies/CNTKCustomMKL/CNTKCustomMKL-Windows-*.zip
# Python bindings
bindings/python/_cntk_py.pyd
bindings/python/cntk.egg-info/
bindings/python/cntk/cntk_py.py
bindings/python/cntk/libs/
bindings/python/cntk/cntk_py_wrap.cpp
bindings/python/cntk/cntk_py_wrap.h
bindings/python/dist/

Просмотреть файл

@ -174,7 +174,6 @@ Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "DoublePrecision", "DoublePr
EndProject EndProject
Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Kaldi2Reader", "Kaldi2Reader", "{C70E1572-20FF-496C-A0A9-10AA6755A07C}" Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Kaldi2Reader", "Kaldi2Reader", "{C70E1572-20FF-496C-A0A9-10AA6755A07C}"
ProjectSection(SolutionItems) = preProject ProjectSection(SolutionItems) = preProject
Source\Readers\Kaldi2Reader\basetypes.h = Source\Readers\Kaldi2Reader\basetypes.h
Source\Readers\Kaldi2Reader\biggrowablevectors.h = Source\Readers\Kaldi2Reader\biggrowablevectors.h Source\Readers\Kaldi2Reader\biggrowablevectors.h = Source\Readers\Kaldi2Reader\biggrowablevectors.h
Source\Readers\Kaldi2Reader\chunkevalsource.h = Source\Readers\Kaldi2Reader\chunkevalsource.h Source\Readers\Kaldi2Reader\chunkevalsource.h = Source\Readers\Kaldi2Reader\chunkevalsource.h
Source\Readers\Kaldi2Reader\DataReader.cpp = Source\Readers\Kaldi2Reader\DataReader.cpp Source\Readers\Kaldi2Reader\DataReader.cpp = Source\Readers\Kaldi2Reader\DataReader.cpp

Просмотреть файл

@ -46,22 +46,22 @@ int main(int argc, char* argv[])
path = (pos == std::string::npos) ? "." : app.substr(0, pos); path = (pos == std::string::npos) ? "." : app.substr(0, pos);
// This relative path assumes launching from CNTK's binary folder, e.g. x64\Release // This relative path assumes launching from CNTK's binary folder, e.g. x64\Release
const std::string modelWorkingDirectory = path + "/../../Examples/Image/MNIST/Data/"; const std::string modelWorkingDirectory = path + "/../../Examples/Image/GettingStarted";
#else // on Linux #else // on Linux
pos = app.rfind("/"); pos = app.rfind("/");
path = (pos == std::string::npos) ? "." : app.substr(0, pos); path = (pos == std::string::npos) ? "." : app.substr(0, pos);
// This relative path assumes launching from CNTK's binary folder, e.g. build/cpu/release/bin/ // This relative path assumes launching from CNTK's binary folder, e.g. build/cpu/release/bin/
const std::string modelWorkingDirectory = path + "/../../../../Examples/Image/MNIST/Data/"; const std::string modelWorkingDirectory = path + "/../../../../Examples/Image/GettingStarted";
#endif #endif
const std::string modelFilePath = modelWorkingDirectory + "../Output/Models/01_OneHidden"; const std::string modelFilePath = modelWorkingDirectory + "/Output/Models/01_OneHidden";
try try
{ {
struct stat statBuf; struct stat statBuf;
if (stat(modelFilePath.c_str(), &statBuf) != 0) if (stat(modelFilePath.c_str(), &statBuf) != 0)
{ {
fprintf(stderr, "Error: The model %s does not exist. Please follow instructions in README.md in <CNTK>/Examples/Image/MNIST to create the model.\n", modelFilePath.c_str()); fprintf(stderr, "Error: The model %s does not exist. Please follow instructions in README.md in <CNTK>/Examples/Image/GettingStarted to create the model.\n", modelFilePath.c_str());
return(1); return(1);
} }

Просмотреть файл

@ -1,3 +1,9 @@
//
// Copyright (c) Microsoft. All rights reserved.
// Licensed under the MIT license. See LICENSE.md file in the project root for full license information.
//
// EvalMultithreads.cpp : Sample application shows how to evaluate a model in multiple threading environment.
//
#include <functional> #include <functional>
#include <thread> #include <thread>
#include <iostream> #include <iostream>
@ -5,108 +11,23 @@
using namespace CNTK; using namespace CNTK;
FunctionPtr FullyConnectedDNNLayerWithSharedParameters(Variable input, void OutputFunctionInfo(FunctionPtr);
const Parameter& timesParam, FunctionPtr FullyConnectedDNNLayerWithSharedParameters(Variable, const Parameter&, const Parameter&, const std::function<FunctionPtr(const FunctionPtr&)>&);
const Parameter& plusParam, void CreateFunctionAndEvaluateWithSharedParameters(size_t, size_t, size_t, const Parameter&, const Parameter&, const Parameter[], const Parameter[], const Parameter&, const DeviceDescriptor&);
const std::function<FunctionPtr(const FunctionPtr&)>& nonLinearity) FunctionPtr SetupFullyConnectedLinearLayer(Variable, size_t, const DeviceDescriptor&, const std::wstring&);
{ FunctionPtr SetupFullyConnectedDNNLayer(Variable, size_t, const DeviceDescriptor& device, const std::function<FunctionPtr(const FunctionPtr&)>& nonLinearity);
assert(input.Shape().Rank() == 1); void RunEvaluationClassifier(FunctionPtr, const DeviceDescriptor&);
void RunEvaluationOneHidden(FunctionPtr, const DeviceDescriptor&);
// Todo: assume that timesParam has matched outputDim and inputDim /// <summary>
auto timesFunction = Times(timesParam, input); /// Shows how to create Function whose parameters can be shared by multi evaluation threads.
/// </summary>
// Todo: assume that timesParam has matched outputDim /// <description>
auto plusFunction = Plus(plusParam, timesFunction); /// It first creates all parameters needed for the Function, and then spawns multi threads.
/// Althought each thread creates a new instance of function, all threads share the same parameters.
return nonLinearity(plusFunction); /// After that, each thread runs evaluation independently.
} /// </description>
void MultiThreadsEvaluationWithNewFunction(const DeviceDescriptor& device, const int threadCount)
FunctionPtr FullyConnectedFeedForwardClassifierNetWithSharedParameters(Variable input,
size_t numHiddenLayers,
const Parameter& inputTimesParam,
const Parameter& inputPlusParam,
const Parameter hiddenLayerTimesParam[],
const Parameter hiddenLayerPlusParam[],
const Parameter& outputTimesParam,
const std::function<FunctionPtr(const FunctionPtr&)>& nonLinearity)
{
assert(numHiddenLayers >= 1);
auto classifierRoot = FullyConnectedDNNLayerWithSharedParameters(input, inputTimesParam, inputPlusParam, nonLinearity);
for (size_t i = 1; i < numHiddenLayers; ++i)
classifierRoot = FullyConnectedDNNLayerWithSharedParameters(classifierRoot, hiddenLayerTimesParam[i - 1], hiddenLayerPlusParam[i - 1], nonLinearity);
// Todo: assume that outputTimesParam has matched output dim and hiddenLayerDim
classifierRoot = Times(outputTimesParam, classifierRoot);
return classifierRoot;
}
void EvaluationNewNetworkWithSharedParameters(size_t inputDim,
size_t numOutputClasses,
size_t numHiddenLayers,
const Parameter& inputTimesParam,
const Parameter& inputPlusParam,
const Parameter hiddenLayerTimesParam[],
const Parameter hiddenLayerPlusParam[],
const Parameter& outputTimesParam,
const DeviceDescriptor& computeDevice)
{
using namespace std::placeholders;
// Create network using shared parameters
auto inputVar = InputVariable({inputDim}, DataType::Float, L"Features");
auto classifierOutputFunction = FullyConnectedFeedForwardClassifierNetWithSharedParameters(inputVar,
numHiddenLayers,
inputTimesParam,
inputPlusParam,
hiddenLayerTimesParam,
hiddenLayerPlusParam,
outputTimesParam,
std::bind(Sigmoid, _1, L""));
auto labelsVar = InputVariable({numOutputClasses}, DataType::Float, L"Labels");
auto trainingLossFunction = CNTK::CrossEntropyWithSoftmax(classifierOutputFunction, labelsVar, L"LossFunction");
auto predictionFunction = CNTK::ClassificationError(classifierOutputFunction, labelsVar, L"ClassificationError");
auto ffNet = CNTK::Combine({trainingLossFunction, predictionFunction, classifierOutputFunction}, L"ClassifierModel");
if (ffNet->Parameters().size() != ((numHiddenLayers * 2) + 1))
throw std::runtime_error("EvaluationNewNetworkWithSharedParameters: Function does not have expected Parameter count");
if (ffNet->Arguments().size() != 2)
throw std::runtime_error("EvaluationNewNetworkWithSharedParameters: Function does not have expected Argument count");
if (ffNet->Outputs().size() != 3)
throw std::runtime_error("EvaluationNewNetworkWithSharedParameters: Function does not have expected Output count");
// Evaluate the network in several runs
size_t iterationCount = 4;
unsigned int randSeed = 2;
srand(randSeed);
size_t numSamples = 3;
for (size_t t = 0; t < iterationCount; ++t)
{
std::vector<float> inputData(inputDim * numSamples);
for (size_t i = 0; i < inputData.size(); ++i)
inputData[i] = ((float)rand()) / RAND_MAX;
NDShape inputShape = {inputDim, 1, numSamples};
ValuePtr inputValue = MakeSharedObject<Value>(MakeSharedObject<NDArrayView>(inputShape, inputData.data(), inputData.size(), DeviceDescriptor::CPUDevice(), true));
std::vector<float> labelData(numOutputClasses * numSamples, 0);
for (size_t i = 0; i < numSamples; ++i)
labelData[(i*numOutputClasses) + (rand() % numOutputClasses)] = 1;
NDShape labelShape = {numOutputClasses, 1, numSamples};
ValuePtr labelValue = MakeSharedObject<Value>(MakeSharedObject<NDArrayView>(labelShape, labelData.data(), labelData.size(), DeviceDescriptor::CPUDevice(), true));
ValuePtr outputValue, predictionErrorValue;
std::unordered_map<Variable, ValuePtr> outputs = {{classifierOutputFunction->Output(), outputValue}, {predictionFunction->Output(), predictionErrorValue}};
ffNet->Forward({{inputVar, inputValue}, {labelsVar, labelValue}}, outputs, computeDevice);
}
}
void EvalMultiThreadsWithNewNetwork(const DeviceDescriptor& device, const int threadCount)
{ {
const size_t inputDim = 937; const size_t inputDim = 937;
const size_t numOutputClasses = 9304; const size_t numOutputClasses = 9304;
@ -136,7 +57,7 @@ void EvalMultiThreadsWithNewNetwork(const DeviceDescriptor& device, const int th
std::vector<std::thread> threadList(threadCount); std::vector<std::thread> threadList(threadCount);
for (int th = 0; th < threadCount; ++th) for (int th = 0; th < threadCount; ++th)
{ {
threadList[th] = std::thread(EvaluationNewNetworkWithSharedParameters, inputDim, numOutputClasses, numHiddenLayers, inputTimesParam, inputPlusParam, hiddenLayerTimesParam, hiddenLayerPlusParam, outputTimesParam, device); threadList[th] = std::thread(CreateFunctionAndEvaluateWithSharedParameters, inputDim, numOutputClasses, numHiddenLayers, inputTimesParam, inputPlusParam, hiddenLayerTimesParam, hiddenLayerPlusParam, outputTimesParam, device);
} }
for (int th = 0; th < threadCount; ++th) for (int th = 0; th < threadCount; ++th)
@ -146,3 +67,433 @@ void EvalMultiThreadsWithNewNetwork(const DeviceDescriptor& device, const int th
fflush(stderr); fflush(stderr);
} }
} }
/// <summary>
/// Shows how to use Clone() to share function parameters among multi evaluation threads.
/// </summary>
/// <description>
/// It first creates a new function with parameters, then spawns multi threads. Each thread uses Clone() to create a new
/// instance of function and then use this instance to do evaluation.
/// All cloned functions share the same parameters.
/// </description>
void MultiThreadsEvaluationWithClone(const DeviceDescriptor& device, const int threadCount)
{
using namespace std::placeholders;
const size_t inputDim = 937;
const size_t numOutputClasses = 9304;
const size_t numHiddenLayers = 6;
const size_t hiddenLayersDim = 2048;
auto inputVar = InputVariable({inputDim}, DataType::Float, L"features");
assert(numHiddenLayers >= 1);
auto classifierRoot = SetupFullyConnectedDNNLayer(inputVar, hiddenLayersDim, device, std::bind(Sigmoid, _1, L""));
for (size_t i = 1; i < numHiddenLayers; ++i)
{
classifierRoot = SetupFullyConnectedDNNLayer(classifierRoot, hiddenLayersDim, device, std::bind(Sigmoid, _1, L""));
}
auto outputTimesParam = Parameter(NDArrayView::RandomUniform<float>({numOutputClasses, hiddenLayersDim}, -0.5, 0.5, 1, device));
auto classifierFunc = Times(outputTimesParam, classifierRoot, 1, L"classifierOutput");
// Now test the structure
if (classifierFunc->Parameters().size() != ((numHiddenLayers * 2) + 1))
{
throw std::runtime_error("MultiThreadsEvaluationWithClone: Function does not have expected Parameter count");
}
OutputFunctionInfo(classifierFunc);
fprintf(stderr, "MultiThreadsEvaluationWithClone on device=%d\n", device.Id());
// Run evaluation in parallel
std::vector<std::thread> threadList(threadCount);
for (int th = 0; th < threadCount; ++th)
{
threadList[th] = std::thread(RunEvaluationClassifier, classifierFunc->Clone(), device);
}
for (int th = 0; th < threadCount; ++th)
{
threadList[th].join();
fprintf(stderr, "thread %d joined.\n", th);
fflush(stderr);
}
}
/// <summary>
/// Shows how to use LoadLegacyModel() and Clone() to share function parameters among multi evaluation threads.
/// </summary>
/// <description>
/// It first loads a model, then spawns multi threads. Each thread uses Clone() to create a new
/// instance of function and then use this instance to do evaluation.
/// All cloned functions share the same parameters.
/// </description>
void MultiThreadsEvaluationWithLoadModel(const DeviceDescriptor& device, const int threadCount)
{
// The model file will be trained and copied to the current runtime directory first.
auto modelFuncPtr = CNTK::LoadLegacyModel(DataType::Float, L"01_OneHidden", device);
OutputFunctionInfo(modelFuncPtr);
fprintf(stderr, "MultiThreadsEvaluationWithLoadModel on device=%d\n", device.Id());
// Run evaluation in parallel.
std::vector<std::thread> threadList(threadCount);
for (int th = 0; th < threadCount; ++th)
{
threadList[th] = std::thread(RunEvaluationOneHidden, modelFuncPtr->Clone(), device);
}
for (int th = 0; th < threadCount; ++th)
{
threadList[th].join();
fprintf(stderr, "thread %d joined.\n", th);
fflush(stderr);
}
}
inline FunctionPtr FullyConnectedDNNLayerWithSharedParameters(Variable input,
const Parameter& timesParam,
const Parameter& plusParam,
const std::function<FunctionPtr(const FunctionPtr&)>& nonLinearity)
{
assert(input.Shape().Rank() == 1);
// Todo: assume that timesParam has matched outputDim and inputDim
auto timesFunction = Times(timesParam, input);
// Todo: assume that timesParam has matched outputDim
auto plusFunction = Plus(plusParam, timesFunction);
return nonLinearity(plusFunction);
}
inline FunctionPtr FullyConnectedFeedForwardClassifierNetWithSharedParameters(Variable input,
size_t numHiddenLayers,
const Parameter& inputTimesParam,
const Parameter& inputPlusParam,
const Parameter hiddenLayerTimesParam[],
const Parameter hiddenLayerPlusParam[],
const Parameter& outputTimesParam,
const std::function<FunctionPtr(const FunctionPtr&)>& nonLinearity)
{
assert(numHiddenLayers >= 1);
auto classifierRoot = FullyConnectedDNNLayerWithSharedParameters(input, inputTimesParam, inputPlusParam, nonLinearity);
for (size_t i = 1; i < numHiddenLayers; ++i)
{
classifierRoot = FullyConnectedDNNLayerWithSharedParameters(classifierRoot, hiddenLayerTimesParam[i - 1], hiddenLayerPlusParam[i - 1], nonLinearity);
}
// Todo: assume that outputTimesParam has matched output dim and hiddenLayerDim
classifierRoot = Times(outputTimesParam, classifierRoot);
return classifierRoot;
}
void CreateFunctionAndEvaluateWithSharedParameters(size_t inputDim,
size_t numOutputClasses,
size_t numHiddenLayers,
const Parameter& inputTimesParam,
const Parameter& inputPlusParam,
const Parameter hiddenLayerTimesParam[],
const Parameter hiddenLayerPlusParam[],
const Parameter& outputTimesParam,
const DeviceDescriptor& computeDevice)
{
using namespace std::placeholders;
// Create network using shared parameters
auto inputVar = InputVariable({inputDim}, DataType::Float, L"Features");
auto classifierOutputFunction = FullyConnectedFeedForwardClassifierNetWithSharedParameters(inputVar,
numHiddenLayers,
inputTimesParam,
inputPlusParam,
hiddenLayerTimesParam,
hiddenLayerPlusParam,
outputTimesParam,
std::bind(Sigmoid, _1, L""));
auto labelsVar = InputVariable({numOutputClasses}, DataType::Float, L"Labels");
auto trainingLossFunction = CNTK::CrossEntropyWithSoftmax(classifierOutputFunction, labelsVar, L"LossFunction");
auto predictionFunction = CNTK::ClassificationError(classifierOutputFunction, labelsVar, L"ClassificationError");
auto ffNet = CNTK::Combine({trainingLossFunction, predictionFunction, classifierOutputFunction}, L"ClassifierModel");
if (ffNet->Parameters().size() != ((numHiddenLayers * 2) + 1))
{
throw std::runtime_error("CreateFunctionAndEvaluateWithSharedParameters: Function does not have expected Parameter count");
}
if (ffNet->Arguments().size() != 2)
{
throw std::runtime_error("CreateFunctionAndEvaluateWithSharedParameters: Function does not have expected Argument count");
}
if (ffNet->Outputs().size() != 3)
{
throw std::runtime_error("CreateFunctionAndEvaluateWithSharedParameters: Function does not have expected Output count");
}
// Evaluate the network in several runs
size_t iterationCount = 4;
unsigned int randSeed = 2;
srand(randSeed);
size_t numSamples = 3;
for (size_t t = 0; t < iterationCount; ++t)
{
std::vector<float> inputData(inputDim * numSamples);
for (size_t i = 0; i < inputData.size(); ++i)
{
inputData[i] = ((float)rand()) / RAND_MAX;
}
NDShape inputShape = {inputDim, 1, numSamples};
ValuePtr inputValue = MakeSharedObject<Value>(MakeSharedObject<NDArrayView>(inputShape, inputData.data(), inputData.size(), DeviceDescriptor::CPUDevice(), true));
std::vector<float> labelData(numOutputClasses * numSamples, 0);
for (size_t i = 0; i < numSamples; ++i)
{
labelData[(i*numOutputClasses) + (rand() % numOutputClasses)] = 1;
}
NDShape labelShape = {numOutputClasses, 1, numSamples};
ValuePtr labelValue = MakeSharedObject<Value>(MakeSharedObject<NDArrayView>(labelShape, labelData.data(), labelData.size(), DeviceDescriptor::CPUDevice(), true));
ValuePtr outputValue, predictionErrorValue;
std::unordered_map<Variable, ValuePtr> outputs = {{classifierOutputFunction->Output(), outputValue}, {predictionFunction->Output(), predictionErrorValue}};
ffNet->Forward({{inputVar, inputValue}, {labelsVar, labelValue}}, outputs, computeDevice);
}
}
inline FunctionPtr SetupFullyConnectedLinearLayer(Variable input, size_t outputDim, const DeviceDescriptor& device, const std::wstring& outputName = L"")
{
assert(input.Shape().Rank() == 1);
size_t inputDim = input.Shape()[0];
auto timesParam = CNTK::Parameter(CNTK::NDArrayView::RandomUniform<float>({outputDim, inputDim}, -0.05, 0.05, 1, device));
auto timesFunction = CNTK::Times(timesParam, input);
auto plusParam = CNTK::Parameter(CNTK::NDArrayView::RandomUniform<float>({outputDim}, -0.05, 0.05, 1, device));
return CNTK::Plus(plusParam, timesFunction, outputName);
}
inline FunctionPtr SetupFullyConnectedDNNLayer(Variable input, size_t outputDim, const DeviceDescriptor& device, const std::function<FunctionPtr(const FunctionPtr&)>& nonLinearity)
{
return nonLinearity(SetupFullyConnectedLinearLayer(input, outputDim, device));
}
void OutputFunctionInfo(FunctionPtr func)
{
auto inputVariables = func->Arguments();
fprintf(stderr, "Function %S: Input Variables (count=%lu)\n", func->Name().c_str(), inputVariables.size());
for_each(inputVariables.begin(), inputVariables.end(), [](const Variable v) {
fprintf(stderr, " name=%S, kind=%d\n", v.Name().c_str(), v.Kind());
});
auto outputVariables = func->Outputs();
fprintf(stderr, "Function %S: Output Variables (count=%lu)\n", func->Name().c_str(), outputVariables.size());
for_each(outputVariables.begin(), outputVariables.end(), [](const Variable v) {
fprintf(stderr, " name=%S, kind=%d\n", v.Name().c_str(), v.Kind());
});
}
bool GetVariableByName(std::vector<Variable> variableLists, std::wstring varName, Variable& var)
{
for (std::vector<Variable>::iterator it = variableLists.begin(); it != variableLists.end(); ++it)
{
if (it->Name().compare(varName) == 0)
{
var = *it;
return true;
}
}
return false;
}
inline bool GetInputVariableByName(FunctionPtr evalFunc, std::wstring varName, Variable& var)
{
return GetVariableByName(evalFunc->Arguments(), varName, var);
}
inline bool GetOutputVaraiableByName(FunctionPtr evalFunc, std::wstring varName, Variable& var)
{
return GetVariableByName(evalFunc->Outputs(), varName, var);
}
void RunEvaluationClassifier(FunctionPtr evalFunc, const DeviceDescriptor& device)
{
const std::wstring inputNodeName = L"features";
Variable inputVar;
if (!GetInputVariableByName(evalFunc, inputNodeName, inputVar))
{
fprintf(stderr, "Input variable %S is not available.\n", inputNodeName.c_str());
throw("Input variable not found error.");
}
// Evaluate the network in several runs
size_t iterationCount = 4;
unsigned int randSeed = 2;
srand(randSeed);
size_t numSamples = 3;
std::vector<float> inputData(inputVar.Shape().TotalSize() * numSamples);
for (size_t t = 0; t < iterationCount; ++t)
{
for (size_t i = 0; i < inputData.size(); ++i)
{
inputData[i] = ((float)rand()) / RAND_MAX;
}
// Create input data shape. Adding sequence length and numSamples as axes.
// Todo: remove sequence length when only numSamples is supported.
// Todo: add convenience APIs to simplify data preparation here.
NDShape inputShape = inputVar.Shape().AppendShape({1, numSamples});
ValuePtr inputValue = MakeSharedObject<Value>(MakeSharedObject<NDArrayView>(inputShape, inputData, true));
// Define output.
ValuePtr outputValue;
auto outputVar = evalFunc->Output();
std::unordered_map<Variable, ValuePtr> outputs = {{outputVar, outputValue}};
// Evaluate the model
evalFunc->Forward({{inputVar, inputValue}}, outputs, device);
// Get output value
outputValue = outputs[outputVar];
// Todo: remove sequence length when only numSamples is supported.
// Todo: add convenience APIs to simplify retrieval of output results.
NDShape outputShape = outputVar.Shape().AppendShape({1, numSamples});
std::vector<float> outputData(outputShape.TotalSize());
NDArrayViewPtr cpuArrayOutput = MakeSharedObject<NDArrayView>(outputShape, outputData, false);
cpuArrayOutput->CopyFrom(*outputValue->Data());
assert(outputData.size() == outputVar.Shape()[0] * numSamples);
fprintf(stderr, "Evaluation result:\n");
size_t dataIndex = 0;
auto outputDim = outputVar.Shape()[0];
for (size_t i = 0; i < numSamples; i++)
{
fprintf(stderr, "Iteration:%lu, Sample %lu:\n", t, i);
fprintf(stderr, " ");
dataIndex = i * outputDim;
for (size_t j = 0; j < std::min((size_t)10, outputDim); j++)
{
fprintf(stderr, "%f ", outputData[dataIndex++]);
}
if (outputDim > 10)
{
fprintf(stderr, "...");
}
fprintf(stderr, "\n");
}
}
}
void RunEvaluationOneHidden(FunctionPtr evalFunc, const DeviceDescriptor& device)
{
const std::wstring inputNodeName = L"features";
const std::wstring outputNodeName = L"out.z_output";
Variable inputVar;
if (!GetInputVariableByName(evalFunc, inputNodeName, inputVar))
{
fprintf(stderr, "Input variable %S is not available.\n", inputNodeName.c_str());
throw("Input variable not found error.");
}
Variable outputVar;
if (!GetOutputVaraiableByName(evalFunc, outputNodeName, outputVar))
{
fprintf(stderr, "Output variable %S is not available.\n", outputNodeName.c_str());
throw("Output variable not found error.");
}
// Evaluate the network in several runs
size_t iterationCount = 4;
size_t numSamples = 3;
for (size_t t = 0; t < iterationCount; ++t)
{
std::vector<float> inputData(inputVar.Shape().TotalSize() * numSamples);
for (size_t i = 0; i < inputData.size(); ++i)
{
inputData[i] = static_cast<float>(i % 255);
}
NDShape inputShape = inputVar.Shape().AppendShape({1, numSamples});
ValuePtr inputValue = MakeSharedObject<Value>(MakeSharedObject<NDArrayView>(inputShape, inputData, true));
ValuePtr outputValue;
std::unordered_map<Variable, ValuePtr> outputs = {{outputVar, outputValue}};
evalFunc->Forward({{inputVar, inputValue}}, outputs, device);
outputValue = outputs[outputVar];
NDShape outputShape = outputVar.Shape().AppendShape({1, numSamples});
std::vector<float> outputData(outputShape.TotalSize());
NDArrayViewPtr cpuArrayOutput = MakeSharedObject<NDArrayView>(outputShape, outputData, false);
cpuArrayOutput->CopyFrom(*outputValue->Data());
assert(outputData.size() == outputVar.Shape()[0] * numSamples);
fprintf(stderr, "Evaluation result:\n");
size_t dataIndex = 0;
auto outputDim = outputVar.Shape()[0];
for (size_t i = 0; i < numSamples; i++)
{
fprintf(stderr, "Iteration:%lu, Sample %lu:\n", t, i);
fprintf(stderr, "Ouput:");
for (size_t j = 0; j < outputDim; j++)
{
fprintf(stderr, "%f ", outputData[dataIndex++]);
}
fprintf(stderr, "\n");
}
}
}
void MultiThreadsEvaluation(bool isGPUAvailable)
{
#ifndef CPUONLY
if (isGPUAvailable)
{
fprintf(stderr, "Run evaluation on GPU device using GPU build.\n");
}
else
{
fprintf(stderr, "Run evaluation on CPU device using GPU build.\n");
}
#else
fprintf(stderr, "Run evaluation using CPU-only build.\n");
#endif
// Test multi-threads evaluation with new function
fprintf(stderr, "Test multi-threaded evaluation with new function on CPU.\n");
MultiThreadsEvaluationWithNewFunction(DeviceDescriptor::CPUDevice(), 2);
if (isGPUAvailable)
{
fprintf(stderr, "Test multi-threaded evaluation with new function on GPU\n");
MultiThreadsEvaluationWithNewFunction(DeviceDescriptor::GPUDevice(0), 2);
}
// Test multi-threads evaluation using clone.
fprintf(stderr, "Test multi-threaded evaluation using clone on CPU.\n");
MultiThreadsEvaluationWithClone(DeviceDescriptor::CPUDevice(), 2);
if (isGPUAvailable)
{
fprintf(stderr, "Test multi-threaded evaluation using clone on GPU.\n");
MultiThreadsEvaluationWithClone(DeviceDescriptor::GPUDevice(0), 2);
}
// test multi-threads evaluation with loading existing models
fprintf(stderr, "Test multi-threaded evaluation with loading existing models on CPU.\n");
MultiThreadsEvaluationWithLoadModel(DeviceDescriptor::CPUDevice(), 2);
if (isGPUAvailable)
{
fprintf(stderr, "Test multi-threaded evaluation with loading existing models on GPU.\n");
MultiThreadsEvaluationWithLoadModel(DeviceDescriptor::GPUDevice(0), 2);
}
fflush(stderr);
}

Просмотреть файл

@ -49,7 +49,7 @@
</PropertyGroup> </PropertyGroup>
<ItemGroup> <ItemGroup>
<Reference Include="EvalWrapper, Version=0.0.0.0, Culture=neutral, processorArchitecture=AMD64"> <Reference Include="EvalWrapper, Version=0.0.0.0, Culture=neutral, processorArchitecture=AMD64">
<HintPath>..\packages\Microsoft.Research.CNTK.CpuEval-mkl.1.7.1\lib\net45\x64\EvalWrapper.dll</HintPath> <HintPath>..\packages\Microsoft.Research.CNTK.CpuEval-mkl.1.7.2\lib\net45\x64\EvalWrapper.dll</HintPath>
<Private>True</Private> <Private>True</Private>
</Reference> </Reference>
<Reference Include="System" /> <Reference Include="System" />
@ -85,11 +85,11 @@
</BootstrapperPackage> </BootstrapperPackage>
</ItemGroup> </ItemGroup>
<Import Project="$(MSBuildToolsPath)\Microsoft.CSharp.targets" /> <Import Project="$(MSBuildToolsPath)\Microsoft.CSharp.targets" />
<Import Project="..\packages\Microsoft.Research.CNTK.CpuEval-mkl.1.7.1\build\net45\Microsoft.Research.CNTK.CpuEval-mkl.targets" Condition="Exists('..\packages\Microsoft.Research.CNTK.CpuEval-mkl.1.7.1\build\net45\Microsoft.Research.CNTK.CpuEval-mkl.targets')" /> <Import Project="..\packages\Microsoft.Research.CNTK.CpuEval-mkl.1.7.2\build\net45\Microsoft.Research.CNTK.CpuEval-mkl.targets" Condition="Exists('..\packages\Microsoft.Research.CNTK.CpuEval-mkl.1.7.2\build\net45\Microsoft.Research.CNTK.CpuEval-mkl.targets')" />
<Target Name="EnsureNuGetPackageBuildImports" BeforeTargets="PrepareForBuild"> <Target Name="EnsureNuGetPackageBuildImports" BeforeTargets="PrepareForBuild">
<PropertyGroup> <PropertyGroup>
<ErrorText>This project references NuGet package(s) that are missing on this computer. Enable NuGet Package Restore to download them. For more information, see http://go.microsoft.com/fwlink/?LinkID=322105. The missing file is {0}.</ErrorText> <ErrorText>This project references NuGet package(s) that are missing on this computer. Enable NuGet Package Restore to download them. For more information, see http://go.microsoft.com/fwlink/?LinkID=322105. The missing file is {0}.</ErrorText>
</PropertyGroup> </PropertyGroup>
<Error Condition="!Exists('..\packages\Microsoft.Research.CNTK.CpuEval-mkl.1.7.1\build\net45\Microsoft.Research.CNTK.CpuEval-mkl.targets')" Text="$([System.String]::Format('$(ErrorText)', '..\packages\Microsoft.Research.CNTK.CpuEval-mkl.1.7.1\build\net45\Microsoft.Research.CNTK.CpuEval-mkl.targets'))" /> <Error Condition="!Exists('..\packages\Microsoft.Research.CNTK.CpuEval-mkl.1.7.2\build\net45\Microsoft.Research.CNTK.CpuEval-mkl.targets')" Text="$([System.String]::Format('$(ErrorText)', '..\packages\Microsoft.Research.CNTK.CpuEval-mkl.1.7.2\build\net45\Microsoft.Research.CNTK.CpuEval-mkl.targets'))" />
</Target> </Target>
</Project> </Project>

Просмотреть файл

@ -30,8 +30,8 @@ namespace Microsoft.MSR.CNTK.Extensibility.Managed.CSEvalClient
/// ///
/// EvaluateModelSingleLayer and EvaluateModelMultipleLayers /// EvaluateModelSingleLayer and EvaluateModelMultipleLayers
/// -------------------------------------------------------- /// --------------------------------------------------------
/// These two cases require the 01_OneHidden model which is part of the <CNTK>/Examples/Image/MNIST example. /// These two cases require the 01_OneHidden model which is part of the <CNTK>/Examples/Image/GettingStarted example.
/// Refer to <see cref="https://github.com/Microsoft/CNTK/blob/master/Examples/Image/MNIST/README.md"/> for how to train /// Refer to <see cref="https://github.com/Microsoft/CNTK/blob/master/Examples/Image/GettingStarted/README.md"/> for how to train
/// the model used in these examples. /// the model used in these examples.
/// ///
/// EvaluateNetworkSingleLayer and EvaluateNetworkSingleLayerNoInput /// EvaluateNetworkSingleLayer and EvaluateNetworkSingleLayerNoInput
@ -41,8 +41,8 @@ namespace Microsoft.MSR.CNTK.Extensibility.Managed.CSEvalClient
/// ///
/// EvaluateMultipleModels /// EvaluateMultipleModels
/// ---------------------- /// ----------------------
/// This case requires the 02_Convolution model and the Test-28x28_cntk_text.txt test file which are part of the <CNTK>/Examples/Image/MNIST example. /// This case requires the 02_Convolution model and the Test-28x28_cntk_text.txt test file which are part of the <CNTK>/Examples/Image/GettingStarted example.
/// Refer to <see cref="https://github.com/Microsoft/CNTK/blob/master/Examples/Image/MNIST/README.md"/> for how to train /// Refer to <see cref="https://github.com/Microsoft/CNTK/blob/master/Examples/Image/GettingStarted/README.md"/> for how to train
/// the model used in this example. /// the model used in this example.
/// ///
/// EvaluateImageClassificationModel /// EvaluateImageClassificationModel
@ -142,15 +142,15 @@ namespace Microsoft.MSR.CNTK.Extensibility.Managed.CSEvalClient
// The examples assume the executable is running from the data folder // The examples assume the executable is running from the data folder
// We switch the current directory to the data folder (assuming the executable is in the <CNTK>/x64/Debug|Release folder // We switch the current directory to the data folder (assuming the executable is in the <CNTK>/x64/Debug|Release folder
Environment.CurrentDirectory = Path.Combine(initialDirectory, @"..\..\Examples\Image\MNIST\Data\"); Environment.CurrentDirectory = Path.Combine(initialDirectory, @"..\..\Examples\Image\GettingStarted");
List<float> outputs; List<float> outputs;
using (var model = new IEvaluateModelManagedF()) using (var model = new IEvaluateModelManagedF())
{ {
// Load model // Load model
string modelFilePath = Path.Combine(Environment.CurrentDirectory, @"..\Output\Models\01_OneHidden"); string modelFilePath = Path.Combine(Environment.CurrentDirectory, @".\Output\Models\01_OneHidden");
ThrowIfFileNotExist(modelFilePath, ThrowIfFileNotExist(modelFilePath,
string.Format("Error: The model '{0}' does not exist. Please follow instructions in README.md in <CNTK>/Examples/Image/MNIST to create the model.", modelFilePath)); string.Format("Error: The model '{0}' does not exist. Please follow instructions in README.md in <CNTK>/Examples/Image/GettingStarted to create the model.", modelFilePath));
model.CreateNetwork(string.Format("modelPath=\"{0}\"", modelFilePath), deviceId: -1); model.CreateNetwork(string.Format("modelPath=\"{0}\"", modelFilePath), deviceId: -1);
@ -189,7 +189,7 @@ namespace Microsoft.MSR.CNTK.Extensibility.Managed.CSEvalClient
{ {
// The examples assume the executable is running from the data folder // The examples assume the executable is running from the data folder
// We switch the current directory to the data folder (assuming the executable is in the <CNTK>/x64/Debug|Release folder // We switch the current directory to the data folder (assuming the executable is in the <CNTK>/x64/Debug|Release folder
Environment.CurrentDirectory = Path.Combine(initialDirectory, @"..\..\Examples\Image\MNIST\Data\"); Environment.CurrentDirectory = Path.Combine(initialDirectory, @"..\..\Examples\Image\GettingStarted");
Dictionary<string, List<float>> outputs; Dictionary<string, List<float>> outputs;
@ -200,9 +200,9 @@ namespace Microsoft.MSR.CNTK.Extensibility.Managed.CSEvalClient
const string outputLayerName = "out.z"; const string outputLayerName = "out.z";
// Load model // Load model
string modelFilePath = Path.Combine(Environment.CurrentDirectory, @"..\Output\Models\01_OneHidden"); string modelFilePath = Path.Combine(Environment.CurrentDirectory, @".\Output\Models\01_OneHidden");
ThrowIfFileNotExist(modelFilePath, ThrowIfFileNotExist(modelFilePath,
string.Format("Error: The model '{0}' does not exist. Please follow instructions in README.md in <CNTK>/Examples/Image/MNIST to create the model.", modelFilePath)); string.Format("Error: The model '{0}' does not exist. Please follow instructions in README.md in <CNTK>/Examples/Image/GettingStarted to create the model.", modelFilePath));
var desiredOutputLayers = new List<string>() { hiddenLayerName, outputLayerName }; var desiredOutputLayers = new List<string>() { hiddenLayerName, outputLayerName };
model.CreateNetwork(string.Format("modelPath=\"{0}\"", modelFilePath), deviceId: -1, outputNodeNames: desiredOutputLayers); model.CreateNetwork(string.Format("modelPath=\"{0}\"", modelFilePath), deviceId: -1, outputNodeNames: desiredOutputLayers);
@ -395,19 +395,19 @@ namespace Microsoft.MSR.CNTK.Extensibility.Managed.CSEvalClient
// The examples assume the executable is running from the data folder // The examples assume the executable is running from the data folder
// We switch the current directory to the data folder (assuming the executable is in the <CNTK>/x64/Debug|Release folder // We switch the current directory to the data folder (assuming the executable is in the <CNTK>/x64/Debug|Release folder
Environment.CurrentDirectory = Path.Combine(initialDirectory, @"..\..\Examples\Image\MNIST\Data\"); Environment.CurrentDirectory = Path.Combine(initialDirectory, @"..\..\Examples\Image\GettingStarted");
// Load model // Load model
string modelFilePath = Path.Combine(Environment.CurrentDirectory, @"..\Output\Models\02_Convolution"); string modelFilePath = Path.Combine(Environment.CurrentDirectory, @".\Output\Models\02_OneConv");
ThrowIfFileNotExist(modelFilePath, ThrowIfFileNotExist(modelFilePath,
string.Format("Error: The model '{0}' does not exist. Please follow instructions in README.md in <CNTK>/Examples/Image/MNIST to create the model.", modelFilePath)); string.Format("Error: The model '{0}' does not exist. Please follow instructions in README.md in <CNTK>/Examples/Image/GettingStarted to create the model.", modelFilePath));
// Initializes the model instances // Initializes the model instances
ModelEvaluator.Initialize(numConcurrentModels, modelFilePath); ModelEvaluator.Initialize(numConcurrentModels, modelFilePath);
string testfile = Path.Combine(Environment.CurrentDirectory, @"Test-28x28_cntk_text.txt"); string testfile = Path.Combine(Environment.CurrentDirectory, @"..\DataSets\MNIST\Test-28x28_cntk_text.txt");
ThrowIfFileNotExist(testfile, ThrowIfFileNotExist(testfile,
string.Format("Error: The test file '{0}' does not exist. Please follow instructions in README.md in <CNTK>/Examples/Image/MNIST to download the data.", testfile)); string.Format("Error: The test file '{0}' does not exist. Please follow instructions in README.md in <CNTK>/Examples/Image/GettingStarted to download the data.", testfile));
Stopwatch sw = new Stopwatch(); Stopwatch sw = new Stopwatch();
sw.Start(); sw.Start();
@ -475,9 +475,9 @@ namespace Microsoft.MSR.CNTK.Extensibility.Managed.CSEvalClient
{ {
// This example requires the RestNet_18 model. // This example requires the RestNet_18 model.
// The model can be downloaded from <see cref="https://www.cntk.ai/resnet/ResNet_18.model"/> // The model can be downloaded from <see cref="https://www.cntk.ai/resnet/ResNet_18.model"/>
// The model is assumed to be located at: <CNTK>\Examples\Image\Miscellaneous\ImageNet\ResNet // The model is assumed to be located at: <CNTK>\Examples\Image\Classification\ResNet
// along with a sample image file named "zebra.jpg". // along with a sample image file named "zebra.jpg".
string workingDirectory = Path.Combine(initialDirectory, @"..\..\Examples\Image\Miscellaneous\ImageNet\ResNet"); string workingDirectory = Path.Combine(initialDirectory, @"..\..\Examples\Image\Classification\ResNet");
Environment.CurrentDirectory = initialDirectory; Environment.CurrentDirectory = initialDirectory;
List<float> outputs; List<float> outputs;
@ -486,7 +486,7 @@ namespace Microsoft.MSR.CNTK.Extensibility.Managed.CSEvalClient
{ {
string modelFilePath = Path.Combine(workingDirectory, "ResNet_18.model"); string modelFilePath = Path.Combine(workingDirectory, "ResNet_18.model");
ThrowIfFileNotExist(modelFilePath, ThrowIfFileNotExist(modelFilePath,
string.Format("Error: The model '{0}' does not exist. Please download the model from https://www.cntk.ai/resnet/ResNet_18.model and save it under ..\\..\\Examples\\Image\\Miscellaneous\\ImageNet\\ResNet.", modelFilePath)); string.Format("Error: The model '{0}' does not exist. Please download the model from https://www.cntk.ai/resnet/ResNet_18.model and save it under ..\\..\\Examples\\Image\\Classification\\ResNet.", modelFilePath));
model.CreateNetwork(string.Format("modelPath=\"{0}\"", modelFilePath), deviceId: -1); model.CreateNetwork(string.Format("modelPath=\"{0}\"", modelFilePath), deviceId: -1);

Просмотреть файл

@ -1,4 +1,4 @@
<?xml version="1.0" encoding="utf-8"?> <?xml version="1.0" encoding="utf-8"?>
<packages> <packages>
<package id="Microsoft.Research.CNTK.CpuEval-mkl" version="1.7.1" targetFramework="net45" /> <package id="Microsoft.Research.CNTK.CpuEval-mkl" version="1.7.2" targetFramework="net45" />
</packages> </packages>

Просмотреть файл

@ -1,14 +1,15 @@
# Simple CIFAR-10 convnet, without and with BatchNormalization. # ConvNet applied on CIFAR-10 dataset, with no data augmentation.
command = TrainConvNet:Eval command = TrainConvNet:Eval
makeMode = false ; traceLevel = 1 ; deviceId = 0 precision = "float"; traceLevel = 1 ; deviceId = "auto"
RootDir = "." ; DataDir = "$RootDir$" ; ModelDir = "$RootDir$/Output/Models" rootDir = "../.." ; dataDir = "$rootDir$/DataSets/CIFAR-10" ;
outputDir = "./Output" ;
modelPath = "$ModelDir$/ConvNet" modelPath = "$outputDir$/Models/ConvNet_CIFAR10"
#stderr = "$outputDir$/ConvNet_CIFAR10_bs_out"
# Training without BN
TrainConvNet = { TrainConvNet = {
action = "train" action = "train"
@ -16,19 +17,21 @@ TrainConvNet = {
imageShape = 32:32:3 imageShape = 32:32:3
labelDim = 10 labelDim = 10
Subtract128 (x) = x - Constant (128) featMean = 128
featScale = 1/256
Normalize{m,f} = x => f .* (x - m)
model = Sequential ( model = Sequential (
Subtract128 : Normalize {featMean, featScale} :
ConvolutionalLayer {32, (5:5), pad = true, activation = ReLU, init = 'glorotUniform', initValueScale=0.00390625} : ConvolutionalLayer {64, (3:3), pad = true} : ReLU :
ConvolutionalLayer {64, (3:3), pad = true} : ReLU :
MaxPoolingLayer {(3:3), stride = (2:2)} : MaxPoolingLayer {(3:3), stride = (2:2)} :
ConvolutionalLayer {32, (5:5), pad = true, activation = ReLU, init = 'glorotUniform'} : ConvolutionalLayer {64, (3:3), pad = true} : ReLU :
ConvolutionalLayer {64, (3:3), pad = true} : ReLU :
MaxPoolingLayer {(3:3), stride = (2:2)} : MaxPoolingLayer {(3:3), stride = (2:2)} :
ConvolutionalLayer {64, (5:5), pad = true, activation = ReLU, init = 'glorotUniform'} : DenseLayer {256} : Dropout : ReLU :
MaxPoolingLayer {(3:3), stride = (2:2)} : DenseLayer {128} : Dropout : ReLU :
DenseLayer {64, activation = ReLU, init = 'glorotUniform', initValueScale=0.1} : LinearLayer {labelDim}
Dropout :
LinearLayer {labelDim, init = 'glorotUniform', initValueScale=0.1}
) )
# inputs # inputs
@ -51,20 +54,23 @@ TrainConvNet = {
} }
SGD = { SGD = {
epochSize = 49984 ; minibatchSize = 64 epochSize = 0
minibatchSize = 64
learningRatesPerSample = 0.00015625*10:0.000046875*10:0.000015625 learningRatesPerSample = 0.0015625*10:0.00046875*10:0.00015625
momentumAsTimeConstant = 600*20:6400 momentumAsTimeConstant = 0*20:6400
maxEpochs = 30 maxEpochs = 30
L2RegWeight = 0.03 L2RegWeight = 0.002
dropoutRate = 0*5:0.5 dropoutRate = 0*5:0.5
firstMBsToShowResult = 10 ; numMBsToShowResult = 500 numMBsToShowResult = 100
} }
reader = { reader = {
readerType = "CNTKTextFormatReader" readerType = "CNTKTextFormatReader"
file = "$DataDir$/Train_cntk_text.txt" file = "$DataDir$/Train_cntk_text.txt"
randomize = true
keepDataInMemory = true # cache all data in memory
input = { input = {
features = { dim = 3072 ; format = "dense" } features = { dim = 3072 ; format = "dense" }
labels = { dim = 10 ; format = "dense" } labels = { dim = 10 ; format = "dense" }

Просмотреть файл

@ -0,0 +1,109 @@
# ConvNet applied on CIFAR-10 dataset, with data augmentation (translation and flipping).
command = TrainConvNet:Eval
precision = "float"; traceLevel = 1 ; deviceId = "auto"
rootDir = "../.." ; dataDir = "$rootDir$/DataSets/CIFAR-10" ;
outputDir = "./Output" ;
modelPath = "$outputDir$/Models/ConvNet_CIFAR10_DataAug"
#stderr = "$outputDir$/ConvNet_CIFAR10_DataAug_bs_out"
TrainConvNet = {
action = "train"
BrainScriptNetworkBuilder = {
imageShape = 32:32:3
labelDim = 10
featMean = 128
featScale = 1/256
Normalize{m,f} = x => Constant(f) .* (x - Constant(m))
model = Sequential (
Normalize {featMean, featScale} :
ConvolutionalLayer {64, (3:3), pad = true} : ReLU :
ConvolutionalLayer {64, (3:3), pad = true} : ReLU :
MaxPoolingLayer {(3:3), stride = (2:2)} :
ConvolutionalLayer {64, (3:3), pad = true} : ReLU :
ConvolutionalLayer {64, (3:3), pad = true} : ReLU :
MaxPoolingLayer {(3:3), stride = (2:2)} :
DenseLayer {256} : Dropout : ReLU :
DenseLayer {128} : Dropout : ReLU :
LinearLayer {labelDim}
)
# inputs
features = Input {imageShape}
labels = Input {labelDim}
# apply model to features
z = model (features)
# connect to system
ce = CrossEntropyWithSoftmax (labels, z)
errs = ClassificationError (labels, z)
top5Errs = ClassificationError (labels, z, topN=5) # only used in Eval action
featureNodes = (features)
labelNodes = (labels)
criterionNodes = (ce)
evaluationNodes = (errs) # top5Errs only used in Eval
outputNodes = (z)
}
SGD = {
epochSize = 0
minibatchSize = 64
learningRatesPerSample = 0.0015625*20:0.00046875*20:0.00015625*20:0.000046875*10:0.000015625
momentumAsTimeConstant = 0*20:600*20:6400
maxEpochs = 80
L2RegWeight = 0.002
dropoutRate = 0*5:0.5
numMBsToShowResult = 100
}
reader = {
verbosity = 0 ; randomize = true
deserializers = ({
type = "ImageDeserializer" ; module = "ImageReader"
file = "$dataDir$/train_map.txt"
input = {
features = { transforms = (
{ type = "Crop" ; cropType = "random" ; cropRatio = 0.8 ; jitterType = "uniRatio" } :
{ type = "Scale" ; width = 32 ; height = 32 ; channels = 3 ; interpolations = "linear" } :
{ type = "Mean" ; meanFile = "$dataDir$/CIFAR-10_mean.xml" } :
{ type = "Transpose" }
)}
labels = { labelDim = 10 }
}
})
}
}
# Eval action
Eval = {
action = "eval"
evalNodeNames = errs:top5Errs # also test top-5 error rate
# Set minibatch size for testing.
minibatchSize = 512
reader = {
verbosity = 0 ; randomize = false
deserializers = ({
type = "ImageDeserializer" ; module = "ImageReader"
file = "$dataDir$/test_map.txt"
input = {
features = { transforms = (
{ type = "Scale" ; width = 32 ; height = 32 ; channels = 3 ; interpolations = "linear" } :
{ type = "Mean"; meanFile = "$dataDir$/CIFAR-10_mean.xml" } :
{ type = "Transpose" }
)}
labels = { labelDim = 10 }
}
})
}
}

Просмотреть файл

@ -0,0 +1,90 @@
# ConvNet on MNIST dataset.
command = trainNetwork:testNetwork
precision = "float"; traceLevel = 1 ; deviceId = "auto"
rootDir = "../.." ; dataDir = "$rootDir$/DataSets/MNIST" ;
outputDir = "./Output" ;
modelPath = "$outputDir$/Models/ConvNet_MNIST"
#stderr = "$outputDir$/ConvNet_MNIST_bs_out"
# TRAINING CONFIG
trainNetwork = {
action = "train"
BrainScriptNetworkBuilder = {
imageShape = 28:28:1 # image dimensions, 1 channel only
labelDim = 10 # number of distinct labels
featScale = 1/256
Scale{f} = x => Constant(f) .* x
model = Sequential (
Scale {featScale} :
ConvolutionalLayer {32, (5:5), pad = true} : ReLU :
MaxPoolingLayer {(3:3), stride=(2:2)} :
ConvolutionalLayer {48, (3:3), pad = false} : ReLU :
MaxPoolingLayer {(3:3), stride=(2:2)} :
ConvolutionalLayer {64, (3:3), pad = false} : ReLU :
DenseLayer {96} : Dropout : ReLU :
LinearLayer {labelDim}
)
# inputs
features = Input {imageShape}
labels = Input {labelDim}
# apply model to features
ol = model (features)
# loss and error computation
ce = CrossEntropyWithSoftmax (labels, ol)
errs = ClassificationError (labels, ol)
# declare special nodes
featureNodes = (features)
labelNodes = (labels)
criterionNodes = (ce)
evaluationNodes = (errs)
outputNodes = (ol)
}
SGD = {
epochSize = 60000
minibatchSize = 64
maxEpochs = 40
learningRatesPerSample = 0.001*10:0.0005*10:0.0001
dropoutRate = 0.5
momentumAsTimeConstant = 0*5:1024
numMBsToShowResult = 500
}
reader = {
readerType = "CNTKTextFormatReader"
# See ../REAMDE.md for details on getting the data (Train-28x28_cntk_text.txt).
file = "$DataDir$/Train-28x28_cntk_text.txt"
randomize = true
keepDataInMemory = true
input = {
features = { dim = 784 ; format = "dense" }
labels = { dim = 10 ; format = "dense" }
}
}
}
# TEST CONFIG
testNetwork = {
action = test
minibatchSize = 1024 # reduce this if you run out of memory
reader = {
readerType = "CNTKTextFormatReader"
file = "$DataDir$/Test-28x28_cntk_text.txt"
input = {
features = { dim = 784 ; format = "dense" }
labels = { dim = 10 ; format = "dense" }
}
}
}

Просмотреть файл

@ -0,0 +1,51 @@
# CNTK Examples: Image/Classification/ConvNet
## Overview
|Data: |The MNIST dataset (http://yann.lecun.com/exdb/mnist/) of handwritten digits and the CIFAR-10 dataset (http://www.cs.toronto.edu/~kriz/cifar.html) for image classification.
|:---------|:---
|Purpose |This folder contains a number of examples that demonstrate the usage of BrainScript to define convolutional neural networks for image classification.
|Network |convolutional neural networks.
|Training |Stochastic gradient descent with momentum.
|Comments |See below.
## Running the example
### Getting the data
we use the MNIST and CIFAR-10 datasets to demonstrate how to train a `convolutional neural network (CNN)`. CNN has been one of the most popular neural networks for image-related tasks. A very well-known early work on CNN is the [LeNet](http://yann.lecun.com/exdb/publis/pdf/lecun-01a.pdf). In 2012 Alex Krizhevsky, Ilya Sutskever, and Geoffrey Hinton won the ILSVRC-2012 competition using a [CNN architecture](https://papers.nips.cc/paper/4824-imagenet-classification-with-deep-convolutional-neural-networks.pdf). And most state-of-the-art neural networks on image classification tasks today adopts a modified CNN architecture, such as [VGG](../VGG), [GoogLeNet](../GoogLeNet), [ResNet](../ResNet), etc.
MNIST and CIFAR-10 dataset is not included in the CNTK distribution but can be easily downloaded and converted by following the instructions in [DataSets/MNIST](../../DataSets/MNIST) and [DataSets/CIFAR-10](../../DataSets/CIFAR-10). We recommend you to keep the downloaded data in the respective folder while downloading, as the configuration files in this folder assumes that by default.
## Details
### ConvNet_MNIST.cntk
Our first example applies CNN on the MNIST dataset. The network we use contains three convolution layers and two dense layers. Dropout is applied after the first dense layer. No data augmentation is used in this example. We start the training with no momentum, and add momentum after training for 5 epochs. Please refer to the cntk configuration file [ConvNet_MNIST.cntk](./ConvNet_MNIST.cntk) for more details.
Run the example from the current folder using:
`cntk configFile=ConvNet_MNIST.cntk`
The network achieves an error rate of `0.5%`, which is very good considering no data augmentation is used. This accuracy is comparable, if not better, than many other vanilla CNN implementations (http://yann.lecun.com/exdb/mnist/).
### ConvNet_CIFAR10.cntk
The second exmaple applies CNN on the CIFAR-10 dataset. The network contains four convolution layers and three dense layers. Max pooling is conducted for every two convolution layers. Dropout is applied after the first two dense layers. No data augmentation is used. Please refer to the cntk configuration file [ConvNet_CIFAR10.cntk](./ConvNet_CIFAR10.cntk) for more details.
Run the example from the current folder using:
`cntk configFile=ConvNet_CIFAR10.cntk`
The network achieves an error rate of `18.51%` after 30 epochs. This is comparable to the network published by [cuda-convnet](https://code.google.com/p/cuda-convnet/), which has 18% error with no data augmentation. One difference is that we do not use a `local response normalization layer`. This layer type is now rarely used in most state-of-the-art deep learning networks.
### ConvNet_CIFAR10_DataAug.cntk
The third example uses the same CNN as the previous example, but it improves by adding data augmentation to training. For this purpose, we use the `ImageReader` instead of the `CNTKTextFormatReader` to load the data. The ImageReader currently supports crop, flip, scale, color jittering, and mean subtraction.
For a reference on image reader and transforms, please check [here](https://github.com/Microsoft/CNTK/wiki/Image-reader).
Run the example from the current folder using:
`cntk configFile=ConvNet_CIFAR10_DataAug.cntk`
As seen in the cntk configuration file [ConvNet_CIFAR10_DataAug.cntk](./ConvNet_CIFAR10_DataAug.cntk), we use a fix crop ratio of `0.8` and scale the image to `32x32` pixels for training. Since all training images are pre-padded to `40x40` pixels, effectively we only perfrom translation transform without scaling. The accuracy of the network on test data is `14.21%`, which is a lot better than the previous model.

Просмотреть файл

@ -0,0 +1,85 @@
# Multi-layer perceptron (MLP) on MNIST dataset.
command = trainNetwork:testNetwork
precision = "float"; traceLevel = 1 ; deviceId = "auto"
rootDir = "../.." ; dataDir = "$rootDir$/DataSets/MNIST" ;
outputDir = "./Output" ;
modelPath = "$outputDir$/Models/MLP_MNIST"
#stderr = "$outputDir$/MLP_MNIST_bs_out"
# TRAINING CONFIG
trainNetwork = {
action = "train"
BrainScriptNetworkBuilder = {
imageShape = 28:28:1 # image dimensions, 1 channel only
labelDim = 10 # number of distinct labels
featScale = 1/256
Scale{f} = x => Constant(f) .* x
model = Sequential (
Scale {featScale} :
DenseLayer {768, init="gaussian", initValueScale=1.5} : Dropout: ReLU :
DenseLayer {512, init="gaussian", initValueScale=1.5} : Dropout: ReLU :
DenseLayer {256, init="gaussian", initValueScale=1.5} : Dropout: ReLU :
LinearLayer {labelDim}
)
# inputs
features = Input {imageShape}
labels = Input {labelDim}
# apply model to features
z = model (features)
# loss and error computation
ce = CrossEntropyWithSoftmax (labels, z)
errs = ClassificationError (labels, z)
# declare special nodes
featureNodes = (features)
labelNodes = (labels)
criterionNodes = (ce)
evaluationNodes = (errs)
outputNodes = (z)
}
SGD = {
epochSize = 60000
minibatchSize = 64
maxEpochs = 40
learningRatesPerSample = 0.001*10:0.0005*10:0.0001
dropoutRate = 0.5
momentumAsTimeConstant = 600*10:4096
numMBsToShowResult = 500
}
reader = {
readerType = "CNTKTextFormatReader"
# See ../REAMDE.md for details on getting the data (Train-28x28_cntk_text.txt).
file = "$DataDir$/Train-28x28_cntk_text.txt"
input = {
features = { dim = 784 ; format = "dense" }
labels = { dim = 10 ; format = "dense" }
}
}
}
# TEST CONFIG
testNetwork = {
action = test
minibatchSize = 1024 # reduce this if you run out of memory
reader = {
readerType = "CNTKTextFormatReader"
file = "$DataDir$/Test-28x28_cntk_text.txt"
input = {
features = { dim = 784 ; format = "dense" }
labels = { dim = 10 ; format = "dense" }
}
}
}

Просмотреть файл

@ -0,0 +1,30 @@
# CNTK Examples: Image/Classification/MLP
## Overview
|Data: |The MNIST dataset (http://yann.lecun.com/exdb/mnist/) of handwritten digits.
|:---------|:---
|Purpose |This folder contains a number of examples that demonstrate the usage of BrainScript to define multi-layer perceptron (MLP) networks for image classification.
|Network |Multi-layer perceptron.
|Training |Stochastic gradient descent with momentum.
|Comments |See below.
## Running the example
### Getting the data
we use the MNIST dataset to demonstrate how to train a `multi-layer perceptron (MLP)` network. MLP is a feed-forward neural network that consists of multiple layers of nodes in a directed graph, where each layer fully connected to the next one. This is argueabally one of the simplest neural networks.
MNIST dataset is not included in the CNTK distribution but can be easily downloaded and converted by following the instructions in [DataSets/MNIST](../../DataSets/MNIST). We recommend you to keep the downloaded data in the respective folder while downloading, as the configuration files in this folder assumes that by default.
## Details
### MLP_MNIST.cntk
Similar to the `01_OneHidden.cntk` network in [GettingStarted](../../GettingStarted), MLP is "permutation invariant". In this particular example, we use 3 hidden layers, each containing `768`, `512` and `256` nodes, respectively. Dropout is applied after each hidden layer, with `droputRate=0.5`. The learning rate is gradually adjusted from `0.001` per sample to `0.0001`, and momentum as time constant is adjusted from `600` (effective momentum = `0.898824`) to `4096` (effective momentum = `0.984495`).
Run the example from the current folder using:
`cntk configFile=MLP_MNIST.cntk`
The network achieves an error rate of `1.45%`, which is about as good as one can have with MLP and no data augmentation (http://yann.lecun.com/exdb/mnist/).

Просмотреть файл

До

Ширина:  |  Высота:  |  Размер: 92 KiB

После

Ширина:  |  Высота:  |  Размер: 92 KiB

Просмотреть файл

@ -0,0 +1,21 @@
# CIFAR-10 Dataset
The CIFAR-10 dataset (http://www.cs.toronto.edu/~kriz/cifar.html) is a popular dataset for image classification, collected by Alex Krizhevsky, Vinod Nair, and Geoffrey Hinton. It is a labeled subset of the [80 million tiny images](http://people.csail.mit.edu/torralba/tinyimages/) dataset.
The CIFAR-10 dataset consists of 60,000 32x32 color images in 10 classes, with 6,000 images per class. There are 50,000 training images and 10,000 test images. The 10 classes are: airplane, automobile, bird, cat, deer, dog, frog, horse, ship, and truck.
The CIFAR-10 dataset is not included in the CNTK distribution but can be easily downloaded and converted to CNTK-supported format by running the following Python command:
```
python install_cifar10.py
```
After running `install_cifar10.py`, you will see the original CIFAR-10 data are copied in a folder named `cifar-10-batches-py`. Meanwhile, two text files `Train_cntk_text.txt` and `Test_cntk_text.txt` are created in the current folder. These text files can be read directly by CNTK.
In addition, the script will create a `train` and a `test` folder that store train and test images in png format. It will also create appropriate mapping files (`train_map.txt` and `test_map.txt`) for the CNTK `ImageReader` as well as mean file `CIFAR-10_mean.xml`.
The total amount of disk space required for both the text version and the png version for CIFAR-10 is around `950`MB.
We provide multiple examples in the [Classification](../../Classification) folder to train classifiers for CIFAR-10 with CNTK. Please refer there for more details.
If you are curious about how well computers can perform on CIFAR-10 today, Rodrigo Benenson maintains a [blog](http://rodrigob.github.io/are_we_there_yet/build/classification_datasets_results.html#43494641522d3130) on the state-of-the-art performance of various algorithms.

Просмотреть файл

@ -0,0 +1,132 @@
from __future__ import print_function
try:
from urllib.request import urlretrieve
except ImportError:
from urllib import urlretrieve
import sys
import tarfile
import shutil
import os
import struct
import numpy as np
import pickle as cp
from PIL import Image
import xml.etree.cElementTree as et
import xml.dom.minidom
import getopt
ImgSize = 32
NumFeat = ImgSize * ImgSize * 3
def readBatch(src):
with open(src, 'rb') as f:
if sys.version_info[0] < 3:
d = cp.load(f)
else:
d = cp.load(f, encoding='latin1')
data = d['data']
feat = data
res = np.hstack((feat, np.reshape(d['labels'], (len(d['labels']), 1))))
return res.astype(np.int)
def loadData(src):
print ('Downloading ' + src)
fname, h = urlretrieve(src, './delete.me')
print ('Done.')
try:
print ('Extracting files...')
with tarfile.open(fname) as tar:
tar.extractall()
print ('Done.')
print ('Preparing train set...')
trn = np.empty((0, NumFeat + 1), dtype=np.int)
for i in range(5):
batchName = './cifar-10-batches-py/data_batch_{0}'.format(i + 1)
trn = np.vstack((trn, readBatch(batchName)))
print ('Done.')
print ('Preparing test set...')
tst = readBatch('./cifar-10-batches-py/test_batch')
print ('Done.')
finally:
os.remove(fname)
return (trn, tst)
def saveTxt(filename, ndarray):
with open(filename, 'w') as f:
labels = list(map(' '.join, np.eye(10, dtype=np.uint).astype(str)))
for row in ndarray:
row_str = row.astype(str)
label_str = labels[row[-1]]
feature_str = ' '.join(row_str[:-1])
f.write('|labels {} |features {}\n'.format(label_str, feature_str))
def saveImage(fname, data, label, mapFile, regrFile, pad, **key_parms):
# data in CIFAR-10 dataset is in CHW format.
pixData = data.reshape((3, ImgSize, ImgSize))
if ('mean' in key_parms):
key_parms['mean'] += pixData
if pad > 0:
pixData = np.pad(pixData, ((0, 0), (pad, pad), (pad, pad)), mode='constant', constant_values=128) # can also use mode='edge'
img = Image.new('RGB', (ImgSize + 2 * pad, ImgSize + 2 * pad))
pixels = img.load()
for x in range(img.size[0]):
for y in range(img.size[1]):
pixels[x, y] = (pixData[0][y][x], pixData[1][y][x], pixData[2][y][x])
img.save(fname)
mapFile.write("%s\t%d\n" % (fname, label))
# compute per channel mean and store for regression example
channelMean = np.mean(pixData, axis=(1,2))
regrFile.write("|regrLabels\t%f\t%f\t%f\n" % (channelMean[0]/255.0, channelMean[1]/255.0, channelMean[2]/255.0))
def saveMean(fname, data):
root = et.Element('opencv_storage')
et.SubElement(root, 'Channel').text = '3'
et.SubElement(root, 'Row').text = str(ImgSize)
et.SubElement(root, 'Col').text = str(ImgSize)
meanImg = et.SubElement(root, 'MeanImg', type_id='opencv-matrix')
et.SubElement(meanImg, 'rows').text = '1'
et.SubElement(meanImg, 'cols').text = str(ImgSize * ImgSize * 3)
et.SubElement(meanImg, 'dt').text = 'f'
et.SubElement(meanImg, 'data').text = ' '.join(['%e' % n for n in np.reshape(data, (ImgSize * ImgSize * 3))])
tree = et.ElementTree(root)
tree.write(fname)
x = xml.dom.minidom.parse(fname)
with open(fname, 'w') as f:
f.write(x.toprettyxml(indent = ' '))
def saveTrainImages(filename, foldername):
if not os.path.exists(foldername):
os.makedirs(foldername)
data = {}
dataMean = np.zeros((3, ImgSize, ImgSize)) # mean is in CHW format.
with open('train_map.txt', 'w') as mapFile:
with open('train_regrLabels.txt', 'w') as regrFile:
for ifile in range(1, 6):
with open(os.path.join('./cifar-10-batches-py', 'data_batch_' + str(ifile)), 'rb') as f:
if sys.version_info[0] < 3:
data = cp.load(f)
else:
data = cp.load(f, encoding='latin1')
for i in range(10000):
fname = os.path.join(os.path.abspath(foldername), ('%05d.png' % (i + (ifile - 1) * 10000)))
saveImage(fname, data['data'][i, :], data['labels'][i], mapFile, regrFile, 4, mean=dataMean)
dataMean = dataMean / (50 * 1000)
saveMean('CIFAR-10_mean.xml', dataMean)
def saveTestImages(filename, foldername):
if not os.path.exists(foldername):
os.makedirs(foldername)
with open('test_map.txt', 'w') as mapFile:
with open('test_regrLabels.txt', 'w') as regrFile:
with open(os.path.join('./cifar-10-batches-py', 'test_batch'), 'rb') as f:
if sys.version_info[0] < 3:
data = cp.load(f)
else:
data = cp.load(f, encoding='latin1')
for i in range(10000):
fname = os.path.join(os.path.abspath(foldername), ('%05d.png' % i))
saveImage(fname, data['data'][i, :], data['labels'][i], mapFile, regrFile, 0)

Просмотреть файл

@ -0,0 +1,18 @@
from __future__ import print_function
import cifar_utils as ut
if __name__ == "__main__":
trn, tst= ut.loadData('http://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz')
print ('Writing train text file...')
ut.saveTxt(r'./Train_cntk_text.txt', trn)
print ('Done.')
print ('Writing test text file...')
ut.saveTxt(r'./Test_cntk_text.txt', tst)
print ('Done.')
print ('Converting train data to png images...')
ut.saveTrainImages(r'./Train_cntk_text.txt', 'train')
print ('Done.')
print ('Converting test data to png images...')
ut.saveTestImages(r'./Test_cntk_text.txt', 'test')
print ('Done.')

Просмотреть файл

@ -0,0 +1,14 @@
# MNIST Dataset
The MNIST dataset (http://yann.lecun.com/exdb/mnist/) for handwritten digits recognition is one of the most widely used image dataset for experimenting with different classification algorithms. MNIST has a training set of 60,000 examples, and a test set of 10,000 examples. Each example contains one digit that has been size-normalized and centered in a grayscale image at 28x28 pixel resolution.
The MNIST dataset is not included in the CNTK distribution but can be easily
downloaded and converted to CNTK-supported format by running the following Python command:
`python install_mnist.py`
After running the script, you will see two output files in the current folder: Train-28x28_cntk_text.txt and Test-28x28_cntk_text.txt. The total amount of disk space required is around `124`MB. You may now proceed to the [`GettingStarted`](../../GettingStarted) folder to play with this dataset.
Further, we provide two advanced examples with MNIST. The first one is a [`Multi-Layer Perceptron network (MLP)`](../../Classification/MLP), which achieves about 1.5% error rate. The second one is a [`Convolutional Neural Network (ConvNet)`](../../Classification/ConvNet), which achieves about 0.5% error rate. These results are comparable to the best published results using these types of networks.
If you are curious about how well computers can perform on MNIST today, Rodrigo Benenson maintains a [blog](http://rodrigob.github.io/are_we_there_yet/build/classification_datasets_results.html#4d4e495354) on the state-of-the-art performance of various algorithms.

Просмотреть файл

@ -0,0 +1,14 @@
from __future__ import print_function
import mnist_utils as ut
if __name__ == "__main__":
train = ut.load('http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz',
'http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz', 60000)
print ('Writing train text file...')
ut.savetxt(r'./Train-28x28_cntk_text.txt', train)
print ('Done.')
test = ut.load('http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz',
'http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz', 10000)
print ('Writing test text file...')
ut.savetxt(r'./Test-28x28_cntk_text.txt', test)
print ('Done.')

Просмотреть файл

@ -1,13 +1,18 @@
from __future__ import print_function
try:
from urllib.request import urlretrieve
except ImportError:
from urllib import urlretrieve
import sys import sys
import urllib.request
import gzip import gzip
import shutil
import os import os
import struct import struct
import numpy as np import numpy as np
def loadData(src, cimg): def loadData(src, cimg):
print ('Downloading ' + src) print ('Downloading ' + src)
gzfname, h = urllib.request.urlretrieve(src, './delete.me') gzfname, h = urlretrieve(src, './delete.me')
print ('Done.') print ('Done.')
try: try:
with gzip.open(gzfname) as gz: with gzip.open(gzfname) as gz:
@ -31,7 +36,7 @@ def loadData(src, cimg):
def loadLabels(src, cimg): def loadLabels(src, cimg):
print ('Downloading ' + src) print ('Downloading ' + src)
gzfname, h = urllib.request.urlretrieve(src, './delete.me') gzfname, h = urlretrieve(src, './delete.me')
print ('Done.') print ('Done.')
try: try:
with gzip.open(gzfname) as gz: with gzip.open(gzfname) as gz:
@ -49,29 +54,16 @@ def loadLabels(src, cimg):
os.remove(gzfname) os.remove(gzfname)
return res.reshape((cimg, 1)) return res.reshape((cimg, 1))
def load(dataSrc, labelsSrc, cimg): def load(dataSrc, labelsSrc, cimg):
data = loadData(dataSrc, cimg) data = loadData(dataSrc, cimg)
labels = loadLabels(labelsSrc, cimg) labels = loadLabels(labelsSrc, cimg)
return np.hstack((data, labels)) return np.hstack((data, labels))
def savetxt(filename, ndarray): def savetxt(filename, ndarray):
with open(filename, 'w', encoding="ascii") as f: with open(filename, 'w') as f:
labels = list(map(' '.join, np.eye(10, dtype=np.uint).astype(str))) labels = list(map(' '.join, np.eye(10, dtype=np.uint).astype(str)))
for row in ndarray: for row in ndarray:
row_str = row.astype(str) row_str = row.astype(str)
label_str = labels[row[-1]] label_str = labels[row[-1]]
feature_str = ' '.join(row_str[:-1]) feature_str = ' '.join(row_str[:-1])
f.write('|labels {} |features {}\n'.format(label_str, feature_str)) f.write('|labels {} |features {}\n'.format(label_str, feature_str))
if __name__ == "__main__":
train = load('http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz',
'http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz', 60000)
print ('Writing train text file...')
savetxt(r'./../Data/Train-28x28_cntk_text.txt', train)
print ('Done.')
test = load('http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz',
'http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz', 10000)
print ('Writing test text file...')
savetxt(r'./../Data/Test-28x28_cntk_text.txt', test)
print ('Done.')

Просмотреть файл

@ -0,0 +1,188 @@
# Fast-RCNN configuration for CNTK
# For algorithm and details see http://arxiv.org/abs/1504.08083
# Overview:
# The Fast-RCNN algorithm uses a DNN that takes as inputs a set of images
# and for each image a set of ROIs (Regions of interest). It first computes
# a convolutional feature map for the entire image using a series of
# of convolutional layers (usually from a pretrained network). Then it
# employs ROI pooling to crop out the part of the conv feature map
# that corresponds to an ROI and resizes it to the input size expected
# by the following layer (usually a set of pretrained fully connected layers).
# Classification error and evaluation criterion are computed for each ROI.
command = Train:Test
#command = Write
deviceId = "Auto"
precision = "float"
parallelTrain = "false"
traceLevel = 1
rootDir = "."
dataDir = "$rootDir$/data/"
outputDir = "$rootDir$/Output"
modelPath = "$outputDir$/Fast-RCNN"
stderr = "$outputDir$/Fast-RCNN.log"
ImageH = 1000
ImageW = 1000
ImageC = 3
NumLabels = 21
NumTrainROIs = 64
TrainROIDim = 256 # $NumTrainROIs$ * 4
TrainROILabelDim = 1344 # $NumTrainROIs$ * $NumLabels$
NumTestROIs = 200
TestROIDim = 800
TestROILabelDim = 4200
# For training we load a pretrained AlexNet model (AlexNet.89) and clone three parts of it.
# For the first part (up to pool1) we keep the weights fixed. The middle part contains the
# remaining convolutional and pooling layers and the last part are the FC layers.
# In the model we apply the first two cloned parts, then an ROI pooling layer and
# finally the pretrained FC layers followed by a new FC layer that maps to the new
# label dimensionality of 21 classes.
# The inputs are images (1000 x 1000 x 3), ROIs (64 ROIs x 4 coordinates (x, y, w, h))
# and ground truht labels per ROI (64 ROIs x 21 classes).
Train = {
action = "train"
BrainScriptNetworkBuilder = {
imageShape = $ImageH$:$ImageW$:$ImageC$ # 1000:1000:3
labelShape = $NumLabels$:$NumTrainROIs$ # 21:64
ROIShape = 4:$NumTrainROIs$ # 4:64
network = BS.Network.Load ("AlexNet.89")
pool1 = BS.Network.CloneFunction(network.features, network.pool1, parameters = "constant")
convLayers = BS.Network.CloneFunction(network.pool1, network.conv5_y)
fcLayers = BS.Network.CloneFunction(network.pool3, network.h2_d)
model (features, rois) = {
featNorm = features - 114
pool1Out = pool1 (featNorm)
conv5Out = convLayers (pool1Out)
roiOut = ROIPooling (conv5Out, rois, (6:6))
fcOut = fcLayers (roiOut)
W = ParameterTensor{(21:4096)}
b = ParameterTensor{21, init = 'zero'}
z = W * fcOut + b
}.z
features = Input {imageShape}
roiLabels = Input {labelShape}
rois = Input {ROIShape}
z = model (features, rois)
ce = CrossEntropyWithSoftmax(roiLabels, z, axis = 1)
errs = ClassificationError(roiLabels, z, axis = 1)
featureNodes = (features:rois)
labelNodes = (roiLabels)
criterionNodes = (ce)
evaluationNodes = (errs)
outputNodes = (z)
}
SGD = {
epochSize = 0
minibatchSize = 2
maxEpochs = 15
learningRatesPerSample = 0.00005
momentumAsTimeConstant = 0*5:1024 # was: 0.9 per MB
L2RegWeight = 0.0001
dropoutRate = 0.5
numMBsToShowResult = 50
}
reader = {
randomize = false
verbosity = 2
deserializers = ({
type = "CNTKTextFormatDeserializer" ; module = "CNTKTextFormatReader"
file = "$dataDir$/tv2012pad.rois.txt"
input = { rois = { dim = $TrainROIDim$ ; format = "dense" } }
}:{
type = "CNTKTextFormatDeserializer" ; module = "CNTKTextFormatReader"
file = "$dataDir$/tv2012pad.roilabels.txt"
input = { roiLabels = { dim = $TrainROILabelDim$ ; format = "dense" } }
}:{
type = "ImageDeserializer" ; module = "ImageReader"
file = "$dataDir$/tv2012pad.txt"
input = {
features = { transforms = (
{ type = "Scale" ; width = $ImageW$ ; height = $ImageW$ ; channels = $ImageC$ ; scaleMode = "pad" ; padValue = 114 }:
{ type = "Transpose" }
)}
ignored = {labelDim = 1000}
}
})
}
}
# For testing we load the trained Fast-RCNN model and modify the input size,
# such that the network accepts 200 ROIs per image. To this end we load and
# clone the entire network and define new inputs with the desired size
# corresponding to 200 ROIs.
Test = {
action = "test"
minibatchSize = 1
# use this for write action
# action = "write"
# outputPath = "$OutputDir$/fastrcnnNetOutput"
BrainScriptNetworkBuilder = {
imageShape = $ImageH$:$ImageW$:$ImageC$ # 1000:1000:3
labelShape = $NumLabels$:$NumTestROIs$ # 21:200
ROIShape = 4:$NumTestROIs$ # 4:200
# load network
network = BS.Network.Load ("$modelPath$")
clonedNet = BS.Network.CloneFunction ((network.features:network.rois), { z = network.z }, parameters = "constant")
features = Input {imageShape}
roiLabels = Input {labelShape}
rois = Input {ROIShape}
z = clonedNet(features, rois).z
ce = CrossEntropyWithSoftmax (roiLabels, z, axis = 1)
errs = ClassificationError(z, roiLabels, axis = 1)
featureNodes = (features:rois)
labelNodes = (roiLabels)
criterionNodes = (ce)
evaluationNodes = (errs)
outputNodes = (z)
}
reader = {
randomize = false
verbosity = 2
deserializers = ({
type = "CNTKTextFormatDeserializer" ; module = "CNTKTextFormatReader"
file = "$dataDir$/test2007pad_all.rois.txt"
input = { rois = { dim = $TestROIDim$ ; format = "dense" } }
}:{
type = "CNTKTextFormatDeserializer" ; module = "CNTKTextFormatReader"
file = "$dataDir$/test2007pad_all.roilabels.txt"
input = { roiLabels = { dim = $TestROILabelDim$ ; format = "dense" } }
}:{
type = "ImageDeserializer" ; module = "ImageReader"
file = "$dataDir$/test2007pad_all.txt"
input = {
features = { transforms = (
{ type = "Scale" ; width = $ImageW$ ; height = $ImageW$ ; channels = $ImageC$ ; scaleMode = "pad" ; padValue = 114 }:
{ type = "Transpose" }
)}
ignored = {labelDim = 1000}
}
})
}
}

Просмотреть файл

@ -0,0 +1,115 @@
# Parameters can be overwritten on the command line
# for example: cntk configFile=myConfigFile RootDir=../..
# For running from Visual Studio add
# currentDirectory=$(SolutionDir)/<path to corresponding data folder>
command = trainNetwork:testNetwork
precision = "float"; traceLevel = 1 ; deviceId = "auto"
rootDir = ".." ; dataDir = "$rootDir$/DataSets/MNIST" ;
outputDir = "./Output" ;
modelPath = "$outputDir$/Models/01_OneHidden"
#stderr = "$outputDir$/01_OneHidden_bs_out"
# TRAINING CONFIG
trainNetwork = {
action = "train"
BrainScriptNetworkBuilder = {
imageShape = 28:28:1 # image dimensions, 1 channel only
labelDim = 10 # number of distinct labels
featScale = 1/256
# This model returns multiple nodes as a record, which
# can be accessed using .x syntax.
model(x) = {
s1 = x * featScale
h1 = DenseLayer {200, activation=ReLU} (s1)
z = LinearLayer {labelDim} (h1)
}
# inputs
features = Input {imageShape}
labels = Input {labelDim}
# apply model to features
out = model (features)
# loss and error computation
ce = CrossEntropyWithSoftmax (labels, out.z)
errs = ClassificationError (labels, out.z)
# declare special nodes
featureNodes = (features)
labelNodes = (labels)
criterionNodes = (ce)
evaluationNodes = (errs)
outputNodes = (out.z)
# Alternative, you can use the Sequential keyword and write the model
# as follows. We keep the previous format because EvalClientTest needs
# to access the internal nodes, which is not doable yet with Sequential
#
# Scale{f} = x => Constant(f) .* x
# model = Sequential (
# Scale {featScale} :
# DenseLayer {200} : ReLU :
# LinearLayer {labelDim}
# )
# # inputs
# features = Input {imageShape}
# labels = Input (labelDim)
# # apply model to features
# ol = model (features)
# # loss and error computation
# ce = CrossEntropyWithSoftmax (labels, ol)
# errs = ClassificationError (labels, ol)
# # declare special nodes
# featureNodes = (features)
# labelNodes = (labels)
# criterionNodes = (ce)
# evaluationNodes = (errs)
# outputNodes = (ol)
}
SGD = {
epochSize = 60000
minibatchSize = 64
maxEpochs = 10
learningRatesPerSample = 0.01*5:0.005
momentumAsTimeConstant = 0
numMBsToShowResult = 500
}
reader = {
readerType = "CNTKTextFormatReader"
# See ../REAMDE.md for details on getting the data (Train-28x28_cntk_text.txt).
file = "$DataDir$/Train-28x28_cntk_text.txt"
input = {
features = { dim = 784 ; format = "dense" }
labels = { dim = 10 ; format = "dense" }
}
}
}
# TEST CONFIG
testNetwork = {
action = "test"
minibatchSize = 1024 # reduce this if you run out of memory
reader = {
readerType = "CNTKTextFormatReader"
file = "$DataDir$/Test-28x28_cntk_text.txt"
input = {
features = { dim = 784 ; format = "dense" }
labels = { dim = 10 ; format = "dense" }
}
}
}

Просмотреть файл

@ -1,5 +1,5 @@
# Parameters can be overwritten on the command line # Parameters can be overwritten on the command line
# for example: cntk configFile=myConfigFile rootDir=../.. # for example: cntk configFile=myConfigFile RootDir=../..
# For running from Visual Studio add # For running from Visual Studio add
# currentDirectory=$(SolutionDir)/<path to corresponding data folder> # currentDirectory=$(SolutionDir)/<path to corresponding data folder>
@ -7,11 +7,11 @@ command = trainNetwork:testNetwork
precision = "float"; traceLevel = 1 ; deviceId = "auto" precision = "float"; traceLevel = 1 ; deviceId = "auto"
rootDir = ".." ; configDir = "$rootDir$/Config" ; dataDir = "$rootDir$/Data" ; rootDir = ".." ; dataDir = "$rootDir$/DataSets/MNIST" ;
outputDir = "$rootDir$/Output" ; outputDir = "./Output" ;
modelPath = "$outputDir$/Models/02_Convolution" modelPath = "$outputDir$/Models/02_OneConv"
stderr = "$outputDir$/02_Convolution_bs_out" #stderr = "$outputDir$/02_OneConv_bs_out"
# TRAINING CONFIG # TRAINING CONFIG
trainNetwork = { trainNetwork = {
@ -27,15 +27,13 @@ trainNetwork = {
Scale {featScale} : Scale {featScale} :
ConvolutionalLayer {16, (5:5), pad = true} : ReLU : ConvolutionalLayer {16, (5:5), pad = true} : ReLU :
MaxPoolingLayer {(2:2), stride=(2:2)} : MaxPoolingLayer {(2:2), stride=(2:2)} :
ConvolutionalLayer {32, (5:5), pad = true} : ReLU : DenseLayer {64} : ReLU :
MaxPoolingLayer {(2:2), stride=(2:2)} :
DenseLayer {128, activation=Sigmoid} :
LinearLayer {labelDim} LinearLayer {labelDim}
) )
# inputs # inputs
features = Input {imageShape} features = Input {imageShape}
labels = Input {labelDim} labels = Input (labelDim)
# apply model to features # apply model to features
ol = model (features) ol = model (features)
@ -57,8 +55,7 @@ trainNetwork = {
minibatchSize = 64 minibatchSize = 64
maxEpochs = 15 maxEpochs = 15
learningRatesPerSample = 0.001*5:0.0005 learningRatesPerSample = 0.001*5:0.0005
momentumAsTimeConstant = 0*5:1024 momentumAsTimeConstant = 0
numMBsToShowResult = 500 numMBsToShowResult = 500
} }
@ -75,7 +72,7 @@ trainNetwork = {
# TEST CONFIG # TEST CONFIG
testNetwork = { testNetwork = {
action = test action = "test"
minibatchSize = 1024 # reduce this if you run out of memory minibatchSize = 1024 # reduce this if you run out of memory
reader = { reader = {

Просмотреть файл

@ -7,11 +7,11 @@ command = trainNetwork:testNetwork
precision = "float"; traceLevel = 1 ; deviceId = "auto" precision = "float"; traceLevel = 1 ; deviceId = "auto"
rootDir = ".." ; configDir = "$rootDir$/Config" ; dataDir = "$rootDir$/Data" ; rootDir = ".." ; dataDir = "$rootDir$/DataSets/MNIST" ;
outputDir = "$rootDir$/Output" ; outputDir = "./Output" ;
modelPath = "$outputDir$/Models/01_OneHidden" modelPath = "$outputDir$/Models/03_OneConvDropout"
stderr = "$outputDir$/01_OneHidden_bs_out" #stderr = "$outputDir$/03_OneConvDropout_bs_out"
# TRAINING CONFIG # TRAINING CONFIG
trainNetwork = { trainNetwork = {
@ -21,41 +21,42 @@ trainNetwork = {
imageShape = 28:28:1 # image dimensions, 1 channel only imageShape = 28:28:1 # image dimensions, 1 channel only
labelDim = 10 # number of distinct labels labelDim = 10 # number of distinct labels
featScale = 1/256 featScale = 1/256
Scale{f} = x => Constant(f) .* x
# This model returns multiple nodes as a record, which model = Sequential (
# can be accessed using .x syntax. Scale {featScale} :
model(x) = { ConvolutionalLayer {16, (5:5), pad = true} : ReLU :
s1 = x * featScale MaxPoolingLayer {(2:2), stride=(2:2)} : Dropout :
h1 = DenseLayer {200, activation=Sigmoid} (s1) DenseLayer {64} : ReLU :
z = LinearLayer {labelDim} (h1) LinearLayer {labelDim}
} )
# inputs # inputs
features = Input {imageShape} features = Input {imageShape}
labels = Input (labelDim) labels = Input (labelDim)
# apply model to features # apply model to features
out = model (features) ol = model (features)
# loss and error computation # loss and error computation
ce = CrossEntropyWithSoftmax (labels, out.z) ce = CrossEntropyWithSoftmax (labels, ol)
errs = ClassificationError (labels, out.z) errs = ClassificationError (labels, ol)
# declare special nodes # declare special nodes
featureNodes = (features) featureNodes = (features)
labelNodes = (labels) labelNodes = (labels)
criterionNodes = (ce) criterionNodes = (ce)
evaluationNodes = (errs) evaluationNodes = (errs)
outputNodes = (out.z) outputNodes = (ol)
} }
SGD = { SGD = {
epochSize = 60000 epochSize = 60000
minibatchSize = 64 minibatchSize = 64
maxEpochs = 30 maxEpochs = 15
learningRatesPerSample = 0.01*5:0.005 learningRatesPerSample = 0.001*5:0.0005
momentumAsTimeConstant = 0 momentumAsTimeConstant = 0
dropoutRate = 0.5
numMBsToShowResult = 500 numMBsToShowResult = 500
} }

Просмотреть файл

@ -1,5 +1,5 @@
# Parameters can be overwritten on the command line # Parameters can be overwritten on the command line
# for example: cntk configFile=myConfigFile rootDir=../.. # for example: cntk configFile=myConfigFile RootDir=../..
# For running from Visual Studio add # For running from Visual Studio add
# currentDirectory=$(SolutionDir)/<path to corresponding data folder> # currentDirectory=$(SolutionDir)/<path to corresponding data folder>
@ -7,11 +7,11 @@ command = trainNetwork:testNetwork
precision = "float"; traceLevel = 1 ; deviceId = "auto" precision = "float"; traceLevel = 1 ; deviceId = "auto"
rootDir = ".." ; configDir = "$rootDir$/Config" ; dataDir = "$rootDir$/Data" ; rootDir = ".." ; dataDir = "$rootDir$/DataSets/MNIST" ;
outputDir = "$rootDir$/Output" ; outputDir = "./Output" ;
modelPath = "$outputDir$/Models/03_ConvBatchNorm" modelPath = "$outputDir$/Models/04_OneConvBN"
stderr = "$outputDir$/03_ConvBatchNorm_bs_out" #stderr = "$outputDir$/04_OneConvBN_bs_out"
# TRAINING CONFIG # TRAINING CONFIG
trainNetwork = { trainNetwork = {
@ -24,20 +24,22 @@ trainNetwork = {
Scale{f} = x => Constant(f) .* x Scale{f} = x => Constant(f) .* x
# define a custom layer with 5x5 convolution, batch norm, relu and 2x2 max pooling # define a custom layer with 5x5 convolution, batch norm, relu and 2x2 max pooling
ConvBnReluPoolLayer {outChannels} = Sequential ( ConvBnReluPoolLayer {outChannels, filterShape} = Sequential (
ConvolutionalLayer {outChannels, (5:5), pad=true, bias=false} : ConvolutionalLayer {outChannels, filterShape, pad=true, bias=false} :
BatchNormalizationLayer {spatialRank = 2} : BatchNormalizationLayer {spatialRank = 2} :
ReLU : ReLU :
MaxPoolingLayer {(2:2), stride = (2:2)} MaxPoolingLayer {(2:2), stride = (2:2)}
) )
DenseBnReluLayer {outDim} = Sequential (
LinearLayer {outDim} :
BatchNormalizationLayer {spatialRank = 1} : ReLU
)
model = Sequential ( model = Sequential (
Scale {featScale} : Scale {featScale} :
ConvBnReluPoolLayer {16} : ConvBnReluPoolLayer {16, (5:5)} :
ConvBnReluPoolLayer {32} : DenseBnReluLayer {64} :
LinearLayer {128} :
BatchNormalizationLayer {} :
ReLU :
LinearLayer {labelDim} LinearLayer {labelDim}
) )
@ -63,10 +65,9 @@ trainNetwork = {
SGD = { SGD = {
epochSize = 60000 epochSize = 60000
minibatchSize = 64 minibatchSize = 64
maxEpochs = 3 maxEpochs = 10
learningRatesPerSample = 0.02:0.005 learningRatesPerSample = 0.01*5:0.001
momentumAsTimeConstant = 0 momentumAsTimeConstant = 0
numMBsToShowResult = 500 numMBsToShowResult = 500
} }
@ -83,7 +84,7 @@ trainNetwork = {
# TEST CONFIG # TEST CONFIG
testNetwork = { testNetwork = {
action = test action = "test"
minibatchSize = 1024 # reduce this if you run out of memory minibatchSize = 1024 # reduce this if you run out of memory
reader = { reader = {

Просмотреть файл

@ -0,0 +1,86 @@
# Parameters can be overwritten on the command line
# for example: cntk configFile=myConfigFile RootDir=../..
# For running from Visual Studio add
# currentDirectory=$(SolutionDir)/<path to corresponding data folder>
command = trainNetwork:testNetwork
precision = "float"; traceLevel = 1 ; deviceId = "auto"
rootDir = ".." ; dataDir = "$rootDir$/DataSets/MNIST" ;
outputDir = "./Output" ;
modelPath = "$outputDir$/Models/05_OneConvRegr"
#stderr = "$outputDir$/05_OneConvRegr_bs_out"
# TRAINING CONFIG
trainNetwork = {
action = "train"
BrainScriptNetworkBuilder = {
imageShape = 28:28:1 # image dimensions, 1 channel only
labelDim = 10 # number of distinct labels
featScale = 1/256
Scale{f} = x => Constant(f) .* x
model = Sequential (
Scale {featScale} :
ConvolutionalLayer {16, (5:5), pad = true} : ReLU :
MaxPoolingLayer {(2:2), stride=(2:2)} :
DenseLayer {64} : ReLU :
LinearLayer {labelDim}
)
# inputs
features = Input {imageShape}
labels = Input {labelDim}
# apply model to features
z = model (features)
# loss and error computation
sqErr = SquareError (labels, z)
rmse = Sqrt (sqErr / labelDim)
# declare special nodes
featureNodes = (features)
labelNodes = (labels)
criterionNodes = (rmse)
evaluationNodes = (rmse)
outputNodes = (z)
}
SGD = {
epochSize = 0
minibatchSize = 64
maxEpochs = 15
learningRatesPerSample = 0.001*5:0.0005
momentumAsTimeConstant = 1024
numMBsToShowResult = 500
}
reader = {
readerType = "CNTKTextFormatReader"
# See ../REAMDE.md for details on getting the data (Train-28x28_cntk_text.txt).
file = "$DataDir$/Train-28x28_cntk_text.txt"
input = {
features = { dim = 784 ; format = "dense" }
labels = { dim = 10 ; format = "dense" }
}
}
}
# TEST CONFIG
testNetwork = {
action = "test"
minibatchSize = 1024 # reduce this if you run out of memory
reader = {
readerType = "CNTKTextFormatReader"
file = "$DataDir$/Test-28x28_cntk_text.txt"
input = {
features = { dim = 784 ; format = "dense" }
labels = { dim = 10 ; format = "dense" }
}
}
}

Просмотреть файл

@ -0,0 +1,103 @@
# CNTK Examples: Image/Getting Started
## Overview
|Data: |The MNIST dataset (http://yann.lecun.com/exdb/mnist/) of handwritten digits.
|:---------|:---
|Purpose |This folder contains a number of examples that demonstrate the usage of BrainScript to define basic networks for deep learning on image tasks.
|Network |Simple feed-forward networks including dense layers, convolution layers, drop out and batch normalization for classification and regression tasks.
|Training |Stochastic gradient descent both with and without momentum.
|Comments |There are five configuration files, details are provided below.
## Running the example
### Getting the data
These examples use the MNIST dataset to demonstrate various network configurations. MNIST dataset is not included in the CNTK distribution but can be easily downloaded and converted by following the instructions in [DataSets/MNIST](../DataSets/MNIST). We recommend you to keep the downloaded data in the respective folder while downloading, as the configuration files in this folder assumes that by default.
### Setup
Compile the sources to generate the cntk executable (not required if you downloaded the binaries).
__Windows:__ Add the folder of the cntk executable to your path
(e.g. `set PATH=%PATH%;c:/src/cntk/x64/Release/;`)
or prefix the call to the cntk executable with the corresponding folder.
__Linux:__ Add the folder of the cntk executable to your path
(e.g. `export PATH=$PATH:$HOME/src/cntk/build/Release/bin/`)
or prefix the call to the cntk executable with the corresponding folder.
### Run
Run the example from the current folder (recommended) using:
`cntk configFile=01_OneHidden.cntk`
or run from any folder and specify the `GettingStarted` folder as the `currentDirectory`,
e.g. running from the `Image` folder using:
`cntk configFile=GettingStarted/01_OneHidden.cntk currentDirectory=GettingStarted`
An Output folder will be created in the `Image/GettingStarted` folder, which is used to store intermediate results and trained models.
## Details
There are five cntk configuration files in the current folder. These cntk configuration files use BrainScript, a custom script language for CNTK. To learn more about BrainScript, please follow the introduction of [BrainScript Basic Concepts](https://github.com/Microsoft/CNTK/wiki/BS-Basic-concepts).
### 01_OneHidden.cntk
This is a simple, one hidden layer network that produces `1.76%` of error. Since this model does not assume any spatial relationships between the pixels, it is often referred as "permutation invariant".
To run this example, use the following command:
`cntk configFile=01_OneHidden.cntk`
In this example, the MNIST images are first normalized to the range `[0,1)`, followed by a single dense hidden layer with 200 nodes. A [rectified linear unit (ReLU)](http://machinelearning.wustl.edu/mlpapers/paper_files/icml2010_NairH10.pdf) activation function is added for nonlinearity. Afterwards, another dense linear layer is added to generate the output label. The training adopts cross entropy as the cost function after softmax.
In the `SGD` block, `learningRatesPerSample = 0.01*5:0.005` indicates using 0.01 as learning rate per sample for 5 epochs and then 0.005 for the rest. More details about the SGD block are explained [here](https://github.com/Microsoft/CNTK/wiki/SGD-Block).
The MNIST data is loaded with a simple CNTK text format reader. The train and test datasets are converted by running the Python script in [DataSets/MNIST](../DataSets/MNIST). For more information on the reader block, please refer [here](https://github.com/Microsoft/CNTK/wiki/Reader-block).
### 02_OneConv.cntk
In the second example, we add a convolution layer to the network. Convolution layers were inspired by biological process, and has been extremely popular in image-related tasks, where neighboring pixels have high correlation. One of the earliest papers on convolution neural networks can be found [here](http://yann.lecun.com/exdb/publis/pdf/lecun-01a.pdf).
To run this example, use the following command:
`cntk configFile=02_OneConv.cntk`
After normalization, a convolution layer with `16` kernels at size `(5,5)` is added, followed by a ReLU nonlinearity. Then, we perform max pooling on the output feature map, with size `(2,2)` and stride `(2,2)`. A dense layer of 64 hidden nodes is then added, followed by another ReLU, and another dense layer to generate the output. This network achieves `1.22%` error rate, which is better than the previous network.
In practice, one would be stacking multiple convolution layers to improve classification accuracy. State-of-the-art convolution neural networks can achieve lower than 0.5% error rate on MNIST. Interested readers can find more examples in [Classification/ConvNet](../Classification/ConvNet).
### 03_OneConvdropout.cntk
In the third example, we demonstrate the use of dropout layers. Dropout is a network regularization technique that helps combat overfitting, in particular when the network contains many parameters. Dropout, together with ReLU activiation, are the two key techniques that enables Alex Krizhevsky, Ilya Sutskever, and Geoffrey Hinton to win the ILSVRC-2012 competition, which has argueabally changed the course of computer vision research. Their paper can be found [here](https://papers.nips.cc/paper/4824-imagenet-classification-with-deep-convolutional-neural-networks.pdf).
To run this example, use the following command:
`cntk configFile=03_OneConvDropout.cntk`
Compared with the previous example, we added a dropout layer after max pooling. Dropout can also be added after dense layer if needed. The dropout rate is specified in the SGD block, as `dropoutRate = 0.5`.
With dropout, the accuracy of the network improves slightly to `1.10%` error rate.
### 04_OneConvBN.cntk
In the fourth example, we add [batch normalization](https://arxiv.org/abs/1502.03167) to the network. Batch normalization was designed to address the internal covariate shift problem caused by input and parameter changes during training. The technique has been proven to be very useful in training very deep and complicated networks.
In this example, we simply added a batch normalization layer to the `02_OneConv.cntk` network. To run this example, use the following command:
`cntk configFile=04_OneConvBN.cntk`
The network achieves around `0.96%` error rate, which is better than the previous examples. Due to the small training dataset and the extremely simple network, we have to stop the training early (10 epochs) in order to avoid overfitting.
This cntk configuration file also demonstrates the use of custom layer definition in BrainScript. Note `ConvBnReluPoolLayer` and `DenseBnReluLayer` are both custom layers that contains different basic layer types.
### 05_OneConvRegr.cntk
In the fifth example, we show how CNTK can be used to perform a regression task. To simplify our task and not introduce any new datasets, we assume the digit labels of MNIST is a regression target rather than a classification target. We then reuse the same network architecture in `02_OneConv`, only to replace the cost function with squared error. To run this example, use the following command:
`cntk configFile=05_OneConvRegr.cntk`
The trained network achieves root-mean-square error (RMSE) of 0.0039. To see more sophisticated examples on regression tasks, please refer to [Regression](../Regression).

Просмотреть файл

@ -1,77 +0,0 @@
import sys
import urllib
import gzip
import shutil
import os
import struct
import numpy as np
def loadData(src, cimg):
print ('Downloading ' + src)
gzfname, h = urllib.urlretrieve(src, './delete.me')
print ('Done.')
try:
with gzip.open(gzfname) as gz:
n = struct.unpack('I', gz.read(4))
# Read magic number.
if n[0] != 0x3080000:
raise Exception('Invalid file: unexpected magic number.')
# Read number of entries.
n = struct.unpack('>I', gz.read(4))[0]
if n != cimg:
raise Exception('Invalid file: expected {0} entries.'.format(cimg))
crow = struct.unpack('>I', gz.read(4))[0]
ccol = struct.unpack('>I', gz.read(4))[0]
if crow != 28 or ccol != 28:
raise Exception('Invalid file: expected 28 rows/cols per image.')
# Read data.
res = np.fromstring(gz.read(cimg * crow * ccol), dtype = np.uint8)
finally:
os.remove(gzfname)
return res.reshape((cimg, crow * ccol))
def loadLabels(src, cimg):
print 'Downloading ' + src
gzfname, h = urllib.urlretrieve(src, './delete.me')
print 'Done.'
try:
with gzip.open(gzfname) as gz:
n = struct.unpack('I', gz.read(4))
# Read magic number.
if n[0] != 0x1080000:
raise Exception('Invalid file: unexpected magic number.')
# Read number of entries.
n = struct.unpack('>I', gz.read(4))
if n[0] != cimg:
raise Exception('Invalid file: expected {0} rows.'.format(cimg))
# Read labels.
res = np.fromstring(gz.read(cimg), dtype = np.uint8)
finally:
os.remove(gzfname)
return res.reshape((cimg, 1))
def load(dataSrc, labelsSrc, cimg):
data = loadData(dataSrc, cimg)
labels = loadLabels(labelsSrc, cimg)
return np.hstack((data, labels))
def savetxt(filename, ndarray):
with open(filename, 'w') as f:
labels = map(' '.join, np.eye(10, dtype=np.uint).astype(str))
for row in ndarray:
row_str = row.astype(str)
label_str = labels[row[-1]]
feature_str = ' '.join(row_str[:-1])
f.write('|labels {} |features {}\n'.format(label_str, feature_str))
if __name__ == "__main__":
train = load('http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz',
'http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz', 60000)
print 'Writing train text file...'
savetxt(r'./../Data/Train-28x28_cntk_text.txt', train)
print 'Done.'
test = load('http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz',
'http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz', 10000)
print 'Writing test text file...'
savetxt(r'./../Data/Test-28x28_cntk_text.txt', test)
print 'Done.'

Просмотреть файл

@ -1,10 +0,0 @@
0
1
2
3
4
5
6
7
8
9

Просмотреть файл

@ -1,85 +0,0 @@
# CNTK example: MNIST
## Overview
|Data: |The MNIST database (http://yann.lecun.com/exdb/mnist/) of handwritten digits.
|:---------|:---
|Purpose |This example demonstrates usage of the NDL (Network Description Language) to define networks.
|Network |NDLNetworkBuilder, simple feed forward and convolutional networks, cross entropy with softmax.
|Training |Stochastic gradient descent both with and without momentum.
|Comments |There are four config files, details are provided below.
## Running the example
### Getting the data
The MNIST dataset is not included in the CNTK distribution but can be easily
downloaded and converted by running the following command from the 'AdditionalFiles' folder:
`python mnist_convert.py`
The script will download all required files and convert them to CNTK-supported format.
The resulting files (Train-28x28_cntk_text.txt and Test-28x28_cntk_text.txt) will be stored in the 'Data' folder.
In case you don't have Python installed, there are 2 options:
1. Download and install latest version of Python 2.7 from: https://www.python.org/downloads/
Then install the numpy package by following instruction from: http://www.scipy.org/install.html#individual-packages
2. Alternatively install the Python Anaconda distribution which contains most of the
popular Python packages including numpy: http://continuum.io/downloads
### Setup
Compile the sources to generate the cntk executable (not required if you downloaded the binaries).
__Windows:__ Add the folder of the cntk executable to your path
(e.g. `set PATH=%PATH%;c:/src/cntk/x64/Debug/;`)
or prefix the call to the cntk executable with the corresponding folder.
__Linux:__ Add the folder of the cntk executable to your path
(e.g. `export PATH=$PATH:$HOME/src/cntk/build/debug/bin/`)
or prefix the call to the cntk executable with the corresponding folder.
### Run
Run the example from the Image/MNIST/Data folder using:
`cntk configFile=../Config/01_OneHidden_ndl_deprecated.cntk`
or run from any folder and specify the Data folder as the `currentDirectory`,
e.g. running from the Image/MNIST folder using:
`cntk configFile=Config/01_OneHidden_ndl_deprecated.cntk currentDirectory=Data`
The output folder will be created inside Image/MNIST/.
## Details
### Config files
There are four config files and the corresponding network description files in the 'Config' folder:
1. 01_OneHidden.ndl is a simple, one hidden layer network that produces 2.3% of error.
To run the sample, navigate to the Data folder and run the following command:
`cntk configFile=../Config/01_OneHidden_ndl_deprecated.cntk`
2. 02_Convolution.ndl is more interesting, convolutional network which has 2 convolutional and 2 max pooling layers.
The network produces 0.87% of error after training for about 2 minutes on GPU.
To run the sample, navigate to the Data folder and run the following command:
`cntk configFile=../Config/02_Convolution_ndl_deprecated.cntk`
3. 03_ConvBatchNorm.ndl is almost identical to 02_Convolution.ndl
except that it uses batch normalization for the convolutional and fully connected layers.
As a result, it achieves around 0.8% of error after training for just 2 epochs (and less than 30 seconds).
To run the sample, navigate to the Data folder and run the following command:
`cntk configFile=../Config/03_ConvBatchNorm_ndl_deprecated.cntk`
4. 04_DeConv.ndl illustrates the usage of Deconvolution and Unpooling. It is a network with one Convolution, one Pooling, one Unpooling and one Deconvolution layer. In fact it is an auto-encoder network where Rectified Linear Unit (ReLU) or Sigmoid layer is now replaced with Convolutional ReLU (for encoding) and Deconvolutional ReLU (for decoding) layers. The network goal is to reconstruct the original signal, with Mean Squared Error (MSE) used to minimize the reconstruction error. Generally such networks are used in semantic segmentation.
To run the sample, navigate to the Data folder and run the following command:
`cntk configFile=../Config/04_DeConv_ndl_deprecated.cntk`
For more details, refer to .ndl and the corresponding .cntk files.
### Additional files
The 'AdditionalFiles' folder contains the python script to download and convert the data.

Просмотреть файл

@ -1,80 +0,0 @@
import os
import sys
import struct
import cPickle as cp
from PIL import Image
import numpy as np
import xml.etree.cElementTree as et
import xml.dom.minidom
imgSize = 32
def saveImage(fname, data, label, mapFile, regrFile, pad, **key_parms):
# data in CIFAR-10 dataset is in CHW format.
pixData = data.reshape((3, imgSize, imgSize))
if ('mean' in key_parms):
key_parms['mean'] += pixData
if pad > 0:
pixData = np.pad(pixData, ((0, 0), (pad, pad), (pad, pad)), mode='constant', constant_values=128) # can also use mode='edge'
img = Image.new('RGB', (imgSize + 2 * pad, imgSize + 2 * pad))
pixels = img.load()
for x in range(img.size[0]):
for y in range(img.size[1]):
pixels[x, y] = (pixData[0][y][x], pixData[1][y][x], pixData[2][y][x])
img.save(fname)
mapFile.write("%s\t%d\n" % (fname, label))
# compute per channel mean and store for regression example
channelMean = np.mean(pixData, axis=(1,2))
regrFile.write("|regrLabels\t%f\t%f\t%f\n" % (channelMean[0]/255.0, channelMean[1]/255.0, channelMean[2]/255.0))
def saveMean(fname, data):
root = et.Element('opencv_storage')
et.SubElement(root, 'Channel').text = '3'
et.SubElement(root, 'Row').text = str(imgSize)
et.SubElement(root, 'Col').text = str(imgSize)
meanImg = et.SubElement(root, 'MeanImg', type_id='opencv-matrix')
et.SubElement(meanImg, 'rows').text = '1'
et.SubElement(meanImg, 'cols').text = str(imgSize * imgSize * 3)
et.SubElement(meanImg, 'dt').text = 'f'
et.SubElement(meanImg, 'data').text = ' '.join(['%e' % n for n in np.reshape(data, (imgSize * imgSize * 3))])
tree = et.ElementTree(root)
tree.write(fname)
x = xml.dom.minidom.parse(fname)
with open(fname, 'w') as f:
f.write(x.toprettyxml(indent = ' '))
if __name__ == "__main__":
if len(sys.argv) != 2:
print "Usage: CifarConverter.py <path to CIFAR-10 dataset directory>\nCIFAR-10 dataset (Python version) can be downloaded from http://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz"
sys.exit(1)
rootDir = sys.argv[1]
trainDir = os.path.join(rootDir, os.path.join('data', 'train'))
if not os.path.exists(trainDir):
os.makedirs(trainDir)
testDir = os.path.join(rootDir, os.path.join('data', 'test'))
if not os.path.exists(testDir):
os.makedirs(testDir)
data = {}
dataMean = np.zeros((3, imgSize, imgSize)) # mean is in CHW format.
with open(os.path.join(rootDir, 'train_map.txt'), 'w') as mapFile:
with open(os.path.join(rootDir, 'train_regrLabels.txt'), 'w') as regrFile:
for ifile in range(1, 6):
with open(os.path.join(rootDir, 'data_batch_' + str(ifile)), 'rb') as f:
data = cp.load(f)
for i in range(10000):
fname = os.path.join(trainDir, ('%05d.png' % (i + (ifile - 1) * 10000)))
saveImage(fname, data['data'][i, :], data['labels'][i], mapFile, regrFile, 4, mean=dataMean)
dataMean = dataMean / (50 * 1000)
saveMean(os.path.join(rootDir, 'CIFAR-10_mean.xml'), dataMean)
with open(os.path.join(rootDir, 'test_map.txt'), 'w') as mapFile:
with open(os.path.join(rootDir, 'test_regrLabels.txt'), 'w') as regrFile:
with open(os.path.join(rootDir, 'test_batch'), 'rb') as f:
data = cp.load(f)
for i in range(10000):
fname = os.path.join(testDir, ('%05d.png' % i))
saveImage(fname, data['data'][i, :], data['labels'][i], mapFile, regrFile, 0)

Просмотреть файл

@ -1,73 +0,0 @@
import os
import sys
import struct
import pickle as cp
from PIL import Image
import numpy as np
import xml.etree.cElementTree as et
import xml.dom.minidom
imgSize = 32
def saveImage(fname, data, label, mapFile, pad, **key_parms):
# data in CIFAR-10 dataset is in CHW format.
pixData = data.reshape((3, imgSize, imgSize))
if ('mean' in key_parms):
key_parms['mean'] += pixData
if pad > 0:
pixData = np.pad(pixData, ((0, 0), (pad, pad), (pad, pad)), mode='constant', constant_values=128) # can also use mode='edge'
img = Image.new('RGB', (imgSize + 2 * pad, imgSize + 2 * pad))
pixels = img.load()
for x in range(img.size[0]):
for y in range(img.size[1]):
pixels[x, y] = (pixData[0][y][x], pixData[1][y][x], pixData[2][y][x])
img.save(fname)
mapFile.write("%s\t%d\n" % (fname, label))
def saveMean(fname, data):
root = et.Element('opencv_storage')
et.SubElement(root, 'Channel').text = '3'
et.SubElement(root, 'Row').text = str(imgSize)
et.SubElement(root, 'Col').text = str(imgSize)
meanImg = et.SubElement(root, 'MeanImg', type_id='opencv-matrix')
et.SubElement(meanImg, 'rows').text = '1'
et.SubElement(meanImg, 'cols').text = str(imgSize * imgSize * 3)
et.SubElement(meanImg, 'dt').text = 'f'
et.SubElement(meanImg, 'data').text = ' '.join(['%e' % n for n in np.reshape(data, (imgSize * imgSize * 3))])
tree = et.ElementTree(root)
tree.write(fname)
x = xml.dom.minidom.parse(fname)
with open(fname, 'w') as f:
f.write(x.toprettyxml(indent = ' '))
if __name__ == "__main__":
if len(sys.argv) != 2:
print ("Usage: CifarConverter.py <path to CIFAR-10 dataset directory>\nCIFAR-10 dataset (Python version) can be downloaded from http://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz")
sys.exit(1)
rootDir = sys.argv[1]
trainDir = os.path.join(rootDir, os.path.join('data', 'train'))
if not os.path.exists(trainDir):
os.makedirs(trainDir)
testDir = os.path.join(rootDir, os.path.join('data', 'test'))
if not os.path.exists(testDir):
os.makedirs(testDir)
data = {}
dataMean = np.zeros((3, imgSize, imgSize)) # mean is in CHW format.
with open(os.path.join(rootDir, 'train_map.txt'), 'w') as mapFile:
for ifile in range(1, 6):
with open(os.path.join(rootDir, 'data_batch_' + str(ifile)), 'rb') as f:
data = cp.load(f, encoding='latin1')
for i in range(10000):
fname = os.path.join(trainDir, ('%05d.png' % (i + (ifile - 1) * 10000)))
saveImage(fname, data['data'][i, :], data['labels'][i], mapFile, 4, mean=dataMean)
dataMean = dataMean / (50 * 1000)
saveMean(os.path.join(rootDir, 'CIFAR-10_mean.xml'), dataMean)
with open(os.path.join(rootDir, 'test_map.txt'), 'w') as mapFile:
with open(os.path.join(rootDir, 'test_batch'), 'rb') as f:
data = cp.load(f, encoding='latin1')
for i in range(10000):
fname = os.path.join(testDir, ('%05d.png' % i))
saveImage(fname, data['data'][i, :], data['labels'][i], mapFile, 0)

Просмотреть файл

@ -1,105 +0,0 @@
import sys
import urllib
import tarfile
import shutil
import os
import struct
import numpy as np
import cPickle as cp
import getopt
ImgSize = 32
NumFeat = ImgSize * ImgSize * 3
def readBatch(src, outFmt):
with open(src, 'rb') as f:
d = cp.load(f)
# Note: most of the frameworks use spatial-major (aka NCHW) input format:
# R0..RN,G0..GN,B0..BN
# There are 2 possible options in CNTK:
# 1. If CNTK is built with cuDNN then 'cudnn' (i.e. NCHW format) should be used.
# 2. Otherwise, legacy CNTK 'NHWC' format should be used. As CIFAR-10 dataset comes in
# NCHW format, it has to be converted to CNTK legacy format first.
data = d['data']
if outFmt == 'cudnn':
feat = data
elif outFmt == 'legacy':
r = data[:, : ImgSize * ImgSize]
g = data[:, ImgSize * ImgSize : 2 * ImgSize * ImgSize]
b = data[:, 2 * ImgSize * ImgSize : 3 * ImgSize * ImgSize]
feat = np.empty_like(data)
feat[:, ::3] = r
feat[:, 1::3] = g
feat[:, 2::3] = b
else:
print ('Format not supported: ' + outFmt)
usage()
sys.exit(1)
res = np.hstack((feat, np.reshape(d['labels'], (len(d['labels']), 1))))
return res.astype(np.int)
def loadData(src, outFmt):
print ('Downloading ' + src)
fname, h = urllib.urlretrieve(src, './delete.me')
print ('Done.')
try:
print ('Extracting files...')
with tarfile.open(fname) as tar:
tar.extractall()
print ('Done.')
print ('Preparing train set...')
trn = np.empty((0, NumFeat + 1), dtype=np.int)
for i in range(5):
batchName = './cifar-10-batches-py/data_batch_{0}'.format(i + 1)
trn = np.vstack((trn, readBatch(batchName, outFmt)))
print ('Done.')
print ('Preparing test set...')
tst = readBatch('./cifar-10-batches-py/test_batch', outFmt)
print ('Done.')
finally:
os.remove(fname)
return (trn, tst)
def usage():
print ('Usage: CifarDownload.py [-f <format>] \n where format can be either cudnn or legacy. Default is cudnn.')
def parseCmdOpt(argv):
if len(argv) == 0:
print ("Using cudnn output format.")
return "cudnn"
try:
opts, args = getopt.getopt(argv, 'hf:', ['help', 'outFormat='])
except getopt.GetoptError:
usage()
sys.exit(1)
for opt, arg in opts:
if opt in ('-h', '--help'):
usage()
sys.exit()
elif opt in ('-f', '--outFormat'):
fmt = arg
if fmt != 'cudnn' and fmt != 'legacy':
print ('Invalid output format option.')
usage()
sys.exit(1)
return fmt
def savetxt(filename, ndarray):
with open(filename, 'w') as f:
labels = map(' '.join, np.eye(10, dtype=np.uint).astype(str))
for row in ndarray:
row_str = row.astype(str)
label_str = labels[row[-1]]
feature_str = ' '.join(row_str[:-1])
f.write('|labels {} |features {}\n'.format(label_str, feature_str))
if __name__ == "__main__":
fmt = parseCmdOpt(sys.argv[1:])
trn, tst = loadData('http://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz', fmt)
print ('Writing train text file...')
savetxt(r'./Train_cntk_text.txt', trn)
print ('Done.')
print ('Writing test text file...')
savetxt(r'./Test_cntk_text.txt', tst)
print ('Done.')

Просмотреть файл

@ -1,105 +0,0 @@
import sys
import urllib.request as ul
import pickle as cp
import tarfile
import shutil
import os
import struct
import numpy as np
import getopt
ImgSize = 32
NumFeat = ImgSize * ImgSize * 3
def readBatch(src, outFmt):
with open(src, 'rb') as f:
d = cp.load(f, encoding="latin1")
# Note: most of the frameworks use spatial-major (aka NCHW) input format:
# R0..RN,G0..GN,B0..BN
# There are 2 possible options in CNTK:
# 1. If CNTK is built with cuDNN then 'cudnn' (i.e. NCHW format) should be used.
# 2. Otherwise, legacy CNTK 'NHWC' format should be used. As CIFAR-10 dataset comes in
# NCHW format, it has to be converted to CNTK legacy format first.
data = d['data']
if outFmt == 'cudnn':
feat = data
elif outFmt == 'legacy':
r = data[:, : ImgSize * ImgSize]
g = data[:, ImgSize * ImgSize : 2 * ImgSize * ImgSize]
b = data[:, 2 * ImgSize * ImgSize : 3 * ImgSize * ImgSize]
feat = np.empty_like(data)
feat[:, ::3] = r
feat[:, 1::3] = g
feat[:, 2::3] = b
else:
print ('Format not supported: ' + outFmt)
usage()
sys.exit(1)
res = np.hstack((feat, np.reshape(d['labels'], (len(d['labels']), 1))))
return res.astype(np.int)
def loadData(src, outFmt):
print ('Downloading ' + src)
fname, h = ul.urlretrieve(src, './delete.me')
print ('Done.')
try:
print ('Extracting files...')
with tarfile.open(fname) as tar:
tar.extractall()
print ('Done.')
print ('Preparing train set...')
trn = np.empty((0, NumFeat + 1), dtype=np.int)
for i in range(5):
batchName = './cifar-10-batches-py/data_batch_{0}'.format(i + 1)
trn = np.vstack((trn, readBatch(batchName, outFmt)))
print ('Done.')
print ('Preparing test set...')
tst = readBatch('./cifar-10-batches-py/test_batch', outFmt)
print ('Done.')
finally:
os.remove(fname)
return (trn, tst)
def usage():
print ('Usage: CifarDownload_py3.py [-f <format>] \n where format can be either cudnn or legacy. Default is cudnn.')
def parseCmdOpt(argv):
if len(argv) == 0:
print ("Using cudnn output format.")
return "cudnn"
try:
opts, args = getopt.getopt(argv, 'hf:', ['help', 'outFormat='])
except getopt.GetoptError:
usage()
sys.exit(1)
for opt, arg in opts:
if opt in ('-h', '--help'):
usage()
sys.exit()
elif opt in ('-f', '--outFormat'):
fmt = arg
if fmt != 'cudnn' and fmt != 'legacy':
print ('Invalid output format option.')
usage()
sys.exit(1)
return fmt
def savetxt(filename, ndarray):
with open(filename, 'w') as f:
labels = list(map(' '.join, np.eye(10, dtype=np.uint).astype(str)))
for row in ndarray:
row_str = row.astype(str)
label_str = labels[row[-1]]
feature_str = ' '.join(row_str[:-1])
f.write('|labels {} |features {}\n'.format(label_str, feature_str))
if __name__ == "__main__":
fmt = parseCmdOpt(sys.argv[1:])
trn, tst = loadData('http://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz', fmt)
print ('Writing train text file...')
savetxt(r'./Train_cntk_text.txt', trn)
print ('Done.')
print ('Writing test text file...')
savetxt(r'./Test_cntk_text.txt', tst)
print ('Done.')

Просмотреть файл

@ -1,91 +0,0 @@
# Simple CIFAR-10 convnet, without and with BatchNormalization.
command = TrainConvNetWithBN:Eval
makeMode = false ; traceLevel = 1 ; deviceId = 0
RootDir = "." ; DataDir = "$RootDir$" ; ModelDir = "$RootDir$/Output/Models"
modelPath = "$ModelDir$/ConvNetBN"
# Training with BN
TrainConvNetWithBN = {
action = "train"
BrainScriptNetworkBuilder = {
imageShape = 32:32:3
labelDim = 10
Subtract128 (x) = x - Constant (128)
model = Sequential (
Subtract128 :
ConvolutionalLayer {32, (5:5), pad = true, bias = false, init = "heNormal", initValueScale=0.00390625} :
BatchNormalizationLayer {spatialRank = 2, normalizationTimeConstant = 4096} : ReLU :
MaxPoolingLayer {(3:3), stride = (2:2)} :
ConvolutionalLayer {32, (5:5), pad = true, bias = false, init = "heNormal"} :
BatchNormalizationLayer {spatialRank = 2, normalizationTimeConstant = 4096} : ReLU :
MaxPoolingLayer {(3:3), stride = (2:2)} :
ConvolutionalLayer {64, (5:5), pad = true, bias = false, init = "heNormal"} :
BatchNormalizationLayer {spatialRank = 2, normalizationTimeConstant = 4096} : ReLU :
MaxPoolingLayer {(3:3), stride = (2:2)} :
LinearLayer {64, bias = false, init = "heNormal", initValueScale=0.1} :
BatchNormalizationLayer {normalizationTimeConstant = 4096} : ReLU :
LinearLayer {labelDim, init = "heNormal", initValueScale=0.1}
)
# inputs
features = Input {imageShape}
labels = Input {labelDim}
# apply model to features
z = model (features)
# connect to system
ce = CrossEntropyWithSoftmax (labels, z)
errs = ClassificationError (labels, z)
top5Errs = ClassificationError (labels, z, topN=5)
featureNodes = (features)
labelNodes = (labels)
criterionNodes = (ce)
evaluationNodes = (errs)
outputNodes = (z)
}
SGD = {
epochSize = 49984 ; minibatchSize = 64
learningRatesPerSample = 0.00046875*5:0.00015625
momentumAsTimeConstant = 0
maxEpochs = 10
L2RegWeight = 0.003
dropoutRate = 0
firstMBsToShowResult = 10 ; numMBsToShowResult = 500
}
reader = {
readerType = "CNTKTextFormatReader"
file = "$DataDir$/Train_cntk_text.txt"
input = {
features = { dim = 3072 ; format = "dense" }
labels = { dim = 10 ; format = "dense" }
}
}
}
# Eval action
Eval = {
action = "eval"
minibatchSize = 16
evalNodeNames = errs:top5Errs # also test top-5 error rate
reader = {
readerType = "CNTKTextFormatReader"
file = "$DataDir$/Test_cntk_text.txt"
input = {
features = { dim = 3072 ; format = "dense" }
labels = { dim = 10 ; format = "dense" }
}
}
}

Просмотреть файл

@ -1,57 +0,0 @@
# CNTK example: CIFAR-10
## Overview
|Data: |The CIFAR-10 dataset (http://www.cs.toronto.edu/~kriz/cifar.html) of small images.
|:---------|:---
|Purpose |This example demonstrates usage of the NDL (Network Description Language) to define networks.
|Network |NDLNetworkBuilder, convolutional networks with batch normalization (including ResNet), cross entropy with softmax.
|Training |Stochastic gradient descent with momentum.
|Comments |See below.
## Running the example
### Getting the data
CIFAR-10 dataset is not included in CNTK distribution but can be easily downloaded and converted by running the following commands from this folder:
```
python CifarDownload.py [-f <format: cudnn|legacy>]
python CifarConverter.py <path to CIFAR-10 dataset>
```
The scripts will download all required files and convert them to CNTK-supported format.
In case you don't have Python installed (you require Python 2.7 and numpy), we recommend to install the Python Anaconda distribution which contains most of the popular Python packages including numpy:
http://continuum.io/downloads
The download script has an optional `-f` parameter which specifies output format of the datasets. `cudnn` option (default) saves dataset in a spatial-major format used by cuDNN, while `legacy` - in CNTK legacy format. Use `cudnn` if CNTK is compiled with cuDNN and `legacy` otherwise.
The converter script takes a full path to the original CIFAR-10 dataset (in Python pickle format). The script will create `data` folder inside of provided path where it will store both train and test images (in `train` and `test` folders). It will also create appropriate mapping files for the CNTK ImageReader as well as mean file.
## Details
### Config files
1. 01_Convolution.ndl is a convolutional network which has 3 convolutional and 3 max pooling layers and resembles the network described here:
https://code.google.com/p/cuda-convnet/source/browse/trunk/example-layers/layers-80sec.cfg
(main differences are usage of max pooling layers everywhere rather than mix of max and average pooling, as well as dropout in fully-connected layer).
The network produces 20.5% of error after training for about 3 minutes on GPU.
To run the sample, navigate to the sample folder and run the following command:
```
cntk configFile=01_Conv_ndl_deprecated.cntk
```
2. 02_BatchNormConv.ndl is a convolutional network which uses batch normalization technique (http://arxiv.org/abs/1502.03167).
To run the sample, navigate to the sample folder and run the following command:
```
cntk configFile=02_BatchNormConv_ndl_deprecated.cntk
```
3. 03_ResNet.ndl and 04_ResNet_56.ndl are very deep convolutional networks that use ResNet architecture and have 20 and 56 layers respectively (http://arxiv.org/abs/1512.03385).
With 03_ResNet_ndl_deprecated.cntk you should get around 8.2% of error after training for about 50 minutes. 04_ResNet_56_ndl_deprecated.cntk should produce around 6.4% of error after training for about 3 hours (see log files in the Output directory).
4. 05_ConvLocal_ndl_deprecated.cntk uses locally-connected convolution layers (see `conv_local3` and `conv_local4` in `05_ConvLocal_ndl_deprecated.cntk`) and resembles a network described here: https://code.google.com/p/cuda-convnet/source/browse/trunk/example-layers/layers-conv-local-11pct.cfg
5. 06_RegressionSimple.cntk shows how to train a regression model on image data. It uses a very simple network and a composite reader using both the ImageReader and CNTKTextFormatReader and defines a the RMSE (root mean square error) as the loss function. The value that the network learns to predict are simply the average rgb values of an image normalized to [0, 1]. To generate the ground truth labels for regression you need to run the CifarConverter.py script (since this example was added later you might need to rerun it to generate the regression files). See also here: https://github.com/Microsoft/CNTK/wiki/Train-a-regression-model-on-images
For more details, refer to .ndl and corresponding .cntk files.

Просмотреть файл

@ -1,12 +0,0 @@
0
1
2
3
4
5
6
7
8
9

Просмотреть файл

@ -0,0 +1,30 @@
# CNTK Examples: Image/Regression
## Overview
|Data: |The CIFAR-10 dataset (http://www.cs.toronto.edu/~kriz/cifar.html) of small images.
|:---------|:---
|Purpose |This folder contains a number of examples that demonstrate the usage of BrainScript to define deep learning networks for image regression tasks.
|Network |Convolution neural networks.
|Training |Stochastic gradient descent with momentum.
|Comments |See below.
## Running the example
### Getting the data
we use the CIFAR-10 dataset to demonstrate how to perform regression on images. CIFAR-10 dataset is not included in the CNTK distribution but can be easily downloaded and converted by following the instructions in [DataSets/CIFAR-10](../DataSets/CIFAR-10). We recommend you to keep the downloaded data in the respective folder while downloading, as the configuration files in this folder assumes that by default.
## Details
### RegrSimple_CIFAR10.cntk
In this example, we set up a very simple task to have a neural network predict the average RGB values of images normalized to [0,1). To generate the ground truth labels for this regression task, the CIFAR-10 installation script in [DataSets/CIFAR-10](../DataSets/CIFAR-10) will generate two additional files, `train_regrLabels.txt` and `test_regrLabels.txt`, for train and test respectively.
Run the example from the current folder using:
`cntk configFile=RegrSimple_CIFAR10.cntk`
The network produces root-mean-square error (rmse) of around 0.00098, which indicates that the regression accuracy is very high for this simple task.
You may examine the cntk configuration file [RegrSimple_CIFAR10.cntk](./RegrSimple_CIFAR10.cntk) for more details. Note the network is a linear one without nonlinearity. This is intended as we know that computing the average RGB values of images is a linear operation. The reader is a composite reader that uses the `ImageReader` to read images and the `CNTKTextFormatReader` to read the regression ground truth labels. The configuration file also demonstrates how to write the network prediction for the test data into an output file.

Просмотреть файл

@ -1,21 +1,17 @@
# 06_RegressionSimple.cntk shows how to train a regression model on image data. # RegrSimple_CIFAR10.cntk shows how to train a regression model on CIFAR-10 image data.
# It uses a very simple network and a composite reader using both the ImageReader # It uses a very simple network and a composite reader using both the ImageReader
# and CNTKTextFormatReader and defines the RMSE (root mean square error) as the # and CNTKTextFormatReader and defines the RMSE (root mean square error) as the
# loss function. The value that the network learns to predict are simply the # loss function. The value that the network learns to predict are simply the
# average rgb values of an image normalized to [0, 1]. # average rgb values of an image normalized to [0, 1].
# The network consists simply of two linear layers, i.e. two fully connected layers
# with no non-linear activation function, simply LinearLayer{...} (v) = W * v + b
# See https://github.com/Microsoft/CNTK/wiki/Layers-Reference for details on CNTK layers.
# See README.md for instructions on how to generate data and regression labels for this example.
command = TrainConvNet:Write command = TrainConvNet:Write:Test
makeMode = false ; traceLevel = 1 ; deviceId = "auto" makeMode = false ; traceLevel = 1 ; deviceId = "auto"
rootDir = "." ; configDir = "$rootDir$" ; dataDir = "$rootDir$" ; rootDir = ".." ; dataDir = "$rootDir$/DataSets/CIFAR10" ;
outputDir = "$rootDir$/Output" ; modelDir = "$outputDir$/Models" outputDir = "Output" ; modelDir = "$outputDir$/Models"
modelPath = "$ModelDir$/06_RegressionSimple.cmf" modelPath = "$ModelDir$/RegrSimple_CIFAR10.cmf"
# Training action for a convolutional network # Training action for a convolutional network
TrainConvNet = { TrainConvNet = {
@ -70,7 +66,7 @@ TrainConvNet = {
verbosity = 0 ; randomize = true verbosity = 0 ; randomize = true
deserializers = ({ deserializers = ({
type = "ImageDeserializer" ; module = "ImageReader" type = "ImageDeserializer" ; module = "ImageReader"
file = "$dataDir$/cifar-10-batches-py/train_map.txt" file = "$dataDir$/train_map.txt"
input = { input = {
features = { transforms = ( features = { transforms = (
{ type = "Scale" ; width = 32 ; height = 32 ; channels = 3 ; interpolations = "linear" } : { type = "Scale" ; width = 32 ; height = 32 ; channels = 3 ; interpolations = "linear" } :
@ -80,7 +76,7 @@ TrainConvNet = {
} }
} : { } : {
type = "CNTKTextFormatDeserializer" ; module = "CNTKTextFormatReader" type = "CNTKTextFormatDeserializer" ; module = "CNTKTextFormatReader"
file = "$dataDir$/cifar-10-batches-py/train_regrLabels.txt" file = "$dataDir$/train_regrLabels.txt"
input = { input = {
regrLabels = { dim = 3 ; format = "dense" } regrLabels = { dim = 3 ; format = "dense" }
} }
@ -93,13 +89,13 @@ Write = {
action = "write" action = "write"
minibatchSize = 1 minibatchSize = 1
outputNodeNames = (ol, regrLabels, rmse) outputNodeNames = (ol, regrLabels, rmse)
outputPath = "$OutputDir$/06_RegressionSimple" outputPath = "$OutputDir$/RegrSimple_CIFAR10"
reader = { reader = {
verbosity = 0 ; randomize = false verbosity = 0 ; randomize = false
deserializers = ({ deserializers = ({
type = "ImageDeserializer" ; module = "ImageReader" type = "ImageDeserializer" ; module = "ImageReader"
file = "$dataDir$/cifar-10-batches-py/test_map.txt" file = "$dataDir$/test_map.txt"
input = { input = {
features = { transforms = ( features = { transforms = (
{ type = "Scale" ; width = 32 ; height = 32 ; channels = 3 ; interpolations = "linear" } : { type = "Scale" ; width = 32 ; height = 32 ; channels = 3 ; interpolations = "linear" } :
@ -109,7 +105,7 @@ Write = {
} }
} : { } : {
type = "CNTKTextFormatDeserializer" ; module = "CNTKTextFormatReader" type = "CNTKTextFormatDeserializer" ; module = "CNTKTextFormatReader"
file = "$dataDir$/cifar-10-batches-py/test_regrLabels.txt" file = "$dataDir$/test_regrLabels.txt"
input = { input = {
regrLabels = { dim = 3 ; format = "dense" } regrLabels = { dim = 3 ; format = "dense" }
} }
@ -122,13 +118,13 @@ Test = {
action = "test" action = "test"
minibatchSize = 512 minibatchSize = 512
outputNodeNames = (ol, regrLabels, rmse) outputNodeNames = (ol, regrLabels, rmse)
outputPath = "$OutputDir$/06_RegressionSimple" outputPath = "$OutputDir$/RegrSimple_CIFAR10"
reader = { reader = {
verbosity = 0 ; randomize = false verbosity = 0 ; randomize = false
deserializers = ({ deserializers = ({
type = "ImageDeserializer" ; module = "ImageReader" type = "ImageDeserializer" ; module = "ImageReader"
file = "$dataDir$/cifar-10-batches-py/test_map.txt" file = "$dataDir$/test_map.txt"
input = { input = {
features = { transforms = ( features = { transforms = (
{ type = "Scale" ; width = 32 ; height = 32 ; channels = 3 ; interpolations = "linear" } : { type = "Scale" ; width = 32 ; height = 32 ; channels = 3 ; interpolations = "linear" } :
@ -138,7 +134,7 @@ Test = {
} }
} : { } : {
type = "CNTKTextFormatDeserializer" ; module = "CNTKTextFormatReader" type = "CNTKTextFormatDeserializer" ; module = "CNTKTextFormatReader"
file = "$dataDir$/cifar-10-batches-py/test_regrLabels.txt" file = "$dataDir$/test_regrLabels.txt"
input = { input = {
regrLabels = { dim = 3 ; format = "dense" } regrLabels = { dim = 3 ; format = "dense" }
} }

Просмотреть файл

@ -16,5 +16,5 @@ Please refer to the Readme file in the corresponding folder for further details.
|:------------------------|:-------------------------------------------------|:----------------| |:------------------------|:-------------------------------------------------|:----------------|
|Other/Simple2d | Synthetic 2d data | FF (CPU and GPU) |Other/Simple2d | Synthetic 2d data | FF (CPU and GPU)
|Speech/AN4 | Speech data (CMU AN4) | FF and LSTM |Speech/AN4 | Speech data (CMU AN4) | FF and LSTM
|Image/MNIST | Image data (MNIST handwritten digit recognition) | CNN |Image/GettingStarted | Image data (MNIST handwritten digit recognition) | CNN
|Text/PennTreebank | Text data (penn treebank) | RNN |Text/PennTreebank | Text data (penn treebank) | RNN

Просмотреть файл

@ -412,6 +412,7 @@ $(CNTKLIBRARY_LIB): $(CNTKLIBRARY_OBJ) | $(CNTKMATH_LIB)
CNTKLIBRARY_TESTS_SRC =\ CNTKLIBRARY_TESTS_SRC =\
Tests/UnitTests/V2LibraryTests/FeedForwardTests.cpp \ Tests/UnitTests/V2LibraryTests/FeedForwardTests.cpp \
Tests/UnitTests/V2LibraryTests/Main.cpp \ Tests/UnitTests/V2LibraryTests/Main.cpp \
Tests/UnitTests/V2LibraryTests/Common.cpp \
Tests/UnitTests/V2LibraryTests/NDArrayViewTests.cpp \ Tests/UnitTests/V2LibraryTests/NDArrayViewTests.cpp \
Tests/UnitTests/V2LibraryTests/RecurrentFunctionTests.cpp \ Tests/UnitTests/V2LibraryTests/RecurrentFunctionTests.cpp \
Tests/UnitTests/V2LibraryTests/TensorTests.cpp \ Tests/UnitTests/V2LibraryTests/TensorTests.cpp \
@ -422,6 +423,8 @@ CNTKLIBRARY_TESTS_SRC =\
Tests/UnitTests/V2LibraryTests/FunctionTests.cpp \ Tests/UnitTests/V2LibraryTests/FunctionTests.cpp \
Tests/UnitTests/V2LibraryTests/SequenceClassification.cpp \ Tests/UnitTests/V2LibraryTests/SequenceClassification.cpp \
Tests/UnitTests/V2LibraryTests/Seq2Seq.cpp \ Tests/UnitTests/V2LibraryTests/Seq2Seq.cpp \
Tests/UnitTests/V2LibraryTests/TruncatedLSTMAcousticModel.cpp \
Tests/UnitTests/V2LibraryTests/DeviceSelectionTests.cpp \
Examples/Evaluation/CPPEvalV2Client/EvalMultithreads.cpp \ Examples/Evaluation/CPPEvalV2Client/EvalMultithreads.cpp \
CNTKLIBRARY_TESTS:=$(BINDIR)/v2librarytests CNTKLIBRARY_TESTS:=$(BINDIR)/v2librarytests
@ -474,7 +477,7 @@ $(EVAL_LIB): $(EVAL_OBJ) | $(CNTKMATH_LIB) $(MULTIVERSO_LIB)
@echo $(SEPARATOR) @echo $(SEPARATOR)
@mkdir -p $(dir $@) @mkdir -p $(dir $@)
@echo Building $(EVAL_LIB) for $(ARCH) with build type $(BUILDTYPE) @echo Building $(EVAL_LIB) for $(ARCH) with build type $(BUILDTYPE)
$(CXX) $(LDFLAGS) -shared $(patsubst %,-L%, $(LIBDIR) $(LIBPATH) $(GDK_NVML_LIB_PATH)) $(patsubst %,$(RPATH)%, $(ORIGINDIR) $(LIBPATH)) -o $@ $^ $(LIBS) -l$(CNTKMATH) -l$(MULTIVERSO) $(CXX) $(LDFLAGS) -shared $(patsubst %,-L%, $(LIBDIR) $(LIBPATH) $(GDK_NVML_LIB_PATH)) $(patsubst %,$(RPATH)%, $(ORIGINDIR) $(LIBPATH)) -o $@ $^ $(LIBS) -l$(CNTKMATH) $(lMULTIVERSO)
######################################## ########################################
# Eval Sample client # Eval Sample client
@ -493,7 +496,7 @@ $(EVAL_SAMPLE_CLIENT): $(EVAL_SAMPLE_CLIENT_OBJ) | $(EVAL_LIB) $(MULTIVERSO_LIB)
@echo $(SEPARATOR) @echo $(SEPARATOR)
@mkdir -p $(dir $@) @mkdir -p $(dir $@)
@echo building $(EVAL_SAMPLE_CLIENT) for $(ARCH) with build type $(BUILDTYPE) @echo building $(EVAL_SAMPLE_CLIENT) for $(ARCH) with build type $(BUILDTYPE)
$(CXX) $(LDFLAGS) $(patsubst %,-L%, $(LIBDIR) $(LIBPATH) $(GDK_NVML_LIB_PATH)) $(patsubst %,$(RPATH)%, $(ORIGINLIBDIR) $(LIBPATH)) -o $@ $^ $(LIBS) -l$(EVAL) -l$(CNTKMATH) -l$(MULTIVERSO) $(CXX) $(LDFLAGS) $(patsubst %,-L%, $(LIBDIR) $(LIBPATH) $(GDK_NVML_LIB_PATH)) $(patsubst %,$(RPATH)%, $(ORIGINLIBDIR) $(LIBPATH)) -o $@ $^ $(LIBS) -l$(EVAL) -l$(CNTKMATH) $(lMULTIVERSO)
######################################## ########################################
# BinaryReader plugin # BinaryReader plugin
@ -733,8 +736,11 @@ IMAGE_READER_LIBS += -lopencv_core -lopencv_imgproc -lopencv_imgcodecs
ifdef LIBZIP_PATH ifdef LIBZIP_PATH
CPPFLAGS += -DUSE_ZIP CPPFLAGS += -DUSE_ZIP
#both directories are needed for building libzip
INCLUDEPATH += $(LIBZIP_PATH)/include
INCLUDEPATH += $(LIBZIP_PATH)/lib/libzip/include INCLUDEPATH += $(LIBZIP_PATH)/lib/libzip/include
IMAGE_READER_LIBS += -lzip IMAGE_READER_LIBS += -lzip
LIBPATH += $(LIBZIP_PATH)/lib
endif endif
IMAGEREADER_SRC =\ IMAGEREADER_SRC =\
@ -788,7 +794,7 @@ ifeq (,$(wildcard Source/Multiverso/include/multiverso/*.h))
$(error Build with Multiverso was requested but cannot find the code. Please check https://github.com/Microsoft/DMTK to learn more.) $(error Build with Multiverso was requested but cannot find the code. Please check https://github.com/Microsoft/DMTK to learn more.)
endif endif
MULTIVERSO:=multiverso lMULTIVERSO:=-lmultiverso
INCLUDEPATH += $(SOURCEDIR)/Multiverso/include INCLUDEPATH += $(SOURCEDIR)/Multiverso/include
COMMON_FLAGS += -DMULTIVERSO_SUPPORT COMMON_FLAGS += -DMULTIVERSO_SUPPORT
@ -813,6 +819,26 @@ $(MULTIVERSO_LIB):
-B./Source/Multiverso/build -H./Source/Multiverso -B./Source/Multiverso/build -H./Source/Multiverso
@make VERBOSE=1 -C ./Source/Multiverso/build/ -j multiverso @make VERBOSE=1 -C ./Source/Multiverso/build/ -j multiverso
UNITTEST_MULTIVERSO_SRC = \
$(SOURCEDIR)/Multiverso/Test/unittests/test_array.cpp \
$(SOURCEDIR)/Multiverso/Test/unittests/test_blob.cpp \
$(SOURCEDIR)/Multiverso/Test/unittests/test_kv.cpp \
$(SOURCEDIR)/Multiverso/Test/unittests/test_message.cpp \
$(SOURCEDIR)/Multiverso/Test/unittests/test_multiverso.cpp \
$(SOURCEDIR)/Multiverso/Test/unittests/test_node.cpp \
$(SOURCEDIR)/Multiverso/Test/unittests/test_sync.cpp \
UNITTEST_MULTIVERSO_OBJ := $(patsubst %.cpp, $(OBJDIR)/%.o, $(UNITTEST_MULTIVERSO_SRC))
UNITTEST_MULTIVERSO := $(BINDIR)/multiversotests
ALL += $(UNITTEST_MULTIVERSO)
$(UNITTEST_MULTIVERSO): $(UNITTEST_MULTIVERSO_OBJ) | $(MULTIVERSO_LIB)
@echo $(SEPARATOR)
@mkdir -p $(dir $@)
@echo building $@ for $(ARCH) with build type $(BUILDTYPE)
$(CXX) $(LDFLAGS) $(patsubst %,-L%, $(LIBDIR) $(BOOSTLIB_PATH)) $(patsubst %, $(RPATH)%, $(ORIGINLIBDIR) $(BOOSTLIB_PATH)) -o $@ $^ $(BOOSTLIBS) $(lMULTIVERSO) -ldl
endif endif
######################################## ########################################
@ -849,7 +875,7 @@ $(CNTK): $(CNTK_OBJ) | $(CNTKMATH_LIB) $(MULTIVERSO_LIB)
@echo $(SEPARATOR) @echo $(SEPARATOR)
@mkdir -p $(dir $@) @mkdir -p $(dir $@)
@echo building output for $(ARCH) with build type $(BUILDTYPE) @echo building output for $(ARCH) with build type $(BUILDTYPE)
$(CXX) $(LDFLAGS) $(patsubst %,-L%, $(LIBDIR) $(LIBPATH) $(GDK_NVML_LIB_PATH)) $(patsubst %,$(RPATH)%, $(ORIGINLIBDIR) $(LIBPATH)) -o $@ $^ $(LIBS) -l$(CNTKMATH) -l$(MULTIVERSO) -fopenmp $(CXX) $(LDFLAGS) $(patsubst %,-L%, $(LIBDIR) $(LIBPATH) $(GDK_NVML_LIB_PATH)) $(patsubst %,$(RPATH)%, $(ORIGINLIBDIR) $(LIBPATH)) -o $@ $^ $(LIBS) -l$(CNTKMATH) $(lMULTIVERSO) -fopenmp
# deployable resources: standard library of BS # deployable resources: standard library of BS
CNTK_CORE_BS:=$(BINDIR)/cntk.core.bs CNTK_CORE_BS:=$(BINDIR)/cntk.core.bs
@ -886,7 +912,7 @@ $(UNITTEST_EVAL) : $(UNITTEST_EVAL_OBJ) | $(EVAL_LIB) $(CNTKMATH_LIB)
@echo $(SEPARATOR) @echo $(SEPARATOR)
@mkdir -p $(dir $@) @mkdir -p $(dir $@)
@echo building $@ for $(ARCH) with build type $(BUILDTYPE) @echo building $@ for $(ARCH) with build type $(BUILDTYPE)
$(CXX) $(LDFLAGS) $(patsubst %,-L%, $(LIBDIR) $(LIBPATH) $(GDK_NVML_LIB_PATH) $(BOOSTLIB_PATH)) $(patsubst %, $(RPATH)%, $(ORIGINLIBDIR) $(LIBPATH) $(BOOSTLIB_PATH)) -o $@ $^ $(BOOSTLIBS) $(LIBS) -l$(EVAL) -l$(CNTKMATH) -l$(MULTIVERSO) $(CXX) $(LDFLAGS) $(patsubst %,-L%, $(LIBDIR) $(LIBPATH) $(GDK_NVML_LIB_PATH) $(BOOSTLIB_PATH)) $(patsubst %, $(RPATH)%, $(ORIGINLIBDIR) $(LIBPATH) $(BOOSTLIB_PATH)) -o $@ $^ $(BOOSTLIBS) $(LIBS) -l$(EVAL) -l$(CNTKMATH) $(lMULTIVERSO)
#TODO: create project specific makefile or rules to avoid adding project specific path to the global path #TODO: create project specific makefile or rules to avoid adding project specific path to the global path
INCLUDEPATH += $(SOURCEDIR)/Readers/CNTKTextFormatReader INCLUDEPATH += $(SOURCEDIR)/Readers/CNTKTextFormatReader
@ -944,7 +970,7 @@ $(UNITTEST_NETWORK): $(UNITTEST_NETWORK_OBJ) | $(CNTKMATH_LIB) $(CNTKTEXTFORMATR
@echo $(SEPARATOR) @echo $(SEPARATOR)
@mkdir -p $(dir $@) @mkdir -p $(dir $@)
@echo building $@ for $(ARCH) with build type $(BUILDTYPE) @echo building $@ for $(ARCH) with build type $(BUILDTYPE)
$(CXX) $(LDFLAGS) $(patsubst %,-L%, $(LIBDIR) $(LIBPATH) $(GDK_NVML_LIB_PATH) $(BOOSTLIB_PATH)) $(patsubst %, $(RPATH)%, $(ORIGINLIBDIR) $(LIBPATH) $(BOOSTLIB_PATH)) -o $@ $^ $(BOOSTLIBS) $(LIBS) -l$(MULTIVERSO) -l$(CNTKMATH) -fopenmp $(CXX) $(LDFLAGS) $(patsubst %,-L%, $(LIBDIR) $(LIBPATH) $(GDK_NVML_LIB_PATH) $(BOOSTLIB_PATH)) $(patsubst %, $(RPATH)%, $(ORIGINLIBDIR) $(LIBPATH) $(BOOSTLIB_PATH)) -o $@ $^ $(BOOSTLIBS) $(LIBS) $(lMULTIVERSO) -l$(CNTKMATH) -fopenmp
UNITTEST_MATH_SRC = \ UNITTEST_MATH_SRC = \
$(SOURCEDIR)/../Tests/UnitTests/MathTests/BatchNormalizationEngineTests.cpp \ $(SOURCEDIR)/../Tests/UnitTests/MathTests/BatchNormalizationEngineTests.cpp \
@ -1003,27 +1029,6 @@ $(UNITTEST_BRAINSCRIPT): $(UNITTEST_BRAINSCRIPT_OBJ) | $(CNTKMATH_LIB)
@echo building $@ for $(ARCH) with build type $(BUILDTYPE) @echo building $@ for $(ARCH) with build type $(BUILDTYPE)
$(CXX) $(LDFLAGS) $(patsubst %,-L%, $(LIBDIR) $(LIBPATH) $(GDK_NVML_LIB_PATH) $(BOOSTLIB_PATH)) $(patsubst %, $(RPATH)%, $(ORIGINLIBDIR) $(LIBPATH) $(BOOSTLIB_PATH)) -o $@ $^ $(BOOSTLIBS) $(LIBS) -ldl -l$(CNTKMATH) $(CXX) $(LDFLAGS) $(patsubst %,-L%, $(LIBDIR) $(LIBPATH) $(GDK_NVML_LIB_PATH) $(BOOSTLIB_PATH)) $(patsubst %, $(RPATH)%, $(ORIGINLIBDIR) $(LIBPATH) $(BOOSTLIB_PATH)) -o $@ $^ $(BOOSTLIBS) $(LIBS) -ldl -l$(CNTKMATH)
UNITTEST_MULTIVERSO_SRC = \
$(SOURCEDIR)/Multiverso/Test/unittests/test_array.cpp \
$(SOURCEDIR)/Multiverso/Test/unittests/test_blob.cpp \
$(SOURCEDIR)/Multiverso/Test/unittests/test_kv.cpp \
$(SOURCEDIR)/Multiverso/Test/unittests/test_message.cpp \
$(SOURCEDIR)/Multiverso/Test/unittests/test_multiverso.cpp \
$(SOURCEDIR)/Multiverso/Test/unittests/test_node.cpp \
$(SOURCEDIR)/Multiverso/Test/unittests/test_sync.cpp \
UNITTEST_MULTIVERSO_OBJ := $(patsubst %.cpp, $(OBJDIR)/%.o, $(UNITTEST_MULTIVERSO_SRC))
UNITTEST_MULTIVERSO := $(BINDIR)/multiversotests
ALL += $(UNITTEST_MULTIVERSO)
$(UNITTEST_MULTIVERSO): $(UNITTEST_MULTIVERSO_OBJ) | $(MULTIVERSO_LIB)
@echo $(SEPARATOR)
@mkdir -p $(dir $@)
@echo building $@ for $(ARCH) with build type $(BUILDTYPE)
$(CXX) $(LDFLAGS) $(patsubst %,-L%, $(LIBDIR) $(BOOSTLIB_PATH)) $(patsubst %, $(RPATH)%, $(ORIGINLIBDIR) $(BOOSTLIB_PATH)) -o $@ $^ $(BOOSTLIBS) -l$(MULTIVERSO) -ldl
unittests: $(UNITTEST_EVAL) $(UNITTEST_READER) $(UNITTEST_NETWORK) $(UNITTEST_MATH) $(UNITTEST_BRAINSCRIPT) unittests: $(UNITTEST_EVAL) $(UNITTEST_READER) $(UNITTEST_NETWORK) $(UNITTEST_MATH) $(UNITTEST_BRAINSCRIPT)
endif endif

Просмотреть файл

@ -1,6 +1,14 @@
# CNTK # CNTK
## Latest news ## Latest news
*2016-10-03.* V 1.7.2 Binary release
**This is a Hot Fix Release. It affects all users of Model Evaluation Library**
If you are NOT using Model Evaluation Library you may skip this release.
If you ARE using Model Evaluation Library we **strongly recommend** installing version 1.7.2 instead of **any** previous version you might be using.
See [Release Notes](https://github.com/Microsoft/CNTk/wiki/CNTK_1_7_2_Release_Notes) for details.
*2016-09-28.* V 1.7.1 Binary release *2016-09-28.* V 1.7.1 Binary release
Highlights of this Release: Highlights of this Release:
* Two Breaking Changes related to Layers library default initialization and ```fsAdagrad``` gradient-normalization scheme * Two Breaking Changes related to Layers library default initialization and ```fsAdagrad``` gradient-normalization scheme
@ -31,9 +39,6 @@ Get the Release from the [CNTK Releases page](https://github.com/Microsoft/CNTK/
*2016-08-10.* We have significantly simplified handling of **Gated Recurrent Units (GRU)**. Read more in the [corresponding article](https://github.com/Microsoft/CNTK/wiki/GRUs-on-CNTK-with-BrainScript). *2016-08-10.* We have significantly simplified handling of **Gated Recurrent Units (GRU)**. Read more in the [corresponding article](https://github.com/Microsoft/CNTK/wiki/GRUs-on-CNTK-with-BrainScript).
*2016-07-15.* V 1.6 Binary release
CNTK v.1.6 binaries are on the [CNTK Releases page](https://github.com/Microsoft/CNTK/releases)
See [all news](https://github.com/Microsoft/CNTK/wiki/News). See [all news](https://github.com/Microsoft/CNTK/wiki/News).
## What is CNTK ## What is CNTK

Просмотреть файл

@ -90,22 +90,24 @@ if __name__ == "__main__":
##################################################################################################### #####################################################################################################
try: try:
import StringIO import StringIO
stringio = StringIO.StringIO
except ImportError: except ImportError:
from io import StringIO from io import StringIO
stringio = StringIO
try: try:
import pytest import pytest
except ImportError: except ImportError:
pass pass
def test_simpleSanityCheck(): def test_simpleSanityCheck():
dictionary1 = StringIO.StringIO("hello\nmy\nworld\nof\nnothing\n") dictionary1 = stringio("hello\nmy\nworld\nof\nnothing\n")
dictionary2 = StringIO.StringIO("let\nme\nbe\nclear\nabout\nit\n") dictionary2 = stringio("let\nme\nbe\nclear\nabout\nit\n")
input = StringIO.StringIO("hello my\tclear about\nworld of\tit let clear\n") input = stringio("hello my\tclear about\nworld of\tit let clear\n")
output = StringIO.StringIO() output = stringio()
convert([dictionary1, dictionary2], [input], output, None, False) convert([dictionary1, dictionary2], [input], output, None, False)
expectedOutput = StringIO.StringIO() expectedOutput = stringio()
expectedOutput.write("0\t|S0 0:1\t|S1 3:1\n") expectedOutput.write("0\t|S0 0:1\t|S1 3:1\n")
expectedOutput.write("0\t|S0 1:1\t|S1 4:1\n") expectedOutput.write("0\t|S0 1:1\t|S1 4:1\n")
expectedOutput.write("1\t|S0 2:1\t|S1 5:1\n") expectedOutput.write("1\t|S0 2:1\t|S1 5:1\n")
@ -115,10 +117,10 @@ def test_simpleSanityCheck():
assert expectedOutput.getvalue() == output.getvalue() assert expectedOutput.getvalue() == output.getvalue()
def test_nonExistingWord(): def test_nonExistingWord():
dictionary1 = StringIO.StringIO("hello\nmy\nworld\nof\nnothing\n") dictionary1 = stringio("hello\nmy\nworld\nof\nnothing\n")
input = StringIO.StringIO("hello my\nworld of nonexistent\n") input = stringio("hello my\nworld of nonexistent\n")
output = StringIO.StringIO() output = stringio()
with pytest.raises(Exception) as info: with pytest.raises(Exception) as info:
convert([dictionary1], [input], output, None, False) convert([dictionary1], [input], output, None, False)
assert info.value.message == "Token 'nonexistent' cannot be found in the dictionary for stream 0" assert str(info.value) == "Token 'nonexistent' cannot be found in the dictionary for stream 0"

Просмотреть файл

@ -39,6 +39,12 @@ using namespace std;
using namespace Microsoft::MSR; using namespace Microsoft::MSR;
using namespace Microsoft::MSR::CNTK; using namespace Microsoft::MSR::CNTK;
bool GetDistributedMBReadingDefaultValue(const ConfigParameters& config, const IDataReader& reader)
{
// Return 'true' if we're running a parallel training with a v2 reader, 'false' otherwise.
return (MPIWrapper::GetInstance() != nullptr && !reader.IsLegacyReader());
}
// =========================================================================== // ===========================================================================
// DoEvalBase() - implements CNTK "eval" command // DoEvalBase() - implements CNTK "eval" command
// =========================================================================== // ===========================================================================
@ -62,7 +68,7 @@ static void DoEvalBase(const ConfigParameters& config, IDataReader& reader)
size_t maxSamplesInRAM = config(L"maxSamplesInRAM", (size_t)SIZE_MAX); size_t maxSamplesInRAM = config(L"maxSamplesInRAM", (size_t)SIZE_MAX);
size_t numSubminiBatches = config(L"numSubminibatches", (size_t)1); size_t numSubminiBatches = config(L"numSubminibatches", (size_t)1);
bool enableDistributedMBReading = config(L"distributedMBReading", false); bool enableDistributedMBReading = config(L"distributedMBReading", GetDistributedMBReadingDefaultValue(config, reader));
vector<wstring> evalNodeNamesVector; vector<wstring> evalNodeNamesVector;
@ -104,7 +110,7 @@ static void DoEvalBNBase(const ConfigParameters& config, IDataReader& reader)
size_t maxSamplesInRAM = config(L"maxSamplesInRAM", (size_t)SIZE_MAX); size_t maxSamplesInRAM = config(L"maxSamplesInRAM", (size_t)SIZE_MAX);
size_t numSubminiBatches = config(L"numSubminibatches", (size_t)1); size_t numSubminiBatches = config(L"numSubminibatches", (size_t)1);
bool enableDistributedMBReading = config(L"distributedMBReading", false); bool enableDistributedMBReading = config(L"distributedMBReading", GetDistributedMBReadingDefaultValue(config, reader));
vector<wstring> evalNodeNamesVector; vector<wstring> evalNodeNamesVector;
@ -189,8 +195,6 @@ void DoCrossValidate(const ConfigParameters& config)
size_t maxSamplesInRAM = config(L"maxSamplesInRAM", (size_t)SIZE_MAX); size_t maxSamplesInRAM = config(L"maxSamplesInRAM", (size_t)SIZE_MAX);
size_t numSubminiBatches = config(L"numSubminibatches", (size_t)1); size_t numSubminiBatches = config(L"numSubminibatches", (size_t)1);
bool enableDistributedMBReading = config(L"distributedMBReading", false);
ConfigArray evalNodeNames = config(L"evalNodeNames", ""); ConfigArray evalNodeNames = config(L"evalNodeNames", "");
vector<wstring> evalNodeNamesVector; vector<wstring> evalNodeNamesVector;
for (int i = 0; i < evalNodeNames.size(); ++i) for (int i = 0; i < evalNodeNames.size(); ++i)
@ -203,6 +207,8 @@ void DoCrossValidate(const ConfigParameters& config)
DataReader cvDataReader(readerConfig); DataReader cvDataReader(readerConfig);
bool enableDistributedMBReading = config(L"distributedMBReading", GetDistributedMBReadingDefaultValue(config, cvDataReader));
bool finalModelEvaluated = false; bool finalModelEvaluated = false;
for (size_t i = cvInterval[0]; i <= cvInterval[2]; i += cvInterval[1]) for (size_t i = cvInterval[0]; i <= cvInterval[2]; i += cvInterval[1])
{ {

Просмотреть файл

@ -269,6 +269,16 @@ void NDLNodeEvaluatorImpl<ElemType>::Evaluate(NDLNode<ElemType>* node, const wst
nodePtr = builder.LegacyReshape(NULL, num_rows, ImageDimensions::AsTensorShape(img_width, img_height, img_channels, imageLayoutKind), name); nodePtr = builder.LegacyReshape(NULL, num_rows, ImageDimensions::AsTensorShape(img_width, img_height, img_channels, imageLayoutKind), name);
} }
} }
else if (cnNodeType == OperationNameOf(ReconcileDynamicAxisNode))
{
nodeParamCount = 2;
nodeParamStart = 0;
if (pass == ndlPassInitial)
{
nodePtr = builder.ReconcileDynamicAxis(NULL, NULL, name);
}
}
else if (cnNodeType == OperationNameOf(PastValueNode) || else if (cnNodeType == OperationNameOf(PastValueNode) ||
cnNodeType == OperationNameOf(FutureValueNode)) cnNodeType == OperationNameOf(FutureValueNode))
{ {

Просмотреть файл

@ -208,8 +208,10 @@ bool CheckFunction(std::string& p_nodeType, bool* allowUndeterminedVariable)
else if (EqualInsensitive(nodeType, OperationNameOf(PerDimMeanVarNormalizationNode), L"PerDimMVNorm")) ret = true; else if (EqualInsensitive(nodeType, OperationNameOf(PerDimMeanVarNormalizationNode), L"PerDimMVNorm")) ret = true;
else if (EqualInsensitive(nodeType, OperationNameOf(PlusNode))) ret = true; else if (EqualInsensitive(nodeType, OperationNameOf(PlusNode))) ret = true;
else if (EqualInsensitive(nodeType, OperationNameOf(ReciprocalNode))) ret = true; else if (EqualInsensitive(nodeType, OperationNameOf(ReciprocalNode))) ret = true;
else if (EqualInsensitive(nodeType, OperationNameOf(ReconcileDynamicAxisNode))) ret = true;
else if (EqualInsensitive(nodeType, OperationNameOf(RectifiedLinearNode), L"ReLU")) ret = true; else if (EqualInsensitive(nodeType, OperationNameOf(RectifiedLinearNode), L"ReLU")) ret = true;
else if (EqualInsensitive(nodeType, OperationNameOf(ReshapeNode))) ret = true; else if (EqualInsensitive(nodeType, OperationNameOf(ReshapeNode))) ret = true;
else if (EqualInsensitive(nodeType, OperationNameOf(ROIPoolingNode))) ret = true;
else if (EqualInsensitive(nodeType, OperationNameOf(RowRepeatNode))) ret = true; else if (EqualInsensitive(nodeType, OperationNameOf(RowRepeatNode))) ret = true;
else if (EqualInsensitive(nodeType, OperationNameOf(RowStackNode))) ret = true; else if (EqualInsensitive(nodeType, OperationNameOf(RowStackNode))) ret = true;
#ifdef COMING_SOON #ifdef COMING_SOON

Просмотреть файл

@ -455,8 +455,22 @@ CNTK2 = [
// 11. Criterion nodes // 11. Criterion nodes
// No changes here - we said the default input would be the label sequence here, against which the // No changes here - we said the default input would be the label sequence here, against which the
// empirical sequence is compared to. Keeping this for now. // empirical sequence is compared to. Keeping this for now.
CrossEntropyWithSoftmax(_, outProbVectorSequence, tag='') = new ComputationNode [ operation = 'CrossEntropyWithSoftmax' ; inputs = _AsNodes (_ : outProbVectorSequence) /*plus the function args*/ ] CrossEntropyWithSoftmax(labelSequence, outProbVectorSequence, axis=0, tag='') =
ClassificationError(_, outVectorSequence, topN=1, tag='') = new ComputationNode [ operation = 'ClassificationError' ; inputs = _AsNodes (if topN == 1 then (_ : outVectorSequence) else (_ : outVectorSequence : Constant (topN))) /*plus the function args*/ ] if axis==0 then new ComputationNode [ operation = 'CrossEntropyWithSoftmax' ; inputs = _AsNodes (labelSequence : outProbVectorSequence) /*plus the function args*/ ]
else [ tag1 = tag; out = Minus (ReduceLogSum (outProbVectorSequence, axis=axis), ReduceSum (labelSequence .* outProbVectorSequence, axis=axis), tag=tag1) ].out
# Classification error along a specific axis: account only for missed labels, i.e.
# strictly check whether at the one “1” location in labels we find a value equal to the max
ClassificationError(labelSequence, outVectorSequence, topN=1, axis=0, tag='') =
if axis==0 then new ComputationNode [ operation = 'ClassificationError' ; inputs = _AsNodes (if topN == 1 then (labelSequence : outVectorSequence) else (labelSequence : outVectorSequence : Constant (topN))) /*plus the function args*/ ]
else if topN != 1 then Fail ("ClassificationError() along a specific axis does not support topN.")
else {
axMax = ReduceMax (outVectorSequence, axis=axis) # max value along competition axis
pred = outVectorSequence == axMax # 1 for all values that are max
wrongPred = labelSequence != pred # look up all wrong predictions {label index}
axErr = ReduceSum (wrongPred, axis=axis) # sum up wrong predictions along competition axis
capErr = axErr >= 1 # only count maximally one error per prediction
err = ReduceMean (capErr, tag=tag) # average into a single number per sample
}.err
ErrorPrediction = ClassificationError # legacy ErrorPrediction = ClassificationError # legacy
# TODO: replace with this (need to deal with topN thing): # TODO: replace with this (need to deal with topN thing):
# (_new will be removed once the change is made) # (_new will be removed once the change is made)
@ -547,6 +561,7 @@ MaxUnpooling(unpoolInput, poolInput, kernelDims, stride=1, autoPadding = true, l
# 2D pooling # 2D pooling
MaxPooling(input, windowWidth, windowHeight, horizontalSubsample, verticalSubsample, imageLayout='CHW', tag='') = new ComputationNode [ operation = 'MaxPooling' ; inputs = _AsNodes (input) /*plus the function args*/ ] MaxPooling(input, windowWidth, windowHeight, horizontalSubsample, verticalSubsample, imageLayout='CHW', tag='') = new ComputationNode [ operation = 'MaxPooling' ; inputs = _AsNodes (input) /*plus the function args*/ ]
AveragePooling(input, windowWidth, windowHeight, horizontalSubsample, verticalSubsample, imageLayout='CHW', tag='') = new ComputationNode [ operation = 'AveragePooling' ; inputs = _AsNodes (input) /*plus the function args*/ ] AveragePooling(input, windowWidth, windowHeight, horizontalSubsample, verticalSubsample, imageLayout='CHW', tag='') = new ComputationNode [ operation = 'AveragePooling' ; inputs = _AsNodes (input) /*plus the function args*/ ]
ROIPooling (input, ROIs, shape) = new ComputationNode { operation = 'ROIPooling' ; inputs = _AsNodes (input : ROIs) ; outputShape = new TensorShape [ dims = shape ] ; tag='' /*plus the function args*/ }
ColumnwiseCrossProduct = KhatriRaoProduct // deprecated ColumnwiseCrossProduct = KhatriRaoProduct // deprecated
ErrorPrediction = ClassificationError # legacy name ErrorPrediction = ClassificationError # legacy name
Delay = PastValue Delay = PastValue
@ -560,8 +575,6 @@ CosDistance(aVectorSequence, anotherVectorSequence, tag='') = new ComputationNod
CosDistanceWithNegativeSamples(aVectorSequence, anotherVectorSequence, numShifts, numNegSamples, tag='') = new ComputationNode [ operation = 'CosDistanceWithNegativeSamples' ; inputs = _AsNodes (aVectorSequence : anotherVectorSequence : numShifts : numNegSamples) /*plus the function args*/ ] CosDistanceWithNegativeSamples(aVectorSequence, anotherVectorSequence, numShifts, numNegSamples, tag='') = new ComputationNode [ operation = 'CosDistanceWithNegativeSamples' ; inputs = _AsNodes (aVectorSequence : anotherVectorSequence : numShifts : numNegSamples) /*plus the function args*/ ]
Cosine(x, tag='') = new ComputationNode [ operation = 'Cosine' ; inputs = _AsNodes (x) /*plus the function args*/ ] Cosine(x, tag='') = new ComputationNode [ operation = 'Cosine' ; inputs = _AsNodes (x) /*plus the function args*/ ]
CrossEntropy(refProbVectorSequence, outProbVectorSequence, tag='') = new ComputationNode [ operation = 'CrossEntropy' ; inputs = _AsNodes (refProbVectorSequence : outProbVectorSequence) /*plus the function args*/ ] CrossEntropy(refProbVectorSequence, outProbVectorSequence, tag='') = new ComputationNode [ operation = 'CrossEntropy' ; inputs = _AsNodes (refProbVectorSequence : outProbVectorSequence) /*plus the function args*/ ]
# once ReduceLogSum becomes proper C++, CrossEntropyWithSoftmax() will become this:
NewCrossEntropyWithSoftmax (labelSequence, z, tag='') = [ tag1 = tag; out = Minus (ReduceLogSum (z), ReduceSum (labelSequence .* z), tag=tag1) ].out
DiagTimes(diagonalMatrixAsColumnVector, matrix, tag='') = new ComputationNode [ operation = 'DiagTimes' ; inputs = _AsNodes (diagonalMatrixAsColumnVector : matrix) /*plus the function args*/ ] DiagTimes(diagonalMatrixAsColumnVector, matrix, tag='') = new ComputationNode [ operation = 'DiagTimes' ; inputs = _AsNodes (diagonalMatrixAsColumnVector : matrix) /*plus the function args*/ ]
// TODO: DiagTimes = ElementTimes // TODO: DiagTimes = ElementTimes
GatherPacked(indexSequence, sourceData, tag='') = new ComputationNode [ operation = 'GatherPacked' ; inputs = _AsNodes (indexSequence : sourceData) /*plus the function args*/ ] GatherPacked(indexSequence, sourceData, tag='') = new ComputationNode [ operation = 'GatherPacked' ; inputs = _AsNodes (indexSequence : sourceData) /*plus the function args*/ ]

Просмотреть файл

@ -553,9 +553,14 @@ int wmainWithBS(int argc, wchar_t* argv[]) // called from wmain which is a wrapp
// parallel training // parallel training
shared_ptr<Microsoft::MSR::CNTK::MPIWrapper> mpi; shared_ptr<Microsoft::MSR::CNTK::MPIWrapper> mpi;
auto ensureMPIWrapperCleanup = MakeScopeExit(&MPIWrapper::DeleteInstance); auto ensureMPIWrapperCleanup = MakeScopeExit(&MPIWrapper::DeleteInstance);
bool paralleltrain = config(L"parallelTrain", false); // when running under MPI with more than one node, use 'true' as the default value for parallelTrain,
// 'false' otherwise.
bool paralleltrain = config(L"parallelTrain", (MPIWrapper::GetTotalNumberOfMPINodes() > 1));
if (paralleltrain) if (paralleltrain)
{
mpi = MPIWrapper::GetInstance(true /*create*/); mpi = MPIWrapper::GetInstance(true /*create*/);
}
g_shareNodeValueMatrices = config(L"shareNodeValueMatrices", false); g_shareNodeValueMatrices = config(L"shareNodeValueMatrices", false);
@ -638,7 +643,7 @@ int wmainWithBS(int argc, wchar_t* argv[]) // called from wmain which is a wrapp
static void PrintBanner(int argc, wchar_t* argv[], const string& timestamp) static void PrintBanner(int argc, wchar_t* argv[], const string& timestamp)
{ {
fprintf(stderr, "CNTK 1.7.1+ ("); fprintf(stderr, "CNTK 1.7.2+ (");
#ifdef _GIT_EXIST #ifdef _GIT_EXIST
fprintf(stderr, "%s %.6s, ", _BUILDBRANCH_, _BUILDSHA1_); fprintf(stderr, "%s %.6s, ", _BUILDBRANCH_, _BUILDSHA1_);
#endif #endif
@ -687,9 +692,15 @@ int wmainOldCNTKConfig(int argc, wchar_t* argv[])
// The top-level 'parallelTrain' is a bool, not to be confused with the parallelTrain block inside SGD. // The top-level 'parallelTrain' is a bool, not to be confused with the parallelTrain block inside SGD.
shared_ptr<Microsoft::MSR::CNTK::MPIWrapper> mpi; shared_ptr<Microsoft::MSR::CNTK::MPIWrapper> mpi;
auto ensureMPIWrapperCleanup = MakeScopeExit(&MPIWrapper::DeleteInstance); auto ensureMPIWrapperCleanup = MakeScopeExit(&MPIWrapper::DeleteInstance);
bool paralleltrain = config(L"parallelTrain", "false");
// when running under MPI with more than one node, use 'true' as the default value for parallelTrain,
// 'false' otherwise.
bool paralleltrain = config(L"parallelTrain", (MPIWrapper::GetTotalNumberOfMPINodes() > 1));
if (paralleltrain) if (paralleltrain)
{
mpi = MPIWrapper::GetInstance(true /*create*/); mpi = MPIWrapper::GetInstance(true /*create*/);
}
g_shareNodeValueMatrices = config(L"shareNodeValueMatrices", false); g_shareNodeValueMatrices = config(L"shareNodeValueMatrices", false);

Просмотреть файл

@ -208,21 +208,21 @@ namespace CNTK
NDShape() {} NDShape() {}
/// ///
/// Contruct a NDShape instance with the specified number of axes and dimensionality in each axis. /// Construct a NDShape instance with the specified rank and dimensionality in each axis.
/// ///
explicit NDShape(size_t numAxes, size_t dimension = InferredDimension) explicit NDShape(size_t numAxes, size_t dimension = InferredDimension)
: m_shapeDims(numAxes, dimension) : m_shapeDims(numAxes, dimension)
{} {}
/// ///
/// Contruct a NDShape instance with specified dimensions. /// Construct a NDShape instance with specified dimensions.
/// ///
NDShape(const std::vector<size_t>& dimensions) NDShape(const std::vector<size_t>& dimensions)
: m_shapeDims(dimensions) : m_shapeDims(dimensions)
{} {}
/// ///
/// Contruct a NDShape instance with specified dimensions. /// Construct a NDShape instance with specified dimensions.
/// ///
NDShape(const std::initializer_list<size_t>& dimensions) NDShape(const std::initializer_list<size_t>& dimensions)
: m_shapeDims(dimensions) : m_shapeDims(dimensions)
@ -234,7 +234,7 @@ namespace CNTK
const std::vector<size_t>& Dimensions() const { return m_shapeDims; } const std::vector<size_t>& Dimensions() const { return m_shapeDims; }
/// ///
/// Returns the number of axes of 'this' shape. /// Returns the rank of 'this' shape.
/// ///
size_t Rank() const { return m_shapeDims.size(); } size_t Rank() const { return m_shapeDims.size(); }
@ -255,7 +255,7 @@ namespace CNTK
{ {
endAxisId = (endAxisId == SIZE_MAX) ? Rank() : endAxisId; endAxisId = (endAxisId == SIZE_MAX) ? Rank() : endAxisId;
if ((endAxisId < beginAxisId) || (endAxisId > Rank())) if ((endAxisId < beginAxisId) || (endAxisId > Rank()))
InvalidArgument("NDShape::SubShape : The specified endAxisId (%d) cannot exceed the number of axes (%d) of 'this' NDShape and must be >= than the specified beginAxisId (%d)", (int)endAxisId, (int)Rank(), (int)beginAxisId); InvalidArgument("NDShape::SubShape : The specified endAxisId (%d) cannot exceed the rank (%d) of 'this' NDShape and must be >= than the specified beginAxisId (%d)", (int)endAxisId, (int)Rank(), (int)beginAxisId);
std::vector<size_t> subShapeDims(m_shapeDims.begin() + beginAxisId, m_shapeDims.begin() + endAxisId); std::vector<size_t> subShapeDims(m_shapeDims.begin() + beginAxisId, m_shapeDims.begin() + endAxisId);
return subShapeDims; return subShapeDims;
@ -343,6 +343,7 @@ namespace CNTK
friend class CompositeFunction; friend class CompositeFunction;
friend class LearnerBase; friend class LearnerBase;
friend class Variable; friend class Variable;
friend class PackedValue;
template <typename T, typename ...CtorArgTypes> template <typename T, typename ...CtorArgTypes>
friend inline std::shared_ptr<T> MakeSharedObject(CtorArgTypes&& ...ctorArgs); friend inline std::shared_ptr<T> MakeSharedObject(CtorArgTypes&& ...ctorArgs);
@ -593,6 +594,13 @@ namespace CNTK
std::shared_ptr<void> m_tensorView; // Microsoft::MSR::CNTK::TensorView<ElemType>* std::shared_ptr<void> m_tensorView; // Microsoft::MSR::CNTK::TensorView<ElemType>*
}; };
enum class MaskKind : char
{
Invalid = 0,
Valid = 1,
SequenceBegin = 2,
};
/// ///
/// Denotes a multi-dimensional mask used for specifying specific sections of a NDArrayView object as masked/invalid. /// Denotes a multi-dimensional mask used for specifying specific sections of a NDArrayView object as masked/invalid.
/// This type denotes a view and there may be multiple simultaneous views of the data underlying a NDMask instance. /// This type denotes a view and there may be multiple simultaneous views of the data underlying a NDMask instance.
@ -603,6 +611,7 @@ namespace CNTK
template <typename T, typename ...CtorArgTypes> template <typename T, typename ...CtorArgTypes>
friend inline std::shared_ptr<T> MakeSharedObject(CtorArgTypes&& ...ctorArgs); friend inline std::shared_ptr<T> MakeSharedObject(CtorArgTypes&& ...ctorArgs);
public: public:
/// ///
/// Construct a new Mask object of specified shape /// Construct a new Mask object of specified shape
@ -615,12 +624,32 @@ namespace CNTK
CNTK_API ~NDMask(); CNTK_API ~NDMask();
/// ///
/// Mask out the specified sub-section of 'this' mask /// Mask out (i.e. mark Invalid) the specified sub-section of 'this' mask
/// ///
CNTK_API void MaskSection(const std::vector<size_t>& sectionOffset, const NDShape& sectionShape); void InvalidateSection(const std::vector<size_t>& sectionOffset, const NDShape& sectionShape)
{
MarkSectionAs(sectionOffset, sectionShape, MaskKind::Invalid);
}
/// ///
/// Clear the mask; i.e. unmask all currently masked values /// Mark the specified position in 'this' mask as sequence begin
///
void MarkSequenceBegin(const std::vector<size_t>& offset)
{
NDShape sectionShape = NDShape(Shape().Rank(), 1);
MarkSectionAs(offset, sectionShape, MaskKind::SequenceBegin);
}
///
/// Mark the specified sub-section of 'this' mask as sequence begin
///
void MarkSequenceBegin(const std::vector<size_t>& offset, const NDShape& sectionShape)
{
MarkSectionAs(offset, sectionShape, MaskKind::SequenceBegin);
}
///
/// Clear the mask; i.e. unmask or mark Valid all currently masked (i.e. Invalid) values
/// ///
CNTK_API void Clear(); CNTK_API void Clear();
@ -642,12 +671,20 @@ namespace CNTK
/// ///
/// Returns a read-only pointer to the data buffer underlying 'this' Mask object /// Returns a read-only pointer to the data buffer underlying 'this' Mask object
/// ///
CNTK_API const char* DataBuffer() const; CNTK_API const MaskKind* DataBuffer() const;
///
/// Creates a new NDArrayView with newly allocated storage on the specified device and copies 'this' view's contents into the newly allocated view.
///
CNTK_API NDMaskPtr DeepClone(const DeviceDescriptor& device) const;
/// ///
/// Creates a new NDMask with newly allocated storage on the same device as 'this' mask and copies 'this' mask's contents into the newly allocated mask. /// Creates a new NDMask with newly allocated storage on the same device as 'this' mask and copies 'this' mask's contents into the newly allocated mask.
/// ///
CNTK_API NDMaskPtr DeepClone() const; NDMaskPtr DeepClone() const
{
return DeepClone(this->Device());
}
/// ///
/// Creates a new NDMask which is an alias of 'this' mask. /// Creates a new NDMask which is an alias of 'this' mask.
@ -662,6 +699,9 @@ namespace CNTK
private: private:
NDMask(const NDShape& shape, Microsoft::MSR::CNTK::Matrix<char>* matrix); NDMask(const NDShape& shape, Microsoft::MSR::CNTK::Matrix<char>* matrix);
CNTK_API void MarkSectionAs(const std::vector<size_t>& sectionOffset, const NDShape& sectionShape, MaskKind maskKind);
Microsoft::MSR::CNTK::Matrix<char>* GetMatrix() const; Microsoft::MSR::CNTK::Matrix<char>* GetMatrix() const;
// Disallow copy and move construction and assignment // Disallow copy and move construction and assignment
@ -710,41 +750,82 @@ namespace CNTK
/// ///
/// Destruct 'this' Value object. /// Destruct 'this' Value object.
/// ///
CNTK_API virtual ~Value(); virtual ~Value();
///
/// Returns the descriptor of the device that 'this' Value resides on
///
virtual DeviceDescriptor Device() const { return m_data->Device(); }
///
/// Returns the data type of 'this' Value's contents.
///
virtual DataType GetDataType() const { return m_data->GetDataType(); }
///
/// Returns the storage format of 'this' Value.
///
virtual StorageFormat GetStorageFormat() const { return m_data->GetStorageFormat(); }
///
/// Returns the shape 'this' Value.
///
virtual const NDShape& Shape() const { return m_data->Shape(); }
///
/// Returns a boolean indicating if 'this' Value contains data in sparse storage format.
///
bool IsSparse() const
{
return (GetStorageFormat() != StorageFormat::Dense);
}
///
/// Returns a boolean indicating if 'this' Value is read-only.
///
virtual bool IsReadOnly() const { return m_data->IsReadOnly(); }
///
/// Returns the number of masked/invalid values
///
virtual size_t MaskedCount() const
{
return m_mask ? m_mask->MaskedCount() : 0;
}
/// ///
/// Returns the NDArrayView object corresponding to the data contents of 'this value object. /// Returns the NDArrayView object corresponding to the data contents of 'this value object.
/// ///
CNTK_API virtual NDArrayViewPtr Data() const; virtual NDArrayViewPtr Data() const;
/// ///
/// Returns the NDMask object corresponding to the mask associated with 'this value object. /// Returns the NDMask object corresponding to the mask associated with 'this value object.
/// ///
CNTK_API virtual NDMaskPtr Mask() const; virtual NDMaskPtr Mask() const;
/// ///
/// Creates a new Value with newly allocated storage on the same device as 'this' Value and copies 'this' Value's contents into the newly allocated Value. /// Creates a new Value with newly allocated storage on the same device as 'this' Value and copies 'this' Value's contents into the newly allocated Value.
/// ///
CNTK_API virtual ValuePtr DeepClone(bool readOnly = false) const; virtual ValuePtr DeepClone(bool readOnly = false) const;
/// ///
/// Creates a new Value which is an alias of 'this' Value. /// Creates a new Value which is an alias of 'this' Value.
/// ///
CNTK_API virtual ValuePtr Alias(bool readOnly = false) const; virtual ValuePtr Alias(bool readOnly = false) const;
/// ///
/// Copies the contents of the 'source' Value to 'this' Value. /// Copies the contents of the 'source' Value to 'this' Value.
/// The shapes of the 'source' Value's data and mask must be identical to 'this' Value's data and mask. /// The shapes of the 'source' Value's data and mask must be identical to 'this' Value's data and mask.
/// ///
CNTK_API virtual void CopyFrom(const Value& source); virtual void CopyFrom(const Value& source);
private: private:
// Disallow copy and move construction and assignment // Disallow copy and move construction and assignment
Value(const Value&) = delete; Value& operator=(const Value&) = delete; Value(Value&&) = delete; Value& operator=(Value&&) = delete; Value(const Value&) = delete; Value& operator=(const Value&) = delete; Value(Value&&) = delete; Value& operator=(Value&&) = delete;
private: protected:
NDArrayViewPtr m_data; mutable NDArrayViewPtr m_data;
NDMaskPtr m_mask; mutable NDMaskPtr m_mask;
}; };
/// ///
@ -758,6 +839,7 @@ namespace CNTK
{ {
CNTK_API static const std::wstring StaticAxisNamePrefix; CNTK_API static const std::wstring StaticAxisNamePrefix;
static const size_t SentinelStaticAxisIndexValueForDynamicAxes = SIZE_MAX; static const size_t SentinelStaticAxisIndexValueForDynamicAxes = SIZE_MAX;
static const size_t SentinelStaticAxisIndexValueForAllStaticAxes = SIZE_MAX - 1;
class UniqueDynamicAxesNames class UniqueDynamicAxesNames
{ {
@ -839,15 +921,20 @@ namespace CNTK
} }
/// ///
/// Static Axis object representing the default dynamic axis. /// Axis object representing the default dynamic axis.
/// ///
CNTK_API static const Axis& DefaultDynamicAxis(); CNTK_API static const Axis& DefaultDynamicAxis();
/// ///
/// Static Axis object representing the batch axis. /// Axis object representing the batch axis.
/// ///
CNTK_API static const Axis& DefaultBatchAxis(); CNTK_API static const Axis& DefaultBatchAxis();
///
/// Axis object representing all the static axes of an operand
///
CNTK_API static const Axis& AllStaticAxes();
/// ///
/// Returns a new unique Dynamic axis /// Returns a new unique Dynamic axis
/// ///
@ -1282,6 +1369,8 @@ namespace CNTK
return Contains(key.c_str()); return Contains(key.c_str());
} }
CNTK_API void Add(const Dictionary& other);
CNTK_API bool operator==(const Dictionary& other) const; CNTK_API bool operator==(const Dictionary& other) const;
CNTK_API bool operator!=(const Dictionary& other) const; CNTK_API bool operator!=(const Dictionary& other) const;
@ -1335,7 +1424,7 @@ namespace CNTK
typedef Dictionary ParameterInitializer; typedef Dictionary ParameterInitializer;
// Forward declarations // Forward declarations
inline Variable PlaceholderVariable(const NDShape& shape, const std::vector<Axis>& dynamicAxes = Axis::DefaultInputVariableDynamicAxes); inline Variable PlaceholderVariable(const NDShape& shape, const std::wstring& name, const std::vector<Axis>& dynamicAxes = Axis::DefaultInputVariableDynamicAxes);
inline Variable InputVariable(const NDShape& shape, bool isSparse, CNTK::DataType dataType, bool needsGradient, const std::wstring& name, const std::vector<Axis>& dynamicAxes = Axis::DefaultInputVariableDynamicAxes); inline Variable InputVariable(const NDShape& shape, bool isSparse, CNTK::DataType dataType, bool needsGradient, const std::wstring& name, const std::vector<Axis>& dynamicAxes = Axis::DefaultInputVariableDynamicAxes);
inline Variable OutputVariable(const NDShape& shape, CNTK::DataType dataType, Function* ownerFunction, const std::vector<Axis>& dynamicAxes, const std::wstring& name = L""); inline Variable OutputVariable(const NDShape& shape, CNTK::DataType dataType, Function* ownerFunction, const std::vector<Axis>& dynamicAxes, const std::wstring& name = L"");
@ -1362,7 +1451,7 @@ namespace CNTK
#ifndef SWIG #ifndef SWIG
private: private:
friend inline Variable PlaceholderVariable(const NDShape& shape, const std::vector<Axis>& dynamicAxes /*= Axis::DefaultInputVariableDynamicAxes*/); friend inline Variable PlaceholderVariable(const NDShape& shape, const std::wstring& name, const std::vector<Axis>& dynamicAxes /*= Axis::DefaultInputVariableDynamicAxes*/);
friend inline Variable InputVariable(const NDShape& shape, bool isSparse, CNTK::DataType dataType, bool needsGradient, const std::wstring& name, const std::vector<Axis>& dynamicAxes /*= Axis::DefaultInputVariableDynamicAxes*/); friend inline Variable InputVariable(const NDShape& shape, bool isSparse, CNTK::DataType dataType, bool needsGradient, const std::wstring& name, const std::vector<Axis>& dynamicAxes /*= Axis::DefaultInputVariableDynamicAxes*/);
friend inline Variable OutputVariable(const NDShape& shape, CNTK::DataType dataType, Function* ownerFunction, const std::vector<Axis>& dynamicAxes, const std::wstring& name /*= L""*/); friend inline Variable OutputVariable(const NDShape& shape, CNTK::DataType dataType, Function* ownerFunction, const std::vector<Axis>& dynamicAxes, const std::wstring& name /*= L""*/);
#endif #endif
@ -1481,6 +1570,7 @@ namespace CNTK
: m_dataFields(MakeSharedObject<VariableFields>(shape, varType, dataType, ownerFunction, value, needsGradient, dynamicAxes, isSparse, name, uid)) : m_dataFields(MakeSharedObject<VariableFields>(shape, varType, dataType, ownerFunction, value, needsGradient, dynamicAxes, isSparse, name, uid))
{} {}
private:
Variable Clone() const Variable Clone() const
{ {
Variable clonedVariable; Variable clonedVariable;
@ -1544,17 +1634,7 @@ namespace CNTK
Internal::GenerateUid(m_varKind)); Internal::GenerateUid(m_varKind));
} }
void SetValueInitialization(const ParameterInitializer& initializationConfig, const DeviceDescriptor& device) CNTK_API void SetValueInitialization(const ParameterInitializer& initializationConfig, const DeviceDescriptor& device);
{
if (m_value != nullptr)
LogicError("Value initialization config cannot be set if a value already exists");
assert(!m_valueInitializer);
assert(!m_valueInitializationDevice);
m_valueInitializer.reset(new ParameterInitializer(initializationConfig));
m_valueInitializationDevice.reset(new DeviceDescriptor(device));
}
private: private:
// Disallow copy and move construction and assignment // Disallow copy and move construction and assignment
@ -1580,10 +1660,19 @@ namespace CNTK
/// Create a Placeholder variable to be used as a temporary/placeholder input to a Function. /// Create a Placeholder variable to be used as a temporary/placeholder input to a Function.
/// All placeholder inputs of a Function must be replaced with non-placeholder Variables before Forward evaluation of the Function. /// All placeholder inputs of a Function must be replaced with non-placeholder Variables before Forward evaluation of the Function.
/// ///
inline Variable PlaceholderVariable(const NDShape& shape, const std::vector<Axis>& dynamicAxes /*= Axis::DefaultInputVariableDynamicAxes*/) inline Variable PlaceholderVariable(const NDShape& shape, const std::wstring& name, const std::vector<Axis>& dynamicAxes /*= Axis::DefaultInputVariableDynamicAxes*/)
{ {
auto varKind = VariableKind::Placeholder; auto varKind = VariableKind::Placeholder;
return Variable(shape, varKind, DataType::Unknown, nullptr, false, dynamicAxes, L"", Internal::GenerateUid(varKind)); return Variable(shape, varKind, DataType::Unknown, nullptr, false, dynamicAxes, name, Internal::GenerateUid(varKind));
}
///
/// Create a Placeholder variable to be used as a temporary/placeholder input to a Function.
/// All placeholder inputs of a Function must be replaced with non-placeholder Variables before Forward evaluation of the Function.
///
inline Variable PlaceholderVariable(const NDShape& shape, const std::vector<Axis>& dynamicAxes = Axis::DefaultInputVariableDynamicAxes)
{
return PlaceholderVariable(shape, L"", dynamicAxes);
} }
/// ///
@ -1765,7 +1854,7 @@ namespace CNTK
public: public:
/// ///
/// Contruct a Constant whose initial contents are a copy of the specified value /// Construct a Constant whose initial contents are a copy of the specified value
/// ///
Constant(const NDArrayViewPtr& value, const std::wstring& name = L"") Constant(const NDArrayViewPtr& value, const std::wstring& name = L"")
: Constant(value, name, Internal::GenerateUid(VariableKind::Constant)) : Constant(value, name, Internal::GenerateUid(VariableKind::Constant))
@ -1946,7 +2035,7 @@ namespace CNTK
/// and the user is responsible for ensuring that the contents of the inputs and outputs are unchanged until after any uses of the BackPropState instance /// and the user is responsible for ensuring that the contents of the inputs and outputs are unchanged until after any uses of the BackPropState instance
/// for backpropagating gradients through this function. /// for backpropagating gradients through this function.
/// ///
CNTK_API virtual BackPropStatePtr Forward(const std::unordered_map<Variable, ValuePtr>& arguments, virtual BackPropStatePtr Forward(const std::unordered_map<Variable, ValuePtr>& arguments,
std::unordered_map<Variable, ValuePtr>& outputs, std::unordered_map<Variable, ValuePtr>& outputs,
const DeviceDescriptor& computeDevice = DeviceDescriptor::UseDefaultDevice(), const DeviceDescriptor& computeDevice = DeviceDescriptor::UseDefaultDevice(),
const std::unordered_set<Variable>& outputsToRetainBackwardStateFor = {}) = 0; const std::unordered_set<Variable>& outputsToRetainBackwardStateFor = {}) = 0;
@ -1960,10 +2049,15 @@ namespace CNTK
/// The 'state' parameter is an instance of an BackPropState instance obtained from a previous call to the Forward method on 'this; Function for the /// The 'state' parameter is an instance of an BackPropState instance obtained from a previous call to the Forward method on 'this; Function for the
/// computation that this gradient backpropagation corresponds to. /// computation that this gradient backpropagation corresponds to.
/// ///
CNTK_API virtual void Backward(const BackPropStatePtr& state, virtual void Backward(const BackPropStatePtr& state,
const std::unordered_map<Variable, ValuePtr>& rootGradientValues, const std::unordered_map<Variable, ValuePtr>& rootGradientValues,
std::unordered_map<Variable, ValuePtr>& backPropagatedGradientValuesForInputs) = 0; std::unordered_map<Variable, ValuePtr>& backPropagatedGradientValuesForInputs) = 0;
///
/// Returns the name of the operation that this Function denotes
///
virtual const std::wstring& OpName() = 0;
public: public:
// Optional overrides // Optional overrides
@ -2074,6 +2168,11 @@ namespace CNTK
/// ///
CNTK_API FunctionPtr ReplacePlaceholder(const Variable& placeholderReplacement); CNTK_API FunctionPtr ReplacePlaceholder(const Variable& placeholderReplacement);
///
/// Restore the models parameters from a saved model file
///
CNTK_API void RestoreFromLegacyModel(const std::wstring& modelFilePath);
private: private:
template <typename VariableType, typename FilterFunction> template <typename VariableType, typename FilterFunction>
@ -2144,9 +2243,6 @@ namespace CNTK
} }
} }
private:
void RestoreFromLegacyModel(const std::wstring& modelFilePath);
private: private:
std::vector<Variable> m_inputs; std::vector<Variable> m_inputs;
@ -2501,7 +2597,7 @@ namespace CNTK
/// E.g. When creating a classification model, typically the CrossEntropy loss Function and the ClassificationError Function comprise the two roots /// E.g. When creating a classification model, typically the CrossEntropy loss Function and the ClassificationError Function comprise the two roots
/// of the computation graph which can be "Combine"d to create a single Function with 2 outputs; viz. CrossEntropy loss and ClassificationError output. /// of the computation graph which can be "Combine"d to create a single Function with 2 outputs; viz. CrossEntropy loss and ClassificationError output.
/// ///
CNTK_API FunctionPtr Combine(const std::vector<FunctionPtr>& operands, const std::wstring& name = L""); CNTK_API FunctionPtr Combine(const std::vector<Variable>& operands, const std::wstring& name = L"");
namespace Sequence namespace Sequence
{ {
@ -2535,12 +2631,14 @@ namespace CNTK
/// ///
class Learner : public std::enable_shared_from_this<Learner> class Learner : public std::enable_shared_from_this<Learner>
{ {
static const std::wstring LearningRateAttributeName;
public: public:
// //
// Method to update the parameters associated with this learner. By returning false, this method indicates that // Method to update the parameters associated with this learner. By returning false, this method indicates that
// learning has stopped for all of the parameters associated with this learner // learning has stopped for all of the parameters associated with this learner
// //
CNTK_API virtual bool Update(const std::unordered_map<Parameter, NDArrayViewPtr>& gradientValues, size_t trainingSampleCount) = 0; virtual bool Update(const std::unordered_map<Parameter, NDArrayViewPtr>& gradientValues, size_t trainingSampleCount) = 0;
/// ///
/// Returns the set of parameters associated with this learner. /// Returns the set of parameters associated with this learner.
@ -2552,32 +2650,50 @@ namespace CNTK
/// ///
// TODO: move the following two methods into ISerializable interface, make // TODO: move the following two methods into ISerializable interface, make
// Learner (and all other entities that need checkpointing capability) implement it. // Learner (and all other entities that need checkpointing capability) implement it.
CNTK_API virtual Dictionary GetCheckpointState() const { return Dictionary(); } virtual Dictionary GetCheckpointState() const
{
Dictionary baseCheckpointState;
baseCheckpointState[LearningRateAttributeName] = m_learningRate;
return baseCheckpointState;
}
/// ///
/// Optionally overridable method to restore the learner's state from a previous checkpoint. /// Optionally overridable method to restore the learner's state from a previous checkpoint.
/// ///
CNTK_API virtual void RestoreFromCheckpoint(const Dictionary& /*checkpoint*/) {} virtual void RestoreFromCheckpoint(const Dictionary& checkpoint)
{
if (checkpoint.Contains(LearningRateAttributeName))
m_learningRate = checkpoint[LearningRateAttributeName].Value<double>();
}
/// ///
/// Destruct this Learner. /// Destruct this Learner.
/// ///
virtual ~Learner() {} virtual ~Learner() {}
virtual void ResetLearningRate(double learningRate) { m_learningRate = learningRate; }
virtual double LearningRate() const { return m_learningRate; }
protected: protected:
Learner(const std::vector<Parameter>& parameters) Learner(const std::vector<Parameter>& parameters, double learningRate)
: m_parameters(parameters.begin(), parameters.end()) : m_parameters(parameters.begin(), parameters.end()), m_learningRate(learningRate)
{} {}
std::unordered_set<Parameter> m_parameters; std::unordered_set<Parameter> m_parameters;
double m_learningRate;
}; };
/// ///
/// A collection of key-value pairs that represents training parameter schedule in /// A collection of key-value pairs that represents a training parameter schedule in
/// terms of the number of processed samples. /// terms of the number of processed samples (e.g., learning rate and momentum schedules).
/// This class provides a number of convenience constructors to allow easy conversion /// This class is designed to simplify Learner's factory methods and provides a number of
/// from a single value, a vector of values and a list of pairs to the training schedule. /// convenience constructors to allow easy conversion from a single value, a vector of values
/// and a list of pairs to the training schedule. For example, a learning rate schedule
/// { { 10, 0.5 }, { 100, 0.3 }, { 20, 0.2 } } indicates that the rate of 0.5 should be
/// used for the first 10 units (equivalently, samples if the default unit = 1 is used)
/// followed by 0.3 for the next 100 units, and then 0.2 for the remaining 20 units or
/// until the end of training if it takes longer.
/// ///
template <typename T> template <typename T>
class TrainingParameterSchedule class TrainingParameterSchedule
@ -2586,31 +2702,14 @@ namespace CNTK
/// ///
/// Create a schedule with a constant parameter value. /// Create a schedule with a constant parameter value.
/// ///
TrainingParameterSchedule(T value) CNTK_API TrainingParameterSchedule(T value);
: m_schedule({ std::make_pair(0, value) }), m_unit(1)
{}
/// ///
/// Create a schedule where the parameter changes its value every 'unit' samples: /// Create a schedule where the parameter changes its value every 'unit' samples:
/// schedule[0] is used for the first 'unit' samples, schedule[1] -- for the second, /// schedule[0] is used for the first 'unit' samples, schedule[1] -- for the second,
/// and so on. The last value is then used repeatedly until the end of training. /// and so on. The last value is then used repeatedly until the end of training.
/// ///
TrainingParameterSchedule(const std::vector<T>& schedule, size_t unit = 1) CNTK_API TrainingParameterSchedule(const std::vector<T>& schedule, size_t unit = 1);
: m_unit(unit)
{
// TODO: 0 will be used to mean "the entire sweep"
if (unit == 0)
RuntimeError("TrainingParameterSchedule::constructor : 'unit' cannot be 0.");
if (schedule.size() == 0)
RuntimeError("TrainingParameterSchedule::constructor : schedule is empty.");
size_t i = 1;
for (const auto& value : schedule)
{
m_schedule[m_unit * i++] = value;
}
}
/// ///
/// Create a schedule using the list of key-value pairs, where the key specifies /// Create a schedule using the list of key-value pairs, where the key specifies
@ -2621,74 +2720,104 @@ namespace CNTK
/// '0.1' is used for the second 200 samples, after which the values is switched /// '0.1' is used for the second 200 samples, after which the values is switched
/// to '0.005'. /// to '0.005'.
/// ///
TrainingParameterSchedule(const std::initializer_list<std::pair<const size_t, T>>& schedule, size_t unit = 1) CNTK_API TrainingParameterSchedule(const std::vector<std::pair<size_t, T>>& schedule, size_t unit = 1);
: m_unit(unit)
{
// TODO: 0 will be used to mean "the entire sweep"
if (unit == 0)
RuntimeError("TrainingParameterSchedule::constructor : 'unit' cannot be 0.");
if (schedule.size() == 0)
RuntimeError("TrainingParameterSchedule::constructor : schedule is empty.");
size_t i = 0;
for (const auto& it : schedule)
{
if (it.first == 0)
RuntimeError("TrainingParameterSchedule::constructor : unit count cannot be 0.");
i += it.first;
m_schedule[m_unit * i] = it.second;
}
}
/// ///
/// Returns a value corresponding to the absolute sample count from the beginning of training. /// Returns a value corresponding to the absolute sample count from the beginning of training.
/// ///
CNTK_API const T& operator[](size_t samleCount) const; CNTK_API virtual const T& operator[](size_t sampleCount) const;
CNTK_API virtual ~TrainingParameterSchedule();
CNTK_API TrainingParameterSchedule(const TrainingParameterSchedule<T>&);
CNTK_API TrainingParameterSchedule(TrainingParameterSchedule<T>&&);
CNTK_API TrainingParameterSchedule<T>& operator=(const TrainingParameterSchedule<T>&);
CNTK_API TrainingParameterSchedule<T>& operator=(TrainingParameterSchedule<T>&&);
private: private:
CNTK_API void ConstructSchedule(const std::vector<std::pair<size_t, T>>& schedule);
protected:
std::map<size_t, T> m_schedule; std::map<size_t, T> m_schedule;
size_t m_unit; size_t m_unit;
}; };
typedef TrainingParameterSchedule<double> LearningRatesPerSample; typedef TrainingParameterSchedule<double> LearningRatesPerSample;
typedef TrainingParameterSchedule<double> MomentumsPerSample; typedef TrainingParameterSchedule<double> MomentumValuesPerSample;
///
/// This class allows to specify momentum as time constant in place of momentum per sample in
/// all of Learners factory methods. The specified values are then automatically converted into
/// per sample values.
///
class MomentumValuesAsTimeConstants: public MomentumValuesPerSample
{
public:
MomentumValuesAsTimeConstants(double value)
: MomentumValuesPerSample(value)
{
ConvertToPerSampleValues();
}
MomentumValuesAsTimeConstants(const std::vector<double>& schedule, size_t unit = 1)
: MomentumValuesPerSample(schedule, unit)
{
ConvertToPerSampleValues();
}
MomentumValuesAsTimeConstants(const std::vector<std::pair<size_t, double>>& schedule, size_t unit = 1)
: MomentumValuesPerSample(schedule, unit)
{
ConvertToPerSampleValues();
}
private:
CNTK_API void ConvertToPerSampleValues();
};
/// A collection of additional options that affect parameter updates and
/// are applicable for all standard learners
struct AdditionalLearningOptions
{
double l1RegularizationWeight = 0.0;
double l2RegularizationWeight = 0.0;
double gaussianNoiseInjectionStdDev = 0.0;
double gradientClippingThresholdPerSample = std::numeric_limits<double>::infinity();
bool gradientClippingWithTruncation = true;
};
/// ///
/// Create an instance of the CNTK built-in SGD learner. /// Create an instance of the CNTK built-in SGD learner.
/// ///
CNTK_API LearnerPtr SGDLearner(const std::vector<Parameter>& parameters, CNTK_API LearnerPtr SGDLearner(const std::vector<Parameter>& parameters,
const LearningRatesPerSample& learningRates, const LearningRatesPerSample& learningRates,
double clippingThresholdPerSample = std::numeric_limits<double>::infinity(), AdditionalLearningOptions additionalOptions = AdditionalLearningOptions());
bool gradientClippingWithTruncation = true);
/// ///
/// Create an instance of the CNTK built-in Momentum SGD learner. /// Create an instance of the CNTK built-in Momentum SGD learner.
/// ///
CNTK_API LearnerPtr MomentumSGDLearner(const std::vector<Parameter>& parameters, CNTK_API LearnerPtr MomentumSGDLearner(const std::vector<Parameter>& parameters,
const LearningRatesPerSample& learningRates, const LearningRatesPerSample& learningRates,
const MomentumsPerSample& momentums, const MomentumValuesPerSample& momentumValues,
double clippingThresholdPerSample = std::numeric_limits<double>::infinity(), AdditionalLearningOptions additionalOptions = AdditionalLearningOptions());
bool gradientClippingWithTruncation = true);
/// ///
/// Create an instance of the CNTK built-in Nesterov's accelerated SGD learner. /// Create an instance of the CNTK built-in Nesterov's accelerated SGD learner.
/// ///
CNTK_API LearnerPtr NesterovLearner(const std::vector<Parameter>& parameters, CNTK_API LearnerPtr NesterovLearner(const std::vector<Parameter>& parameters,
const LearningRatesPerSample& learningRates, const LearningRatesPerSample& learningRates,
const MomentumsPerSample& momentums, const MomentumValuesPerSample& momentumValues,
double clippingThresholdPerSample = std::numeric_limits<double>::infinity(), AdditionalLearningOptions additionalOptions = AdditionalLearningOptions());
bool gradientClippingWithTruncation = true);
/// ///
/// Create an instance of the CNTK built-in FSAdaGrad (improved AdaGrad) learner. /// Create an instance of the CNTK built-in FSAdaGrad (improved AdaGrad) learner.
/// ///
CNTK_API LearnerPtr FSAdaGradLearner(const std::vector<Parameter>& parameters, CNTK_API LearnerPtr FSAdaGradLearner(const std::vector<Parameter>& parameters,
const LearningRatesPerSample& learningRates, const LearningRatesPerSample& learningRates,
const MomentumsPerSample& momentums, const MomentumValuesPerSample& momentumValues,
double clippingThresholdPerSample = std::numeric_limits<double>::infinity(), const double targetAdagradAvDenom = 0.0025, // 1/400 magic constant
bool gradientClippingWithTruncation = true); const size_t adagradT = 2 * 3600 * 100,
AdditionalLearningOptions additionalOptions = AdditionalLearningOptions());
/// ///
/// Create an instance of the CNTK built-in AdaGrad learner. /// Create an instance of the CNTK built-in AdaGrad learner.
@ -2696,8 +2825,7 @@ namespace CNTK
CNTK_API LearnerPtr AdaGradLearner(const std::vector<Parameter>& parameters, CNTK_API LearnerPtr AdaGradLearner(const std::vector<Parameter>& parameters,
const LearningRatesPerSample& learningRates, const LearningRatesPerSample& learningRates,
bool needAveMultiplier = true, bool needAveMultiplier = true,
double clippingThresholdPerSample = std::numeric_limits<double>::infinity(), AdditionalLearningOptions additionalOptions = AdditionalLearningOptions());
bool gradientClippingWithTruncation = true);
/// ///
/// Create an instance of the CNTK built-in RMSProp learner. /// Create an instance of the CNTK built-in RMSProp learner.
@ -2710,8 +2838,7 @@ namespace CNTK
double max, double max,
double min, double min,
bool needAveMultiplier = true, bool needAveMultiplier = true,
double clippingThresholdPerSample = std::numeric_limits<double>::infinity(), AdditionalLearningOptions additionalOptions = AdditionalLearningOptions());
bool gradientClippingWithTruncation = true);
/// ///
/// Trainer is the top-level abstraction responsible for the orchestration of the training of a model /// Trainer is the top-level abstraction responsible for the orchestration of the training of a model
@ -2805,7 +2932,9 @@ namespace CNTK
FunctionPtr m_combinedTrainingFunction; FunctionPtr m_combinedTrainingFunction;
FunctionPtr m_model; FunctionPtr m_model;
FunctionPtr m_lossFunction; FunctionPtr m_lossFunction;
FunctionPtr m_aggregatedLossFunction;
FunctionPtr m_evaluationFunction; FunctionPtr m_evaluationFunction;
FunctionPtr m_aggregatedEvaluationFunction;
std::unordered_set<LearnerPtr> m_parameterLearners; std::unordered_set<LearnerPtr> m_parameterLearners;
@ -2930,11 +3059,14 @@ namespace CNTK
/// ///
/// Instantiate the CNTK built-in test format minibatch source /// Instantiate the CNTK built-in test format minibatch source
/// ///
inline MinibatchSourcePtr TextFormatMinibatchSource(const std::wstring& dataFilePath, const std::vector<StreamConfiguration>& streamConfigs, size_t epochSize = SIZE_MAX) inline MinibatchSourcePtr TextFormatMinibatchSource(const std::wstring& dataFilePath, const std::vector<StreamConfiguration>& streamConfigs, size_t epochSize = SIZE_MAX, bool randomize = true)
{ {
CNTK::Dictionary minibatchSourceConfiguration; CNTK::Dictionary minibatchSourceConfiguration;
minibatchSourceConfiguration[L"epochSize"] = epochSize; minibatchSourceConfiguration[L"epochSize"] = epochSize;
if (randomize)
minibatchSourceConfiguration[L"randomize"] = true;
CNTK::Dictionary deserializerConfiguration; CNTK::Dictionary deserializerConfiguration;
deserializerConfiguration[L"type"] = L"CNTKTextFormatDeserializer"; deserializerConfiguration[L"type"] = L"CNTKTextFormatDeserializer";
deserializerConfiguration[L"file"] = dataFilePath; deserializerConfiguration[L"file"] = dataFilePath;
@ -2968,4 +3100,17 @@ namespace CNTK
CNTK_API void ComputeInputPerDimMeansAndInvStdDevs(const MinibatchSourcePtr& minibatchSource, CNTK_API void ComputeInputPerDimMeansAndInvStdDevs(const MinibatchSourcePtr& minibatchSource,
std::unordered_map<StreamInformation, std::pair<NDArrayViewPtr, NDArrayViewPtr>>& computedMeanAndVariances, std::unordered_map<StreamInformation, std::pair<NDArrayViewPtr, NDArrayViewPtr>>& computedMeanAndVariances,
const DeviceDescriptor& device = DeviceDescriptor::CPUDevice()); const DeviceDescriptor& device = DeviceDescriptor::CPUDevice());
///
/// Set the process-wide setting for maximum number of CPU threads to be used by any individual compute operation
/// Note that this is a per compute operation limit and if the user performs multiple compute operations concurrently
/// by launching multiple threads and performing a compute operation inside, it will result in each of those concurrently
/// executing operations to use the specified number of CPU threads limit.
///
CNTK_API void SetMaxNumCPUThreads(size_t numCPUThreads);
///
/// Returns the current process-wide setting for maximum number of CPU threads to be used by any individual compute operation
///
CNTK_API size_t GetMaxNumCPUThreads();
} }

Просмотреть файл

@ -186,9 +186,6 @@ namespace CNTK
namespace Internal namespace Internal
{ {
// Create a new Function instance which just passes through specified list of 'operands'.
CNTK_API FunctionPtr Combine(const std::vector<Variable>& operands, const std::wstring& name = L"");
CNTK_API FunctionPtr IsWithin(const Variable& operand, int offset, const std::wstring& name = L""); CNTK_API FunctionPtr IsWithin(const Variable& operand, int offset, const std::wstring& name = L"");
CNTK_API FunctionPtr PackedIndex(const Variable& operand, const Variable& index, const std::wstring& name = L""); CNTK_API FunctionPtr PackedIndex(const Variable& operand, const Variable& index, const std::wstring& name = L"");
CNTK_API FunctionPtr GatherPacked(const Variable& operand, const Variable& packedIndex, const std::wstring& name = L""); CNTK_API FunctionPtr GatherPacked(const Variable& operand, const Variable& packedIndex, const std::wstring& name = L"");
@ -202,10 +199,15 @@ namespace CNTK
CNTK_API size_t NewUniqueId(); CNTK_API size_t NewUniqueId();
// Internal hooks for testing and higher-level bindings
// These should not be directly called by C++ API users
CNTK_API void EnableReversingTensorShapesInErrorMessages(); CNTK_API void EnableReversingTensorShapesInErrorMessages();
bool IsReversingTensorShapesInErrorMessagesEnabled(); bool IsReversingTensorShapesInErrorMessagesEnabled();
CNTK_API void AlwaysAllowSettingDefaultDevice(); CNTK_API void AlwaysAllowSettingDefaultDevice();
bool IsSettingDefaultDeviceAlwaysAllowed(); bool IsSettingDefaultDeviceAlwaysAllowed();
CNTK_API void DisableAutomaticUnpackingOfPackedValues();
bool IsAutomaticUnpackingOfPackedValuesDisabled();
} }
} }

Просмотреть файл

@ -36,8 +36,11 @@ namespace CNTK
if (node->IsLeaf()) if (node->IsLeaf())
{ {
std::wstring varUid, varName;
if (node->Is<InputValueBase<ElementType>>()) if (node->Is<InputValueBase<ElementType>>())
{ {
std::tie(varUid, varName) = UidAndNameFromCNTKInternalNodeName(node->NodeName(), VariableKind::Input);
bool isSparse = node->Is<SparseInputValue<ElementType>>(); bool isSparse = node->Is<SparseInputValue<ElementType>>();
if (node->HasMBLayout()) if (node->HasMBLayout())
{ {
@ -45,12 +48,12 @@ namespace CNTK
auto inputNodeInternalDynamicAxisName = node->GetMBLayout()->GetAxisName(); auto inputNodeInternalDynamicAxisName = node->GetMBLayout()->GetAxisName();
std::vector<Axis> inputVarDynamicAxes = DynamicAxesFromInternalDynamicAxisName(inputNodeInternalDynamicAxisName); std::vector<Axis> inputVarDynamicAxes = DynamicAxesFromInternalDynamicAxisName(inputNodeInternalDynamicAxisName);
var = Variable(varShape, isSparse, AsDataType<ElementType>(), node->GetLearningRateMultiplier() != 0, node->NodeName(), inputVarDynamicAxes, node->NodeName()); var = Variable(varShape, isSparse, AsDataType<ElementType>(), node->GetLearningRateMultiplier() != 0, varName, inputVarDynamicAxes, varUid);
} }
else else
{ {
// TODO: Allow creating inputs without a dynamic axis // TODO: Allow creating inputs without a dynamic axis
LogicError("Found InputNode with no dynamic axis which is currently unsupported"); LogicError("Found InputNode with no dynamic axes which is currently unsupported");
} }
} }
else if (node->Is<LearnableParameter<ElementType>>()) else if (node->Is<LearnableParameter<ElementType>>())
@ -60,9 +63,15 @@ namespace CNTK
auto tensorView = new TensorView<ElementType>(std::make_shared<Matrix<ElementType>>(matrix.AsReference()), AsTensorViewShape(node->GetSampleLayout())); auto tensorView = new TensorView<ElementType>(std::make_shared<Matrix<ElementType>>(matrix.AsReference()), AsTensorViewShape(node->GetSampleLayout()));
NDArrayViewPtr value = MakeSharedObject<NDArrayView>(AsDataType<ElementType>(), AsDeviceDescriptor(matrix.GetDeviceId()), AsStorageFormat(matrix.GetFormat()), varShape, false, tensorView); NDArrayViewPtr value = MakeSharedObject<NDArrayView>(AsDataType<ElementType>(), AsDeviceDescriptor(matrix.GetDeviceId()), AsStorageFormat(matrix.GetFormat()), varShape, false, tensorView);
if (isConstant) if (isConstant)
var = Constant(value, node->NodeName(), node->NodeName()); {
std::tie(varUid, varName) = UidAndNameFromCNTKInternalNodeName(node->NodeName(), VariableKind::Constant);
var = Constant(value, varName, varUid);
}
else else
var = Parameter(value, node->NodeName(), node->NodeName()); {
std::tie(varUid, varName) = UidAndNameFromCNTKInternalNodeName(node->NodeName(), VariableKind::Parameter);
var = Parameter(value, varName, varUid);
}
} }
else else
LogicError("CNTK::LoadLegacyModel: Unsupported legacy CNTK node named '%S'", node->NodeName().c_str()); LogicError("CNTK::LoadLegacyModel: Unsupported legacy CNTK node named '%S'", node->NodeName().c_str());
@ -299,17 +308,17 @@ namespace CNTK
std::unordered_map<ComputationNodeBasePtr, Variable> nodeToVariableMap; std::unordered_map<ComputationNodeBasePtr, Variable> nodeToVariableMap;
std::unordered_map<Variable, Variable> placeholderReplacements; std::unordered_map<Variable, Variable> placeholderReplacements;
std::unordered_set<FunctionPtr> allPrimitiveFunctions; std::unordered_set<FunctionPtr> allPrimitiveFunctions;
std::vector<FunctionPtr> rootFunctions; std::vector<Variable> rootVariables;
auto& networkRoots = net->RootNodes(); auto& networkRoots = net->RootNodes();
for (auto& rootNode : networkRoots) for (auto& rootNode : networkRoots)
{ {
if (rootNode->IsLeaf()) if (rootNode->IsLeaf())
continue; continue;
rootFunctions.push_back(GetVariable<ElementType>(rootNode, nodeToVariableMap, placeholderReplacements, allPrimitiveFunctions).Owner()); rootVariables.push_back(GetVariable<ElementType>(rootNode, nodeToVariableMap, placeholderReplacements, allPrimitiveFunctions).Owner());
} }
auto rootComposite = Combine(rootFunctions); auto rootComposite = Combine(rootVariables);
rootComposite->ReplacePlaceholders(placeholderReplacements); rootComposite->ReplacePlaceholders(placeholderReplacements);
return rootComposite; return rootComposite;
@ -350,8 +359,5 @@ namespace CNTK
} }
computationNetwork->Save(modelFile); computationNetwork->Save(modelFile);
if (!compositeFunction->NetworkMatricesAllocated())
compositeFunction->PurgeComputationNetwork();
} }
} }

Просмотреть файл

@ -5,9 +5,12 @@
#include "stdafx.h" #include "stdafx.h"
#include "CNTKLibrary.h" #include "CNTKLibrary.h"
#include "Utils.h"
#include "BestGpu.h" #include "BestGpu.h"
#include <mutex> #include <mutex>
#include <algorithm> #include <algorithm>
#include <CPUMatrix.h> // For CPUMatrix::SetNumThreads
#include <thread>
namespace CNTK namespace CNTK
{ {
@ -40,6 +43,17 @@ namespace CNTK
{ {
return s_alwaysAllowSettingDefaultDevice.load(); return s_alwaysAllowSettingDefaultDevice.load();
} }
std::atomic<bool> s_disableAutomaticUnpackingOfPackedValues(false);
void DisableAutomaticUnpackingOfPackedValues()
{
s_disableAutomaticUnpackingOfPackedValues.store(true);
}
bool IsAutomaticUnpackingOfPackedValuesDisabled()
{
return s_disableAutomaticUnpackingOfPackedValues.load();
}
} }
/*static*/ std::atomic<bool> DeviceDescriptor::s_defaultDeviceFrozen(false); /*static*/ std::atomic<bool> DeviceDescriptor::s_defaultDeviceFrozen(false);
@ -62,7 +76,7 @@ namespace CNTK
auto selectedDevice = DefaultDevice(); auto selectedDevice = DefaultDevice();
if (!alreadyFrozen) if (!alreadyFrozen)
{ {
Microsoft::MSR::CNTK::OnDeviceSelected(selectedDevice.Id()); Microsoft::MSR::CNTK::OnDeviceSelected(AsCNTKImplDeviceId(selectedDevice));
} }
return selectedDevice; return selectedDevice;
} }
@ -74,7 +88,7 @@ namespace CNTK
RuntimeError("Process wide default device cannot be changed since it has been frozen by being implicitly used as the default device in a CNTK API call"); RuntimeError("Process wide default device cannot be changed since it has been frozen by being implicitly used as the default device in a CNTK API call");
std::call_once(s_initDefaultDeviceFlag, []{ std::call_once(s_initDefaultDeviceFlag, []{
// do nothing. This will set the flag above, in case the DefaultDevice() was never called before. // do nothing. This will set the flag above, in case when DefaultDevice() was never called before.
}); });
s_defaultDevice.reset(new DeviceDescriptor(newDefaultDevice)); s_defaultDevice.reset(new DeviceDescriptor(newDefaultDevice));
@ -82,7 +96,9 @@ namespace CNTK
/*static*/ DeviceDescriptor DeviceDescriptor::BestDevice() /*static*/ DeviceDescriptor DeviceDescriptor::BestDevice()
{ {
// TODO: add unit tests for this. //TODO: BestDevice remains locked if UseDefaultDevice is never executed
// or if BestDevice() is invoked after UseDefaultDevice().
// Should we do anything about it?
auto id = Microsoft::MSR::CNTK::GetBestDevice(); auto id = Microsoft::MSR::CNTK::GetBestDevice();
return id >= 0 ? DeviceDescriptor::GPUDevice(id) : DeviceDescriptor::CPUDevice(); return id >= 0 ? DeviceDescriptor::GPUDevice(id) : DeviceDescriptor::CPUDevice();
} }
@ -140,6 +156,12 @@ namespace CNTK
return s_defaultBatchAxis; return s_defaultBatchAxis;
} }
/*static*/ const Axis& Axis::AllStaticAxes()
{
static const Axis s_allStaticAxes(SentinelStaticAxisIndexValueForAllStaticAxes);
return s_allStaticAxes;
}
/*static*/ Axis Axis::NewUniqueDynamicAxis(const std::wstring& axisNamePrefix, bool isOrderedDynamicAxis /*= true*/) /*static*/ Axis Axis::NewUniqueDynamicAxis(const std::wstring& axisNamePrefix, bool isOrderedDynamicAxis /*= true*/)
{ {
return Axis(s_uniqueDynamicAxisNames.NewUniqueDynamicAxisName(axisNamePrefix), isOrderedDynamicAxis); return Axis(s_uniqueDynamicAxisNames.NewUniqueDynamicAxisName(axisNamePrefix), isOrderedDynamicAxis);
@ -149,4 +171,16 @@ namespace CNTK
{ {
s_uniqueDynamicAxisNames.RegisterAxisName(axisName); s_uniqueDynamicAxisNames.RegisterAxisName(axisName);
} }
std::atomic<size_t> s_maxNumCPUThreads(std::thread::hardware_concurrency());
void SetMaxNumCPUThreads(size_t numCPUThreads)
{
s_maxNumCPUThreads.store(numCPUThreads);
Microsoft::MSR::CNTK::CPUMatrix<float>::SetNumThreads((int)numCPUThreads);
}
size_t GetMaxNumCPUThreads()
{
return s_maxNumCPUThreads.load();
}
} }

Просмотреть файл

@ -16,6 +16,7 @@
#include "InputAndParamNodes.h" #include "InputAndParamNodes.h"
#include "NonlinearityNodes.h" #include "NonlinearityNodes.h"
#include "RecurrentNodes.h" #include "RecurrentNodes.h"
#include "Value.h"
using namespace Microsoft::MSR::CNTK; using namespace Microsoft::MSR::CNTK;
@ -81,7 +82,7 @@ namespace CNTK
} }
} }
auto outputsUsingNewInputs = PrimitiveFunction::GetOutputVariables(primitiveFunction->OpType(), m_inputs, this, primitiveFunction->Attributes()); auto outputsUsingNewInputs = PrimitiveFunction::GetOutputVariables(primitiveFunction->OpType(), m_inputs, this, primitiveFunction->Attributes(), primitiveFunction->Name());
auto currentOutputs = Outputs(); auto currentOutputs = Outputs();
for (size_t i = 0; i < currentOutputs.size(); ++i) for (size_t i = 0; i < currentOutputs.size(); ++i)
{ {
@ -197,7 +198,7 @@ namespace CNTK
{ {
auto placeholders = Placeholders(); auto placeholders = Placeholders();
if (placeholders.size() != 1) if (placeholders.size() != 1)
InvalidArgument("Function::ReplacePlaceholders called with a single replacement variable but this Function has none or more than 1 placeholders"); InvalidArgument("Function::ReplacePlaceholders called with a single replacement variable but this Function has %d placeholders", (int)placeholders.size());
return ReplacePlaceholders({ { *(placeholders.begin()), placeholderReplacement } }); return ReplacePlaceholders({ { *(placeholders.begin()), placeholderReplacement } });
} }
@ -413,26 +414,52 @@ namespace CNTK
/*static*/ const std::wstring PrimitiveFunction::AttributeNameEndIndex = L"endIndex"; /*static*/ const std::wstring PrimitiveFunction::AttributeNameEndIndex = L"endIndex";
/*static*/ const std::wstring PrimitiveFunction::AttributeNameReductionOpName = L"reductionOpName"; /*static*/ const std::wstring PrimitiveFunction::AttributeNameReductionOpName = L"reductionOpName";
/*static*/ std::vector<Variable> PrimitiveFunction::GetOutputVariables(PrimitiveOpType op, const std::vector<Variable>& inputs, Function* owner, const Dictionary& functionConfig) /*static*/ std::vector<Variable> PrimitiveFunction::GetOutputVariables(PrimitiveOpType op, const std::vector<Variable>& inputs, Function* owner, const Dictionary& functionConfig, const std::wstring& functionName)
{ {
if (op == PrimitiveOpType::Combine) if (op == PrimitiveOpType::Combine)
return inputs; return inputs;
// TODO: We are just using the primary operand's DataType as output node's DataType. Is this always correct? // We use the first non-constant input operand's DataType as the output DataType
// In case there are no non-constant known DataTypes, we just pick the first known operand DataType
// Also, all the known DataTypes of operands should match except for constants where coercion is allowed
DataType firstKnownInputDataType = DataType::Unknown;
DataType outputDataType = DataType::Unknown; DataType outputDataType = DataType::Unknown;
NDShape outputShape; NDShape outputShape;
size_t i = 0; size_t i = 0;
while ((outputDataType == DataType::Unknown) && (i < inputs.size())) while (i < inputs.size())
outputDataType = inputs[i++].GetDataType(); {
auto input = inputs[i++];
auto inputDataType = input.GetDataType();
if (inputDataType != DataType::Unknown)
{
if (firstKnownInputDataType == DataType::Unknown)
firstKnownInputDataType = inputDataType;
if (outputDataType == DataType::Unknown) if (outputDataType == DataType::Unknown)
InvalidArgument("The DataType of all the input operands of primitive function with op type %s are unknown", PrimitiveOpTypeName(op)); {
if (!input.IsConstant())
outputDataType = inputDataType;
}
else
{
// The DataType of all operands should match except for Constants where we allow coercion
if ((inputDataType != DataType::Unknown) && (inputDataType != outputDataType) && !input.IsConstant())
InvalidArgument("Primitive function with op type %S has operands with different DataTypes %s and %s", PrimitiveOpTypeName(op).c_str(), DataTypeName(outputDataType), DataTypeName(inputDataType));
}
}
}
if (outputDataType == DataType::Unknown)
outputDataType = firstKnownInputDataType;
if (outputDataType == DataType::Unknown)
InvalidArgument("The DataType of all the input operands of primitive function with op type %S are unknown", PrimitiveOpTypeName(op).c_str());
// We currently require that the inputs' dynamic axes if any match // We currently require that the inputs' dynamic axes if any match
std::vector<Axis> outputDynamicAxes; std::vector<Axis> outputDynamicAxes;
if ((op == PrimitiveOpType::SumAll) || (op == PrimitiveOpType::SquaredError) || (op == PrimitiveOpType::CrossEntropyWithSoftmax) || (op == PrimitiveOpType::ClassificationError)) if ((op == PrimitiveOpType::SumAll) || (op == PrimitiveOpType::SquaredError) || (op == PrimitiveOpType::CrossEntropyWithSoftmax) || (op == PrimitiveOpType::ClassificationError))
outputDynamicAxes = std::vector<Axis>({}); outputDynamicAxes = std::vector<Axis>({});
if (op == PrimitiveOpType::Where) else if (op == PrimitiveOpType::Where)
outputDynamicAxes = AsVector<Axis>(functionConfig[PrimitiveFunction::AttributeNameNewDynamicAxes].Value<std::vector<DictionaryValue>>()); outputDynamicAxes = AsVector<Axis>(functionConfig[PrimitiveFunction::AttributeNameNewDynamicAxes].Value<std::vector<DictionaryValue>>());
else if (op == PrimitiveOpType::ScatterPacked) else if (op == PrimitiveOpType::ScatterPacked)
outputDynamicAxes = inputs[2].DynamicAxes(); outputDynamicAxes = inputs[2].DynamicAxes();
@ -598,18 +625,18 @@ namespace CNTK
assert(inputs.size() == 2); assert(inputs.size() == 2);
if ((inputs[0].Shape().Rank() > 2) || ((inputs[0].Shape().Rank() > 1) && (inputs[0].Shape()[1] != 1))) if ((inputs[0].Shape().Rank() > 2) || ((inputs[0].Shape().Rank() > 1) && (inputs[0].Shape()[1] != 1)))
InvalidArgument("The shape of input operands for the %s operation should have at most one axis", PrimitiveOpTypeName(op)); InvalidArgument("The shape of input operands for the %S operation should have at most one axis", PrimitiveOpTypeName(op).c_str());
auto predictionShape = inputs[0].Shape(); auto predictionShape = inputs[0].Shape();
auto labelsShape = inputs[1].Shape(); auto labelsShape = inputs[1].Shape();
if (predictionShape != labelsShape) if (predictionShape != labelsShape)
RuntimeError("Prediction output operand's shape %S is incompatible with label operand's shape %S for the %s operation", AsStringForErrorReporting(predictionShape).c_str(), AsStringForErrorReporting(labelsShape).c_str(), PrimitiveOpTypeName(op)); RuntimeError("Prediction output operand's shape %S is incompatible with label operand's shape %S for the %S operation", AsStringForErrorReporting(predictionShape).c_str(), AsStringForErrorReporting(labelsShape).c_str(), PrimitiveOpTypeName(op).c_str());
std::vector<size_t> reductionAxes; std::vector<size_t> reductionAxes;
for (size_t i = 0; i < inputs[0].Shape().Rank(); ++i) for (size_t i = 0; i < inputs[0].Shape().Rank(); ++i)
reductionAxes.push_back(i); reductionAxes.push_back(i);
outputShape = ReductionOpOutputShape(op, predictionShape, reductionAxes); outputShape = ReductionOpOutputShape(op, predictionShape, reductionAxes, /*preserveReductionAxes =*/ false);
break; break;
} }
case PrimitiveOpType::PastValue: case PrimitiveOpType::PastValue:
@ -630,9 +657,13 @@ namespace CNTK
{ {
assert(inputs.size() == 1); assert(inputs.size() == 1);
auto reductionAxis = functionConfig[PrimitiveFunction::AttributeNameAxis].Value<Axis>(); auto reductionAxis = functionConfig[PrimitiveFunction::AttributeNameAxis].Value<Axis>();
if (reductionAxis == Axis::AllStaticAxes())
outputShape = {};
else
{
std::vector<size_t> reductionAxes = { reductionAxis.StaticAxisIndex() }; std::vector<size_t> reductionAxes = { reductionAxis.StaticAxisIndex() };
outputShape = ReductionOpOutputShape(op, inputs[0].Shape(), reductionAxes, /*preserveReductionAxes =*/ true);
outputShape = ReductionOpOutputShape(op, inputs[0].Shape(), reductionAxes); }
break; break;
} }
case PrimitiveOpType::BatchNormalization: case PrimitiveOpType::BatchNormalization:
@ -664,9 +695,6 @@ namespace CNTK
if (inputs[0].DynamicAxes().empty() || inputs[1].DynamicAxes().empty() || inputs[2].DynamicAxes().empty()) if (inputs[0].DynamicAxes().empty() || inputs[1].DynamicAxes().empty() || inputs[2].DynamicAxes().empty())
InvalidArgument("ScatterPacked requires all its operands to have dynamic axes"); InvalidArgument("ScatterPacked requires all its operands to have dynamic axes");
if (inputs[1].Shape().Rank() != 1)
InvalidArgument("ScatterPacked requires the packedIndex operand to be a scalar sequence");
outputShape = inputs[0].Shape(); outputShape = inputs[0].Shape();
break; break;
} }
@ -686,13 +714,14 @@ namespace CNTK
break; break;
} }
default: default:
LogicError("Specified op %s not yet supported", PrimitiveOpTypeName(op)); LogicError("Specified op %S not yet supported", PrimitiveOpTypeName(op).c_str());
break; break;
} }
return{ OutputVariable(outputShape, outputDataType, owner, outputDynamicAxes) }; return{ OutputVariable(outputShape, outputDataType, owner, outputDynamicAxes, functionName.empty() ? L"" : functionName + L"_output") };
} }
/*static*/ const std::wstring CompositeFunction::CompositeFunctionOpName = L"CompositeFunctionOpName";
/*static*/ std::atomic<unsigned int> CompositeFunction::s_nextAutoGeneratedDynamicAxis(0); /*static*/ std::atomic<unsigned int> CompositeFunction::s_nextAutoGeneratedDynamicAxis(0);
// Names of the dynamic axes in the CNTK engine for some special sets of dynamic axes values // Names of the dynamic axes in the CNTK engine for some special sets of dynamic axes values
@ -746,9 +775,10 @@ namespace CNTK
variableToNodeMap[variable] = nullptr; variableToNodeMap[variable] = nullptr;
std::shared_ptr<ComputationNode<ElementType>> computationNodePtr; std::shared_ptr<ComputationNode<ElementType>> computationNodePtr;
auto internalNodeName = CNTKInternalNodeNameFromUidAndName(variable.Uid(), variable.Name());
if (variable.IsParameter() || variable.IsConstant()) if (variable.IsParameter() || variable.IsConstant())
{ {
computationNodePtr = builder.CreateLearnableParameter(variable.Uid(), AsTensorShape(variable.Shape())); computationNodePtr = builder.CreateLearnableParameter(internalNodeName, AsTensorShape(variable.Shape()));
network->InitLearnableParameters(computationNodePtr, L"fixedValue", 0); // must call this to follow protocol; can overwrite later network->InitLearnableParameters(computationNodePtr, L"fixedValue", 0); // must call this to follow protocol; can overwrite later
if (!variable.NeedsGradient()) if (!variable.NeedsGradient())
computationNodePtr->SetLearningRateMultiplier(0.0); computationNodePtr->SetLearningRateMultiplier(0.0);
@ -786,9 +816,9 @@ namespace CNTK
network->AddNodeToNetAndAttachInputs(New<DynamicAxisNode<ElementType>>(network->GetDeviceId(), internalDynamicAxisName), {}); network->AddNodeToNetAndAttachInputs(New<DynamicAxisNode<ElementType>>(network->GetDeviceId(), internalDynamicAxisName), {});
if (IsSparseInput(variable)) if (IsSparseInput(variable))
computationNodePtr = builder.CreateSparseInputNode(variable.Uid(), AsTensorShape(variable.Shape()), internalDynamicAxisName); computationNodePtr = builder.CreateSparseInputNode(internalNodeName, AsTensorShape(variable.Shape()), internalDynamicAxisName);
else else
computationNodePtr = builder.CreateInputNode(variable.Uid(), AsTensorShape(variable.Shape()), internalDynamicAxisName); computationNodePtr = builder.CreateInputNode(internalNodeName, AsTensorShape(variable.Shape()), internalDynamicAxisName);
if (variable.NeedsGradient()) if (variable.NeedsGradient())
{ {
@ -1033,7 +1063,7 @@ namespace CNTK
break; break;
} }
default: default:
LogicError("Specified op %s not yet supported", PrimitiveOpTypeName(op)); LogicError("Specified op %S not yet supported", PrimitiveOpTypeName(op).c_str());
break; break;
} }
@ -1047,8 +1077,8 @@ namespace CNTK
{ {
auto computationNodeExpectedInputCount = computationNodePtr->As<INumInputs>()->GetExpectedNumInputs(); auto computationNodeExpectedInputCount = computationNodePtr->As<INumInputs>()->GetExpectedNumInputs();
if (computationNodeExpectedInputCount != inputNodesBasePtrs.size()) if (computationNodeExpectedInputCount != inputNodesBasePtrs.size())
LogicError("Input count mismatch: The Primitive function for op %s has %d inputs while the corresponding ComputationNode has %d inputs", LogicError("Input count mismatch: The Primitive function for op %S has %d inputs while the corresponding ComputationNode has %d inputs",
PrimitiveOpTypeName(op), PrimitiveOpTypeName(op).c_str(),
(int)inputNodesBasePtrs.size(), (int)inputNodesBasePtrs.size(),
(int)computationNodeExpectedInputCount); (int)computationNodeExpectedInputCount);
} }
@ -1128,8 +1158,8 @@ namespace CNTK
// TODO: Support changing the device across different invocations of the forward method on a Function instance // TODO: Support changing the device across different invocations of the forward method on a Function instance
if (AsDeviceDescriptor(m_computationNetwork->GetDeviceId()) != device) if (AsDeviceDescriptor(m_computationNetwork->GetDeviceId()) != device)
LogicError("Changing device across different Forward calls on a CNTK composite Function is currently unsupported"); LogicError("Changing device across different Forward calls on a CNTK composite Function is currently unsupported");
}
}
else else
{ {
m_computationNetwork = std::make_shared<ComputationNetwork>(AsCNTKImplDeviceId(device)); m_computationNetwork = std::make_shared<ComputationNetwork>(AsCNTKImplDeviceId(device));
@ -1140,20 +1170,11 @@ namespace CNTK
if (backpropRoots.size() > 1) if (backpropRoots.size() > 1)
LogicError("More than one backprop roots is currently unsupported"); LogicError("More than one backprop roots is currently unsupported");
ComputationNodeBasePtr backpropRootNode;
// Now recursively create the network in a top-down fashion // Now recursively create the network in a top-down fashion
auto rootFunction = RootFunction(); auto rootFunction = RootFunction();
auto rootFunctionOutputs = rootFunction->Outputs(); auto rootFunctionOutputs = rootFunction->Outputs();
std::vector<ComputationNodeBasePtr> forwardRootNodes;
for (auto rootOutput : rootFunctionOutputs) for (auto rootOutput : rootFunctionOutputs)
{ GetNode(rootOutput, m_computationNetwork, builder, m_variableToNodeMap, m_isVariableRootMap);
auto currentRootNode = GetNode(rootOutput, m_computationNetwork, builder, m_variableToNodeMap, m_isVariableRootMap);
forwardRootNodes.push_back(currentRootNode);
if (backpropRoots.find(rootOutput) != backpropRoots.end())
backpropRootNode = m_variableToNodeMap[rootOutput];
}
// If any of the function outputs is not a root node, we need to explicitly add it to the 'output' group of the ComputationNetwork // If any of the function outputs is not a root node, we need to explicitly add it to the 'output' group of the ComputationNetwork
for (auto rootOutput : rootFunctionOutputs) for (auto rootOutput : rootFunctionOutputs)
@ -1212,8 +1233,26 @@ namespace CNTK
} }
} }
} }
}
if (!m_networkMatricesAllocated && allocateNetworkMatrices)
{
ComputationNodeBasePtr backpropRootNode;
// Now recursively create the network in a top-down fashion
auto rootFunction = RootFunction();
auto rootFunctionOutputs = rootFunction->Outputs();
std::vector<ComputationNodeBasePtr> forwardRootNodes;
for (auto rootOutput : rootFunctionOutputs)
{
auto currentRootNode = m_variableToNodeMap[rootOutput];
forwardRootNodes.push_back(currentRootNode);
if (m_currentBackpropRoots.find(rootOutput) != m_currentBackpropRoots.end())
backpropRootNode = currentRootNode;
}
if (allocateNetworkMatrices)
m_computationNetwork->AllocateAllMatrices(forwardRootNodes, {}, backpropRootNode); m_computationNetwork->AllocateAllMatrices(forwardRootNodes, {}, backpropRootNode);
m_networkMatricesAllocated = allocateNetworkMatrices; m_networkMatricesAllocated = allocateNetworkMatrices;
} }
@ -1224,107 +1263,180 @@ namespace CNTK
template <typename ElementType> template <typename ElementType>
/*static*/ std::pair<std::shared_ptr<const Matrix<ElementType>>, MBLayoutPtr> CompositeFunction::GetCNTKImplMatrixAndMBLayoutFromValueObject(Variable var, const ValuePtr& value) /*static*/ std::pair<std::shared_ptr<const Matrix<ElementType>>, MBLayoutPtr> CompositeFunction::GetCNTKImplMatrixAndMBLayoutFromValueObject(Variable var, const ValuePtr& value)
{ {
if (var.GetDataType() != value->Data()->GetDataType()) if (var.GetDataType() != value->GetDataType())
LogicError("The Variable's DataType %s does not match the corresponding Value's DataType %s", DataTypeName(var.GetDataType()), DataTypeName(value->Data()->GetDataType())); LogicError("The Variable's DataType %s does not match the corresponding Value's DataType %s", DataTypeName(var.GetDataType()), DataTypeName(value->GetDataType()));
if (AsDataType<ElementType>() != value->Data()->GetDataType()) if (AsDataType<ElementType>() != value->GetDataType())
LogicError("The specified ElementType %s does not match the DataType %s", typeid(ElementType).name(), DataTypeName(value->Data()->GetDataType())); LogicError("The specified ElementType %s does not match the DataType %s", typeid(ElementType).name(), DataTypeName(value->GetDataType()));
// TODO: Is supplying dense data for an Input variable tagged as sparse, a fatal error? // TODO: Is supplying dense data for an Input variable tagged as sparse, a fatal error?
if (IsSparseInput(var) && !value->Data()->IsSparse()) if (IsSparseInput(var) && !value->IsSparse())
InvalidArgument("Dense input data supplied for a sparse input Variable"); InvalidArgument("Dense input data supplied for a sparse input Variable");
if (IsSparseInput(var) && (value->Data()->GetStorageFormat() != StorageFormat::SparseCSC)) if (IsSparseInput(var) && (value->GetStorageFormat() != StorageFormat::SparseCSC))
InvalidArgument("Sparse Input data must be in SparseCSC format"); InvalidArgument("Sparse Input data must be in SparseCSC format");
if (value->Data()->Shape().Rank() == var.Shape().Rank()) auto varShape = var.Shape();
return{ value->Data()->GetMatrix<ElementType>(), nullptr }; auto valueShape = value->Shape();
if (valueShape.Rank() < varShape.Rank())
InvalidArgument("Value's rank should be >= the Variable's rank");
if (value->Data()->Shape().Rank() < (var.Shape().Rank() + var.DynamicAxes().size())) size_t maxAddionalValueAxes = std::max<size_t>(2, var.DynamicAxes().size());
InvalidArgument("Value's number of axes should be larger than the Variable's number of axes by number of dynamic axes"); if (valueShape.Rank() > (varShape.Rank() + maxAddionalValueAxes))
InvalidArgument("Value rank should be larger than the Variable%S rank at most by number of dynamic axes", ParanthesizedName(var.Name()).c_str());
if (valueShape.SubShape(0, varShape.Rank()) != varShape)
{
InvalidArgument("The %s dimensions of the Value shape %S do not match the shape of the variable %S that it corresponds to!",
Internal::IsReversingTensorShapesInErrorMessagesEnabled() ? "trailing" : "leading",
AsStringForErrorReporting(valueShape).c_str(),
AsStringForErrorReporting(varShape).c_str());
}
if (var.DynamicAxes().empty())
return{ value->Data()->GetMatrix<ElementType>(), nullptr };
if (var.DynamicAxes().size() > 2) if (var.DynamicAxes().size() > 2)
LogicError("More than 2 dynamic axis for a variable is currently unsupported"); LogicError("More than 2 dynamic axis for a variable is currently unsupported");
if (value->Data()->Shape().SubShape(0, var.Shape().Rank()) != var.Shape())
{
InvalidArgument("The %s dimensions of the Value shape %S do not match the shape of the variable %S that it corresponds to!",
Internal::IsReversingTensorShapesInErrorMessagesEnabled() ? "trailing" : "leading",
AsStringForErrorReporting(value->Data()->Shape()).c_str(),
AsStringForErrorReporting(var.Shape()).c_str());
}
size_t maxNumTimeSteps = value->Data()->Shape()[var.Shape().Rank()];
size_t numSequences = value->Data()->Shape()[var.Shape().Rank() + 1];
auto mask = value->Mask(); auto mask = value->Mask();
if ((mask != nullptr) && ((var.Shape().Rank() + mask->Shape().Rank()) != value->Data()->Shape().Rank())) if ((mask != nullptr) && ((varShape.Rank() + mask->Shape().Rank()) != valueShape.Rank()))
InvalidArgument("Invalid Value object; the sum of the rank of the mask and data does not equal the Variable's rank + number of dynamic axes"); InvalidArgument("Invalid Value object; the sum of the rank of the mask and data does not equal the Variable's rank + number of dynamic axes");
if ((numSequences == 1) || (maxNumTimeSteps == 1)) auto getNumTimeStepsAndSequencesFunc = [](const NDShape& maskShape) {
{ size_t maxNumTimeSteps = 1;
// The data need not be shuffled size_t numSequences = 1;
std::shared_ptr<const Matrix<ElementType>> matrixData = value->Data()->GetMatrix<ElementType>(var.Shape().Rank()); if (maskShape.Rank() > 0)
auto layout = std::make_shared<MBLayout>(); maxNumTimeSteps = maskShape[0];
if (maxNumTimeSteps == 1)
layout->InitAsFrameMode(numSequences); if (maskShape.Rank() > 1)
else numSequences = maskShape[1];
{
layout->Init(1, maxNumTimeSteps); return std::pair<size_t, size_t>(maxNumTimeSteps, numSequences);
layout->AddSequence(0, 0, 0, maxNumTimeSteps); };
}
size_t maxNumTimeSteps, numSequences;
std::tie(maxNumTimeSteps, numSequences) = getNumTimeStepsAndSequencesFunc(valueShape.SubShape(varShape.Rank()));
auto getSequenceStartsAndLengthsFunc = [&getNumTimeStepsAndSequencesFunc](const NDMaskPtr& mask, std::vector<ptrdiff_t>& sequenceBeginIndices, std::vector<size_t>& sequenceLengths) {
auto cpuMask = mask;
if (mask->Device() != DeviceDescriptor::CPUDevice())
cpuMask = mask->DeepClone(DeviceDescriptor::CPUDevice());
const MaskKind* maskBuffer = cpuMask->DataBuffer();
size_t maxNumTimeSteps, numSequences;
std::tie(maxNumTimeSteps, numSequences) = getNumTimeStepsAndSequencesFunc(mask->Shape());
return{ matrixData , layout};
}
else
{
std::vector<size_t> sequenceLengths(numSequences, maxNumTimeSteps);
if (mask != nullptr)
{
// Determine the sequence lengths from the mask
std::unique_ptr<char[]> maskData(mask->GetMatrix()->CopyToArray());
for (size_t i = 0; i < numSequences; ++i) for (size_t i = 0; i < numSequences; ++i)
{ {
size_t currentSequenceLength = 0; MaskKind firstMaskEntry = maskBuffer[i * maxNumTimeSteps];
if (firstMaskEntry == MaskKind::SequenceBegin)
sequenceBeginIndices[i] = 0;
else if (firstMaskEntry == MaskKind::Valid)
sequenceBeginIndices[i] = Microsoft::MSR::CNTK::SentinelValueIndicatingUnspecifedSequenceBeginIdx;
else
LogicError("The first entry of a mask should be Valid or SequenceBegin");
size_t currentSequenceLength = 1;
bool currentSequenceEndAlreadyFound = false; bool currentSequenceEndAlreadyFound = false;
for (size_t j = 0; j < maxNumTimeSteps; ++j) for (size_t j = 1; j < maxNumTimeSteps; ++j)
{ {
if (maskData[(i * maxNumTimeSteps) + j] == 1) if (maskBuffer[(i * maxNumTimeSteps) + j] == MaskKind::Invalid)
currentSequenceEndAlreadyFound = true;
else
{ {
if (currentSequenceEndAlreadyFound) if (currentSequenceEndAlreadyFound)
InvalidArgument("Invalid Value object; only trailing steps of a sequence can be masked"); InvalidArgument("Invalid Value object; only trailing steps of a sequence can be masked");
currentSequenceLength++; currentSequenceLength++;
} }
else
currentSequenceEndAlreadyFound = true;
} }
sequenceLengths[i] = currentSequenceLength; sequenceLengths[i] = currentSequenceLength;
} }
};
if ((numSequences == 1) || (maxNumTimeSteps == 1))
{
// The data need not be shuffled
std::shared_ptr<const Matrix<ElementType>> matrixData = value->Data()->GetMatrix<ElementType>(varShape.Rank());
auto layout = std::make_shared<MBLayout>();
if (!mask)
{
if (maxNumTimeSteps == 1)
layout->InitAsFrameMode(numSequences);
else
{
layout->Init(numSequences, maxNumTimeSteps);
layout->AddSequence(0, 0, 0, maxNumTimeSteps);
}
}
else
{
layout->Init(numSequences, maxNumTimeSteps);
std::vector<ptrdiff_t> sequenceBeginIndices(numSequences, 0);
std::vector<size_t> sequenceLengths(numSequences, maxNumTimeSteps);
getSequenceStartsAndLengthsFunc(mask, sequenceBeginIndices, sequenceLengths);
for (size_t i = 0; i < numSequences; ++i)
layout->AddSequence(i, i, sequenceBeginIndices[i], sequenceLengths[i]);
} }
// The data needs to be rearranged since CNTK requires sequences to be interleaved across timesteps return{ matrixData , layout};
std::vector<MBLayout::SequenceInfo> sequences; }
for (size_t i = 0; i < numSequences; ++i) else
sequences.push_back({ i, SIZE_MAX, 0, sequenceLengths[i]}); {
std::vector<ptrdiff_t> sequenceBeginIndices(numSequences, 0);
std::vector<size_t> sequenceLengths(numSequences, maxNumTimeSteps);
if (mask != nullptr)
getSequenceStartsAndLengthsFunc(mask, sequenceBeginIndices, sequenceLengths);
bool hasTruncatedSequences = std::find_if(sequenceBeginIndices.begin(), sequenceBeginIndices.end(), [](const int& val) { return (val < 0); }) != sequenceBeginIndices.end();
auto layout = std::make_shared<MBLayout>(); auto layout = std::make_shared<MBLayout>();
std::vector<std::pair<size_t, size_t>> placement; std::vector<std::pair<size_t, size_t>> placement;
if (!hasTruncatedSequences)
{
std::vector<MBLayout::SequenceInfo> sequences;
for (size_t i = 0; i < numSequences; ++i)
sequences.push_back({ i, SIZE_MAX, sequenceBeginIndices[i], sequenceLengths[i] });
std::vector<size_t> rowAllocations; std::vector<size_t> rowAllocations;
layout->InitAsPackedSequences(sequences, placement, rowAllocations); layout->InitAsPackedSequences(sequences, placement, rowAllocations);
}
else
{
layout->Init(numSequences, maxNumTimeSteps);
// We cannot pack as some of the sequences are truncated and thus all sequences have to be
// kept in their original parallel streams
placement.resize(numSequences);
for (size_t i = 0; i < numSequences; ++i)
{
layout->AddSequence(i, i, sequenceBeginIndices[i], sequenceLengths[i]);
// Add the gap if there is one
if (sequenceLengths[i] < maxNumTimeSteps)
layout->AddSequence(GAP_SEQUENCE_ID, i, sequenceLengths[i], maxNumTimeSteps);
placement[i] = std::make_pair(i, 0);
}
}
if (maxNumTimeSteps != layout->GetNumTimeSteps()) if (maxNumTimeSteps != layout->GetNumTimeSteps())
LogicError("The number of time steps in the packed MBLayout does not match the longest sequence's length in the Value object"); LogicError("The number of time steps in the packed MBLayout does not match the longest sequence's length in the Value object");
if (numSequences != layout->GetNumSequences()) if (numSequences != layout->GetNumSequences())
LogicError("The number of sequences in the packed MBLayout does not match the sequence count in the Value object"); LogicError("The number of sequences in the packed MBLayout does not match the sequence count in the Value object");
// The data needs to be rearranged since CNTK requires sequences to be interleaved across timesteps
// Now generate the gather indices // Now generate the gather indices
auto matrixData = std::make_shared<Matrix<ElementType>>(var.Shape().TotalSize(), auto matrixData = std::make_shared<Matrix<ElementType>>(varShape.TotalSize(),
layout->GetNumCols(), layout->GetNumCols(),
AsCNTKImplDeviceId(value->Data()->Device()), AsCNTKImplDeviceId(value->Device()),
value->Data()->IsSparse() ? MatrixType::SPARSE : MatrixType::DENSE, value->IsSparse() ? MatrixType::SPARSE : MatrixType::DENSE,
AsCNTKImplMatrixFormat(value->Data()->GetStorageFormat())); AsCNTKImplMatrixFormat(value->GetStorageFormat()));
std::vector<size_t> sequencesShorterThanLongestSequence; std::vector<size_t> sequencesShorterThanLongestSequence;
for (size_t i = 0; i < numSequences; ++i) for (size_t i = 0; i < numSequences; ++i)
@ -1342,8 +1454,8 @@ namespace CNTK
gatherIndicesVector[((targetStartIdxInParallelStream + j) * layout->GetNumParallelSequences()) + targetParallelStreamIdx] = (ElementType)((i * maxNumTimeSteps) + j); gatherIndicesVector[((targetStartIdxInParallelStream + j) * layout->GetNumParallelSequences()) + targetParallelStreamIdx] = (ElementType)((i * maxNumTimeSteps) + j);
} }
auto gatherIdxMatrix = std::make_shared<Matrix<ElementType>>(1, layout->GetNumCols(), gatherIndicesVector.data(), AsCNTKImplDeviceId(value->Data()->Device())); auto gatherIdxMatrix = std::make_shared<Matrix<ElementType>>(1, layout->GetNumCols(), gatherIndicesVector.data(), AsCNTKImplDeviceId(value->Device()));
matrixData->DoGatherColumnsOf(0, *gatherIdxMatrix, *(value->Data()->GetMatrix<ElementType>(var.Shape().Rank())), 1); matrixData->DoGatherColumnsOf(0, *gatherIdxMatrix, *(value->Data()->GetMatrix<ElementType>(varShape.Rank())), 1);
return{ matrixData, layout }; return{ matrixData, layout };
} }
} }
@ -1352,53 +1464,111 @@ namespace CNTK
/*static*/ ValuePtr CompositeFunction::GetValueObjectFromCNTKImplMatrixAndMBLayout(const NDShape& sampleShape, const Matrix<ElementType>& matrix, const MBLayoutPtr& layout, bool readOnly /*= true*/) /*static*/ ValuePtr CompositeFunction::GetValueObjectFromCNTKImplMatrixAndMBLayout(const NDShape& sampleShape, const Matrix<ElementType>& matrix, const MBLayoutPtr& layout, bool readOnly /*= true*/)
{ {
NDShape valueDataShape = sampleShape; NDShape valueDataShape = sampleShape;
size_t maxNumTimeSteps = 1;
size_t numSequences = 1;
if (layout != nullptr) if (layout != nullptr)
valueDataShape = valueDataShape.AppendShape({ layout->GetNumTimeSteps(), layout->GetNumSequences() }); {
maxNumTimeSteps = layout->GetNumTimeSteps();
numSequences = layout->GetNumSequences();
valueDataShape = valueDataShape.AppendShape({ maxNumTimeSteps, numSequences });
}
auto createMaskFunc = [](const MBLayoutPtr& layout, const DeviceDescriptor& device, std::vector<size_t>& sequencesShorterThanLongestSequence) {
std::vector<bool> sequenceBeginFlags;
std::vector<size_t> sequenceLengths;
sequencesShorterThanLongestSequence.clear();
size_t maxNumTimeSteps = layout->GetNumTimeSteps();
size_t numSequences = layout->GetNumSequences();
auto& layoutSequences = layout->GetAllSequences();
size_t sequenceIdx = 0;
bool allSequencesStartInThisMB = true;
bool allSequencesSameLength = true;
for (auto sequenceInfo : layoutSequences)
{
if (sequenceInfo.seqId != GAP_SEQUENCE_ID)
{
auto currentSequenceBeginIdx = std::max<ptrdiff_t>(0, sequenceInfo.tBegin);
auto currentSequenceEndIdx = std::min(maxNumTimeSteps, sequenceInfo.tEnd);
auto currentSequenceLength = (currentSequenceEndIdx - currentSequenceBeginIdx);
auto isCurrentSequenceBeginningInsideThisMB = sequenceInfo.tBegin >= 0;
allSequencesStartInThisMB = allSequencesStartInThisMB && isCurrentSequenceBeginningInsideThisMB;
allSequencesSameLength = allSequencesSameLength && (currentSequenceLength == maxNumTimeSteps);
sequenceBeginFlags.push_back(isCurrentSequenceBeginningInsideThisMB);
sequenceLengths.push_back(currentSequenceLength);
if (currentSequenceLength != maxNumTimeSteps)
sequencesShorterThanLongestSequence.push_back(sequenceIdx);
sequenceIdx++;
}
}
if (!allSequencesStartInThisMB && (numSequences != layout->GetNumParallelSequences()))
LogicError("Cannot create an unpacked Value object from packed data where one or more sequences are truncated");
bool maskNeeded = !allSequencesSameLength || !allSequencesStartInThisMB;
NDMaskPtr mask;
if (maskNeeded)
{
mask = MakeSharedObject<NDMask>(NDShape({ maxNumTimeSteps, numSequences }), device);
for (size_t i = 0; i < numSequences; ++i)
if (sequenceBeginFlags[i])
mask->MarkSequenceBegin({0, i});
for (auto shortSequenceIdx : sequencesShorterThanLongestSequence)
mask->InvalidateSection({ sequenceLengths[shortSequenceIdx], shortSequenceIdx }, { NDShape::InferredDimension, 1 });
}
return mask;
};
// No data shuffling needed if no layout or the layout has just one time-step or just one sequence // No data shuffling needed if no layout or the layout has just one time-step or just one sequence
if ((layout == nullptr) || (layout->GetNumTimeSteps() == 1) || (layout->GetNumSequences() == 1)) std::vector<size_t> sequencesShorterThanLongestSequence;
if ((maxNumTimeSteps == 1) || (numSequences == 1))
{ {
// Just create a view over the existing matrix itself // Just create a view over the existing matrix itself
auto tensorView = new TensorView<ElementType>(std::make_shared<Matrix<ElementType>>(matrix.AsReference()), AsTensorViewShape(valueDataShape)); auto tensorView = new TensorView<ElementType>(std::make_shared<Matrix<ElementType>>(matrix.AsReference()), AsTensorViewShape(valueDataShape));
auto data = MakeSharedObject<NDArrayView>(AsDataType<ElementType>(), AsDeviceDescriptor(matrix.GetDeviceId()), AsStorageFormat(matrix.GetFormat()), valueDataShape, readOnly, tensorView); auto data = MakeSharedObject<NDArrayView>(AsDataType<ElementType>(), AsDeviceDescriptor(matrix.GetDeviceId()), AsStorageFormat(matrix.GetFormat()), valueDataShape, readOnly, tensorView);
if (layout == nullptr)
return MakeSharedObject<Value>(data); return MakeSharedObject<Value>(data);
else
{
auto mask = createMaskFunc(layout, AsDeviceDescriptor(matrix.GetDeviceId()), sequencesShorterThanLongestSequence);
return MakeSharedObject<Value>(data, mask);
}
} }
if (layout->GetNumCols() != matrix.GetNumCols()) if (layout->GetNumCols() != matrix.GetNumCols())
LogicError("Bad MBLayout: The number of columns in the MBLayout does not match the number of columns in the data matrix!"); LogicError("Bad MBLayout: The number of columns in the MBLayout does not match the number of columns in the data matrix!");
size_t maxNumTimeSteps = layout->GetNumTimeSteps();
size_t numSequences = layout->GetNumSequences();
std::vector<size_t> sequenceLengths;
auto& layoutSequences = layout->GetAllSequences();
for (auto sequenceInfo : layoutSequences)
{
if (sequenceInfo.seqId != GAP_SEQUENCE_ID)
sequenceLengths.push_back(sequenceInfo.GetNumTimeSteps());
}
// Reshuffle to data to unpack and uninterleave the CNTK form packed data // Reshuffle to data to unpack and uninterleave the CNTK form packed data
// Now generate the scatter indices // Now generate the scatter indices
auto shuffledMatrixData = std::make_shared<Matrix<ElementType>>(matrix.GetNumRows(), maxNumTimeSteps * numSequences, matrix.GetDeviceId(), matrix.GetMatrixType(), matrix.GetFormat()); auto shuffledMatrixData = std::make_shared<Matrix<ElementType>>(matrix.GetNumRows(), maxNumTimeSteps * numSequences, matrix.GetDeviceId(), matrix.GetMatrixType(), matrix.GetFormat());
auto mask = createMaskFunc(layout, AsDeviceDescriptor(matrix.GetDeviceId()), sequencesShorterThanLongestSequence);
std::vector<size_t> sequencesShorterThanLongestSequence;
for (size_t i = 0; i < numSequences; ++i)
if (sequenceLengths[i] != maxNumTimeSteps)
sequencesShorterThanLongestSequence.push_back(i);
// Set the target location of all gaps to be the last step of the first sequence that is shorter than the longest sequence in the batch // Set the target location of all gaps to be the last step of the first sequence that is shorter than the longest sequence in the batch
size_t targetColIdxForInvalidColumns = sequencesShorterThanLongestSequence.empty() ? 0 : (((sequencesShorterThanLongestSequence[0] + 1) * maxNumTimeSteps) - 1); size_t targetColIdxForInvalidColumns = sequencesShorterThanLongestSequence.empty() ? 0 : (((sequencesShorterThanLongestSequence[0] + 1) * maxNumTimeSteps) - 1);
std::vector<ElementType> scatterIndicesVector(layout->GetNumCols(), (ElementType)targetColIdxForInvalidColumns); std::vector<ElementType> scatterIndicesVector(layout->GetNumCols(), (ElementType)targetColIdxForInvalidColumns);
size_t i = 0; size_t i = 0;
auto& layoutSequences = layout->GetAllSequences();
for (auto sequenceInfo : layoutSequences) for (auto sequenceInfo : layoutSequences)
{ {
if (sequenceInfo.seqId != GAP_SEQUENCE_ID) if (sequenceInfo.seqId != GAP_SEQUENCE_ID)
{ {
size_t targetParallelStreamIdx = sequenceInfo.s; size_t targetParallelStreamIdx = sequenceInfo.s;
size_t targetStartIdxInParallelStream = sequenceInfo.tBegin; auto currentSequenceBeginIdx = std::max<ptrdiff_t>(0, sequenceInfo.tBegin);
for (size_t j = 0; j < sequenceInfo.GetNumTimeSteps(); ++j) auto currentSequenceEndIdx = std::min(maxNumTimeSteps, sequenceInfo.tEnd);
scatterIndicesVector[((targetStartIdxInParallelStream + j) * layout->GetNumParallelSequences()) + targetParallelStreamIdx] = (ElementType)((i * maxNumTimeSteps) + j); size_t currentSequenceLength = (currentSequenceEndIdx - currentSequenceBeginIdx);
for (size_t j = 0; j < currentSequenceLength; ++j)
scatterIndicesVector[((currentSequenceBeginIdx + j) * layout->GetNumParallelSequences()) + targetParallelStreamIdx] = (ElementType)((i * maxNumTimeSteps) + j);
i++; i++;
} }
@ -1407,17 +1577,6 @@ namespace CNTK
auto scatterIdxMatrix = std::make_shared<Matrix<ElementType>>(1, layout->GetNumCols(), scatterIndicesVector.data(), matrix.GetDeviceId()); auto scatterIdxMatrix = std::make_shared<Matrix<ElementType>>(1, layout->GetNumCols(), scatterIndicesVector.data(), matrix.GetDeviceId());
shuffledMatrixData->DoScatterColumnsOf(0, *scatterIdxMatrix, matrix, 1); shuffledMatrixData->DoScatterColumnsOf(0, *scatterIdxMatrix, matrix, 1);
// Create the mask if needed
NDMaskPtr mask;
if (!sequencesShorterThanLongestSequence.empty())
{
mask = MakeSharedObject<NDMask>(NDShape({ maxNumTimeSteps, numSequences }), AsDeviceDescriptor(matrix.GetDeviceId()));
for (auto shortSequenceIdx : sequencesShorterThanLongestSequence)
{
mask->MaskSection({ sequenceLengths[shortSequenceIdx], shortSequenceIdx }, { NDShape::InferredDimension, 1 });
}
}
auto tensorView = new TensorView<ElementType>(shuffledMatrixData, AsTensorViewShape(valueDataShape)); auto tensorView = new TensorView<ElementType>(shuffledMatrixData, AsTensorViewShape(valueDataShape));
auto data = MakeSharedObject<NDArrayView>(AsDataType<ElementType>(), AsDeviceDescriptor(matrix.GetDeviceId()), AsStorageFormat(shuffledMatrixData->GetFormat()), valueDataShape, readOnly, tensorView); auto data = MakeSharedObject<NDArrayView>(AsDataType<ElementType>(), AsDeviceDescriptor(matrix.GetDeviceId()), AsStorageFormat(shuffledMatrixData->GetFormat()), valueDataShape, readOnly, tensorView);
return MakeSharedObject<Value>(data, mask); return MakeSharedObject<Value>(data, mask);
@ -1441,7 +1600,13 @@ namespace CNTK
template <typename ElementType> template <typename ElementType>
/*static*/ void CompositeFunction::PopulateComputationNodeValue(const std::pair<Variable, ValuePtr>& variableValue, ComputationNodeBasePtr& computationNode) /*static*/ void CompositeFunction::PopulateComputationNodeValue(const std::pair<Variable, ValuePtr>& variableValue, ComputationNodeBasePtr& computationNode)
{ {
auto CNTKMatrixAndMBLayout = GetCNTKImplMatrixAndMBLayoutFromValueObject<ElementType>(variableValue.first, variableValue.second); std::pair<std::shared_ptr<const Matrix<ElementType>>, MBLayoutPtr> CNTKMatrixAndMBLayout;
auto packedValue = dynamic_cast<PackedValue*>(variableValue.second.get());
if (packedValue)
CNTKMatrixAndMBLayout = packedValue->PackedData<ElementType>();
else
CNTKMatrixAndMBLayout = GetCNTKImplMatrixAndMBLayoutFromValueObject<ElementType>(variableValue.first, variableValue.second);
MBLayoutPtr layout = CNTKMatrixAndMBLayout.second; MBLayoutPtr layout = CNTKMatrixAndMBLayout.second;
auto& nodeData = computationNode->As<ComputationNode<ElementType>>()->Value(); auto& nodeData = computationNode->As<ComputationNode<ElementType>>()->Value();
@ -1464,7 +1629,7 @@ namespace CNTK
ValuePtr argumentValue = arguments.at(argument); ValuePtr argumentValue = arguments.at(argument);
MBLayoutPtr layout; MBLayoutPtr layout;
switch (argumentValue->Data()->GetDataType()) switch (argumentValue->GetDataType())
{ {
case DataType::Float: case DataType::Float:
PopulateComputationNodeValue<float>({ argument, argumentValue }, argumentComputationNode); PopulateComputationNodeValue<float>({ argument, argumentValue }, argumentComputationNode);
@ -1473,7 +1638,7 @@ namespace CNTK
PopulateComputationNodeValue<double>({ argument, argumentValue }, argumentComputationNode); PopulateComputationNodeValue<double>({ argument, argumentValue }, argumentComputationNode);
break; break;
default: default:
LogicError("Unsupported DataType %s", DataTypeName(argumentValue->Data()->GetDataType())); LogicError("Unsupported DataType %s", DataTypeName(argumentValue->GetDataType()));
break; break;
} }
} }
@ -1484,7 +1649,13 @@ namespace CNTK
template <typename ElementType> template <typename ElementType>
/*static*/ void CompositeFunction::PopulateComputationNodeGradient(const std::pair<Variable, ValuePtr>& variableGradient, Microsoft::MSR::CNTK::ComputationNodeBasePtr& computationNode) /*static*/ void CompositeFunction::PopulateComputationNodeGradient(const std::pair<Variable, ValuePtr>& variableGradient, Microsoft::MSR::CNTK::ComputationNodeBasePtr& computationNode)
{ {
auto CNTKMatrixAndMBLayout = GetCNTKImplMatrixAndMBLayoutFromValueObject<ElementType>(variableGradient.first, variableGradient.second); std::pair<std::shared_ptr<const Matrix<ElementType>>, MBLayoutPtr> CNTKMatrixAndMBLayout;
auto packedValue = dynamic_cast<PackedValue*>(variableGradient.second.get());
if (packedValue)
CNTKMatrixAndMBLayout = packedValue->PackedData<ElementType>();
else
CNTKMatrixAndMBLayout = GetCNTKImplMatrixAndMBLayoutFromValueObject<ElementType>(variableGradient.first, variableGradient.second);
MBLayoutPtr layout = CNTKMatrixAndMBLayout.second; MBLayoutPtr layout = CNTKMatrixAndMBLayout.second;
auto nodeLayout = computationNode->GetMBLayout(); auto nodeLayout = computationNode->GetMBLayout();
if (((layout == nullptr) != (nodeLayout == nullptr)) || ((layout != nullptr) && (*layout != *nodeLayout))) if (((layout == nullptr) != (nodeLayout == nullptr)) || ((layout != nullptr) && (*layout != *nodeLayout)))
@ -1505,7 +1676,7 @@ namespace CNTK
auto outputComputationNode = m_variableToNodeMap[gradientVarValuePair.first]; auto outputComputationNode = m_variableToNodeMap[gradientVarValuePair.first];
ValuePtr gradientValue = gradientVarValuePair.second; ValuePtr gradientValue = gradientVarValuePair.second;
switch (gradientValue->Data()->GetDataType()) switch (gradientValue->GetDataType())
{ {
case DataType::Float: case DataType::Float:
PopulateComputationNodeGradient<float>(gradientVarValuePair, outputComputationNode); PopulateComputationNodeGradient<float>(gradientVarValuePair, outputComputationNode);
@ -1514,7 +1685,7 @@ namespace CNTK
PopulateComputationNodeGradient<double>(gradientVarValuePair, outputComputationNode); PopulateComputationNodeGradient<double>(gradientVarValuePair, outputComputationNode);
break; break;
default: default:
LogicError("Unsupported DataType %s", DataTypeName(gradientValue->Data()->GetDataType())); LogicError("Unsupported DataType %s", DataTypeName(gradientValue->GetDataType()));
break; break;
} }
} }
@ -1547,23 +1718,32 @@ namespace CNTK
if (varValue != nullptr) if (varValue != nullptr)
{ {
// TODO: The shape of the specified output Value object must match the actual output shape // TODO: The shape of the specified output Value object must match the actual output shape
if (varValue->Data()->Shape() != valueShape) if (varValue->Shape() != valueShape)
InvalidArgument("The shape %S of the specified Value object for %s does not match the actual shape %S", AsStringForErrorReporting(varValue->Data()->Shape()).c_str(), getGradient ? "gradient" : "output", AsStringForErrorReporting(valueShape).c_str()); InvalidArgument("The shape %S of the specified Value object for %s does not match the actual shape %S", AsStringForErrorReporting(varValue->Shape()).c_str(), getGradient ? "gradient" : "output", AsStringForErrorReporting(valueShape).c_str());
} }
ValuePtr nodeValue; ValuePtr nodeValue;
auto layout = computationNode->GetMBLayout();
switch (var.GetDataType()) switch (var.GetDataType())
{ {
case DataType::Float: case DataType::Float:
nodeValue = GetValueObjectFromCNTKImplMatrixAndMBLayout<float>(var, {
getGradient ? computationNode->As<ComputationNode<float>>()->Gradient() : computationNode->As<ComputationNode<float>>()->Value(), auto& matrix = getGradient ? computationNode->As<ComputationNode<float>>()->Gradient() : computationNode->As<ComputationNode<float>>()->Value();
computationNode->GetMBLayout()); if (varValue == nullptr)
nodeValue = MakeSharedObject<PackedValue>(var.Shape(), std::make_shared<Matrix<float>>(matrix.AsReference()), layout, /*readOnly =*/ false);
else
nodeValue = GetValueObjectFromCNTKImplMatrixAndMBLayout<float>(var, matrix, layout);
break; break;
}
case DataType::Double: case DataType::Double:
nodeValue = GetValueObjectFromCNTKImplMatrixAndMBLayout<double>(var, {
getGradient ? computationNode->As<ComputationNode<double>>()->Gradient() : computationNode->As<ComputationNode<double>>()->Value(), auto& matrix = getGradient ? computationNode->As<ComputationNode<double>>()->Gradient() : computationNode->As<ComputationNode<double>>()->Value();
computationNode->GetMBLayout()); if (varValue == nullptr)
nodeValue = MakeSharedObject<PackedValue>(var.Shape(), std::make_shared<Matrix<double>>(matrix.AsReference()), layout, /*readOnly =*/ false);
else
nodeValue = GetValueObjectFromCNTKImplMatrixAndMBLayout<double>(var, matrix, layout);
break; break;
}
default: default:
LogicError("Unsupported DataType %s", DataTypeName(var.GetDataType())); LogicError("Unsupported DataType %s", DataTypeName(var.GetDataType()));
break; break;
@ -1605,6 +1785,20 @@ namespace CNTK
} }
} }
const std::vector<Variable>& CompositeFunction::GetArgumentDependencies(const Variable& output)
{
assert(output.IsOutput());
auto iter = m_perOutputVarArgumentDependencies.find(output);
if (iter != m_perOutputVarArgumentDependencies.end())
return iter->second;
auto wrappedComposite = CompositeFunction::Create(output.Owner());
m_perOutputVarArgumentDependencies[output] = wrappedComposite->Arguments();
return m_perOutputVarArgumentDependencies[output];
}
/*virtual*/ BackPropStatePtr CompositeFunction::Forward(const std::unordered_map<Variable, ValuePtr>& arguments, /*virtual*/ BackPropStatePtr CompositeFunction::Forward(const std::unordered_map<Variable, ValuePtr>& arguments,
std::unordered_map<Variable, ValuePtr>& outputs, std::unordered_map<Variable, ValuePtr>& outputs,
const DeviceDescriptor& computeDevice, const DeviceDescriptor& computeDevice,
@ -1641,8 +1835,31 @@ namespace CNTK
else else
InvalidArgument("Unsupported DataType %s", DataTypeName(dataType)); InvalidArgument("Unsupported DataType %s", DataTypeName(dataType));
std::unordered_set<Variable> functionOutputs(this->Outputs().begin(), this->Outputs().end());
std::vector<ComputationNodeBasePtr> outputsToEvaluate;
std::unordered_set<Variable> requiredArguments;
for (auto outputVarValuePair : outputs)
{
// Ensure that only a subset of this function's outputs are being asked to be evaluated
if (functionOutputs.find(outputVarValuePair.first) == functionOutputs.end())
InvalidArgument("Requested output is not an Ouptut of the Function");
auto& requiredArgumentsForCurrentOutput = GetArgumentDependencies(outputVarValuePair.first);
requiredArguments.insert(requiredArgumentsForCurrentOutput.begin(), requiredArgumentsForCurrentOutput.end());
auto outputComputationNode = m_variableToNodeMap[outputVarValuePair.first];
outputsToEvaluate.push_back(outputComputationNode);
}
// TODO: Avoid copying the data when possible // TODO: Avoid copying the data when possible
// We should have argument values supplied for all required argument dependencies for the requested outputs
for (auto requiredArgument : requiredArguments)
{
if (arguments.find(requiredArgument) == arguments.end())
InvalidArgument("Function::Forward: Required argument's (%S) value that the requested output(s) depend on has not been provided", requiredArgument.Name().c_str());
}
// Feed data into the arguments of the network // Feed data into the arguments of the network
PopulateNetworkInputs(arguments); PopulateNetworkInputs(arguments);
@ -1653,19 +1870,6 @@ namespace CNTK
for (auto& nodeIter : dropoutNodes) for (auto& nodeIter : dropoutNodes)
nodeIter->SetEvalTimeStampOutdatedWrtAll(); nodeIter->SetEvalTimeStampOutdatedWrtAll();
std::unordered_set<Variable> functionOutputs(this->Outputs().begin(), this->Outputs().end());
std::vector<ComputationNodeBasePtr> outputsToEvaluate;
for (auto outputVarValuePair : outputs)
{
// Ensure that only a subset of this function's outputs are being asked to be evaluated
if (functionOutputs.find(outputVarValuePair.first) == functionOutputs.end())
InvalidArgument("Requested output is not an Ouptut of the Function");
auto outputComputationNode = m_variableToNodeMap[outputVarValuePair.first];
outputsToEvaluate.push_back(outputComputationNode);
}
// The 'outputsToRetainBackwardStateFor' nodes also need to be evaluated if not already specified in 'outputs' // The 'outputsToRetainBackwardStateFor' nodes also need to be evaluated if not already specified in 'outputs'
for (auto rootVarForBackprop : outputsToRetainBackwardStateFor) for (auto rootVarForBackprop : outputsToRetainBackwardStateFor)
{ {
@ -1879,7 +2083,7 @@ namespace CNTK
newDynamicAxes.push_back(operandAxis); newDynamicAxes.push_back(operandAxis);
} }
return Internal::Gather(operand, flags, newDynamicAxes); return Internal::Gather(operand, flags, newDynamicAxes, name);
} }
FunctionPtr Dropout(const Variable& operand, double dropoutRate, const std::wstring& name /*= L""*/) FunctionPtr Dropout(const Variable& operand, double dropoutRate, const std::wstring& name /*= L""*/)
@ -1968,23 +2172,25 @@ namespace CNTK
FunctionPtr SquaredError(const Variable& prediction, const Variable& targets, const std::wstring& name/* = L""*/) FunctionPtr SquaredError(const Variable& prediction, const Variable& targets, const std::wstring& name/* = L""*/)
{ {
return BinaryOp(PrimitiveOpType::SquaredError, prediction, targets, Dictionary(), name); auto difference = Minus(prediction, targets);
auto squaredDifference = ElementTimes(difference, difference);
return Internal::ReduceElements(squaredDifference, PrimitiveFunction::InternalSumReductionOpName, Axis::AllStaticAxes(), name);
} }
FunctionPtr CrossEntropyWithSoftmax(const Variable& prediction, const Variable& labels, const std::wstring& name/* = L""*/) FunctionPtr CrossEntropyWithSoftmax(const Variable& prediction, const Variable& labels, const std::wstring& name/* = L""*/)
{ {
return ReduceSum(Minus(ReduceLogSum(prediction, Axis(0)), TransposeTimes(labels, prediction)), name); return Minus(ReduceLogSum(prediction, Axis(0)), TransposeTimes(labels, prediction), name);
} }
FunctionPtr ClassificationError(const Variable& prediction, const Variable& labels, const std::wstring& name/* = L""*/) FunctionPtr ClassificationError(const Variable& prediction, const Variable& labels, const std::wstring& name/* = L""*/)
{ {
return ReduceSum(Minus(Constant::Scalar(prediction.GetDataType(), 1.0), TransposeTimes(labels, Hardmax(prediction))), name); return Minus(Constant::Scalar(prediction.GetDataType(), 1.0), TransposeTimes(labels, Hardmax(prediction)), name);
} }
FunctionPtr PastValue(const Variable& operand, const Variable& initialState, size_t offset, const std::wstring& name) FunctionPtr PastValue(const Variable& operand, const Variable& initialState, size_t offset, const std::wstring& name)
{ {
if (operand.DynamicAxes().size() != 2) if (operand.DynamicAxes().size() != 2)
InvalidArgument("PastValue overload that does not explicitly specify a dynamic axis can only be used for operands with exactly one dynamic sequence-axis"); InvalidArgument("PastValue can only be used for operands with exactly one dynamic sequence-axis and one dynamic batch axis");
auto additionalProperties = Dictionary(); auto additionalProperties = Dictionary();
additionalProperties[PrimitiveFunction::AttributeNameOffset] = DictionaryValue(offset); additionalProperties[PrimitiveFunction::AttributeNameOffset] = DictionaryValue(offset);
@ -1994,7 +2200,7 @@ namespace CNTK
FunctionPtr FutureValue(const Variable& operand, const Variable& initialState, size_t offset, const std::wstring& name) FunctionPtr FutureValue(const Variable& operand, const Variable& initialState, size_t offset, const std::wstring& name)
{ {
if (operand.DynamicAxes().size() != 2) if (operand.DynamicAxes().size() != 2)
InvalidArgument("FutureValue overload that does not explicitly specify a dynamic axis can only be used for operands with exactly one dynamic sequence-axis"); InvalidArgument("FutureValue can only be used for operands with exactly one dynamic sequence-axis and one dynamic batch axis");
auto additionalProperties = Dictionary(); auto additionalProperties = Dictionary();
additionalProperties[PrimitiveFunction::AttributeNameOffset] = DictionaryValue(offset); additionalProperties[PrimitiveFunction::AttributeNameOffset] = DictionaryValue(offset);
@ -2035,7 +2241,7 @@ namespace CNTK
Constant meanVar(mean); Constant meanVar(mean);
Constant invStdDevVar(invStdDev); Constant invStdDevVar(invStdDev);
return ElementTimes(Minus(operand, meanVar), invStdDevVar); return ElementTimes(Minus(operand, meanVar), invStdDevVar, name);
} }
FunctionPtr Convolution(const Variable& convolutionMap, FunctionPtr Convolution(const Variable& convolutionMap,
@ -2049,6 +2255,12 @@ namespace CNTK
size_t maxTempMemSizeInSamples, size_t maxTempMemSizeInSamples,
const std::wstring& name) const std::wstring& name)
{ {
// Currently we require that the Convolution function's operand have a dynamic axis since otherwise
// the internal implementation incorrectly infers the batch axis dimension by picking up the first axis as
// the sample shape and considering the rest to be part of the batch axis
if (operand.DynamicAxes().empty())
LogicError("Convolution currently requires the main operand to have dynamic axes");
auto additionalProperties = Dictionary(); auto additionalProperties = Dictionary();
additionalProperties[PrimitiveFunction::AttributeNameStrides] = strides; additionalProperties[PrimitiveFunction::AttributeNameStrides] = strides;
additionalProperties[PrimitiveFunction::AttributeNameSharing] = AsDictionaryValueVector(sharing); additionalProperties[PrimitiveFunction::AttributeNameSharing] = AsDictionaryValueVector(sharing);
@ -2129,16 +2341,18 @@ namespace CNTK
return CompositeFunction::Create(MakeSharedObject<PrimitiveFunction>(PrimitiveOpType::Splice, operands, std::move(additionalProperties), name), name); return CompositeFunction::Create(MakeSharedObject<PrimitiveFunction>(PrimitiveOpType::Splice, operands, std::move(additionalProperties), name), name);
} }
FunctionPtr Combine(const std::vector<FunctionPtr>& operands, const std::wstring& name/* = L""*/) FunctionPtr Combine(const std::vector<Variable>& operands, const std::wstring& name /*= L""*/)
{ {
std::vector<Variable> inputs; std::unordered_set<Variable> uniqueOperands;
for (auto operand : operands) for (auto operand : operands)
{ {
auto currentFunctionOutputs = operand->Outputs(); if (uniqueOperands.find(operand) != uniqueOperands.end())
std::copy(currentFunctionOutputs.begin(), currentFunctionOutputs.end(), std::back_inserter(inputs)); LogicError("All operands specified to Combine must be unique");
uniqueOperands.insert(operand);
} }
return Internal::Combine(inputs, name); return CompositeFunction::Create(MakeSharedObject<PrimitiveFunction>(PrimitiveOpType::Combine, operands, Dictionary(), name), name);
} }
namespace Sequence namespace Sequence
@ -2153,25 +2367,25 @@ namespace CNTK
FunctionPtr IsFirst(const Variable& operand, const std::wstring& name /*= L""*/) FunctionPtr IsFirst(const Variable& operand, const std::wstring& name /*= L""*/)
{ {
VerifyIsSequence(operand); VerifyIsSequence(operand);
return Internal::IsWithin(operand, 1); return Internal::IsWithin(operand, 1, name);
} }
FunctionPtr IsLast(const Variable& operand, const std::wstring& name /*= L""*/) FunctionPtr IsLast(const Variable& operand, const std::wstring& name /*= L""*/)
{ {
VerifyIsSequence(operand); VerifyIsSequence(operand);
return Internal::IsWithin(operand, -1); return Internal::IsWithin(operand, -1, name);
} }
FunctionPtr First(const Variable& operand, const std::wstring& name /*= L""*/) FunctionPtr First(const Variable& operand, const std::wstring& name /*= L""*/)
{ {
VerifyIsSequence(operand); VerifyIsSequence(operand);
return Slice(operand, operand.DynamicAxes()[0], 0, 1); return Slice(operand, operand.DynamicAxes()[0], 0, 1, name);
} }
FunctionPtr Last(const Variable& operand, const std::wstring& name /*= L""*/) FunctionPtr Last(const Variable& operand, const std::wstring& name /*= L""*/)
{ {
VerifyIsSequence(operand); VerifyIsSequence(operand);
return Slice(operand, operand.DynamicAxes()[0], -1, 0); return Slice(operand, operand.DynamicAxes()[0], -1, 0, name);
} }
std::vector<Axis> WhereOpDynamicAxes(const Variable& operand) std::vector<Axis> WhereOpDynamicAxes(const Variable& operand)
@ -2211,20 +2425,6 @@ namespace CNTK
namespace Internal namespace Internal
{ {
FunctionPtr Combine(const std::vector<Variable>& operands, const std::wstring& name /*= L""*/)
{
std::unordered_set<Variable> uniqueOperands;
for (auto operand : operands)
{
if (uniqueOperands.find(operand) != uniqueOperands.end())
LogicError("All operands specified to Combine must be unique");
uniqueOperands.insert(operand);
}
return CompositeFunction::Create(MakeSharedObject<PrimitiveFunction>(PrimitiveOpType::Combine, operands, Dictionary(), name), name);
}
FunctionPtr IsWithin(const Variable& operand, int offset, const std::wstring& name /*= L""*/) FunctionPtr IsWithin(const Variable& operand, int offset, const std::wstring& name /*= L""*/)
{ {
Sequence::VerifyIsSequence(operand); Sequence::VerifyIsSequence(operand);
@ -2266,14 +2466,8 @@ namespace CNTK
} }
else else
{ {
auto rowSliceFunc = Internal::Slice(operand, Axis(0), 0, 1); auto reduceAllStaticAxesFunc = Internal::ReduceElements(operand, PrimitiveFunction::InternalSumReductionOpName, Axis::AllStaticAxes());
auto result = Minus(rowSliceFunc, rowSliceFunc); return Minus(reduceAllStaticAxesFunc, reduceAllStaticAxesFunc);
// Reduce away all but the static axis 0
for (size_t i = 1; i < result->Output().Shape().Rank(); ++i)
result = ReduceSum(result, Axis(i));
return result;
} }
} }
@ -2286,12 +2480,12 @@ namespace CNTK
FunctionPtr Gather(const Variable& operand, const Variable& condition, const std::vector<Axis>& newDynamicAxes, const std::wstring& name /*= L""*/) FunctionPtr Gather(const Variable& operand, const Variable& condition, const std::vector<Axis>& newDynamicAxes, const std::wstring& name /*= L""*/)
{ {
return Internal::GatherPacked(operand, Internal::PackedIndex(operand, Where(condition, newDynamicAxes))); return Internal::GatherPacked(operand, Internal::PackedIndex(/*layout of*/ operand, Where(condition, newDynamicAxes)), name);
} }
FunctionPtr Scatter(const Variable& operand, const Variable& condition, const std::vector<Axis>& newDynamicAxes, const std::wstring& name /*= L""*/) FunctionPtr Scatter(const Variable& operand, const Variable& condition, const std::vector<Axis>& newDynamicAxes, const std::wstring& name /*= L""*/)
{ {
return Internal::ScatterPacked(operand, Internal::PackedIndex(operand, Where(condition, newDynamicAxes)), condition); return Internal::ScatterPacked(operand, Internal::PackedIndex(/*layout of*/ condition, Where(condition, newDynamicAxes)), /*layout of*/ condition, name);
} }
FunctionPtr Slice(const Variable& operand, const Axis& axis, int beginIndex, int endIndex, const std::wstring& name /*= L""*/) FunctionPtr Slice(const Variable& operand, const Axis& axis, int beginIndex, int endIndex, const std::wstring& name /*= L""*/)
@ -2308,7 +2502,7 @@ namespace CNTK
{ {
using namespace std::placeholders; using namespace std::placeholders;
if (axis.IsStaticAxis()) if (axis.IsStaticAxis() || (axis == Axis::AllStaticAxes()))
{ {
auto additionalProperties = Dictionary(); auto additionalProperties = Dictionary();
additionalProperties[PrimitiveFunction::AttributeNameAxis] = axis; additionalProperties[PrimitiveFunction::AttributeNameAxis] = axis;
@ -2332,7 +2526,7 @@ namespace CNTK
auto cumulativeSumFunction = reductionFunctor(prevAccumulatedValuesFunction, operand); auto cumulativeSumFunction = reductionFunctor(prevAccumulatedValuesFunction, operand);
cumulativeSumFunction->ReplacePlaceholders({ { cumulativeSumFunctionPlaceholder, cumulativeSumFunction } }); cumulativeSumFunction->ReplacePlaceholders({ { cumulativeSumFunctionPlaceholder, cumulativeSumFunction } });
return CNTK::Slice(cumulativeSumFunction, axis, -1, 0); return CNTK::Slice(cumulativeSumFunction, axis, -1, 0, name);
} }
} }
} }

Просмотреть файл

@ -77,54 +77,54 @@ namespace std
namespace CNTK namespace CNTK
{ {
inline const char* PrimitiveOpTypeName(PrimitiveOpType opType) inline const std::wstring& PrimitiveOpTypeName(PrimitiveOpType opType)
{ {
static const std::unordered_map<PrimitiveOpType, const char*> primitiveOpNames = { static const std::unordered_map<PrimitiveOpType, std::wstring> primitiveOpNames = {
{ PrimitiveOpType::Negate, "Negate" }, { PrimitiveOpType::Negate, L"Negate" },
{ PrimitiveOpType::Sigmoid, "Sigmoid" }, { PrimitiveOpType::Sigmoid, L"Sigmoid" },
{ PrimitiveOpType::Tanh, "Tanh" }, { PrimitiveOpType::Tanh, L"Tanh" },
{ PrimitiveOpType::ReLU, "ReLU" }, { PrimitiveOpType::ReLU, L"ReLU" },
{ PrimitiveOpType::Exp, "Exp" }, { PrimitiveOpType::Exp, L"Exp" },
{ PrimitiveOpType::Log, "Log" }, { PrimitiveOpType::Log, L"Log" },
{ PrimitiveOpType::Sqrt, "Sqrt" }, { PrimitiveOpType::Sqrt, L"Sqrt" },
{ PrimitiveOpType::Floor, "Floor" }, { PrimitiveOpType::Floor, L"Floor" },
{ PrimitiveOpType::Abs, "Abs" }, { PrimitiveOpType::Abs, L"Abs" },
{ PrimitiveOpType::Reciprocal, "Reciprocal" }, { PrimitiveOpType::Reciprocal, L"Reciprocal" },
{ PrimitiveOpType::Softmax, "Softmax" }, { PrimitiveOpType::Softmax, L"Softmax" },
{ PrimitiveOpType::Hardmax, "Hardmax" }, { PrimitiveOpType::Hardmax, L"Hardmax" },
{ PrimitiveOpType::TransposeAxes, "TransposeAxes" }, { PrimitiveOpType::TransposeAxes, L"TransposeAxes" },
{ PrimitiveOpType::Where, "Where" }, { PrimitiveOpType::Where, L"Where" },
{ PrimitiveOpType::Slice, "Slice" }, { PrimitiveOpType::Slice, L"Slice" },
{ PrimitiveOpType::Dropout, "Dropout" }, { PrimitiveOpType::Dropout, L"Dropout" },
{ PrimitiveOpType::Reshape, "Reshape" }, { PrimitiveOpType::Reshape, L"Reshape" },
{ PrimitiveOpType::Pooling, "Pooling" }, { PrimitiveOpType::Pooling, L"Pooling" },
{ PrimitiveOpType::SumAll, "SumAll" }, { PrimitiveOpType::SumAll, L"SumAll" },
{ PrimitiveOpType::Plus, "Plus" }, { PrimitiveOpType::Plus, L"Plus" },
{ PrimitiveOpType::Minus, "Minus" }, { PrimitiveOpType::Minus, L"Minus" },
{ PrimitiveOpType::ElementTimes, "ElementTimes" }, { PrimitiveOpType::ElementTimes, L"ElementTimes" },
{ PrimitiveOpType::Equal, "Equal" }, { PrimitiveOpType::Equal, L"Equal" },
{ PrimitiveOpType::NotEqual, "NotEqual" }, { PrimitiveOpType::NotEqual, L"NotEqual" },
{ PrimitiveOpType::Less, "Less" }, { PrimitiveOpType::Less, L"Less" },
{ PrimitiveOpType::LessEqual, "LessEqual" }, { PrimitiveOpType::LessEqual, L"LessEqual" },
{ PrimitiveOpType::Greater, "Greater" }, { PrimitiveOpType::Greater, L"Greater" },
{ PrimitiveOpType::GreaterEqual, "GreaterEqual" }, { PrimitiveOpType::GreaterEqual, L"GreaterEqual" },
{ PrimitiveOpType::PackedIndex, "PackedIndex" }, { PrimitiveOpType::PackedIndex, L"PackedIndex" },
{ PrimitiveOpType::GatherPacked, "GatherPacked" }, { PrimitiveOpType::GatherPacked, L"GatherPacked" },
{ PrimitiveOpType::ScatterPacked, "ScatterPacked" }, { PrimitiveOpType::ScatterPacked, L"ScatterPacked" },
{ PrimitiveOpType::Times, "Times" }, { PrimitiveOpType::Times, L"Times" },
{ PrimitiveOpType::TransposeTimes, "TransposeTimes" }, { PrimitiveOpType::TransposeTimes, L"TransposeTimes" },
{ PrimitiveOpType::Convolution, "Convolution" }, { PrimitiveOpType::Convolution, L"Convolution" },
{ PrimitiveOpType::SquaredError, "SquaredError" }, { PrimitiveOpType::SquaredError, L"SquaredError" },
{ PrimitiveOpType::CrossEntropyWithSoftmax, "CrossEntropyWithSoftmax" }, { PrimitiveOpType::CrossEntropyWithSoftmax, L"CrossEntropyWithSoftmax" },
{ PrimitiveOpType::ClassificationError, "ClassificationError" }, { PrimitiveOpType::ClassificationError, L"ClassificationError" },
{ PrimitiveOpType::PastValue, "PastValue" }, { PrimitiveOpType::PastValue, L"PastValue" },
{ PrimitiveOpType::FutureValue, "FutureValue" }, { PrimitiveOpType::FutureValue, L"FutureValue" },
{ PrimitiveOpType::ReduceElements, "ReduceElements" }, { PrimitiveOpType::ReduceElements, L"ReduceElements" },
{ PrimitiveOpType::BatchNormalization, "BatchNormalization" }, { PrimitiveOpType::BatchNormalization, L"BatchNormalization" },
{ PrimitiveOpType::Clip, "Clip" }, { PrimitiveOpType::Clip, L"Clip" },
{ PrimitiveOpType::Select, "Select" }, { PrimitiveOpType::Select, L"Select" },
{ PrimitiveOpType::Splice, "Splice" }, { PrimitiveOpType::Splice, L"Splice" },
{ PrimitiveOpType::Combine, "Combine" } { PrimitiveOpType::Combine, L"Combine" },
}; };
if (primitiveOpNames.find(opType) == primitiveOpNames.end()) if (primitiveOpNames.find(opType) == primitiveOpNames.end())
@ -220,7 +220,7 @@ namespace CNTK
public: public:
PrimitiveFunction(PrimitiveOpType op, const std::vector<Variable>& inputs, Dictionary&& functionConfig, const std::wstring& functionName = L"") PrimitiveFunction(PrimitiveOpType op, const std::vector<Variable>& inputs, Dictionary&& functionConfig, const std::wstring& functionName = L"")
: Function(inputs, GetOutputVariables(op, inputs, this, functionConfig), std::move(functionConfig), nullptr, functionName), m_op(op) : Function(inputs, GetOutputVariables(op, inputs, this, functionConfig, functionName), std::move(functionConfig), nullptr, functionName), m_op(op)
{ {
} }
@ -239,6 +239,11 @@ namespace CNTK
NOT_IMPLEMENTED; NOT_IMPLEMENTED;
} }
virtual const std::wstring& OpName() override
{
return PrimitiveOpTypeName(OpType());
}
public: public:
PrimitiveOpType OpType() const PrimitiveOpType OpType() const
{ {
@ -343,7 +348,10 @@ namespace CNTK
else else
{ {
if (leftOperandShape[i] != rightOperandShape[i]) if (leftOperandShape[i] != rightOperandShape[i])
RuntimeError("Left operand's shape %S is not compatible with right operand's shape %S for the binary elementwise operation %s", AsStringForErrorReporting(leftOperandShape).c_str(), AsStringForErrorReporting(rightOperandShape).c_str(), PrimitiveOpTypeName(op)); RuntimeError("Left operand's shape %S is not compatible with right operand's shape %S for the binary elementwise operation %S",
AsStringForErrorReporting(leftOperandShape).c_str(),
AsStringForErrorReporting(rightOperandShape).c_str(),
PrimitiveOpTypeName(op).c_str());
outputDims[i] = leftOperandShape[i]; outputDims[i] = leftOperandShape[i];
} }
@ -399,19 +407,25 @@ namespace CNTK
return leftOperandShape.SubShape(0, outputRank).AppendShape(rightOperandShape.SubShape(numReductionAxes)); return leftOperandShape.SubShape(0, outputRank).AppendShape(rightOperandShape.SubShape(numReductionAxes));
} }
static NDShape ReductionOpOutputShape(PrimitiveOpType op, const NDShape& operandShape, const std::vector<size_t>& reductionAxes) static NDShape ReductionOpOutputShape(PrimitiveOpType op, const NDShape& operandShape, const std::vector<size_t>& reductionAxes, bool preserveReductionAxes)
{ {
if (reductionAxes.size() > operandShape.Rank()) if (reductionAxes.size() > operandShape.Rank())
RuntimeError("The number of reduction axes %d exceeds the number of axes in the operand shape %S of the reduction operation %s", (int)reductionAxes.size(), AsStringForErrorReporting(operandShape).c_str(), PrimitiveOpTypeName(op)); RuntimeError("The number of reduction axes %d exceeds the rank in the operand shape %S of the reduction operation %S",
(int)reductionAxes.size(),
AsStringForErrorReporting(operandShape).c_str(),
PrimitiveOpTypeName(op).c_str());
size_t numOutputAxes = operandShape.Rank() - reductionAxes.size(); size_t numOutputAxes = operandShape.Rank() - (preserveReductionAxes ? 0 : reductionAxes.size());
std::vector<size_t> outputDims(numOutputAxes); std::vector<size_t> outputDims(numOutputAxes);
for (size_t i = 0, j = 0; i < operandShape.Rank(); ++i) for (size_t i = 0, j = 0; i < operandShape.Rank(); ++i)
{ {
// Skip axes being reduced over // Skip axes being reduced over
if (std::find(reductionAxes.begin(), reductionAxes.end(), i) != reductionAxes.end()) if (std::find(reductionAxes.begin(), reductionAxes.end(), i) != reductionAxes.end())
continue; {
if (preserveReductionAxes)
outputDims[j++] = 1;
}
else
outputDims[j++] = operandShape[i]; outputDims[j++] = operandShape[i];
} }
@ -433,7 +447,7 @@ namespace CNTK
} }
// TODO: Reconcile this with the ComputationNode::Validate functionality in core CNTK to avoid duplication of inference logic // TODO: Reconcile this with the ComputationNode::Validate functionality in core CNTK to avoid duplication of inference logic
static std::vector<Variable> GetOutputVariables(PrimitiveOpType op, const std::vector<Variable>& inputs, Function* owner, const Dictionary& functionConfig); static std::vector<Variable> GetOutputVariables(PrimitiveOpType op, const std::vector<Variable>& inputs, Function* owner, const Dictionary& functionConfig, const std::wstring& functionName);
private: private:
PrimitiveOpType m_op; PrimitiveOpType m_op;
@ -464,6 +478,7 @@ namespace CNTK
friend class Function; friend class Function;
friend class Trainer; friend class Trainer;
friend class CompositeMinibatchSource; friend class CompositeMinibatchSource;
friend class PackedValue;
template <typename T, typename ...CtorArgTypes> template <typename T, typename ...CtorArgTypes>
friend inline std::shared_ptr<T> MakeSharedObject(CtorArgTypes&& ...ctorArgs); friend inline std::shared_ptr<T> MakeSharedObject(CtorArgTypes&& ...ctorArgs);
@ -476,6 +491,8 @@ namespace CNTK
static std::atomic<unsigned int> s_nextAutoGeneratedDynamicAxis; static std::atomic<unsigned int> s_nextAutoGeneratedDynamicAxis;
static const std::wstring CompositeFunctionOpName;
public: public:
static const std::wstring InternalDefaultDynamicAxisName; static const std::wstring InternalDefaultDynamicAxisName;
static const std::wstring InternalNoSequenceAxisName; static const std::wstring InternalNoSequenceAxisName;
@ -506,15 +523,9 @@ namespace CNTK
const std::unordered_map<Variable, ValuePtr>& rootGradientValues, const std::unordered_map<Variable, ValuePtr>& rootGradientValues,
std::unordered_map<Variable, ValuePtr>& backPropagatedGradientValuesForInputs) override; std::unordered_map<Variable, ValuePtr>& backPropagatedGradientValuesForInputs) override;
public: virtual const std::wstring& OpName() override
bool NetworkMatricesAllocated() const
{ {
return (m_computationNetwork != nullptr) && m_networkMatricesAllocated; return CompositeFunctionOpName;
}
void PurgeComputationNetwork()
{
m_computationNetwork = nullptr;
} }
private: private:
@ -523,7 +534,7 @@ namespace CNTK
std::unordered_set<Variable>& replacedPlaceholders) override; std::unordered_set<Variable>& replacedPlaceholders) override;
CompositeFunction(const FunctionPtr& rootFunction, std::unordered_set<FunctionPtr>&& allPrimitiveFunctions, const std::wstring& name) CompositeFunction(const FunctionPtr& rootFunction, std::unordered_set<FunctionPtr>&& allPrimitiveFunctions, const std::wstring& name)
: Function({}, rootFunction->Outputs(), Dictionary(), rootFunction, name), m_allPrimitiveFunctions(std::move(allPrimitiveFunctions)) : Function({}, rootFunction->Outputs(), Dictionary(), rootFunction, name), m_allPrimitiveFunctions(std::move(allPrimitiveFunctions)), m_networkMatricesAllocated(false)
{} {}
std::vector<Variable> DetermineInputs() const std::vector<Variable> DetermineInputs() const
@ -597,6 +608,8 @@ namespace CNTK
template <typename ElementType> template <typename ElementType>
static ValuePtr GetValueObjectFromCNTKImplMatrixAndMBLayout(Variable var, const Microsoft::MSR::CNTK::Matrix<ElementType>& matrix, const Microsoft::MSR::CNTK::MBLayoutPtr& layout, bool readOnly = true); static ValuePtr GetValueObjectFromCNTKImplMatrixAndMBLayout(Variable var, const Microsoft::MSR::CNTK::Matrix<ElementType>& matrix, const Microsoft::MSR::CNTK::MBLayoutPtr& layout, bool readOnly = true);
const std::vector<Variable>& GetArgumentDependencies(const Variable& output);
private: private:
// Set of all primitive functions in the graph underlying 'this' Function. Also keeps the primitive Function objects alive // Set of all primitive functions in the graph underlying 'this' Function. Also keeps the primitive Function objects alive
@ -617,6 +630,8 @@ namespace CNTK
// the next 'Backward' call. // the next 'Backward' call.
std::unordered_set<Variable> m_currentBackpropRoots; std::unordered_set<Variable> m_currentBackpropRoots;
std::unordered_map<Variable, std::vector<Variable>> m_perOutputVarArgumentDependencies;
bool m_networkMatricesAllocated; bool m_networkMatricesAllocated;
}; };

Просмотреть файл

@ -26,6 +26,9 @@ using namespace std;
namespace CNTK namespace CNTK
{ {
/*static*/ const std::wstring Learner::LearningRateAttributeName = L"learningRate";
/*static*/ const std::wstring LearnerBase::WasLearningRateResetAttributeName = L"wasLearningRateReset";
template <typename ElementType> template <typename ElementType>
/*static*/ shared_ptr<const Matrix<ElementType>> LearnerBase::GetMatrix(const NDArrayViewPtr& arrayView) /*static*/ shared_ptr<const Matrix<ElementType>> LearnerBase::GetMatrix(const NDArrayViewPtr& arrayView)
{ {
@ -141,7 +144,7 @@ namespace CNTK
// L1 regularizer with proximal gradient descent method // L1 regularizer with proximal gradient descent method
if (m_additionalOptions.l1RegularizationWeight > 0) if (m_additionalOptions.l1RegularizationWeight > 0)
{ {
auto learningRate = ElementType(m_learningRates[m_sampleCount]); auto learningRate = ElementType(LearningRate());
// multiply by actualMBSize so that it's invariant to minibatch size since learning rate is per sample // multiply by actualMBSize so that it's invariant to minibatch size since learning rate is per sample
auto weight = ElementType(learningRate * m_additionalOptions.l1RegularizationWeight * actualMBSize); auto weight = ElementType(learningRate * m_additionalOptions.l1RegularizationWeight * actualMBSize);
parameterValue->GetWritableMatrix<ElementType>()->InplaceSoftThreshold(weight); parameterValue->GetWritableMatrix<ElementType>()->InplaceSoftThreshold(weight);
@ -156,17 +159,15 @@ namespace CNTK
LearnerBase::LearnerBase(const vector<Parameter>& parameters, LearnerBase::LearnerBase(const vector<Parameter>& parameters,
const LearningRatesPerSample& learningRates, const LearningRatesPerSample& learningRates,
bool allocateSmoothGradients /* = true */, AdditionalLearningOptions additionalOptions,
double clippingThresholdPerSample /*= std::numeric_limits<double>::infinity()*/, bool allocateSmoothGradients /* = true */)
bool gradientClippingWithTruncation /*= true*/) : Learner(parameters, learningRates[0]),
: Learner(parameters), m_wasLearningRateReset(false),
m_learningRates(learningRates), m_learningRateSchedule(learningRates),
m_sampleCount(0), m_sampleCount(0),
m_minibatchCount(0) m_minibatchCount(0),
m_additionalOptions(additionalOptions)
{ {
m_additionalOptions.gradientClippingThresholdPerSample = clippingThresholdPerSample;
m_additionalOptions.gradientClippingWithTruncation = gradientClippingWithTruncation;
for (const auto& parameter : parameters) for (const auto& parameter : parameters)
{ {
if (!allocateSmoothGradients) if (!allocateSmoothGradients)
@ -225,8 +226,8 @@ namespace CNTK
#endif #endif
#if DUMPOUTPUT #if DUMPOUTPUT
auto learningRate = ElementType(m_learningRates[m_sampleCount]); auto learningRate = ElementType(LearningRate());
auto momentum = ElementType(MomentumPerMB(m_momentums[m_sampleCount], trainingSampleCount)); auto momentum = ElementType(MomentumValueForMB(m_momentumValues[m_sampleCount], trainingSampleCount));
LOGPRINTF(stderr, "learnRatePerSample=%0.8f, momentum=%0.8f, actualMBSize=%ld\n", LOGPRINTF(stderr, "learnRatePerSample=%0.8f, momentum=%0.8f, actualMBSize=%ld\n",
learningRate, momentum, trainingSampleCount); learningRate, momentum, trainingSampleCount);
LOGPRINTF(stderr, "GradUpdateType()=%s, GradientUpdateNoiseStd()=%0.8f\n", LOGPRINTF(stderr, "GradUpdateType()=%s, GradientUpdateNoiseStd()=%0.8f\n",
@ -280,6 +281,9 @@ namespace CNTK
checkpoint[L"sampleCount"] = m_sampleCount; checkpoint[L"sampleCount"] = m_sampleCount;
checkpoint[L"minibatchCount"] = m_minibatchCount; checkpoint[L"minibatchCount"] = m_minibatchCount;
if (m_wasLearningRateReset)
checkpoint[WasLearningRateResetAttributeName] = m_wasLearningRateReset;
// TODO: should we also save learning rate schedule into the checkpoint? // TODO: should we also save learning rate schedule into the checkpoint?
// If that is the case, need to be able to override this method in subclasses // If that is the case, need to be able to override this method in subclasses
// and save momentum schedule as well. // and save momentum schedule as well.
@ -294,11 +298,19 @@ namespace CNTK
const auto& smoothedGradientValue = m_smoothedGradientValues.at(parameter); const auto& smoothedGradientValue = m_smoothedGradientValues.at(parameter);
checkpoint[parameter.Uid()] = *smoothedGradientValue; checkpoint[parameter.Uid()] = *smoothedGradientValue;
} }
// Add the base Learner's checkpoint state
auto baseCheckpointState = Learner::GetCheckpointState();
checkpoint.Add(baseCheckpointState);
return checkpoint; return checkpoint;
} }
/*virtual*/ void LearnerBase::RestoreFromCheckpoint(const Dictionary& checkpoint) /*override*/ /*virtual*/ void LearnerBase::RestoreFromCheckpoint(const Dictionary& checkpoint) /*override*/
{ {
// Restore the base learner's checkpoint state
Learner::RestoreFromCheckpoint(checkpoint);
m_sampleCount = checkpoint[L"sampleCount"].Value<size_t>(); m_sampleCount = checkpoint[L"sampleCount"].Value<size_t>();
m_minibatchCount = checkpoint[L"minibatchCount"].Value<size_t>(); m_minibatchCount = checkpoint[L"minibatchCount"].Value<size_t>();
@ -309,6 +321,9 @@ namespace CNTK
LogicError("Unsupported checkpoint version."); LogicError("Unsupported checkpoint version.");
} }
if (checkpoint.Contains(WasLearningRateResetAttributeName))
m_wasLearningRateReset = checkpoint[WasLearningRateResetAttributeName].Value<bool>();
for (const auto& parameter : Parameters()) for (const auto& parameter : Parameters())
{ {
if (!checkpoint.Contains(parameter.Uid())) if (!checkpoint.Contains(parameter.Uid()))
@ -348,25 +363,16 @@ namespace CNTK
const auto& gradientMatrix = GetWritableMatrix<ElementType>(gradientValue); const auto& gradientMatrix = GetWritableMatrix<ElementType>(gradientValue);
const auto& parameterMatrix = GetWritableMatrix<ElementType>(parameterValue); const auto& parameterMatrix = GetWritableMatrix<ElementType>(parameterValue);
auto learningRate = ElementType(m_learningRates[m_sampleCount]); auto learningRate = ElementType(LearningRate());
auto momentum = ElementType(MomentumPerMB(m_momentums[m_sampleCount], trainingSampleCount)); auto momentum = ElementType(MomentumValueForMB(m_momentumValues[m_sampleCount], trainingSampleCount));
// TODO: break up the NormalGrad into 3 different functions, each with its own set of parameters // TODO: break up the NormalGrad into 3 different functions, each with its own set of parameters
// Also, come up with a better name for NormalGrad (Default? Regular? Plain?).
// (one for vanilla SGD, the other for momentum SGD, and the third one for NAG). // (one for vanilla SGD, the other for momentum SGD, and the third one for NAG).
smoothedGradientMatrix->NormalGrad(*gradientMatrix, *parameterMatrix, smoothedGradientMatrix->NormalGrad(*gradientMatrix, *parameterMatrix,
learningRate, momentum, m_useNesterovAcceleration); learningRate, momentum, m_useNesterovAcceleration);
} }
LearnerAdaGrad::LearnerAdaGrad(const vector<Parameter>& parameters,
const LearningRatesPerSample& learningRates,
bool needAveMultiplier,
double clippingThresholdPerSample /*= std::numeric_limits<double>::infinity()*/,
bool gradientClippingWithTruncation /*= true*/)
: LearnerBase(parameters, learningRates, true, clippingThresholdPerSample, gradientClippingWithTruncation),
m_needAveMultiplier(needAveMultiplier)
{
}
/*virtual*/ void LearnerAdaGrad::Update(const Parameter& parameter, const NDArrayViewPtr& gradientValue, const NDArrayViewPtr& smoothedGradientValue, size_t trainingSampleCount) const /*override*/ /*virtual*/ void LearnerAdaGrad::Update(const Parameter& parameter, const NDArrayViewPtr& gradientValue, const NDArrayViewPtr& smoothedGradientValue, size_t trainingSampleCount) const /*override*/
{ {
UPDATE_FUNCTION; UPDATE_FUNCTION;
@ -382,7 +388,7 @@ namespace CNTK
const auto& gradientMatrix = GetWritableMatrix<ElementType>(gradientValue); const auto& gradientMatrix = GetWritableMatrix<ElementType>(gradientValue);
const auto& parameterMatrix = GetWritableMatrix<ElementType>(parameterValue); const auto& parameterMatrix = GetWritableMatrix<ElementType>(parameterValue);
auto learningRate = ElementType(m_learningRates[m_sampleCount]); auto learningRate = ElementType(LearningRate());
auto aveMultiplier = smoothedGradientMatrix->Adagrad(*gradientMatrix, m_needAveMultiplier); auto aveMultiplier = smoothedGradientMatrix->Adagrad(*gradientMatrix, m_needAveMultiplier);
Matrix<ElementType>::ScaleAndAdd(ElementType(-learningRate / aveMultiplier), *gradientMatrix, *parameterMatrix); Matrix<ElementType>::ScaleAndAdd(ElementType(-learningRate / aveMultiplier), *gradientMatrix, *parameterMatrix);
@ -390,16 +396,20 @@ namespace CNTK
LearnerFSAdaGrad::LearnerFSAdaGrad(const vector<Parameter>& parameters, LearnerFSAdaGrad::LearnerFSAdaGrad(const vector<Parameter>& parameters,
const LearningRatesPerSample& learningRates, const LearningRatesPerSample& learningRates,
const MomentumsPerSample& momentums, const MomentumValuesPerSample& momentumValues,
double clippingThresholdPerSample /*= std::numeric_limits<double>::infinity()*/, const double targetAdagradAvDenom,
bool gradientClippingWithTruncation /*= true*/) const size_t adagradT,
: LearnerMomentumSGD(parameters, learningRates, momentums, /*allocateSmoothGradients*/ false, clippingThresholdPerSample, gradientClippingWithTruncation) AdditionalLearningOptions additionalOptions)
: LearnerMomentumSGD(parameters, learningRates, momentumValues, additionalOptions, /*allocateSmoothGradients*/ false),
m_targetAdagradAvDenom(targetAdagradAvDenom),
m_adagradT(adagradT)
{ {
for (const auto& parameter : parameters) for (const auto& parameter : parameters)
{ {
auto shape = GetMatrixShape(parameter); auto shape = GetMatrixShape(parameter);
NDArrayViewPtr view = AllocateNDArrayView(parameter, {shape[0], 2 * shape[1]}); NDArrayViewPtr view = AllocateNDArrayView(parameter, {shape[0], 2 * shape[1]});
m_smoothedGradientValues.insert(make_pair(parameter, view)); m_smoothedGradientValues.insert(make_pair(parameter, view));
m_smoothedCounts.insert(make_pair(parameter, 0.0));
} }
} }
@ -411,36 +421,31 @@ namespace CNTK
template <typename ElementType> template <typename ElementType>
void LearnerFSAdaGrad::Update(const Parameter& parameter, const NDArrayViewPtr& gradientValue, const NDArrayViewPtr& smoothedGradientValue, size_t trainingSampleCount) const void LearnerFSAdaGrad::Update(const Parameter& parameter, const NDArrayViewPtr& gradientValue, const NDArrayViewPtr& smoothedGradientValue, size_t trainingSampleCount) const
{ {
UNUSED(trainingSampleCount);
const auto& parameterValue = parameter.Value(); const auto& parameterValue = parameter.Value();
const auto& smoothedGradientMatrix = GetWritableMatrix<ElementType>(smoothedGradientValue); const auto& smoothedGradientMatrix = GetWritableMatrix<ElementType>(smoothedGradientValue);
const auto& gradientMatrix = GetWritableMatrix<ElementType>(gradientValue); const auto& gradientMatrix = GetWritableMatrix<ElementType>(gradientValue);
const auto& parameterMatrix = GetWritableMatrix<ElementType>(parameterValue); const auto& parameterMatrix = GetWritableMatrix<ElementType>(parameterValue);
auto learningRate = m_learningRates[m_sampleCount]; auto learningRate = LearningRate();
auto momentum = MomentumPerMB(m_momentums[m_sampleCount], trainingSampleCount); auto momentum = MomentumValueForMB(m_momentumValues[m_sampleCount], trainingSampleCount);
const double targetAdagradAvDenom = 0.0025; // 1/400 magic constant const double varMomentum = (exp(-1.0 * trainingSampleCount / m_adagradT));
const size_t adagradT = 2 * 3600 * 100; double& smoothedCount = m_smoothedCounts.at(parameter);
const double varMomentum = (exp(-1.0 * trainingSampleCount / adagradT)); smoothedGradientMatrix->FSAdagradUpdate(trainingSampleCount, *gradientMatrix, *parameterMatrix, smoothedCount, learningRate, m_targetAdagradAvDenom, momentum, varMomentum);
static double smoothedCount = 0; // BUGBUG!!! Carried over from Alexey's original implementation, needs to be fixed.
smoothedGradientMatrix->FSAdagradUpdate(trainingSampleCount, *gradientMatrix, *parameterMatrix, smoothedCount, learningRate, targetAdagradAvDenom, momentum, varMomentum);
} }
LearnerRMSProp::LearnerRMSProp(const vector<Parameter>& parameters, const LearningRatesPerSample& learningRates, LearnerRMSProp::LearnerRMSProp(const vector<Parameter>& parameters,
double gamma, double inc, double dec, double max, double min, bool needAveMultiplier, const LearningRatesPerSample& learningRates,
double clippingThresholdPerSample /*= std::numeric_limits<double>::infinity()*/, double gamma, double inc, double dec, double max, double min,
bool gradientClippingWithTruncation /*= true*/) bool needAveMultiplier,
: LearnerBase(parameters, learningRates, /*allocateSmoothGradients*/ false, clippingThresholdPerSample, gradientClippingWithTruncation), AdditionalLearningOptions additionalOptions)
: LearnerBase(parameters, learningRates, additionalOptions, /*allocateSmoothGradients*/ false),
m_gamma(gamma), m_inc(inc), m_dec(dec), m_max(max), m_min(min), m_needAveMultiplier(needAveMultiplier) m_gamma(gamma), m_inc(inc), m_dec(dec), m_max(max), m_min(min), m_needAveMultiplier(needAveMultiplier)
{ {
for (const auto& parameter : parameters) for (const auto& parameter : parameters)
{ {
// When needAveMultiplier == true, CPU and GPU implementations of RMSProp require different number of columns. // When needAveMultiplier == true, CPU and GPU implementations of RMSProp require different number of columns.
// TODO: verify that this is correct.
size_t factor = 3; size_t factor = 3;
if (needAveMultiplier && parameter.Value()->Device().Type() == DeviceKind::GPU) if (needAveMultiplier && parameter.Value()->Device().Type() == DeviceKind::GPU)
{ {
@ -469,12 +474,15 @@ namespace CNTK
const auto& gradientMatrix = GetWritableMatrix<ElementType>(gradientValue); const auto& gradientMatrix = GetWritableMatrix<ElementType>(gradientValue);
const auto& parameterMatrix = GetWritableMatrix<ElementType>(parameterValue); const auto& parameterMatrix = GetWritableMatrix<ElementType>(parameterValue);
auto learningRate = ElementType(m_learningRates[m_sampleCount]); auto learningRate = ElementType(LearningRate());
auto aveMultiplier = smoothedGradientMatrix->RmsProp(*gradientMatrix, auto aveMultiplier = smoothedGradientMatrix->RmsProp(*gradientMatrix,
ElementType(m_gamma), ElementType(m_inc), ElementType(m_gamma),
ElementType(m_max), ElementType(m_dec), ElementType(m_inc),
ElementType(m_min), m_needAveMultiplier); ElementType(m_max),
ElementType(m_dec),
ElementType(m_min),
m_needAveMultiplier);
Matrix<ElementType>::ScaleAndAdd(ElementType(-learningRate / aveMultiplier), *gradientMatrix, *parameterMatrix); Matrix<ElementType>::ScaleAndAdd(ElementType(-learningRate / aveMultiplier), *gradientMatrix, *parameterMatrix);
} }
@ -484,54 +492,51 @@ namespace CNTK
LearnerPtr SGDLearner(const vector<Parameter>& parameters, LearnerPtr SGDLearner(const vector<Parameter>& parameters,
const LearningRatesPerSample& learningRates, const LearningRatesPerSample& learningRates,
double clippingThresholdPerSample /*= std::numeric_limits<double>::infinity()*/, AdditionalLearningOptions additionalOptions /*= AdditionalLearningOptions()*/)
bool gradientClippingWithTruncation /*= true*/)
{ {
return MakeSharedObject<LearnerSGD>(parameters, learningRates, true, clippingThresholdPerSample, gradientClippingWithTruncation); return MakeSharedObject<LearnerSGD>(parameters, learningRates, additionalOptions);
} }
LearnerPtr MomentumSGDLearner(const vector<Parameter>& parameters, LearnerPtr MomentumSGDLearner(const vector<Parameter>& parameters,
const LearningRatesPerSample& learningRates, const LearningRatesPerSample& learningRates,
const MomentumsPerSample& momentums, const MomentumValuesPerSample& momentumValues,
double clippingThresholdPerSample /*= std::numeric_limits<double>::infinity()*/, AdditionalLearningOptions additionalOptions /*= AdditionalLearningOptions()*/)
bool gradientClippingWithTruncation /*= true*/)
{ {
return MakeSharedObject<LearnerMomentumSGD>(parameters, learningRates, momentums, true, clippingThresholdPerSample, gradientClippingWithTruncation); return MakeSharedObject<LearnerMomentumSGD>(parameters, learningRates, momentumValues, additionalOptions);
} }
LearnerPtr NesterovLearner(const vector<Parameter>& parameters, LearnerPtr NesterovLearner(const vector<Parameter>& parameters,
const LearningRatesPerSample& learningRates, const LearningRatesPerSample& learningRates,
const MomentumsPerSample& momentums, const MomentumValuesPerSample& momentumValues,
double clippingThresholdPerSample /*= std::numeric_limits<double>::infinity()*/, AdditionalLearningOptions additionalOptions /*= AdditionalLearningOptions()*/)
bool gradientClippingWithTruncation /*= true*/)
{ {
return MakeSharedObject<LearnerNesterov>(parameters, learningRates, momentums, clippingThresholdPerSample, gradientClippingWithTruncation); return MakeSharedObject<LearnerNesterov>(parameters, learningRates, momentumValues, additionalOptions);
} }
LearnerPtr FSAdaGradLearner(const vector<Parameter>& parameters, LearnerPtr FSAdaGradLearner(const vector<Parameter>& parameters,
const LearningRatesPerSample& learningRates, const LearningRatesPerSample& learningRates,
const MomentumsPerSample& momentums, const MomentumValuesPerSample& momentumValues,
double clippingThresholdPerSample /*= std::numeric_limits<double>::infinity()*/, const double targetAdagradAvDenom /*= 0.0025*/,
bool gradientClippingWithTruncation /*= true*/) const size_t adagradT /*= 2 * 3600 * 100*/,
AdditionalLearningOptions additionalOptions /*= AdditionalLearningOptions()*/)
{ {
return MakeSharedObject<LearnerFSAdaGrad>(parameters, learningRates, momentums, clippingThresholdPerSample, gradientClippingWithTruncation); return MakeSharedObject<LearnerFSAdaGrad>(parameters, learningRates, momentumValues, targetAdagradAvDenom, adagradT, additionalOptions);
} }
LearnerPtr AdaGradLearner(const vector<Parameter>& parameters, LearnerPtr AdaGradLearner(const vector<Parameter>& parameters,
const LearningRatesPerSample& learningRates, const LearningRatesPerSample& learningRates,
bool needAveMultiplier /*= true*/, bool needAveMultiplier /*= true*/,
double clippingThresholdPerSample /*= std::numeric_limits<double>::infinity()*/, AdditionalLearningOptions additionalOptions /*= AdditionalLearningOptions()*/)
bool gradientClippingWithTruncation /*= true*/)
{ {
return MakeSharedObject<LearnerAdaGrad>(parameters, learningRates, needAveMultiplier, clippingThresholdPerSample, gradientClippingWithTruncation); return MakeSharedObject<LearnerAdaGrad>(parameters, learningRates, needAveMultiplier, additionalOptions);
} }
LearnerPtr RMSPropLearner(const vector<Parameter>& parameters, const LearningRatesPerSample& learningRates, LearnerPtr RMSPropLearner(const vector<Parameter>& parameters,
const LearningRatesPerSample& learningRates,
double gamma, double inc, double dec, double max, double min, double gamma, double inc, double dec, double max, double min,
bool needAveMultiplier /*= true*/, bool needAveMultiplier /*= true*/,
double clippingThresholdPerSample /*= std::numeric_limits<double>::infinity()*/, AdditionalLearningOptions additionalOptions /*= AdditionalLearningOptions()*/)
bool gradientClippingWithTruncation /*= true*/)
{ {
return MakeSharedObject<LearnerRMSProp>(parameters, learningRates, gamma, inc, dec, max, min, needAveMultiplier, clippingThresholdPerSample, gradientClippingWithTruncation); return MakeSharedObject<LearnerRMSProp>(parameters, learningRates, gamma, inc, dec, max, min, needAveMultiplier, additionalOptions);
} }
} }

Просмотреть файл

@ -3,29 +3,21 @@
// Licensed under the MIT license. See LICENSE.md file in the project root for full license information. // Licensed under the MIT license. See LICENSE.md file in the project root for full license information.
// //
#pragma once
#include "stdafx.h" #include "stdafx.h"
#include "CNTKLibrary.h" #include "CNTKLibrary.h"
#include <numeric> #include <numeric>
namespace CNTK namespace CNTK
{ {
// TODO: Move this to Trainer along with Pre-, PostProcess and ClipGradient.
// A collection of additional options that are applicable for all standard learners
// (after these options are set, they retain their value for the entire lifespan of a learner).
struct AdditionalLearningOptions
{
double l1RegularizationWeight = 0.0;
double l2RegularizationWeight = 0.0;
double gaussianNoiseInjectionStdDev = 0.0;
bool gradientClippingWithTruncation = true;
double gradientClippingThresholdPerSample = std::numeric_limits<double>::infinity();
};
// An abstract base class at the root of the standard learners hierarchy // An abstract base class at the root of the standard learners hierarchy
// It implements most of the learner functionality, except for the actual update function, // It implements most of the learner functionality, except for the actual update function,
// and adds a few pre-/postprocessing methods (which are invoked before and after the update). // and adds a few pre-/postprocessing methods (which are invoked before and after the update).
class LearnerBase : public Learner class LearnerBase : public Learner
{ {
static const std::wstring WasLearningRateResetAttributeName;
public: public:
virtual bool Update(const std::unordered_map<Parameter, NDArrayViewPtr>& gradientValues, size_t trainingSampleCount) override final; virtual bool Update(const std::unordered_map<Parameter, NDArrayViewPtr>& gradientValues, size_t trainingSampleCount) override final;
@ -33,18 +25,36 @@ namespace CNTK
virtual void RestoreFromCheckpoint(const Dictionary& checkpoint) override final; virtual void RestoreFromCheckpoint(const Dictionary& checkpoint) override final;
virtual void ResetLearningRate(double learningRate) override final
{
m_wasLearningRateReset = true;
Learner::ResetLearningRate(learningRate);
}
virtual double LearningRate() const override final
{
if (m_wasLearningRateReset)
return Learner::LearningRate();
else
return m_learningRateSchedule[m_sampleCount];
}
protected: protected:
// allocateSmoothGradients flag specifies whether NDArrayViews for smoothed gradients can be allocated
// in the base class constructor (in which case they are allocated with the shapes identical to the shapes of
// the corresponding parameters) or if the allocation should be deferred to the subclass constructor (which
// performs allocation that is specific to the particular learner, see FSAdaGrad and RMSProp).
LearnerBase(const std::vector<Parameter>& parameters, LearnerBase(const std::vector<Parameter>& parameters,
const LearningRatesPerSample& learningRates, const LearningRatesPerSample& learningRates,
bool allocateSmoothGradients = true, AdditionalLearningOptions additionalOptions,
double clippingThresholdPerSample = std::numeric_limits<double>::infinity(), bool allocateSmoothGradients = true);
bool gradientClippingWithTruncation = true);
virtual void Update(const Parameter& parameter, const NDArrayViewPtr& gradientValue, const NDArrayViewPtr& smoothedGradientValue, size_t trainingSampleCount) const = 0; virtual void Update(const Parameter& parameter, const NDArrayViewPtr& gradientValue, const NDArrayViewPtr& smoothedGradientValue, size_t trainingSampleCount) const = 0;
std::string LearnerType() const; std::string LearnerType() const;
LearningRatesPerSample m_learningRates; bool m_wasLearningRateReset;
LearningRatesPerSample m_learningRateSchedule;
AdditionalLearningOptions m_additionalOptions; AdditionalLearningOptions m_additionalOptions;
@ -84,6 +94,7 @@ namespace CNTK
// Retrieves the shape of the matrix corresponding to the parameter value. // Retrieves the shape of the matrix corresponding to the parameter value.
static NDShape GetMatrixShape(const Parameter& parameter); static NDShape GetMatrixShape(const Parameter& parameter);
size_t m_sampleCount; size_t m_sampleCount;
size_t m_minibatchCount; size_t m_minibatchCount;
@ -106,11 +117,10 @@ namespace CNTK
public: public:
LearnerSGD(const std::vector<Parameter>& parameters, LearnerSGD(const std::vector<Parameter>& parameters,
const LearningRatesPerSample& learningRates, const LearningRatesPerSample& learningRates,
bool allocateSmoothGradients = true, AdditionalLearningOptions additionalOptions,
double clippingThresholdPerSample = std::numeric_limits<double>::infinity(), bool allocateSmoothGradients = true)
bool gradientClippingWithTruncation = true) : LearnerBase(parameters, learningRates, additionalOptions, allocateSmoothGradients),
: LearnerBase(parameters, learningRates, allocateSmoothGradients, clippingThresholdPerSample, gradientClippingWithTruncation), m_momentumValues(0.0),
m_momentums(0.0),
m_useNesterovAcceleration(false) m_useNesterovAcceleration(false)
{} {}
@ -121,8 +131,8 @@ namespace CNTK
template <typename ElementType> template <typename ElementType>
void Update(const Parameter& parameter, const NDArrayViewPtr& gradientValue, const NDArrayViewPtr& smoothedGradientValue, size_t trainingSampleCount) const; void Update(const Parameter& parameter, const NDArrayViewPtr& gradientValue, const NDArrayViewPtr& smoothedGradientValue, size_t trainingSampleCount) const;
// TODO: Move m_momentums to LearnerMomentumSGD as soon as NormalGrad is refactored. // TODO: Move m_momentumValues to LearnerMomentumSGD as soon as NormalGrad is refactored.
MomentumsPerSample m_momentums; MomentumValuesPerSample m_momentumValues;
bool m_useNesterovAcceleration; bool m_useNesterovAcceleration;
}; };
@ -132,13 +142,12 @@ namespace CNTK
public: public:
LearnerMomentumSGD(const std::vector<Parameter>& parameters, LearnerMomentumSGD(const std::vector<Parameter>& parameters,
const LearningRatesPerSample& learningRates, const LearningRatesPerSample& learningRates,
const MomentumsPerSample& momentums, const MomentumValuesPerSample& momentumValues,
bool allocateSmoothGradients = true, AdditionalLearningOptions additionalOptions,
double clippingThresholdPerSample = std::numeric_limits<double>::infinity(), bool allocateSmoothGradients = true)
bool gradientClippingWithTruncation = true) : LearnerSGD(parameters, learningRates, additionalOptions, allocateSmoothGradients)
: LearnerSGD(parameters, learningRates, allocateSmoothGradients, clippingThresholdPerSample, gradientClippingWithTruncation)
{ {
m_momentums = momentums; m_momentumValues = momentumValues;
} }
}; };
@ -149,10 +158,9 @@ namespace CNTK
LearnerNesterov(const std::vector<Parameter>& parameters, LearnerNesterov(const std::vector<Parameter>& parameters,
const LearningRatesPerSample& learningRates, const LearningRatesPerSample& learningRates,
const MomentumsPerSample& momentums, const MomentumValuesPerSample& momentumValues,
double clippingThresholdPerSample = std::numeric_limits<double>::infinity(), AdditionalLearningOptions additionalOptions)
bool gradientClippingWithTruncation = true) : LearnerMomentumSGD(parameters, learningRates, momentumValues, additionalOptions, /*allocateSmoothGradients*/ true)
: LearnerMomentumSGD(parameters, learningRates, momentums, true, clippingThresholdPerSample, gradientClippingWithTruncation)
{ {
m_useNesterovAcceleration = true; m_useNesterovAcceleration = true;
} }
@ -165,8 +173,11 @@ namespace CNTK
LearnerAdaGrad(const std::vector<Parameter>& parameters, LearnerAdaGrad(const std::vector<Parameter>& parameters,
const LearningRatesPerSample& learningRates, const LearningRatesPerSample& learningRates,
bool needAveMultiplier, bool needAveMultiplier,
double clippingThresholdPerSample = std::numeric_limits<double>::infinity(), AdditionalLearningOptions additionalOptions)
bool gradientClippingWithTruncation = true); : LearnerBase(parameters, learningRates, additionalOptions, /*allocateSmoothGradients*/ true),
m_needAveMultiplier(needAveMultiplier)
{
}
protected: protected:
bool m_needAveMultiplier; bool m_needAveMultiplier;
@ -183,9 +194,10 @@ namespace CNTK
LearnerFSAdaGrad(const std::vector<Parameter>& parameters, LearnerFSAdaGrad(const std::vector<Parameter>& parameters,
const LearningRatesPerSample& learningRates, const LearningRatesPerSample& learningRates,
const MomentumsPerSample& momentums, const MomentumValuesPerSample& momentumValues,
double clippingThresholdPerSample = std::numeric_limits<double>::infinity(), const double targetAdagradAvDenom,
bool gradientClippingWithTruncation = true); const size_t adagradT,
AdditionalLearningOptions additionalOptions);
protected: protected:
@ -193,6 +205,11 @@ namespace CNTK
template <typename ElementType> template <typename ElementType>
void Update(const Parameter& parameter, const NDArrayViewPtr& gradientValue, const NDArrayViewPtr& smoothedGradientValue, size_t trainingSampleCount) const; void Update(const Parameter& parameter, const NDArrayViewPtr& gradientValue, const NDArrayViewPtr& smoothedGradientValue, size_t trainingSampleCount) const;
private:
mutable std::unordered_map<Parameter, double> m_smoothedCounts;
double m_targetAdagradAvDenom;
size_t m_adagradT;
}; };
class LearnerRMSProp : public LearnerBase class LearnerRMSProp : public LearnerBase
@ -203,8 +220,7 @@ namespace CNTK
const LearningRatesPerSample& learningRates, const LearningRatesPerSample& learningRates,
double gamma, double inc, double dec, double max, double min, double gamma, double inc, double dec, double max, double min,
bool needAveMultiplier, bool needAveMultiplier,
double clippingThresholdPerSample = std::numeric_limits<double>::infinity(), AdditionalLearningOptions additionalOptions);
bool gradientClippingWithTruncation = true);
protected: protected:

Просмотреть файл

@ -12,6 +12,7 @@
#include "ReaderShim.h" #include "ReaderShim.h"
#include "Function.h" #include "Function.h"
#include <tuple> #include <tuple>
#include "Value.h"
using namespace Microsoft::MSR::CNTK; using namespace Microsoft::MSR::CNTK;
@ -78,6 +79,8 @@ namespace CNTK
static const std::unordered_map<std::wstring, std::wstring> deserializerTypeNameToModuleNameMap = { static const std::unordered_map<std::wstring, std::wstring> deserializerTypeNameToModuleNameMap = {
{ L"CNTKTextFormatDeserializer", L"CNTKTextFormatReader" }, { L"CNTKTextFormatDeserializer", L"CNTKTextFormatReader" },
{ L"ImageDeserializer", L"ImageReader" }, { L"ImageDeserializer", L"ImageReader" },
{ L"HTKFeatureDeserializer", L"HTKDeserializers" },
{ L"HTKMLFDeserializer", L"HTKDeserializers" },
}; };
auto& deserializerConfigDict = deserializerConfig.Value<Dictionary>(); auto& deserializerConfigDict = deserializerConfig.Value<Dictionary>();
@ -103,6 +106,10 @@ namespace CNTK
} }
} }
if (deserializerTypeNameToModuleNameMap.find(deserializerTypeName) == deserializerTypeNameToModuleNameMap.end())
InvalidArgument("Unknown deserializer type (%S)", deserializerTypeName.c_str());
deserializerConfigDict[L"module"] = deserializerTypeNameToModuleNameMap.at(deserializerTypeName); deserializerConfigDict[L"module"] = deserializerTypeNameToModuleNameMap.at(deserializerTypeName);
} }
@ -197,7 +204,7 @@ namespace CNTK
// TODO: Eliminate the unnecessary CPU to CPU copy // TODO: Eliminate the unnecessary CPU to CPU copy
ReaderShim<float>::FillMatrixFromStream(currentStreamDesc->m_storageType, dataMatrix.get(), sampleSize, currentStreamMinibatchData, nullptr); ReaderShim<float>::FillMatrixFromStream(currentStreamDesc->m_storageType, dataMatrix.get(), sampleSize, currentStreamMinibatchData, nullptr);
minibatchValuePtr = CompositeFunction::GetValueObjectFromCNTKImplMatrixAndMBLayout<float>(sampleShape, *dataMatrix, currentStreamMinibatchData->m_layout, false); minibatchValuePtr = MakeSharedObject<PackedValue>(sampleShape, dataMatrix, currentStreamMinibatchData->m_layout, /*readOnly =*/ false);
size_t numSamples = currentStreamMinibatchData->m_layout->GetActualNumSamples(); size_t numSamples = currentStreamMinibatchData->m_layout->GetActualNumSamples();
size_t numSequences = currentStreamMinibatchData->m_layout->GetNumSequences(); size_t numSequences = currentStreamMinibatchData->m_layout->GetNumSequences();

Просмотреть файл

@ -36,19 +36,18 @@ namespace CNTK
} }
NDMask::~NDMask() NDMask::~NDMask()
{ {}
}
void NDMask::MaskSection(const std::vector<size_t>& sectionOffset, const NDShape& sectionShape) void NDMask::MarkSectionAs(const std::vector<size_t>& sectionOffset, const NDShape& sectionShape, MaskKind maskKind)
{ {
// TODO: Implement batching of masking operation for masks residing on GPUs to avoid making // TODO: Implement batching of masking operation for masks residing on GPUs to avoid making
// GPU invocations for each MaskSection call. // GPU invocations for each MaskSection call.
if (sectionOffset.size() > m_maskShape.Rank()) if (sectionOffset.size() > m_maskShape.Rank())
LogicError("NDMask::MaskSection: The sectionOffset cannot have dimensionality higher than the number of axes of 'this' mask"); LogicError("NDMask::MaskSection: The sectionOffset cannot have dimensionality higher than the rank of 'this' mask");
if (sectionShape.Rank() > m_maskShape.Rank()) if (sectionShape.Rank() > m_maskShape.Rank())
LogicError("NDMask::MaskSection: The section shape cannot have an axes count higher than the number of axes of 'this' mask"); LogicError("NDMask::MaskSection: The section shape cannot have an axes count higher than the rank of 'this' mask");
std::vector<size_t> offset(m_maskShape.Rank(), 0); std::vector<size_t> offset(m_maskShape.Rank(), 0);
for (size_t i = 0; i < sectionOffset.size(); ++i) for (size_t i = 0; i < sectionOffset.size(); ++i)
@ -62,7 +61,7 @@ namespace CNTK
size_t sliceRowLength = (shape[0] != NDShape::InferredDimension) ? shape[0] : (maskMatrix->GetNumRows() - rowOffset); size_t sliceRowLength = (shape[0] != NDShape::InferredDimension) ? shape[0] : (maskMatrix->GetNumRows() - rowOffset);
size_t sliceColLength = (shape[1] != NDShape::InferredDimension) ? shape[1] : (maskMatrix->GetNumCols() - colOffset); size_t sliceColLength = (shape[1] != NDShape::InferredDimension) ? shape[1] : (maskMatrix->GetNumCols() - colOffset);
if ((rowOffset == 0) && (sliceRowLength == maskMatrix->GetNumRows())) if ((rowOffset == 0) && (sliceRowLength == maskMatrix->GetNumRows()))
maskMatrix->ColumnSlice(colOffset, sliceColLength).SetValue(0); maskMatrix->ColumnSlice(colOffset, sliceColLength).SetValue((char)maskKind);
else else
{ {
// Since Matrix does not support strides in the row dimension, we will need to create separate slices for each column // Since Matrix does not support strides in the row dimension, we will need to create separate slices for each column
@ -70,15 +69,15 @@ namespace CNTK
{ {
auto column = maskMatrix->ColumnSlice(i, 1); auto column = maskMatrix->ColumnSlice(i, 1);
column.Reshape(1, maskMatrix->GetNumRows()); column.Reshape(1, maskMatrix->GetNumRows());
column.ColumnSlice(rowOffset, sliceRowLength).SetValue(0); column.ColumnSlice(rowOffset, sliceRowLength).SetValue((char)maskKind);
} }
} }
} }
void NDMask::Clear() void NDMask::Clear()
{ {
// Clear the mask by marking all samples as Valid; i.e. a value of 1 // Clear the mask by marking all samples as Valid
GetMatrix()->SetValue(1); GetMatrix()->SetValue((char)MaskKind::Valid);
} }
size_t NDMask::MaskedCount() const size_t NDMask::MaskedCount() const
@ -86,17 +85,17 @@ namespace CNTK
auto maskMatrix = GetMatrix(); auto maskMatrix = GetMatrix();
std::unique_ptr<char[]> maskData(maskMatrix->CopyToArray()); std::unique_ptr<char[]> maskData(maskMatrix->CopyToArray());
return std::count_if(maskData.get(), maskData.get() + maskMatrix->GetNumElements(), [](const char& val) { return std::count_if(maskData.get(), maskData.get() + maskMatrix->GetNumElements(), [](const char& val) {
return val == 0; return val == (char)MaskKind::Invalid;
}); });
} }
// TODO: This could actually be strided? // TODO: This could actually be strided?
const char* NDMask::DataBuffer() const const MaskKind* NDMask::DataBuffer() const
{ {
// First make sure that the underlying matrix is on the right device // First make sure that the underlying matrix is on the right device
auto matrix = GetMatrix(); auto matrix = GetMatrix();
matrix->TransferToDeviceIfNotThere(AsCNTKImplDeviceId(m_device), true); matrix->TransferToDeviceIfNotThere(AsCNTKImplDeviceId(m_device), true);
return matrix->Data(); return (const MaskKind*)(matrix->Data());
} }
Matrix<char>* NDMask::GetMatrix() const Matrix<char>* NDMask::GetMatrix() const
@ -112,9 +111,9 @@ namespace CNTK
GetMatrix()->AssignValuesOf(*source.GetMatrix()); GetMatrix()->AssignValuesOf(*source.GetMatrix());
} }
NDMaskPtr NDMask::DeepClone() const NDMaskPtr NDMask::DeepClone(const DeviceDescriptor& device) const
{ {
NDMaskPtr newMask = MakeSharedObject<NDMask>(this->Shape(), this->Device()); NDMaskPtr newMask = MakeSharedObject<NDMask>(this->Shape(), device);
newMask->CopyFrom(*this); newMask->CopyFrom(*this);
return newMask; return newMask;

Просмотреть файл

@ -13,7 +13,24 @@ namespace CNTK
Trainer::Trainer(const FunctionPtr& model, const FunctionPtr& lossFunction, const FunctionPtr& evaluationFunction, const std::unordered_set<LearnerPtr>& parameterLearners) Trainer::Trainer(const FunctionPtr& model, const FunctionPtr& lossFunction, const FunctionPtr& evaluationFunction, const std::unordered_set<LearnerPtr>& parameterLearners)
: m_model(model), m_lossFunction(lossFunction), m_evaluationFunction(evaluationFunction), m_parameterLearners(parameterLearners), m_prevMinibatchNumSamples(1) : m_model(model), m_lossFunction(lossFunction), m_evaluationFunction(evaluationFunction), m_parameterLearners(parameterLearners), m_prevMinibatchNumSamples(1)
{ {
m_combinedTrainingFunction = Combine({ model, lossFunction, evaluationFunction }); if (m_lossFunction->Output().DynamicAxes().empty())
InvalidArgument("The loss function specified in the Trainer constructor must correspond to minibatch data and have dynamic axes");
if (m_evaluationFunction && m_evaluationFunction->Output().DynamicAxes().empty())
InvalidArgument("The evaluation function specified in the Trainer constructor must correspond to minibatch data and have dynamic axes");
m_aggregatedLossFunction = ReduceSum(lossFunction);
if (m_evaluationFunction)
m_aggregatedEvaluationFunction = ReduceSum(m_evaluationFunction);
std::vector<Variable> combinedFunctionArgs = { m_model, m_aggregatedLossFunction, m_lossFunction };
if (m_evaluationFunction)
{
combinedFunctionArgs.push_back(m_aggregatedEvaluationFunction);
combinedFunctionArgs.push_back(m_evaluationFunction);
}
m_combinedTrainingFunction = Combine(combinedFunctionArgs);
auto modelParameters = m_combinedTrainingFunction->Parameters(); auto modelParameters = m_combinedTrainingFunction->Parameters();
std::unordered_set<Parameter> learnerParameters; std::unordered_set<Parameter> learnerParameters;
@ -66,20 +83,11 @@ namespace CNTK
return scalar; return scalar;
} }
static size_t GetSampleCountFromArguments(const Variable& evalOrLossArgument, const std::unordered_map<Variable, ValuePtr>& arguments) static size_t GetSampleCount(const Variable& var, const ValuePtr& value)
{ {
// Find the argument whose dynamic axes match the criterion operation's dynamic axes (i.e. label dynamic axes) auto valueDataShape = value->Shape();
// Then we determine the actual number of samples contributing to the training loss from the argument's Value object size_t numMaskedSamples = value->MaskedCount();
auto argumentIter = std::find_if(arguments.begin(), arguments.end(), [evalOrLossArgument](const std::pair<Variable, ValuePtr>& currentPair) { size_t numSamplesInDataArrayView = valueDataShape.SubShape(var.Shape().Rank()).TotalSize();
return (currentPair.first.DynamicAxes() == evalOrLossArgument.DynamicAxes());
});
auto argumentValue = argumentIter->second;
auto argumentVar = argumentIter->first;
auto argumentDataShape = argumentValue->Data()->Shape();
auto mask = argumentValue->Mask();
size_t numMaskedSamples = (mask != nullptr) ? mask->MaskedCount() : 0;
size_t numSamplesInDataArrayView = argumentDataShape.SubShape(argumentVar.Shape().Rank()).TotalSize();
if (numMaskedSamples > numSamplesInDataArrayView) if (numMaskedSamples > numSamplesInDataArrayView)
LogicError("Number of masked values cannot exceed the number of samples that the Value object's Data NDArrayView can hold"); LogicError("Number of masked values cannot exceed the number of samples that the Value object's Data NDArrayView can hold");
@ -88,15 +96,15 @@ namespace CNTK
double Trainer::TestMinibatch(const std::unordered_map<Variable, ValuePtr>& arguments, const DeviceDescriptor& computeDevice /*= DeviceDescriptor::UseDefaultDevice()*/) double Trainer::TestMinibatch(const std::unordered_map<Variable, ValuePtr>& arguments, const DeviceDescriptor& computeDevice /*= DeviceDescriptor::UseDefaultDevice()*/)
{ {
if (!m_evaluationFunction) if (!m_aggregatedEvaluationFunction)
InvalidArgument("Trainer::TestMinibatch: Cannot test when no evaluation function was specified during 'this' trainer's construction"); InvalidArgument("Trainer::TestMinibatch: Cannot test when no evaluation function was specified during 'this' trainer's construction");
// TODO: Should we refactor this code that is somewhat similar to the prologue of the TrainMinibatch function // TODO: Should we refactor this code that is somewhat similar to the prologue of the TrainMinibatch function
std::unordered_map<Variable, ValuePtr> outputs = { { m_evaluationFunction, nullptr } }; std::unordered_map<Variable, ValuePtr> outputs = { { m_aggregatedEvaluationFunction, nullptr }, {m_evaluationFunction, nullptr} };
m_combinedTrainingFunction->Forward(arguments, outputs, computeDevice); m_combinedTrainingFunction->Forward(arguments, outputs, computeDevice);
auto sampleCount = GetSampleCountFromArguments(*(m_evaluationFunction->Arguments().begin()), arguments); auto sampleCount = GetSampleCount(m_evaluationFunction, outputs[m_evaluationFunction]);
return (GetScalarValue(outputs[m_evaluationFunction]) / sampleCount); return (GetScalarValue(outputs[m_aggregatedEvaluationFunction]) / sampleCount);
} }
bool Trainer::TrainMinibatch(const std::unordered_map<Variable, ValuePtr>& arguments, const DeviceDescriptor& computeDevice /*= DeviceDescriptor::UseDefaultDevice()*/) bool Trainer::TrainMinibatch(const std::unordered_map<Variable, ValuePtr>& arguments, const DeviceDescriptor& computeDevice /*= DeviceDescriptor::UseDefaultDevice()*/)
@ -107,16 +115,16 @@ namespace CNTK
bool Trainer::TrainMinibatch(const std::unordered_map<Variable, ValuePtr>& arguments, std::unordered_map<Variable, ValuePtr>& outputsToFetch, const DeviceDescriptor& computeDevice /*= DeviceDescriptor::UseDefaultDevice()*/) bool Trainer::TrainMinibatch(const std::unordered_map<Variable, ValuePtr>& arguments, std::unordered_map<Variable, ValuePtr>& outputsToFetch, const DeviceDescriptor& computeDevice /*= DeviceDescriptor::UseDefaultDevice()*/)
{ {
std::unordered_map<Variable, ValuePtr> outputs = { { m_lossFunction, nullptr } }; std::unordered_map<Variable, ValuePtr> outputs = { { m_aggregatedLossFunction, nullptr }, { m_lossFunction, nullptr } };
if (m_evaluationFunction) if (m_aggregatedEvaluationFunction)
outputs.insert({ m_evaluationFunction, nullptr }); outputs.insert({ m_aggregatedEvaluationFunction, nullptr });
outputs.insert(outputsToFetch.begin(), outputsToFetch.end()); outputs.insert(outputsToFetch.begin(), outputsToFetch.end());
auto backPropSate = m_combinedTrainingFunction->Forward(arguments, outputs, computeDevice, { m_lossFunction }); auto backPropSate = m_combinedTrainingFunction->Forward(arguments, outputs, computeDevice, { m_aggregatedLossFunction });
m_prevMinibatchAggregateTrainingLossValue = outputs[m_lossFunction]; m_prevMinibatchAggregateTrainingLossValue = outputs[m_aggregatedLossFunction];
if (m_evaluationFunction) if (m_aggregatedEvaluationFunction)
m_prevMinibatchAggregateEvalCriterionValue = outputs[m_evaluationFunction]; m_prevMinibatchAggregateEvalCriterionValue = outputs[m_aggregatedEvaluationFunction];
for (auto outputToFetch : outputsToFetch) for (auto outputToFetch : outputsToFetch)
{ {
@ -124,8 +132,8 @@ namespace CNTK
outputsToFetch[outputToFetch.first] = outputs[outputToFetch.first]; outputsToFetch[outputToFetch.first] = outputs[outputToFetch.first];
} }
ValuePtr rootGradientValue = MakeSharedObject<Value>(MakeSharedObject<NDArrayView>(m_lossFunction->Output().GetDataType(), m_prevMinibatchAggregateTrainingLossValue->Data()->Shape(), computeDevice), outputs.at(m_lossFunction)->Mask()); ValuePtr rootGradientValue = MakeSharedObject<Value>(MakeSharedObject<NDArrayView>(m_aggregatedLossFunction->Output().GetDataType(), m_prevMinibatchAggregateTrainingLossValue->Shape(), computeDevice), outputs.at(m_aggregatedLossFunction)->Mask());
if (m_lossFunction->Output().GetDataType() == DataType::Float) if (m_aggregatedLossFunction->Output().GetDataType() == DataType::Float)
rootGradientValue->Data()->SetValue(1.0f); rootGradientValue->Data()->SetValue(1.0f);
else else
rootGradientValue->Data()->SetValue(1.0); rootGradientValue->Data()->SetValue(1.0);
@ -135,9 +143,9 @@ namespace CNTK
for (const auto& parameter : modelParameters) for (const auto& parameter : modelParameters)
parameterGradients[parameter] = nullptr; parameterGradients[parameter] = nullptr;
m_combinedTrainingFunction->Backward(backPropSate, { { m_lossFunction, rootGradientValue } }, parameterGradients); m_combinedTrainingFunction->Backward(backPropSate, { { m_aggregatedLossFunction, rootGradientValue } }, parameterGradients);
m_prevMinibatchNumSamples = GetSampleCountFromArguments(*(m_lossFunction->Arguments().begin()), arguments); m_prevMinibatchNumSamples = GetSampleCount(m_lossFunction, outputs[m_lossFunction]);
bool anyUpdatesPerformed = false; bool anyUpdatesPerformed = false;
for (auto learner : m_parameterLearners) for (auto learner : m_parameterLearners)

Просмотреть файл

@ -13,6 +13,7 @@ using namespace std;
namespace CNTK namespace CNTK
{ {
// This wrapper redefines operator<< in terms of unformatted (binary) write operation. // This wrapper redefines operator<< in terms of unformatted (binary) write operation.
struct BinaryOStreamWrapper struct BinaryOStreamWrapper
{ {
@ -527,6 +528,17 @@ namespace CNTK
return (m_dictionaryData->find(key) != m_dictionaryData->end()); return (m_dictionaryData->find(key) != m_dictionaryData->end());
} }
void Dictionary::Add(const Dictionary& other)
{
for (auto kv : *(other.m_dictionaryData))
{
if (Contains(kv.first))
InvalidArgument("Dictionary::Add: This dictionary already contains an entry with key %S that is being attempted to add from the 'other' dinctionary", kv.first.c_str());
(*this)[kv.first] = kv.second;
}
}
bool Dictionary::operator==(const Dictionary& other) const bool Dictionary::operator==(const Dictionary& other) const
{ {
if (this == &other) if (this == &other)
@ -539,7 +551,7 @@ namespace CNTK
return false; return false;
} }
for (auto& kv : *m_dictionaryData) for (const auto& kv : *m_dictionaryData)
{ {
auto result = other.m_dictionaryData->find(kv.first); auto result = other.m_dictionaryData->find(kv.first);
if (result == other.m_dictionaryData->end() || kv.second != result->second) if (result == other.m_dictionaryData->end() || kv.second != result->second)
@ -561,7 +573,7 @@ namespace CNTK
BinaryOStreamWrapper stream(stdStream); BinaryOStreamWrapper stream(stdStream);
stream << us.version; stream << us.version;
stream << us.m_dictionaryData->size(); stream << us.m_dictionaryData->size();
for (auto& kv : *(us.m_dictionaryData)) for (const auto& kv : *(us.m_dictionaryData))
{ {
stream << kv.first; stream << kv.first;
stream << kv.second; stream << kv.second;
@ -586,10 +598,62 @@ namespace CNTK
return stream; return stream;
} }
template <typename T>
TrainingParameterSchedule<T>::TrainingParameterSchedule(T value)
: m_schedule({ make_pair(0, value) }), m_unit(1)
{
}
template <typename T>
TrainingParameterSchedule<T>::TrainingParameterSchedule(const vector<T>& schedule, size_t unit)
: m_unit(unit)
{
std::vector<std::pair<size_t, T>> s(schedule.size());
for (auto i = 0; i < schedule.size(); ++i)
{
s[i].first = 1;
s[i].second = schedule[i];
}
ConstructSchedule(s);
}
template <typename T>
TrainingParameterSchedule<T>::TrainingParameterSchedule(const vector<std::pair<size_t, T>>& schedule, size_t unit)
: m_unit(unit)
{
ConstructSchedule(schedule);
}
template <typename T>
void TrainingParameterSchedule<T>::ConstructSchedule(const std::vector<std::pair<size_t, T>>& schedule)
{
// TODO: 0 will be used to mean "the entire sweep"
if (m_unit == 0)
RuntimeError("TrainingParameterSchedule::ConstructSchedule : 'unit' cannot be 0.");
if (schedule.size() == 0)
RuntimeError("TrainingParameterSchedule::ConstructSchedule : schedule is empty.");
size_t i = 0;
for (const auto& it : schedule)
{
if (it.first == 0)
RuntimeError("TrainingParameterSchedule::ConstructSchedule : unit count cannot be 0.");
i += it.first;
m_schedule[m_unit * i] = it.second;
}
}
template <typename T>
/*virtual*/ TrainingParameterSchedule<T>::~TrainingParameterSchedule()
{
}
// Returns the element whose key is greater than the required sample count // Returns the element whose key is greater than the required sample count
// or the last element if no such key exists. // or the last element if no such key exists.
template <typename T> template <typename T>
const T& TrainingParameterSchedule<T>::operator[](size_t sampleCount) const /*virtual*/ const T& TrainingParameterSchedule<T>::operator[](size_t sampleCount) const
{ {
assert(m_schedule.size() > 0); assert(m_schedule.size() > 0);
auto it = m_schedule.upper_bound(sampleCount); auto it = m_schedule.upper_bound(sampleCount);
@ -600,6 +664,38 @@ namespace CNTK
return it->second; return it->second;
} }
template <typename T>
TrainingParameterSchedule<T>::TrainingParameterSchedule(const TrainingParameterSchedule<T>&) = default;
// cannot be defaulted due to a bug in VS2013 (https://connect.microsoft.com/VisualStudio/feedback/details/1255564)
template <typename T>
TrainingParameterSchedule<T>::TrainingParameterSchedule(TrainingParameterSchedule<T>&& that)
:m_schedule(move(that.m_schedule)), m_unit(that.m_unit)
{
}
template <typename T>
TrainingParameterSchedule<T>& TrainingParameterSchedule<T>::operator=(const TrainingParameterSchedule<T>&) = default;
// cannot be defaulted due to a bug in VS2013 (https://connect.microsoft.com/VisualStudio/feedback/details/1255564)
template <typename T>
TrainingParameterSchedule<T>& TrainingParameterSchedule<T>::operator=(TrainingParameterSchedule<T>&& that)
{
m_schedule = move(that.m_schedule);
m_unit = that.m_unit;
return *this;
}
void MomentumValuesAsTimeConstants::ConvertToPerSampleValues()
{
for (auto& it : m_schedule)
{
double momTC = it.second;
double momPS = momTC == 0.0 ? 0 : exp(-1.0 / momTC);
it.second = momPS;
}
}
template void DictionaryValue::AllocateDataPtr<NDShape>(const NDShape& value); template void DictionaryValue::AllocateDataPtr<NDShape>(const NDShape& value);
template void DictionaryValue::AllocateDataPtr<Axis>(const Axis& value); template void DictionaryValue::AllocateDataPtr<Axis>(const Axis& value);
template void DictionaryValue::AllocateDataPtr<vector<DictionaryValue>>(const vector<DictionaryValue>& value); template void DictionaryValue::AllocateDataPtr<vector<DictionaryValue>>(const vector<DictionaryValue>& value);
@ -614,5 +710,5 @@ namespace CNTK
template void DictionaryValue::FreePtrAsType<Dictionary>(); template void DictionaryValue::FreePtrAsType<Dictionary>();
template void DictionaryValue::FreePtrAsType<NDArrayView>(); template void DictionaryValue::FreePtrAsType<NDArrayView>();
template const double& TrainingParameterSchedule<double>::operator[](size_t key) const; template class TrainingParameterSchedule<double>;
} }

Просмотреть файл

@ -32,7 +32,7 @@ namespace CNTK
inline DEVICEID_TYPE AsCNTKImplDeviceId(const DeviceDescriptor& device) inline DEVICEID_TYPE AsCNTKImplDeviceId(const DeviceDescriptor& device)
{ {
if (device.Type() == DeviceKind::CPU) if (device.Type() == DeviceKind::CPU)
return -1; return CPUDEVICE;
else if (device.Type() == DeviceKind::GPU) else if (device.Type() == DeviceKind::GPU)
return device.Id(); return device.Id();
else else
@ -304,16 +304,20 @@ namespace CNTK
} }
} }
static size_t const CNTKInternalIdxValueForAllStaticAxes = 0;
inline Axis AsAxis(size_t CNTKInternalAxisIdx) inline Axis AsAxis(size_t CNTKInternalAxisIdx)
{ {
if (CNTKInternalAxisIdx == 0) if (CNTKInternalAxisIdx == CNTKInternalIdxValueForAllStaticAxes)
LogicError("CNTK internal axis indices must be > 0"); return Axis::AllStaticAxes();
return Axis(CNTKInternalAxisIdx - 1); return Axis(CNTKInternalAxisIdx - 1);
} }
inline int AsCNTKInternalAxisIdx(const Axis& axis) inline int AsCNTKInternalAxisIdx(const Axis& axis)
{ {
if (axis == Axis::AllStaticAxes())
return CNTKInternalIdxValueForAllStaticAxes;
if (!axis.IsStaticAxis()) if (!axis.IsStaticAxis())
LogicError("Only Axis that represent static indices can be converted to a CNTK internal axis index"); LogicError("Only Axis that represent static indices can be converted to a CNTK internal axis index");
@ -322,19 +326,16 @@ namespace CNTK
inline std::pair<NDShape, NDShape> GetConvolutionOutputMapCountAndKernelShape(const NDShape& convolutionMapShape, const NDShape& operandShape) inline std::pair<NDShape, NDShape> GetConvolutionOutputMapCountAndKernelShape(const NDShape& convolutionMapShape, const NDShape& operandShape)
{ {
auto outputMapCount = convolutionMapShape.SubShape(0, convolutionMapShape.Rank() - operandShape.Rank()); NDShape kernelShape = convolutionMapShape.SubShape(0, operandShape.Rank());
auto outputMapCount = convolutionMapShape.SubShape(kernelShape.Rank());
NDShape paddedOutputMapCount(operandShape.Rank(), 1); NDShape paddedOutputMapCount(operandShape.Rank(), 1);
for (size_t i = 0; i < outputMapCount.Rank(); ++i) for (size_t i = 0; i < outputMapCount.Rank(); ++i)
paddedOutputMapCount[paddedOutputMapCount.Rank() - 1 - i] = outputMapCount[outputMapCount.Rank() - 1 - i]; paddedOutputMapCount[paddedOutputMapCount.Rank() - 1 - i] = outputMapCount[outputMapCount.Rank() - 1 - i];
//for (size_t i = 0; i < outputMapCount.Rank(); ++i)
// paddedOutputMapCount[i] = outputMapCount[i];
NDShape kernelShape = convolutionMapShape.SubShape(outputMapCount.Rank());
return{ paddedOutputMapCount, kernelShape }; return{ paddedOutputMapCount, kernelShape };
} }
inline double MomentumPerMB(double momentumPerSample, size_t minibatchSize) inline double MomentumValueForMB(double momentumPerSample, size_t minibatchSize)
{ {
return std::pow(momentumPerSample, minibatchSize); return std::pow(momentumPerSample, minibatchSize);
} }
@ -369,4 +370,45 @@ namespace CNTK
double* castValue = Copy<float, double>(source->DataBuffer<float>(), sourceSize); double* castValue = Copy<float, double>(source->DataBuffer<float>(), sourceSize);
return MakeSharedObject<NDArrayView>(sourceShape, castValue, sourceSize, DeviceDescriptor::CPUDevice(), readOnly); return MakeSharedObject<NDArrayView>(sourceShape, castValue, sourceSize, DeviceDescriptor::CPUDevice(), readOnly);
} }
inline std::wstring ParanthesizedName(const std::wstring& name)
{
if (name.empty())
return name;
return L"(" + name + L")";
}
static const std::wstring UidPrefix = L"__v2libuid__";
static const std::wstring NamePrefix = L"__v2libname__";
inline std::wstring CNTKInternalNodeNameFromUidAndName(const std::wstring& uid, const std::wstring& name)
{
return UidPrefix + uid + NamePrefix + name;
}
inline std::pair<std::wstring, std::wstring> UidAndNameFromCNTKInternalNodeName(const std::wstring& CNTKInternalNodeName, VariableKind varKind)
{
std::wstring uid, name;
auto uidPrefixBeginPos = CNTKInternalNodeName.find(UidPrefix);
if (uidPrefixBeginPos != std::wstring::npos)
{
auto uidBeginPos = uidPrefixBeginPos + UidPrefix.length();
auto namePrefixBeginPos = CNTKInternalNodeName.find(NamePrefix, uidBeginPos);
if (namePrefixBeginPos == std::wstring::npos)
LogicError("CNTK internal node name found to contain uid but not name!");
auto nameBeginPos = namePrefixBeginPos + NamePrefix.length();
uid = CNTKInternalNodeName.substr(uidBeginPos, namePrefixBeginPos - uidBeginPos);
name = CNTKInternalNodeName.substr(nameBeginPos);
}
else
{
name = CNTKInternalNodeName;
uid = Internal::GenerateUid(varKind);
}
return{ uid, name };
}
} }

Просмотреть файл

@ -11,6 +11,8 @@
#include "CNTKLibrary.h" #include "CNTKLibrary.h"
#include "Utils.h" #include "Utils.h"
#include "Value.h"
#include "Function.h"
namespace CNTK namespace CNTK
{ {
@ -28,7 +30,7 @@ namespace CNTK
auto maskShape = mask->Shape(); auto maskShape = mask->Shape();
if (maskShape.Rank() > dataShape.Rank()) if (maskShape.Rank() > dataShape.Rank())
InvalidArgument("The number of axes (%d) of the mask of a Value object cannot exceed the number of axes (%d) of the data NDArrayView object", (int)maskShape.Rank(), (int)dataShape.Rank()); InvalidArgument("The rank (%d) of the mask of a Value object cannot exceed the rank (%d) of the data NDArrayView object", (int)maskShape.Rank(), (int)dataShape.Rank());
if (dataShape.SubShape(dataShape.Rank() - maskShape.Rank()) != maskShape) if (dataShape.SubShape(dataShape.Rank() - maskShape.Rank()) != maskShape)
InvalidArgument("Invalid Value object; the data and mask are incompatible. The trailing dimensions of the data with shape %S do not match the dimensions of the mask with shape %S", AsStringForErrorReporting(dataShape).c_str(), AsStringForErrorReporting(maskShape).c_str()); InvalidArgument("Invalid Value object; the data and mask are incompatible. The trailing dimensions of the data with shape %S do not match the dimensions of the mask with shape %S", AsStringForErrorReporting(dataShape).c_str(), AsStringForErrorReporting(maskShape).c_str());
@ -60,7 +62,10 @@ namespace CNTK
NDShape valueMaskShape = { maxSequenceLength, numSequences }; NDShape valueMaskShape = { maxSequenceLength, numSequences };
deviceValueMask = MakeSharedObject<NDMask>(valueMaskShape, device); deviceValueMask = MakeSharedObject<NDMask>(valueMaskShape, device);
for (size_t i = 0; i < numSequences; ++i) for (size_t i = 0; i < numSequences; ++i)
deviceValueMask->MaskSection({ sequenceLengths[i], i }, { NDShape::InferredDimension, 1 }); {
deviceValueMask->MarkSequenceBegin({0, i});
deviceValueMask->InvalidateSection({ sequenceLengths[i], i }, { NDShape::InferredDimension, 1 });
}
} }
return deviceValueMask; return deviceValueMask;
@ -179,6 +184,39 @@ namespace CNTK
} }
} }
void PackedValue::Unpack() const
{
if (m_packedDataLayout && (m_packedDataLayout->GetNumTimeSteps() != 1) && (m_packedDataLayout->GetNumSequences() != 1) && Internal::IsAutomaticUnpackingOfPackedValuesDisabled())
LogicError("PackedValue::Unpack: Automatic unpacking of PackedValue objects is disabled");
if (m_isPacked)
{
ValuePtr valueObject;
auto dataType = m_packedData->GetDataType();
switch (dataType)
{
case DataType::Float:
valueObject = CompositeFunction::GetValueObjectFromCNTKImplMatrixAndMBLayout(m_sampleShape, *(m_packedData->GetMatrix<float>()), m_packedDataLayout, m_isReadOnly);
break;
case DataType::Double:
valueObject = CompositeFunction::GetValueObjectFromCNTKImplMatrixAndMBLayout(m_sampleShape, *(m_packedData->GetMatrix<double>()), m_packedDataLayout, m_isReadOnly);
break;
default:
LogicError("Unsupported DataType %s", DataTypeName(dataType));
}
m_data = valueObject->Data();
m_mask = valueObject->Mask();
m_packedData = nullptr;
m_packedDataLayout = nullptr;
m_isPacked = false;
if (m_unpackedShape != m_data->Shape())
LogicError("The computed unpacked shape of the PackedValue object does not match the actual Data NDArrayView's shape after unpacking");
}
}
// Explicit template instantiations // Explicit template instantiations
template /*static*/ CNTK_API ValuePtr Value::Create<float>(const NDShape& sampleShape, const std::vector<std::vector<float>>& sequences, const DeviceDescriptor& device, bool readOnly/* = false*/); template /*static*/ CNTK_API ValuePtr Value::Create<float>(const NDShape& sampleShape, const std::vector<std::vector<float>>& sequences, const DeviceDescriptor& device, bool readOnly/* = false*/);
template /*static*/ CNTK_API ValuePtr Value::Create<double>(const NDShape& sampleShape, const std::vector<std::vector<double>>& sequences, const DeviceDescriptor& device, bool readOnly/* = false*/); template /*static*/ CNTK_API ValuePtr Value::Create<double>(const NDShape& sampleShape, const std::vector<std::vector<double>>& sequences, const DeviceDescriptor& device, bool readOnly/* = false*/);

Просмотреть файл

@ -7,14 +7,112 @@
#include "stdafx.h" #include "stdafx.h"
#include "CNTKLibrary.h" #include "CNTKLibrary.h"
#include "Sequences.h"
#include "Utils.h"
namespace CNTK namespace CNTK
{ {
class CNTKValue final : public Value class PackedValue final : public Value
{ {
template <typename T, typename ...CtorArgTypes>
friend inline std::shared_ptr<T> MakeSharedObject(CtorArgTypes&& ...ctorArgs);
public: public:
template <typename ElementType>
PackedValue(const NDShape& sampleShape, const std::shared_ptr<Microsoft::MSR::CNTK::Matrix<ElementType>>& packedDataMatrix, const std::shared_ptr<Microsoft::MSR::CNTK::MBLayout>& packedDataLayout, bool isReadOnly)
: Value(nullptr), m_isPacked(true), m_sampleShape(sampleShape), m_packedData(nullptr), m_packedDataLayout(packedDataLayout), m_isReadOnly(isReadOnly)
{
NDShape packedMatrixShape({ packedDataMatrix->GetNumRows(), packedDataMatrix->GetNumCols() });
auto tensorView = new Microsoft::MSR::CNTK::TensorView<ElementType>(packedDataMatrix, AsTensorViewShape(packedMatrixShape));
m_packedData = MakeSharedObject<NDArrayView>(AsDataType<ElementType>(), AsDeviceDescriptor(packedDataMatrix->GetDeviceId()), AsStorageFormat(packedDataMatrix->GetFormat()), packedMatrixShape, m_isReadOnly, tensorView);
// Determine unpacked shape
m_unpackedShape = sampleShape;
if (packedDataLayout)
m_unpackedShape = m_unpackedShape.AppendShape({ packedDataLayout->GetNumTimeSteps(), packedDataLayout->GetNumSequences() });
}
void Unpack() const;
const NDShape& Shape() const override { return m_unpackedShape; }
DeviceDescriptor Device() const override { return m_isPacked ? m_packedData->Device() : Value::Device(); }
DataType GetDataType() const override { return m_isPacked ? m_packedData->GetDataType() : Value::GetDataType(); }
StorageFormat GetStorageFormat() const override { return m_isPacked? m_packedData->GetStorageFormat() : Value::GetStorageFormat(); }
bool IsReadOnly() const override { return m_isPacked ? m_packedData->IsReadOnly() : Value::IsReadOnly(); }
size_t MaskedCount() const override
{
if (m_isPacked)
// Compute the number of masked samples after the data will be unpacked
return m_packedDataLayout ? ((m_packedDataLayout->GetNumTimeSteps() * m_packedDataLayout->GetNumSequences()) - m_packedDataLayout->GetActualNumSamples()) : 0;
else
return Value::MaskedCount();
}
NDArrayViewPtr Data() const override
{
Unpack();
return Value::Data();
}
NDMaskPtr Mask() const override
{
Unpack();
return Value::Mask();
}
ValuePtr DeepClone(bool /*readOnly = false*/) const override
{
if (m_isPacked)
{
std::shared_ptr<Microsoft::MSR::CNTK::MBLayout> packedLayoutCopy;
if (m_packedDataLayout)
{
packedLayoutCopy = std::make_shared<Microsoft::MSR::CNTK::MBLayout>();
packedLayoutCopy->CopyFrom(m_packedDataLayout);
}
return MakeSharedObject<PackedValue>(m_sampleShape, m_packedData->DeepClone(), packedLayoutCopy, m_isReadOnly);
}
else
return Value::DeepClone();
}
ValuePtr Alias(bool /*readOnly = false*/) const override
{
LogicError("Alias is currently unsupported for PackedValue objects");
}
void CopyFrom(const Value& /*source*/) override
{
LogicError("CopyFrom is currently unsupported for PackedValue objects");
}
template <typename ElementType>
std::pair<std::shared_ptr<const Microsoft::MSR::CNTK::Matrix<ElementType>>, std::shared_ptr<Microsoft::MSR::CNTK::MBLayout>> PackedData()
{
if (!m_isPacked)
InvalidArgument("PackedValue::PackedData called on a Value object that has already been unpacked");
return { m_packedData->GetMatrix<ElementType>(), m_packedDataLayout };
}
private: private:
PackedValue(const NDShape& sampleShape, const NDArrayViewPtr& packedData, const std::shared_ptr<Microsoft::MSR::CNTK::MBLayout>& packedDataLayout, bool isReadOnly)
: Value(nullptr), m_isPacked(true), m_sampleShape(sampleShape), m_packedData(packedData), m_packedDataLayout(packedDataLayout), m_isReadOnly(isReadOnly)
{
// Determine unpacked shape
m_unpackedShape = sampleShape;
if (packedDataLayout)
m_unpackedShape = m_unpackedShape.AppendShape({ packedDataLayout->GetNumTimeSteps(), packedDataLayout->GetNumSequences() });
}
private:
bool m_isReadOnly;
NDShape m_sampleShape;
NDShape m_unpackedShape;
mutable bool m_isPacked;
mutable NDArrayViewPtr m_packedData;
mutable std::shared_ptr<Microsoft::MSR::CNTK::MBLayout> m_packedDataLayout;
}; };
} }

Просмотреть файл

@ -30,7 +30,7 @@ namespace CNTK
if (varOwner) if (varOwner)
return CompositeFunction::Create(varOwner, varOwner->Name()); return CompositeFunction::Create(varOwner, varOwner->Name());
else else
return Internal::Combine({ *this }); return Combine({ *this });
} }
NDArrayViewPtr Variable::Value() const NDArrayViewPtr Variable::Value() const
@ -70,14 +70,24 @@ namespace CNTK
static const std::wstring KernelWidthAttributeName = L"kernelWidth"; static const std::wstring KernelWidthAttributeName = L"kernelWidth";
static const std::wstring KernelHeightAttributeName = L"kernelHeight"; static const std::wstring KernelHeightAttributeName = L"kernelHeight";
ParameterInitializer UniformInitializer(double scale, unsigned long seed) void Variable::VariableFields::SetValueInitialization(const ParameterInitializer& initializationConfig, const DeviceDescriptor& device)
{ {
Dictionary initConfig; if (m_value != nullptr)
initConfig[InitializerTypeAttributeName] = Microsoft::MSR::CNTK::UniformInitializerTypeName; LogicError("Value initialization config cannot be set if a value already exists");
initConfig[ScaleAttributeName] = scale;
initConfig[RandomSeedAttributeName] = (size_t)seed;
return initConfig; assert(!m_valueInitializer);
assert(!m_valueInitializationDevice);
if (initializationConfig.Contains(FilterRankAttributeName))
{
auto filterRank = (int)initializationConfig[FilterRankAttributeName].Value<size_t>();
auto outputRank = (int)initializationConfig[OutputRankAttributeName].Value<size_t>();
if ((filterRank + outputRank) > m_shape.Rank())
InvalidArgument("Sum of filter rank (%d) and output rank (%d) of the parameter initializer cannot exceed the Parameter's rank(%d)", filterRank, outputRank, (int)m_shape.Rank());
}
m_valueInitializer.reset(new ParameterInitializer(initializationConfig));
m_valueInitializationDevice.reset(new DeviceDescriptor(device));
} }
static ParameterInitializer CreateInitializer(const std::wstring& initializerTypeName, int outputRank, int filterRank, double scale, unsigned long seed) static ParameterInitializer CreateInitializer(const std::wstring& initializerTypeName, int outputRank, int filterRank, double scale, unsigned long seed)
@ -92,6 +102,16 @@ namespace CNTK
return initConfig; return initConfig;
} }
ParameterInitializer UniformInitializer(double scale, unsigned long seed)
{
Dictionary initConfig;
initConfig[InitializerTypeAttributeName] = Microsoft::MSR::CNTK::UniformInitializerTypeName;
initConfig[ScaleAttributeName] = scale;
initConfig[RandomSeedAttributeName] = (size_t)seed;
return initConfig;
}
ParameterInitializer GaussianInitializer(int outputRank, int filterRank, double scale, unsigned long seed) ParameterInitializer GaussianInitializer(int outputRank, int filterRank, double scale, unsigned long seed)
{ {
return CreateInitializer(Microsoft::MSR::CNTK::GaussianInitializerTypeName, outputRank, filterRank, scale, seed); return CreateInitializer(Microsoft::MSR::CNTK::GaussianInitializerTypeName, outputRank, filterRank, scale, seed);

Просмотреть файл

@ -124,7 +124,7 @@ private:
}; };
static DEVICEID_TYPE s_bestDeviceId = DEVICEID_NOTYETDETERMINED; static DEVICEID_TYPE s_bestDeviceId = DEVICEID_NOTYETDETERMINED;
static BestGpu* s_bestGpu = nullptr; static std::unique_ptr<BestGpu> s_bestGpu = nullptr;
// DeviceFromConfig - Parse 'deviceId' config parameter to determine what type of behavior is desired // DeviceFromConfig - Parse 'deviceId' config parameter to determine what type of behavior is desired
//Symbol - Meaning //Symbol - Meaning
@ -149,7 +149,7 @@ static DEVICEID_TYPE SelectDevice(DEVICEID_TYPE deviceId, bool bLockGPU, const i
// GPU device to be auto-selected, so init our class // GPU device to be auto-selected, so init our class
if (s_bestGpu == nullptr) if (s_bestGpu == nullptr)
{ {
s_bestGpu = new BestGpu(); s_bestGpu = make_unique<BestGpu>();
for (int i = 0; i < excludedDevices.size(); ++i) for (int i = 0; i < excludedDevices.size(); ++i)
{ {
s_bestGpu->DisallowDevice(excludedDevices[i]); s_bestGpu->DisallowDevice(excludedDevices[i]);
@ -270,6 +270,8 @@ void BestGpu::GetCudaProperties()
if (m_cudaData) if (m_cudaData)
return; return;
int currentDevice, rc;
rc = cudaGetDevice(&currentDevice);
int dev = 0; int dev = 0;
for (ProcessorData* pd : m_procData) for (ProcessorData* pd : m_procData)
@ -284,9 +286,16 @@ void BestGpu::GetCudaProperties()
pd->cudaFreeMem = free; pd->cudaFreeMem = free;
pd->cudaTotalMem = total; pd->cudaTotalMem = total;
dev++; dev++;
cudaDeviceReset(); // cudaDeviceReset() explicitly destroys and cleans up all resources associated with the
// current device in the current process.
// Will result in a segmentation fault is called, for instance, after cudnnCreate, but before cudnnDestroy.
// cudaDeviceReset();
} }
m_cudaData = m_procData.size() > 0; m_cudaData = m_procData.size() > 0;
if (rc == CUDA_SUCCESS)
{
cudaSetDevice(currentDevice);
}
} }
void BestGpu::Init() void BestGpu::Init()
@ -325,8 +334,11 @@ BestGpu::~BestGpu()
if (m_nvmlData) if (m_nvmlData)
{ {
// TODO: Check for error code and throw if !std::uncaught_exception() nvmlReturn_t r = nvmlShutdown();
nvmlShutdown(); if ((r != NVML_SUCCESS) && !std::uncaught_exception())
{
RuntimeError("BestGPU Destructor: failed to shut down NVML. \n");
}
} }
} }

Просмотреть файл

@ -7,6 +7,10 @@
#include <cassert> #include <cassert>
#include <string> #include <string>
#define CLOSEHANDLE_ERROR 0
#define RELEASEMUTEX_ERROR 0
#define FCNTL_ERROR -1
#ifdef WIN32 // --- Windows version #ifdef WIN32 // --- Windows version
#define NOMINMAX #define NOMINMAX
@ -46,7 +50,11 @@ public:
if (::WaitForSingleObject(m_handle, wait ? INFINITE : 0) != WAIT_OBJECT_0) if (::WaitForSingleObject(m_handle, wait ? INFINITE : 0) != WAIT_OBJECT_0)
{ {
// failed to acquire // failed to acquire
::CloseHandle(m_handle); int rc = ::CloseHandle(m_handle);
if ((rc == CLOSEHANDLE_ERROR) && !std::uncaught_exception())
{
RuntimeError("Acquire: Handler close failure with error code %d", ::GetLastError());
}
m_handle = NULL; m_handle = NULL;
return false; return false;
} }
@ -58,9 +66,17 @@ public:
void Release() void Release()
{ {
assert(m_handle != NULL); assert(m_handle != NULL);
// TODO: Check for error code and throw if !std::uncaught_exception() int rc = 0;
::ReleaseMutex(m_handle); rc = ::ReleaseMutex(m_handle);
::CloseHandle(m_handle); if ((rc == RELEASEMUTEX_ERROR) && !std::uncaught_exception())
{
RuntimeError("Mutex Release: Failed to release mutex %s: %d", m_name.c_str(), ::GetLastError());
}
rc = ::CloseHandle(m_handle);
if ((rc == CLOSEHANDLE_ERROR) && !std::uncaught_exception())
{
RuntimeError("Mutex Release: Failed to close handler %s: %d", m_name.c_str(), ::GetLastError());
}
m_handle = NULL; m_handle = NULL;
} }
@ -121,6 +137,8 @@ public:
// Returns false if !wait and lock cannot be acquired, or in case of a system error that prevents us from acquiring the lock. // Returns false if !wait and lock cannot be acquired, or in case of a system error that prevents us from acquiring the lock.
bool Acquire(bool wait) bool Acquire(bool wait)
{ {
mode_t mask = umask(0);
assert(m_fd == -1); assert(m_fd == -1);
for (;;) for (;;)
{ {
@ -146,6 +164,7 @@ public:
{ {
// acquire failed // acquire failed
close(fd); close(fd);
umask(mask);
return false; return false;
} }
// we own the exclusive lock on file descriptor, but we need to double-check // we own the exclusive lock on file descriptor, but we need to double-check
@ -165,6 +184,7 @@ public:
{ {
// lock acquired successfully // lock acquired successfully
m_fd = fd; m_fd = fd;
umask(mask);
return true; return true;
} }
} }
@ -181,8 +201,11 @@ public:
m_lock.l_type = F_UNLCK; m_lock.l_type = F_UNLCK;
// Now removing the lock and closing the file descriptor // Now removing the lock and closing the file descriptor
// waiting processes will be notified // waiting processes will be notified
// TODO: Check for error code and throw if !std::uncaught_exception() int rc = fcntl(m_fd, F_SETLKW, &m_lock);
fcntl(m_fd, F_SETLKW, &m_lock); if (rc == FCNTL_ERROR)
{
RuntimeError("Mutex Release: Failed to release mutex %s", m_fileName.c_str());
}
close(m_fd); close(m_fd);
m_fd = -1; m_fd = -1;
} }

Просмотреть файл

@ -184,6 +184,23 @@ bool DataReader::SupportsDistributedMBRead() const
return supportsDistributedMBRead; return supportsDistributedMBRead;
} }
//IsLegacyReader - Returns true if one of the readers is a legacy reader, false otherwise.
bool DataReader::IsLegacyReader() const
{
for (size_t i = 0; i < m_ioNames.size(); i++)
{
auto currReaderIter = m_dataReaders.find(m_ioNames[i]);
assert(currReaderIter != m_dataReaders.end());
if (currReaderIter->second->IsLegacyReader())
{
return true;
}
}
return false;
}
//StartDistributedMinibatchLoop - Startup a distributed minibatch loop for parallel training //StartDistributedMinibatchLoop - Startup a distributed minibatch loop for parallel training
// mbSize - [in] size of the minibatch (number of frames, etc.) // mbSize - [in] size of the minibatch (number of frames, etc.)
// epoch - [in] epoch number for this loop // epoch - [in] epoch number for this loop
@ -207,6 +224,13 @@ void DataReader::StartDistributedMinibatchLoop(size_t mbSize, size_t epoch, size
} }
} }
size_t DataReader::GetCurrentSamplePosition()
{
// BUGBUG: composition of old readers is not supported.
// Returning just for the last reader.
return m_dataReaders[m_ioNames.back()]->GetCurrentSamplePosition();
}
// GetMinibatch - Get the next minibatch (features and labels) // GetMinibatch - Get the next minibatch (features and labels)
// matrices - [in] a map with named matrix types (i.e. 'features', 'labels') mapped to the corresponding matrix, // matrices - [in] a map with named matrix types (i.e. 'features', 'labels') mapped to the corresponding matrix,
// [out] each matrix resized if necessary containing data. // [out] each matrix resized if necessary containing data.

Просмотреть файл

@ -26,6 +26,8 @@
#include <linux/limits.h> // for PATH_MAX #include <linux/limits.h> // for PATH_MAX
#endif #endif
#define PCLOSE_ERROR -1
namespace Microsoft { namespace MSR { namespace CNTK { namespace Microsoft { namespace MSR { namespace CNTK {
// File creation // File creation
@ -255,17 +257,23 @@ bool File::IsTextBased()
// Note: this does not check for errors when the File corresponds to pipe stream. In this case, use Flush() before closing a file you are writing. // Note: this does not check for errors when the File corresponds to pipe stream. In this case, use Flush() before closing a file you are writing.
File::~File(void) File::~File(void)
{ {
int rc = 0;
if (m_pcloseNeeded) if (m_pcloseNeeded)
{ {
// TODO: Check for error code and throw if !std::uncaught_exception() rc = _pclose(m_file);
_pclose(m_file); if ((rc == PCLOSE_ERROR) && !std::uncaught_exception())
{
RuntimeError("File: failed to close file at %S", m_filename.c_str());
}
} }
else if (m_file != stdin && m_file != stdout && m_file != stderr) else if (m_file != stdin && m_file != stdout && m_file != stderr)
{ {
int rc = fclose(m_file); rc = fclose(m_file);
if ((rc != 0) && !std::uncaught_exception()) if ((rc != FCLOSE_SUCCESS) && !std::uncaught_exception())
{
RuntimeError("File: failed to close file at %S", m_filename.c_str()); RuntimeError("File: failed to close file at %S", m_filename.c_str());
} }
}
} }
void File::Flush() void File::Flush()

Просмотреть файл

@ -26,6 +26,7 @@
#define EPSILON 1e-5 #define EPSILON 1e-5
#define ISCLOSE(a, b, threshold) (abs(a - b) < threshold) ? true : false #define ISCLOSE(a, b, threshold) (abs(a - b) < threshold) ? true : false
#define DLCLOSE_SUCCESS 0
#define UNUSED(x) (void)(x) // for variables that are, e.g., only used in _DEBUG builds #define UNUSED(x) (void)(x) // for variables that are, e.g., only used in _DEBUG builds
@ -705,9 +706,14 @@ public:
} }
~Plugin() ~Plugin()
{ {
// TODO: Check for error code and throw if !std::uncaught_exception()
if (handle != NULL) if (handle != NULL)
dlclose(handle); {
int rc = dlclose(handle);
if ((rc != DLCLOSE_SUCCESS) && !std::uncaught_exception())
{
RuntimeError("Plugin: Failed to decrements the reference count.");
}
}
} }
}; };
#endif #endif

Просмотреть файл

@ -239,6 +239,18 @@ public:
return false; return false;
}; };
// old DataReader architecture
virtual bool IsLegacyReader() const
{
return true;
};
// Gets current sample position on the global timeline.
virtual size_t GetCurrentSamplePosition()
{
NOT_IMPLEMENTED;
}
virtual void StartDistributedMinibatchLoop(size_t mbSize, size_t epoch, size_t subsetNum, size_t numSubsets, size_t requestedEpochSamples = requestDataSize) virtual void StartDistributedMinibatchLoop(size_t mbSize, size_t epoch, size_t subsetNum, size_t numSubsets, size_t requestedEpochSamples = requestDataSize)
{ {
if (SupportsDistributedMBRead() || (numSubsets != 1) || (subsetNum != 0)) if (SupportsDistributedMBRead() || (numSubsets != 1) || (subsetNum != 0))
@ -410,6 +422,8 @@ public:
} }
virtual ~DataReader(); virtual ~DataReader();
size_t GetCurrentSamplePosition() override;
// StartMinibatchLoop - Startup a minibatch loop // StartMinibatchLoop - Startup a minibatch loop
// mbSize - [in] size of the minibatch (number of frames, etc.) // mbSize - [in] size of the minibatch (number of frames, etc.)
// epoch - [in] epoch number for this loop // epoch - [in] epoch number for this loop
@ -417,6 +431,7 @@ public:
virtual void StartMinibatchLoop(size_t mbSize, size_t epoch, size_t requestedEpochSamples = requestDataSize); virtual void StartMinibatchLoop(size_t mbSize, size_t epoch, size_t requestedEpochSamples = requestDataSize);
virtual bool SupportsDistributedMBRead() const override; virtual bool SupportsDistributedMBRead() const override;
virtual bool IsLegacyReader() const override;
virtual void StartDistributedMinibatchLoop(size_t mbSize, size_t epoch, size_t subsetNum, size_t numSubsets, size_t requestedEpochSamples = requestDataSize) override; virtual void StartDistributedMinibatchLoop(size_t mbSize, size_t epoch, size_t subsetNum, size_t numSubsets, size_t requestedEpochSamples = requestDataSize) override;
virtual void StartMinibatchLoop(size_t mbSize, size_t epoch, const std::unordered_set<InputStreamDescription>&, size_t requestedEpochSamples = requestDataSize) override; virtual void StartMinibatchLoop(size_t mbSize, size_t epoch, const std::unordered_set<InputStreamDescription>&, size_t requestedEpochSamples = requestDataSize) override;

Просмотреть файл

@ -14,12 +14,14 @@
#endif #endif
#pragma comment(lib, "msmpi.lib") #pragma comment(lib, "msmpi.lib")
#include <errno.h>
#include <string> #include <string>
#include <array> #include <array>
#include <vector> #include <vector>
#include <memory> #include <memory>
#define FFLUSH_SUCCESS 0
namespace Microsoft { namespace MSR { namespace CNTK { namespace Microsoft { namespace MSR { namespace CNTK {
struct MpiFail : public std::string struct MpiFail : public std::string
@ -138,6 +140,14 @@ public:
MPI_Comm_size(MPI_COMM_WORLD, &m_numMPINodes); MPI_Comm_size(MPI_COMM_WORLD, &m_numMPINodes);
m_numNodesInUse = m_numMPINodes; m_numNodesInUse = m_numMPINodes;
// Verify that the environment variable used by GetTotalNumberOfMPINodes()
// matches what the MPI API says. There're actually two possible cases:
// 1) when we're running with mpiexec both values have to match;
// 2) when we're running without mpiexec, the former will return 0, and
// the later will be set to 1.
assert((GetTotalNumberOfMPINodes() == 0 && m_numNodesInUse == 1) ||
(GetTotalNumberOfMPINodes() == m_numNodesInUse));
// Applying MPI workaround // Applying MPI workaround
s_myRank = m_myRank; s_myRank = m_myRank;
atexit(&MPIWrapper::MPIWorkaroundAtExit); atexit(&MPIWrapper::MPIWorkaroundAtExit);
@ -160,19 +170,50 @@ public:
::Sleep((DWORD)(500 * CurrentNodeRank())); ::Sleep((DWORD)(500 * CurrentNodeRank()));
} }
// Note that specifically, this function is such that it does not require
// MPI initialization. Moreover, it can be used without actually loading any
// MPI libs.
// TODO: Once we move to dynamic loading for MPI libs on Linux, move it to utilities.
static int GetTotalNumberOfMPINodes()
{
#ifdef WIN32
const char* p = std::getenv("PMI_SIZE");
#else
const char* p = std::getenv("OMPI_COMM_WORLD_SIZE");
#endif
if (!p)
{
return 0;
}
else
{
return std::stoi(string(p));
}
}
// Note: we don't clear the sub-communication here although we should, because in case of a crash, this prevents the EXE from terminating. // Note: we don't clear the sub-communication here although we should, because in case of a crash, this prevents the EXE from terminating.
// It's OK since this class is a singleton anyway that gets instantiated exactly once at program startup. // It's OK since this class is a singleton anyway that gets instantiated exactly once at program startup.
~MPIWrapper() ~MPIWrapper()
{ {
fprintf(stderr, "~MPIWrapper\n"); fprintf(stderr, "~MPIWrapper\n");
fflush(stderr);
// TODO: Check for error code and throw if !std::uncaught_exception()
// Do not finalize in event of an exception since calling MPI_Finalize without // Do not finalize in event of an exception since calling MPI_Finalize without
// all pending communications being finished results in a hang // all pending communications being finished results in a hang
int rc = fflush(stderr);
if (!std::uncaught_exception()) if (!std::uncaught_exception())
{
if (rc != FFLUSH_SUCCESS)
{
#ifdef _WIN32
RuntimeError("MPIWrapper: Failed to flush stderr, %d", ::GetLastError());
#else
RuntimeError("MPIWrapper: Failed to flush stderr, %d", errno);
#endif
}
MPI_Finalize(); MPI_Finalize();
} }
}
private: private:
void Ping(const char *msg) const void Ping(const char *msg) const

Некоторые файлы не были показаны из-за слишком большого количества измененных файлов Показать больше