Merge branch 'qiwye/asgd-dev' of https://github.com/Microsoft/CNTK into qiwye/asgd-dev

This commit is contained in:
feiga 2016-10-10 13:04:06 +08:00
Родитель 5e37b37093 614a51e7fb
Коммит ebefc5ade5
392 изменённых файлов: 22041 добавлений и 12503 удалений

9
.gitignore поставляемый
Просмотреть файл

@ -199,3 +199,12 @@ Tests/EndToEndTests/UnitTests/MathTests/MS.txt
Dependencies/CNTKCustomMKL/Publish
Dependencies/CNTKCustomMKL/CNTKCustomMKL-Linux-*.tgz
Dependencies/CNTKCustomMKL/CNTKCustomMKL-Windows-*.zip
# Python bindings
bindings/python/_cntk_py.pyd
bindings/python/cntk.egg-info/
bindings/python/cntk/cntk_py.py
bindings/python/cntk/libs/
bindings/python/cntk/cntk_py_wrap.cpp
bindings/python/cntk/cntk_py_wrap.h
bindings/python/dist/

Просмотреть файл

@ -174,7 +174,6 @@ Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "DoublePrecision", "DoublePr
EndProject
Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Kaldi2Reader", "Kaldi2Reader", "{C70E1572-20FF-496C-A0A9-10AA6755A07C}"
ProjectSection(SolutionItems) = preProject
Source\Readers\Kaldi2Reader\basetypes.h = Source\Readers\Kaldi2Reader\basetypes.h
Source\Readers\Kaldi2Reader\biggrowablevectors.h = Source\Readers\Kaldi2Reader\biggrowablevectors.h
Source\Readers\Kaldi2Reader\chunkevalsource.h = Source\Readers\Kaldi2Reader\chunkevalsource.h
Source\Readers\Kaldi2Reader\DataReader.cpp = Source\Readers\Kaldi2Reader\DataReader.cpp

Просмотреть файл

@ -46,22 +46,22 @@ int main(int argc, char* argv[])
path = (pos == std::string::npos) ? "." : app.substr(0, pos);
// This relative path assumes launching from CNTK's binary folder, e.g. x64\Release
const std::string modelWorkingDirectory = path + "/../../Examples/Image/MNIST/Data/";
const std::string modelWorkingDirectory = path + "/../../Examples/Image/GettingStarted";
#else // on Linux
pos = app.rfind("/");
path = (pos == std::string::npos) ? "." : app.substr(0, pos);
// This relative path assumes launching from CNTK's binary folder, e.g. build/cpu/release/bin/
const std::string modelWorkingDirectory = path + "/../../../../Examples/Image/MNIST/Data/";
const std::string modelWorkingDirectory = path + "/../../../../Examples/Image/GettingStarted";
#endif
const std::string modelFilePath = modelWorkingDirectory + "../Output/Models/01_OneHidden";
const std::string modelFilePath = modelWorkingDirectory + "/Output/Models/01_OneHidden";
try
{
struct stat statBuf;
if (stat(modelFilePath.c_str(), &statBuf) != 0)
{
fprintf(stderr, "Error: The model %s does not exist. Please follow instructions in README.md in <CNTK>/Examples/Image/MNIST to create the model.\n", modelFilePath.c_str());
fprintf(stderr, "Error: The model %s does not exist. Please follow instructions in README.md in <CNTK>/Examples/Image/GettingStarted to create the model.\n", modelFilePath.c_str());
return(1);
}

Просмотреть файл

@ -1,3 +1,9 @@
//
// Copyright (c) Microsoft. All rights reserved.
// Licensed under the MIT license. See LICENSE.md file in the project root for full license information.
//
// EvalMultithreads.cpp : Sample application shows how to evaluate a model in multiple threading environment.
//
#include <functional>
#include <thread>
#include <iostream>
@ -5,108 +11,23 @@
using namespace CNTK;
FunctionPtr FullyConnectedDNNLayerWithSharedParameters(Variable input,
const Parameter& timesParam,
const Parameter& plusParam,
const std::function<FunctionPtr(const FunctionPtr&)>& nonLinearity)
{
assert(input.Shape().Rank() == 1);
void OutputFunctionInfo(FunctionPtr);
FunctionPtr FullyConnectedDNNLayerWithSharedParameters(Variable, const Parameter&, const Parameter&, const std::function<FunctionPtr(const FunctionPtr&)>&);
void CreateFunctionAndEvaluateWithSharedParameters(size_t, size_t, size_t, const Parameter&, const Parameter&, const Parameter[], const Parameter[], const Parameter&, const DeviceDescriptor&);
FunctionPtr SetupFullyConnectedLinearLayer(Variable, size_t, const DeviceDescriptor&, const std::wstring&);
FunctionPtr SetupFullyConnectedDNNLayer(Variable, size_t, const DeviceDescriptor& device, const std::function<FunctionPtr(const FunctionPtr&)>& nonLinearity);
void RunEvaluationClassifier(FunctionPtr, const DeviceDescriptor&);
void RunEvaluationOneHidden(FunctionPtr, const DeviceDescriptor&);
// Todo: assume that timesParam has matched outputDim and inputDim
auto timesFunction = Times(timesParam, input);
// Todo: assume that timesParam has matched outputDim
auto plusFunction = Plus(plusParam, timesFunction);
return nonLinearity(plusFunction);
}
FunctionPtr FullyConnectedFeedForwardClassifierNetWithSharedParameters(Variable input,
size_t numHiddenLayers,
const Parameter& inputTimesParam,
const Parameter& inputPlusParam,
const Parameter hiddenLayerTimesParam[],
const Parameter hiddenLayerPlusParam[],
const Parameter& outputTimesParam,
const std::function<FunctionPtr(const FunctionPtr&)>& nonLinearity)
{
assert(numHiddenLayers >= 1);
auto classifierRoot = FullyConnectedDNNLayerWithSharedParameters(input, inputTimesParam, inputPlusParam, nonLinearity);
for (size_t i = 1; i < numHiddenLayers; ++i)
classifierRoot = FullyConnectedDNNLayerWithSharedParameters(classifierRoot, hiddenLayerTimesParam[i - 1], hiddenLayerPlusParam[i - 1], nonLinearity);
// Todo: assume that outputTimesParam has matched output dim and hiddenLayerDim
classifierRoot = Times(outputTimesParam, classifierRoot);
return classifierRoot;
}
void EvaluationNewNetworkWithSharedParameters(size_t inputDim,
size_t numOutputClasses,
size_t numHiddenLayers,
const Parameter& inputTimesParam,
const Parameter& inputPlusParam,
const Parameter hiddenLayerTimesParam[],
const Parameter hiddenLayerPlusParam[],
const Parameter& outputTimesParam,
const DeviceDescriptor& computeDevice)
{
using namespace std::placeholders;
// Create network using shared parameters
auto inputVar = InputVariable({inputDim}, DataType::Float, L"Features");
auto classifierOutputFunction = FullyConnectedFeedForwardClassifierNetWithSharedParameters(inputVar,
numHiddenLayers,
inputTimesParam,
inputPlusParam,
hiddenLayerTimesParam,
hiddenLayerPlusParam,
outputTimesParam,
std::bind(Sigmoid, _1, L""));
auto labelsVar = InputVariable({numOutputClasses}, DataType::Float, L"Labels");
auto trainingLossFunction = CNTK::CrossEntropyWithSoftmax(classifierOutputFunction, labelsVar, L"LossFunction");
auto predictionFunction = CNTK::ClassificationError(classifierOutputFunction, labelsVar, L"ClassificationError");
auto ffNet = CNTK::Combine({trainingLossFunction, predictionFunction, classifierOutputFunction}, L"ClassifierModel");
if (ffNet->Parameters().size() != ((numHiddenLayers * 2) + 1))
throw std::runtime_error("EvaluationNewNetworkWithSharedParameters: Function does not have expected Parameter count");
if (ffNet->Arguments().size() != 2)
throw std::runtime_error("EvaluationNewNetworkWithSharedParameters: Function does not have expected Argument count");
if (ffNet->Outputs().size() != 3)
throw std::runtime_error("EvaluationNewNetworkWithSharedParameters: Function does not have expected Output count");
// Evaluate the network in several runs
size_t iterationCount = 4;
unsigned int randSeed = 2;
srand(randSeed);
size_t numSamples = 3;
for (size_t t = 0; t < iterationCount; ++t)
{
std::vector<float> inputData(inputDim * numSamples);
for (size_t i = 0; i < inputData.size(); ++i)
inputData[i] = ((float)rand()) / RAND_MAX;
NDShape inputShape = {inputDim, 1, numSamples};
ValuePtr inputValue = MakeSharedObject<Value>(MakeSharedObject<NDArrayView>(inputShape, inputData.data(), inputData.size(), DeviceDescriptor::CPUDevice(), true));
std::vector<float> labelData(numOutputClasses * numSamples, 0);
for (size_t i = 0; i < numSamples; ++i)
labelData[(i*numOutputClasses) + (rand() % numOutputClasses)] = 1;
NDShape labelShape = {numOutputClasses, 1, numSamples};
ValuePtr labelValue = MakeSharedObject<Value>(MakeSharedObject<NDArrayView>(labelShape, labelData.data(), labelData.size(), DeviceDescriptor::CPUDevice(), true));
ValuePtr outputValue, predictionErrorValue;
std::unordered_map<Variable, ValuePtr> outputs = {{classifierOutputFunction->Output(), outputValue}, {predictionFunction->Output(), predictionErrorValue}};
ffNet->Forward({{inputVar, inputValue}, {labelsVar, labelValue}}, outputs, computeDevice);
}
}
void EvalMultiThreadsWithNewNetwork(const DeviceDescriptor& device, const int threadCount)
/// <summary>
/// Shows how to create Function whose parameters can be shared by multi evaluation threads.
/// </summary>
/// <description>
/// It first creates all parameters needed for the Function, and then spawns multi threads.
/// Althought each thread creates a new instance of function, all threads share the same parameters.
/// After that, each thread runs evaluation independently.
/// </description>
void MultiThreadsEvaluationWithNewFunction(const DeviceDescriptor& device, const int threadCount)
{
const size_t inputDim = 937;
const size_t numOutputClasses = 9304;
@ -136,7 +57,7 @@ void EvalMultiThreadsWithNewNetwork(const DeviceDescriptor& device, const int th
std::vector<std::thread> threadList(threadCount);
for (int th = 0; th < threadCount; ++th)
{
threadList[th] = std::thread(EvaluationNewNetworkWithSharedParameters, inputDim, numOutputClasses, numHiddenLayers, inputTimesParam, inputPlusParam, hiddenLayerTimesParam, hiddenLayerPlusParam, outputTimesParam, device);
threadList[th] = std::thread(CreateFunctionAndEvaluateWithSharedParameters, inputDim, numOutputClasses, numHiddenLayers, inputTimesParam, inputPlusParam, hiddenLayerTimesParam, hiddenLayerPlusParam, outputTimesParam, device);
}
for (int th = 0; th < threadCount; ++th)
@ -146,3 +67,433 @@ void EvalMultiThreadsWithNewNetwork(const DeviceDescriptor& device, const int th
fflush(stderr);
}
}
/// <summary>
/// Shows how to use Clone() to share function parameters among multi evaluation threads.
/// </summary>
/// <description>
/// It first creates a new function with parameters, then spawns multi threads. Each thread uses Clone() to create a new
/// instance of function and then use this instance to do evaluation.
/// All cloned functions share the same parameters.
/// </description>
void MultiThreadsEvaluationWithClone(const DeviceDescriptor& device, const int threadCount)
{
using namespace std::placeholders;
const size_t inputDim = 937;
const size_t numOutputClasses = 9304;
const size_t numHiddenLayers = 6;
const size_t hiddenLayersDim = 2048;
auto inputVar = InputVariable({inputDim}, DataType::Float, L"features");
assert(numHiddenLayers >= 1);
auto classifierRoot = SetupFullyConnectedDNNLayer(inputVar, hiddenLayersDim, device, std::bind(Sigmoid, _1, L""));
for (size_t i = 1; i < numHiddenLayers; ++i)
{
classifierRoot = SetupFullyConnectedDNNLayer(classifierRoot, hiddenLayersDim, device, std::bind(Sigmoid, _1, L""));
}
auto outputTimesParam = Parameter(NDArrayView::RandomUniform<float>({numOutputClasses, hiddenLayersDim}, -0.5, 0.5, 1, device));
auto classifierFunc = Times(outputTimesParam, classifierRoot, 1, L"classifierOutput");
// Now test the structure
if (classifierFunc->Parameters().size() != ((numHiddenLayers * 2) + 1))
{
throw std::runtime_error("MultiThreadsEvaluationWithClone: Function does not have expected Parameter count");
}
OutputFunctionInfo(classifierFunc);
fprintf(stderr, "MultiThreadsEvaluationWithClone on device=%d\n", device.Id());
// Run evaluation in parallel
std::vector<std::thread> threadList(threadCount);
for (int th = 0; th < threadCount; ++th)
{
threadList[th] = std::thread(RunEvaluationClassifier, classifierFunc->Clone(), device);
}
for (int th = 0; th < threadCount; ++th)
{
threadList[th].join();
fprintf(stderr, "thread %d joined.\n", th);
fflush(stderr);
}
}
/// <summary>
/// Shows how to use LoadLegacyModel() and Clone() to share function parameters among multi evaluation threads.
/// </summary>
/// <description>
/// It first loads a model, then spawns multi threads. Each thread uses Clone() to create a new
/// instance of function and then use this instance to do evaluation.
/// All cloned functions share the same parameters.
/// </description>
void MultiThreadsEvaluationWithLoadModel(const DeviceDescriptor& device, const int threadCount)
{
// The model file will be trained and copied to the current runtime directory first.
auto modelFuncPtr = CNTK::LoadLegacyModel(DataType::Float, L"01_OneHidden", device);
OutputFunctionInfo(modelFuncPtr);
fprintf(stderr, "MultiThreadsEvaluationWithLoadModel on device=%d\n", device.Id());
// Run evaluation in parallel.
std::vector<std::thread> threadList(threadCount);
for (int th = 0; th < threadCount; ++th)
{
threadList[th] = std::thread(RunEvaluationOneHidden, modelFuncPtr->Clone(), device);
}
for (int th = 0; th < threadCount; ++th)
{
threadList[th].join();
fprintf(stderr, "thread %d joined.\n", th);
fflush(stderr);
}
}
inline FunctionPtr FullyConnectedDNNLayerWithSharedParameters(Variable input,
const Parameter& timesParam,
const Parameter& plusParam,
const std::function<FunctionPtr(const FunctionPtr&)>& nonLinearity)
{
assert(input.Shape().Rank() == 1);
// Todo: assume that timesParam has matched outputDim and inputDim
auto timesFunction = Times(timesParam, input);
// Todo: assume that timesParam has matched outputDim
auto plusFunction = Plus(plusParam, timesFunction);
return nonLinearity(plusFunction);
}
inline FunctionPtr FullyConnectedFeedForwardClassifierNetWithSharedParameters(Variable input,
size_t numHiddenLayers,
const Parameter& inputTimesParam,
const Parameter& inputPlusParam,
const Parameter hiddenLayerTimesParam[],
const Parameter hiddenLayerPlusParam[],
const Parameter& outputTimesParam,
const std::function<FunctionPtr(const FunctionPtr&)>& nonLinearity)
{
assert(numHiddenLayers >= 1);
auto classifierRoot = FullyConnectedDNNLayerWithSharedParameters(input, inputTimesParam, inputPlusParam, nonLinearity);
for (size_t i = 1; i < numHiddenLayers; ++i)
{
classifierRoot = FullyConnectedDNNLayerWithSharedParameters(classifierRoot, hiddenLayerTimesParam[i - 1], hiddenLayerPlusParam[i - 1], nonLinearity);
}
// Todo: assume that outputTimesParam has matched output dim and hiddenLayerDim
classifierRoot = Times(outputTimesParam, classifierRoot);
return classifierRoot;
}
void CreateFunctionAndEvaluateWithSharedParameters(size_t inputDim,
size_t numOutputClasses,
size_t numHiddenLayers,
const Parameter& inputTimesParam,
const Parameter& inputPlusParam,
const Parameter hiddenLayerTimesParam[],
const Parameter hiddenLayerPlusParam[],
const Parameter& outputTimesParam,
const DeviceDescriptor& computeDevice)
{
using namespace std::placeholders;
// Create network using shared parameters
auto inputVar = InputVariable({inputDim}, DataType::Float, L"Features");
auto classifierOutputFunction = FullyConnectedFeedForwardClassifierNetWithSharedParameters(inputVar,
numHiddenLayers,
inputTimesParam,
inputPlusParam,
hiddenLayerTimesParam,
hiddenLayerPlusParam,
outputTimesParam,
std::bind(Sigmoid, _1, L""));
auto labelsVar = InputVariable({numOutputClasses}, DataType::Float, L"Labels");
auto trainingLossFunction = CNTK::CrossEntropyWithSoftmax(classifierOutputFunction, labelsVar, L"LossFunction");
auto predictionFunction = CNTK::ClassificationError(classifierOutputFunction, labelsVar, L"ClassificationError");
auto ffNet = CNTK::Combine({trainingLossFunction, predictionFunction, classifierOutputFunction}, L"ClassifierModel");
if (ffNet->Parameters().size() != ((numHiddenLayers * 2) + 1))
{
throw std::runtime_error("CreateFunctionAndEvaluateWithSharedParameters: Function does not have expected Parameter count");
}
if (ffNet->Arguments().size() != 2)
{
throw std::runtime_error("CreateFunctionAndEvaluateWithSharedParameters: Function does not have expected Argument count");
}
if (ffNet->Outputs().size() != 3)
{
throw std::runtime_error("CreateFunctionAndEvaluateWithSharedParameters: Function does not have expected Output count");
}
// Evaluate the network in several runs
size_t iterationCount = 4;
unsigned int randSeed = 2;
srand(randSeed);
size_t numSamples = 3;
for (size_t t = 0; t < iterationCount; ++t)
{
std::vector<float> inputData(inputDim * numSamples);
for (size_t i = 0; i < inputData.size(); ++i)
{
inputData[i] = ((float)rand()) / RAND_MAX;
}
NDShape inputShape = {inputDim, 1, numSamples};
ValuePtr inputValue = MakeSharedObject<Value>(MakeSharedObject<NDArrayView>(inputShape, inputData.data(), inputData.size(), DeviceDescriptor::CPUDevice(), true));
std::vector<float> labelData(numOutputClasses * numSamples, 0);
for (size_t i = 0; i < numSamples; ++i)
{
labelData[(i*numOutputClasses) + (rand() % numOutputClasses)] = 1;
}
NDShape labelShape = {numOutputClasses, 1, numSamples};
ValuePtr labelValue = MakeSharedObject<Value>(MakeSharedObject<NDArrayView>(labelShape, labelData.data(), labelData.size(), DeviceDescriptor::CPUDevice(), true));
ValuePtr outputValue, predictionErrorValue;
std::unordered_map<Variable, ValuePtr> outputs = {{classifierOutputFunction->Output(), outputValue}, {predictionFunction->Output(), predictionErrorValue}};
ffNet->Forward({{inputVar, inputValue}, {labelsVar, labelValue}}, outputs, computeDevice);
}
}
inline FunctionPtr SetupFullyConnectedLinearLayer(Variable input, size_t outputDim, const DeviceDescriptor& device, const std::wstring& outputName = L"")
{
assert(input.Shape().Rank() == 1);
size_t inputDim = input.Shape()[0];
auto timesParam = CNTK::Parameter(CNTK::NDArrayView::RandomUniform<float>({outputDim, inputDim}, -0.05, 0.05, 1, device));
auto timesFunction = CNTK::Times(timesParam, input);
auto plusParam = CNTK::Parameter(CNTK::NDArrayView::RandomUniform<float>({outputDim}, -0.05, 0.05, 1, device));
return CNTK::Plus(plusParam, timesFunction, outputName);
}
inline FunctionPtr SetupFullyConnectedDNNLayer(Variable input, size_t outputDim, const DeviceDescriptor& device, const std::function<FunctionPtr(const FunctionPtr&)>& nonLinearity)
{
return nonLinearity(SetupFullyConnectedLinearLayer(input, outputDim, device));
}
void OutputFunctionInfo(FunctionPtr func)
{
auto inputVariables = func->Arguments();
fprintf(stderr, "Function %S: Input Variables (count=%lu)\n", func->Name().c_str(), inputVariables.size());
for_each(inputVariables.begin(), inputVariables.end(), [](const Variable v) {
fprintf(stderr, " name=%S, kind=%d\n", v.Name().c_str(), v.Kind());
});
auto outputVariables = func->Outputs();
fprintf(stderr, "Function %S: Output Variables (count=%lu)\n", func->Name().c_str(), outputVariables.size());
for_each(outputVariables.begin(), outputVariables.end(), [](const Variable v) {
fprintf(stderr, " name=%S, kind=%d\n", v.Name().c_str(), v.Kind());
});
}
bool GetVariableByName(std::vector<Variable> variableLists, std::wstring varName, Variable& var)
{
for (std::vector<Variable>::iterator it = variableLists.begin(); it != variableLists.end(); ++it)
{
if (it->Name().compare(varName) == 0)
{
var = *it;
return true;
}
}
return false;
}
inline bool GetInputVariableByName(FunctionPtr evalFunc, std::wstring varName, Variable& var)
{
return GetVariableByName(evalFunc->Arguments(), varName, var);
}
inline bool GetOutputVaraiableByName(FunctionPtr evalFunc, std::wstring varName, Variable& var)
{
return GetVariableByName(evalFunc->Outputs(), varName, var);
}
void RunEvaluationClassifier(FunctionPtr evalFunc, const DeviceDescriptor& device)
{
const std::wstring inputNodeName = L"features";
Variable inputVar;
if (!GetInputVariableByName(evalFunc, inputNodeName, inputVar))
{
fprintf(stderr, "Input variable %S is not available.\n", inputNodeName.c_str());
throw("Input variable not found error.");
}
// Evaluate the network in several runs
size_t iterationCount = 4;
unsigned int randSeed = 2;
srand(randSeed);
size_t numSamples = 3;
std::vector<float> inputData(inputVar.Shape().TotalSize() * numSamples);
for (size_t t = 0; t < iterationCount; ++t)
{
for (size_t i = 0; i < inputData.size(); ++i)
{
inputData[i] = ((float)rand()) / RAND_MAX;
}
// Create input data shape. Adding sequence length and numSamples as axes.
// Todo: remove sequence length when only numSamples is supported.
// Todo: add convenience APIs to simplify data preparation here.
NDShape inputShape = inputVar.Shape().AppendShape({1, numSamples});
ValuePtr inputValue = MakeSharedObject<Value>(MakeSharedObject<NDArrayView>(inputShape, inputData, true));
// Define output.
ValuePtr outputValue;
auto outputVar = evalFunc->Output();
std::unordered_map<Variable, ValuePtr> outputs = {{outputVar, outputValue}};
// Evaluate the model
evalFunc->Forward({{inputVar, inputValue}}, outputs, device);
// Get output value
outputValue = outputs[outputVar];
// Todo: remove sequence length when only numSamples is supported.
// Todo: add convenience APIs to simplify retrieval of output results.
NDShape outputShape = outputVar.Shape().AppendShape({1, numSamples});
std::vector<float> outputData(outputShape.TotalSize());
NDArrayViewPtr cpuArrayOutput = MakeSharedObject<NDArrayView>(outputShape, outputData, false);
cpuArrayOutput->CopyFrom(*outputValue->Data());
assert(outputData.size() == outputVar.Shape()[0] * numSamples);
fprintf(stderr, "Evaluation result:\n");
size_t dataIndex = 0;
auto outputDim = outputVar.Shape()[0];
for (size_t i = 0; i < numSamples; i++)
{
fprintf(stderr, "Iteration:%lu, Sample %lu:\n", t, i);
fprintf(stderr, " ");
dataIndex = i * outputDim;
for (size_t j = 0; j < std::min((size_t)10, outputDim); j++)
{
fprintf(stderr, "%f ", outputData[dataIndex++]);
}
if (outputDim > 10)
{
fprintf(stderr, "...");
}
fprintf(stderr, "\n");
}
}
}
void RunEvaluationOneHidden(FunctionPtr evalFunc, const DeviceDescriptor& device)
{
const std::wstring inputNodeName = L"features";
const std::wstring outputNodeName = L"out.z_output";
Variable inputVar;
if (!GetInputVariableByName(evalFunc, inputNodeName, inputVar))
{
fprintf(stderr, "Input variable %S is not available.\n", inputNodeName.c_str());
throw("Input variable not found error.");
}
Variable outputVar;
if (!GetOutputVaraiableByName(evalFunc, outputNodeName, outputVar))
{
fprintf(stderr, "Output variable %S is not available.\n", outputNodeName.c_str());
throw("Output variable not found error.");
}
// Evaluate the network in several runs
size_t iterationCount = 4;
size_t numSamples = 3;
for (size_t t = 0; t < iterationCount; ++t)
{
std::vector<float> inputData(inputVar.Shape().TotalSize() * numSamples);
for (size_t i = 0; i < inputData.size(); ++i)
{
inputData[i] = static_cast<float>(i % 255);
}
NDShape inputShape = inputVar.Shape().AppendShape({1, numSamples});
ValuePtr inputValue = MakeSharedObject<Value>(MakeSharedObject<NDArrayView>(inputShape, inputData, true));
ValuePtr outputValue;
std::unordered_map<Variable, ValuePtr> outputs = {{outputVar, outputValue}};
evalFunc->Forward({{inputVar, inputValue}}, outputs, device);
outputValue = outputs[outputVar];
NDShape outputShape = outputVar.Shape().AppendShape({1, numSamples});
std::vector<float> outputData(outputShape.TotalSize());
NDArrayViewPtr cpuArrayOutput = MakeSharedObject<NDArrayView>(outputShape, outputData, false);
cpuArrayOutput->CopyFrom(*outputValue->Data());
assert(outputData.size() == outputVar.Shape()[0] * numSamples);
fprintf(stderr, "Evaluation result:\n");
size_t dataIndex = 0;
auto outputDim = outputVar.Shape()[0];
for (size_t i = 0; i < numSamples; i++)
{
fprintf(stderr, "Iteration:%lu, Sample %lu:\n", t, i);
fprintf(stderr, "Ouput:");
for (size_t j = 0; j < outputDim; j++)
{
fprintf(stderr, "%f ", outputData[dataIndex++]);
}
fprintf(stderr, "\n");
}
}
}
void MultiThreadsEvaluation(bool isGPUAvailable)
{
#ifndef CPUONLY
if (isGPUAvailable)
{
fprintf(stderr, "Run evaluation on GPU device using GPU build.\n");
}
else
{
fprintf(stderr, "Run evaluation on CPU device using GPU build.\n");
}
#else
fprintf(stderr, "Run evaluation using CPU-only build.\n");
#endif
// Test multi-threads evaluation with new function
fprintf(stderr, "Test multi-threaded evaluation with new function on CPU.\n");
MultiThreadsEvaluationWithNewFunction(DeviceDescriptor::CPUDevice(), 2);
if (isGPUAvailable)
{
fprintf(stderr, "Test multi-threaded evaluation with new function on GPU\n");
MultiThreadsEvaluationWithNewFunction(DeviceDescriptor::GPUDevice(0), 2);
}
// Test multi-threads evaluation using clone.
fprintf(stderr, "Test multi-threaded evaluation using clone on CPU.\n");
MultiThreadsEvaluationWithClone(DeviceDescriptor::CPUDevice(), 2);
if (isGPUAvailable)
{
fprintf(stderr, "Test multi-threaded evaluation using clone on GPU.\n");
MultiThreadsEvaluationWithClone(DeviceDescriptor::GPUDevice(0), 2);
}
// test multi-threads evaluation with loading existing models
fprintf(stderr, "Test multi-threaded evaluation with loading existing models on CPU.\n");
MultiThreadsEvaluationWithLoadModel(DeviceDescriptor::CPUDevice(), 2);
if (isGPUAvailable)
{
fprintf(stderr, "Test multi-threaded evaluation with loading existing models on GPU.\n");
MultiThreadsEvaluationWithLoadModel(DeviceDescriptor::GPUDevice(0), 2);
}
fflush(stderr);
}

Просмотреть файл

@ -49,7 +49,7 @@
</PropertyGroup>
<ItemGroup>
<Reference Include="EvalWrapper, Version=0.0.0.0, Culture=neutral, processorArchitecture=AMD64">
<HintPath>..\packages\Microsoft.Research.CNTK.CpuEval-mkl.1.7.1\lib\net45\x64\EvalWrapper.dll</HintPath>
<HintPath>..\packages\Microsoft.Research.CNTK.CpuEval-mkl.1.7.2\lib\net45\x64\EvalWrapper.dll</HintPath>
<Private>True</Private>
</Reference>
<Reference Include="System" />
@ -85,11 +85,11 @@
</BootstrapperPackage>
</ItemGroup>
<Import Project="$(MSBuildToolsPath)\Microsoft.CSharp.targets" />
<Import Project="..\packages\Microsoft.Research.CNTK.CpuEval-mkl.1.7.1\build\net45\Microsoft.Research.CNTK.CpuEval-mkl.targets" Condition="Exists('..\packages\Microsoft.Research.CNTK.CpuEval-mkl.1.7.1\build\net45\Microsoft.Research.CNTK.CpuEval-mkl.targets')" />
<Import Project="..\packages\Microsoft.Research.CNTK.CpuEval-mkl.1.7.2\build\net45\Microsoft.Research.CNTK.CpuEval-mkl.targets" Condition="Exists('..\packages\Microsoft.Research.CNTK.CpuEval-mkl.1.7.2\build\net45\Microsoft.Research.CNTK.CpuEval-mkl.targets')" />
<Target Name="EnsureNuGetPackageBuildImports" BeforeTargets="PrepareForBuild">
<PropertyGroup>
<ErrorText>This project references NuGet package(s) that are missing on this computer. Enable NuGet Package Restore to download them. For more information, see http://go.microsoft.com/fwlink/?LinkID=322105. The missing file is {0}.</ErrorText>
</PropertyGroup>
<Error Condition="!Exists('..\packages\Microsoft.Research.CNTK.CpuEval-mkl.1.7.1\build\net45\Microsoft.Research.CNTK.CpuEval-mkl.targets')" Text="$([System.String]::Format('$(ErrorText)', '..\packages\Microsoft.Research.CNTK.CpuEval-mkl.1.7.1\build\net45\Microsoft.Research.CNTK.CpuEval-mkl.targets'))" />
<Error Condition="!Exists('..\packages\Microsoft.Research.CNTK.CpuEval-mkl.1.7.2\build\net45\Microsoft.Research.CNTK.CpuEval-mkl.targets')" Text="$([System.String]::Format('$(ErrorText)', '..\packages\Microsoft.Research.CNTK.CpuEval-mkl.1.7.2\build\net45\Microsoft.Research.CNTK.CpuEval-mkl.targets'))" />
</Target>
</Project>

Просмотреть файл

@ -30,8 +30,8 @@ namespace Microsoft.MSR.CNTK.Extensibility.Managed.CSEvalClient
///
/// EvaluateModelSingleLayer and EvaluateModelMultipleLayers
/// --------------------------------------------------------
/// These two cases require the 01_OneHidden model which is part of the <CNTK>/Examples/Image/MNIST example.
/// Refer to <see cref="https://github.com/Microsoft/CNTK/blob/master/Examples/Image/MNIST/README.md"/> for how to train
/// These two cases require the 01_OneHidden model which is part of the <CNTK>/Examples/Image/GettingStarted example.
/// Refer to <see cref="https://github.com/Microsoft/CNTK/blob/master/Examples/Image/GettingStarted/README.md"/> for how to train
/// the model used in these examples.
///
/// EvaluateNetworkSingleLayer and EvaluateNetworkSingleLayerNoInput
@ -41,8 +41,8 @@ namespace Microsoft.MSR.CNTK.Extensibility.Managed.CSEvalClient
///
/// EvaluateMultipleModels
/// ----------------------
/// This case requires the 02_Convolution model and the Test-28x28_cntk_text.txt test file which are part of the <CNTK>/Examples/Image/MNIST example.
/// Refer to <see cref="https://github.com/Microsoft/CNTK/blob/master/Examples/Image/MNIST/README.md"/> for how to train
/// This case requires the 02_Convolution model and the Test-28x28_cntk_text.txt test file which are part of the <CNTK>/Examples/Image/GettingStarted example.
/// Refer to <see cref="https://github.com/Microsoft/CNTK/blob/master/Examples/Image/GettingStarted/README.md"/> for how to train
/// the model used in this example.
///
/// EvaluateImageClassificationModel
@ -142,15 +142,15 @@ namespace Microsoft.MSR.CNTK.Extensibility.Managed.CSEvalClient
// The examples assume the executable is running from the data folder
// We switch the current directory to the data folder (assuming the executable is in the <CNTK>/x64/Debug|Release folder
Environment.CurrentDirectory = Path.Combine(initialDirectory, @"..\..\Examples\Image\MNIST\Data\");
Environment.CurrentDirectory = Path.Combine(initialDirectory, @"..\..\Examples\Image\GettingStarted");
List<float> outputs;
using (var model = new IEvaluateModelManagedF())
{
// Load model
string modelFilePath = Path.Combine(Environment.CurrentDirectory, @"..\Output\Models\01_OneHidden");
string modelFilePath = Path.Combine(Environment.CurrentDirectory, @".\Output\Models\01_OneHidden");
ThrowIfFileNotExist(modelFilePath,
string.Format("Error: The model '{0}' does not exist. Please follow instructions in README.md in <CNTK>/Examples/Image/MNIST to create the model.", modelFilePath));
string.Format("Error: The model '{0}' does not exist. Please follow instructions in README.md in <CNTK>/Examples/Image/GettingStarted to create the model.", modelFilePath));
model.CreateNetwork(string.Format("modelPath=\"{0}\"", modelFilePath), deviceId: -1);
@ -189,7 +189,7 @@ namespace Microsoft.MSR.CNTK.Extensibility.Managed.CSEvalClient
{
// The examples assume the executable is running from the data folder
// We switch the current directory to the data folder (assuming the executable is in the <CNTK>/x64/Debug|Release folder
Environment.CurrentDirectory = Path.Combine(initialDirectory, @"..\..\Examples\Image\MNIST\Data\");
Environment.CurrentDirectory = Path.Combine(initialDirectory, @"..\..\Examples\Image\GettingStarted");
Dictionary<string, List<float>> outputs;
@ -200,9 +200,9 @@ namespace Microsoft.MSR.CNTK.Extensibility.Managed.CSEvalClient
const string outputLayerName = "out.z";
// Load model
string modelFilePath = Path.Combine(Environment.CurrentDirectory, @"..\Output\Models\01_OneHidden");
string modelFilePath = Path.Combine(Environment.CurrentDirectory, @".\Output\Models\01_OneHidden");
ThrowIfFileNotExist(modelFilePath,
string.Format("Error: The model '{0}' does not exist. Please follow instructions in README.md in <CNTK>/Examples/Image/MNIST to create the model.", modelFilePath));
string.Format("Error: The model '{0}' does not exist. Please follow instructions in README.md in <CNTK>/Examples/Image/GettingStarted to create the model.", modelFilePath));
var desiredOutputLayers = new List<string>() { hiddenLayerName, outputLayerName };
model.CreateNetwork(string.Format("modelPath=\"{0}\"", modelFilePath), deviceId: -1, outputNodeNames: desiredOutputLayers);
@ -395,19 +395,19 @@ namespace Microsoft.MSR.CNTK.Extensibility.Managed.CSEvalClient
// The examples assume the executable is running from the data folder
// We switch the current directory to the data folder (assuming the executable is in the <CNTK>/x64/Debug|Release folder
Environment.CurrentDirectory = Path.Combine(initialDirectory, @"..\..\Examples\Image\MNIST\Data\");
Environment.CurrentDirectory = Path.Combine(initialDirectory, @"..\..\Examples\Image\GettingStarted");
// Load model
string modelFilePath = Path.Combine(Environment.CurrentDirectory, @"..\Output\Models\02_Convolution");
string modelFilePath = Path.Combine(Environment.CurrentDirectory, @".\Output\Models\02_OneConv");
ThrowIfFileNotExist(modelFilePath,
string.Format("Error: The model '{0}' does not exist. Please follow instructions in README.md in <CNTK>/Examples/Image/MNIST to create the model.", modelFilePath));
string.Format("Error: The model '{0}' does not exist. Please follow instructions in README.md in <CNTK>/Examples/Image/GettingStarted to create the model.", modelFilePath));
// Initializes the model instances
ModelEvaluator.Initialize(numConcurrentModels, modelFilePath);
string testfile = Path.Combine(Environment.CurrentDirectory, @"Test-28x28_cntk_text.txt");
string testfile = Path.Combine(Environment.CurrentDirectory, @"..\DataSets\MNIST\Test-28x28_cntk_text.txt");
ThrowIfFileNotExist(testfile,
string.Format("Error: The test file '{0}' does not exist. Please follow instructions in README.md in <CNTK>/Examples/Image/MNIST to download the data.", testfile));
string.Format("Error: The test file '{0}' does not exist. Please follow instructions in README.md in <CNTK>/Examples/Image/GettingStarted to download the data.", testfile));
Stopwatch sw = new Stopwatch();
sw.Start();
@ -475,9 +475,9 @@ namespace Microsoft.MSR.CNTK.Extensibility.Managed.CSEvalClient
{
// This example requires the RestNet_18 model.
// The model can be downloaded from <see cref="https://www.cntk.ai/resnet/ResNet_18.model"/>
// The model is assumed to be located at: <CNTK>\Examples\Image\Miscellaneous\ImageNet\ResNet
// The model is assumed to be located at: <CNTK>\Examples\Image\Classification\ResNet
// along with a sample image file named "zebra.jpg".
string workingDirectory = Path.Combine(initialDirectory, @"..\..\Examples\Image\Miscellaneous\ImageNet\ResNet");
string workingDirectory = Path.Combine(initialDirectory, @"..\..\Examples\Image\Classification\ResNet");
Environment.CurrentDirectory = initialDirectory;
List<float> outputs;
@ -486,7 +486,7 @@ namespace Microsoft.MSR.CNTK.Extensibility.Managed.CSEvalClient
{
string modelFilePath = Path.Combine(workingDirectory, "ResNet_18.model");
ThrowIfFileNotExist(modelFilePath,
string.Format("Error: The model '{0}' does not exist. Please download the model from https://www.cntk.ai/resnet/ResNet_18.model and save it under ..\\..\\Examples\\Image\\Miscellaneous\\ImageNet\\ResNet.", modelFilePath));
string.Format("Error: The model '{0}' does not exist. Please download the model from https://www.cntk.ai/resnet/ResNet_18.model and save it under ..\\..\\Examples\\Image\\Classification\\ResNet.", modelFilePath));
model.CreateNetwork(string.Format("modelPath=\"{0}\"", modelFilePath), deviceId: -1);

Просмотреть файл

@ -1,4 +1,4 @@
<?xml version="1.0" encoding="utf-8"?>
<packages>
<package id="Microsoft.Research.CNTK.CpuEval-mkl" version="1.7.1" targetFramework="net45" />
<package id="Microsoft.Research.CNTK.CpuEval-mkl" version="1.7.2" targetFramework="net45" />
</packages>

Просмотреть файл

@ -1,14 +1,15 @@
# Simple CIFAR-10 convnet, without and with BatchNormalization.
# ConvNet applied on CIFAR-10 dataset, with no data augmentation.
command = TrainConvNet:Eval
makeMode = false ; traceLevel = 1 ; deviceId = 0
precision = "float"; traceLevel = 1 ; deviceId = "auto"
RootDir = "." ; DataDir = "$RootDir$" ; ModelDir = "$RootDir$/Output/Models"
rootDir = "../.." ; dataDir = "$rootDir$/DataSets/CIFAR-10" ;
outputDir = "./Output" ;
modelPath = "$ModelDir$/ConvNet"
modelPath = "$outputDir$/Models/ConvNet_CIFAR10"
#stderr = "$outputDir$/ConvNet_CIFAR10_bs_out"
# Training without BN
TrainConvNet = {
action = "train"
@ -16,19 +17,21 @@ TrainConvNet = {
imageShape = 32:32:3
labelDim = 10
Subtract128 (x) = x - Constant (128)
featMean = 128
featScale = 1/256
Normalize{m,f} = x => f .* (x - m)
model = Sequential (
Subtract128 :
ConvolutionalLayer {32, (5:5), pad = true, activation = ReLU, init = 'glorotUniform', initValueScale=0.00390625} :
Normalize {featMean, featScale} :
ConvolutionalLayer {64, (3:3), pad = true} : ReLU :
ConvolutionalLayer {64, (3:3), pad = true} : ReLU :
MaxPoolingLayer {(3:3), stride = (2:2)} :
ConvolutionalLayer {32, (5:5), pad = true, activation = ReLU, init = 'glorotUniform'} :
ConvolutionalLayer {64, (3:3), pad = true} : ReLU :
ConvolutionalLayer {64, (3:3), pad = true} : ReLU :
MaxPoolingLayer {(3:3), stride = (2:2)} :
ConvolutionalLayer {64, (5:5), pad = true, activation = ReLU, init = 'glorotUniform'} :
MaxPoolingLayer {(3:3), stride = (2:2)} :
DenseLayer {64, activation = ReLU, init = 'glorotUniform', initValueScale=0.1} :
Dropout :
LinearLayer {labelDim, init = 'glorotUniform', initValueScale=0.1}
DenseLayer {256} : Dropout : ReLU :
DenseLayer {128} : Dropout : ReLU :
LinearLayer {labelDim}
)
# inputs
@ -51,20 +54,23 @@ TrainConvNet = {
}
SGD = {
epochSize = 49984 ; minibatchSize = 64
epochSize = 0
minibatchSize = 64
learningRatesPerSample = 0.00015625*10:0.000046875*10:0.000015625
momentumAsTimeConstant = 600*20:6400
learningRatesPerSample = 0.0015625*10:0.00046875*10:0.00015625
momentumAsTimeConstant = 0*20:6400
maxEpochs = 30
L2RegWeight = 0.03
L2RegWeight = 0.002
dropoutRate = 0*5:0.5
firstMBsToShowResult = 10 ; numMBsToShowResult = 500
numMBsToShowResult = 100
}
reader = {
readerType = "CNTKTextFormatReader"
file = "$DataDir$/Train_cntk_text.txt"
randomize = true
keepDataInMemory = true # cache all data in memory
input = {
features = { dim = 3072 ; format = "dense" }
labels = { dim = 10 ; format = "dense" }

Просмотреть файл

@ -0,0 +1,109 @@
# ConvNet applied on CIFAR-10 dataset, with data augmentation (translation and flipping).
command = TrainConvNet:Eval
precision = "float"; traceLevel = 1 ; deviceId = "auto"
rootDir = "../.." ; dataDir = "$rootDir$/DataSets/CIFAR-10" ;
outputDir = "./Output" ;
modelPath = "$outputDir$/Models/ConvNet_CIFAR10_DataAug"
#stderr = "$outputDir$/ConvNet_CIFAR10_DataAug_bs_out"
TrainConvNet = {
action = "train"
BrainScriptNetworkBuilder = {
imageShape = 32:32:3
labelDim = 10
featMean = 128
featScale = 1/256
Normalize{m,f} = x => Constant(f) .* (x - Constant(m))
model = Sequential (
Normalize {featMean, featScale} :
ConvolutionalLayer {64, (3:3), pad = true} : ReLU :
ConvolutionalLayer {64, (3:3), pad = true} : ReLU :
MaxPoolingLayer {(3:3), stride = (2:2)} :
ConvolutionalLayer {64, (3:3), pad = true} : ReLU :
ConvolutionalLayer {64, (3:3), pad = true} : ReLU :
MaxPoolingLayer {(3:3), stride = (2:2)} :
DenseLayer {256} : Dropout : ReLU :
DenseLayer {128} : Dropout : ReLU :
LinearLayer {labelDim}
)
# inputs
features = Input {imageShape}
labels = Input {labelDim}
# apply model to features
z = model (features)
# connect to system
ce = CrossEntropyWithSoftmax (labels, z)
errs = ClassificationError (labels, z)
top5Errs = ClassificationError (labels, z, topN=5) # only used in Eval action
featureNodes = (features)
labelNodes = (labels)
criterionNodes = (ce)
evaluationNodes = (errs) # top5Errs only used in Eval
outputNodes = (z)
}
SGD = {
epochSize = 0
minibatchSize = 64
learningRatesPerSample = 0.0015625*20:0.00046875*20:0.00015625*20:0.000046875*10:0.000015625
momentumAsTimeConstant = 0*20:600*20:6400
maxEpochs = 80
L2RegWeight = 0.002
dropoutRate = 0*5:0.5
numMBsToShowResult = 100
}
reader = {
verbosity = 0 ; randomize = true
deserializers = ({
type = "ImageDeserializer" ; module = "ImageReader"
file = "$dataDir$/train_map.txt"
input = {
features = { transforms = (
{ type = "Crop" ; cropType = "random" ; cropRatio = 0.8 ; jitterType = "uniRatio" } :
{ type = "Scale" ; width = 32 ; height = 32 ; channels = 3 ; interpolations = "linear" } :
{ type = "Mean" ; meanFile = "$dataDir$/CIFAR-10_mean.xml" } :
{ type = "Transpose" }
)}
labels = { labelDim = 10 }
}
})
}
}
# Eval action
Eval = {
action = "eval"
evalNodeNames = errs:top5Errs # also test top-5 error rate
# Set minibatch size for testing.
minibatchSize = 512
reader = {
verbosity = 0 ; randomize = false
deserializers = ({
type = "ImageDeserializer" ; module = "ImageReader"
file = "$dataDir$/test_map.txt"
input = {
features = { transforms = (
{ type = "Scale" ; width = 32 ; height = 32 ; channels = 3 ; interpolations = "linear" } :
{ type = "Mean"; meanFile = "$dataDir$/CIFAR-10_mean.xml" } :
{ type = "Transpose" }
)}
labels = { labelDim = 10 }
}
})
}
}

Просмотреть файл

@ -0,0 +1,90 @@
# ConvNet on MNIST dataset.
command = trainNetwork:testNetwork
precision = "float"; traceLevel = 1 ; deviceId = "auto"
rootDir = "../.." ; dataDir = "$rootDir$/DataSets/MNIST" ;
outputDir = "./Output" ;
modelPath = "$outputDir$/Models/ConvNet_MNIST"
#stderr = "$outputDir$/ConvNet_MNIST_bs_out"
# TRAINING CONFIG
trainNetwork = {
action = "train"
BrainScriptNetworkBuilder = {
imageShape = 28:28:1 # image dimensions, 1 channel only
labelDim = 10 # number of distinct labels
featScale = 1/256
Scale{f} = x => Constant(f) .* x
model = Sequential (
Scale {featScale} :
ConvolutionalLayer {32, (5:5), pad = true} : ReLU :
MaxPoolingLayer {(3:3), stride=(2:2)} :
ConvolutionalLayer {48, (3:3), pad = false} : ReLU :
MaxPoolingLayer {(3:3), stride=(2:2)} :
ConvolutionalLayer {64, (3:3), pad = false} : ReLU :
DenseLayer {96} : Dropout : ReLU :
LinearLayer {labelDim}
)
# inputs
features = Input {imageShape}
labels = Input {labelDim}
# apply model to features
ol = model (features)
# loss and error computation
ce = CrossEntropyWithSoftmax (labels, ol)
errs = ClassificationError (labels, ol)
# declare special nodes
featureNodes = (features)
labelNodes = (labels)
criterionNodes = (ce)
evaluationNodes = (errs)
outputNodes = (ol)
}
SGD = {
epochSize = 60000
minibatchSize = 64
maxEpochs = 40
learningRatesPerSample = 0.001*10:0.0005*10:0.0001
dropoutRate = 0.5
momentumAsTimeConstant = 0*5:1024
numMBsToShowResult = 500
}
reader = {
readerType = "CNTKTextFormatReader"
# See ../REAMDE.md for details on getting the data (Train-28x28_cntk_text.txt).
file = "$DataDir$/Train-28x28_cntk_text.txt"
randomize = true
keepDataInMemory = true
input = {
features = { dim = 784 ; format = "dense" }
labels = { dim = 10 ; format = "dense" }
}
}
}
# TEST CONFIG
testNetwork = {
action = test
minibatchSize = 1024 # reduce this if you run out of memory
reader = {
readerType = "CNTKTextFormatReader"
file = "$DataDir$/Test-28x28_cntk_text.txt"
input = {
features = { dim = 784 ; format = "dense" }
labels = { dim = 10 ; format = "dense" }
}
}
}

Просмотреть файл

@ -0,0 +1,51 @@
# CNTK Examples: Image/Classification/ConvNet
## Overview
|Data: |The MNIST dataset (http://yann.lecun.com/exdb/mnist/) of handwritten digits and the CIFAR-10 dataset (http://www.cs.toronto.edu/~kriz/cifar.html) for image classification.
|:---------|:---
|Purpose |This folder contains a number of examples that demonstrate the usage of BrainScript to define convolutional neural networks for image classification.
|Network |convolutional neural networks.
|Training |Stochastic gradient descent with momentum.
|Comments |See below.
## Running the example
### Getting the data
we use the MNIST and CIFAR-10 datasets to demonstrate how to train a `convolutional neural network (CNN)`. CNN has been one of the most popular neural networks for image-related tasks. A very well-known early work on CNN is the [LeNet](http://yann.lecun.com/exdb/publis/pdf/lecun-01a.pdf). In 2012 Alex Krizhevsky, Ilya Sutskever, and Geoffrey Hinton won the ILSVRC-2012 competition using a [CNN architecture](https://papers.nips.cc/paper/4824-imagenet-classification-with-deep-convolutional-neural-networks.pdf). And most state-of-the-art neural networks on image classification tasks today adopts a modified CNN architecture, such as [VGG](../VGG), [GoogLeNet](../GoogLeNet), [ResNet](../ResNet), etc.
MNIST and CIFAR-10 dataset is not included in the CNTK distribution but can be easily downloaded and converted by following the instructions in [DataSets/MNIST](../../DataSets/MNIST) and [DataSets/CIFAR-10](../../DataSets/CIFAR-10). We recommend you to keep the downloaded data in the respective folder while downloading, as the configuration files in this folder assumes that by default.
## Details
### ConvNet_MNIST.cntk
Our first example applies CNN on the MNIST dataset. The network we use contains three convolution layers and two dense layers. Dropout is applied after the first dense layer. No data augmentation is used in this example. We start the training with no momentum, and add momentum after training for 5 epochs. Please refer to the cntk configuration file [ConvNet_MNIST.cntk](./ConvNet_MNIST.cntk) for more details.
Run the example from the current folder using:
`cntk configFile=ConvNet_MNIST.cntk`
The network achieves an error rate of `0.5%`, which is very good considering no data augmentation is used. This accuracy is comparable, if not better, than many other vanilla CNN implementations (http://yann.lecun.com/exdb/mnist/).
### ConvNet_CIFAR10.cntk
The second exmaple applies CNN on the CIFAR-10 dataset. The network contains four convolution layers and three dense layers. Max pooling is conducted for every two convolution layers. Dropout is applied after the first two dense layers. No data augmentation is used. Please refer to the cntk configuration file [ConvNet_CIFAR10.cntk](./ConvNet_CIFAR10.cntk) for more details.
Run the example from the current folder using:
`cntk configFile=ConvNet_CIFAR10.cntk`
The network achieves an error rate of `18.51%` after 30 epochs. This is comparable to the network published by [cuda-convnet](https://code.google.com/p/cuda-convnet/), which has 18% error with no data augmentation. One difference is that we do not use a `local response normalization layer`. This layer type is now rarely used in most state-of-the-art deep learning networks.
### ConvNet_CIFAR10_DataAug.cntk
The third example uses the same CNN as the previous example, but it improves by adding data augmentation to training. For this purpose, we use the `ImageReader` instead of the `CNTKTextFormatReader` to load the data. The ImageReader currently supports crop, flip, scale, color jittering, and mean subtraction.
For a reference on image reader and transforms, please check [here](https://github.com/Microsoft/CNTK/wiki/Image-reader).
Run the example from the current folder using:
`cntk configFile=ConvNet_CIFAR10_DataAug.cntk`
As seen in the cntk configuration file [ConvNet_CIFAR10_DataAug.cntk](./ConvNet_CIFAR10_DataAug.cntk), we use a fix crop ratio of `0.8` and scale the image to `32x32` pixels for training. Since all training images are pre-padded to `40x40` pixels, effectively we only perfrom translation transform without scaling. The accuracy of the network on test data is `14.21%`, which is a lot better than the previous model.

Просмотреть файл

@ -0,0 +1,85 @@
# Multi-layer perceptron (MLP) on MNIST dataset.
command = trainNetwork:testNetwork
precision = "float"; traceLevel = 1 ; deviceId = "auto"
rootDir = "../.." ; dataDir = "$rootDir$/DataSets/MNIST" ;
outputDir = "./Output" ;
modelPath = "$outputDir$/Models/MLP_MNIST"
#stderr = "$outputDir$/MLP_MNIST_bs_out"
# TRAINING CONFIG
trainNetwork = {
action = "train"
BrainScriptNetworkBuilder = {
imageShape = 28:28:1 # image dimensions, 1 channel only
labelDim = 10 # number of distinct labels
featScale = 1/256
Scale{f} = x => Constant(f) .* x
model = Sequential (
Scale {featScale} :
DenseLayer {768, init="gaussian", initValueScale=1.5} : Dropout: ReLU :
DenseLayer {512, init="gaussian", initValueScale=1.5} : Dropout: ReLU :
DenseLayer {256, init="gaussian", initValueScale=1.5} : Dropout: ReLU :
LinearLayer {labelDim}
)
# inputs
features = Input {imageShape}
labels = Input {labelDim}
# apply model to features
z = model (features)
# loss and error computation
ce = CrossEntropyWithSoftmax (labels, z)
errs = ClassificationError (labels, z)
# declare special nodes
featureNodes = (features)
labelNodes = (labels)
criterionNodes = (ce)
evaluationNodes = (errs)
outputNodes = (z)
}
SGD = {
epochSize = 60000
minibatchSize = 64
maxEpochs = 40
learningRatesPerSample = 0.001*10:0.0005*10:0.0001
dropoutRate = 0.5
momentumAsTimeConstant = 600*10:4096
numMBsToShowResult = 500
}
reader = {
readerType = "CNTKTextFormatReader"
# See ../REAMDE.md for details on getting the data (Train-28x28_cntk_text.txt).
file = "$DataDir$/Train-28x28_cntk_text.txt"
input = {
features = { dim = 784 ; format = "dense" }
labels = { dim = 10 ; format = "dense" }
}
}
}
# TEST CONFIG
testNetwork = {
action = test
minibatchSize = 1024 # reduce this if you run out of memory
reader = {
readerType = "CNTKTextFormatReader"
file = "$DataDir$/Test-28x28_cntk_text.txt"
input = {
features = { dim = 784 ; format = "dense" }
labels = { dim = 10 ; format = "dense" }
}
}
}

Просмотреть файл

@ -0,0 +1,30 @@
# CNTK Examples: Image/Classification/MLP
## Overview
|Data: |The MNIST dataset (http://yann.lecun.com/exdb/mnist/) of handwritten digits.
|:---------|:---
|Purpose |This folder contains a number of examples that demonstrate the usage of BrainScript to define multi-layer perceptron (MLP) networks for image classification.
|Network |Multi-layer perceptron.
|Training |Stochastic gradient descent with momentum.
|Comments |See below.
## Running the example
### Getting the data
we use the MNIST dataset to demonstrate how to train a `multi-layer perceptron (MLP)` network. MLP is a feed-forward neural network that consists of multiple layers of nodes in a directed graph, where each layer fully connected to the next one. This is argueabally one of the simplest neural networks.
MNIST dataset is not included in the CNTK distribution but can be easily downloaded and converted by following the instructions in [DataSets/MNIST](../../DataSets/MNIST). We recommend you to keep the downloaded data in the respective folder while downloading, as the configuration files in this folder assumes that by default.
## Details
### MLP_MNIST.cntk
Similar to the `01_OneHidden.cntk` network in [GettingStarted](../../GettingStarted), MLP is "permutation invariant". In this particular example, we use 3 hidden layers, each containing `768`, `512` and `256` nodes, respectively. Dropout is applied after each hidden layer, with `droputRate=0.5`. The learning rate is gradually adjusted from `0.001` per sample to `0.0001`, and momentum as time constant is adjusted from `600` (effective momentum = `0.898824`) to `4096` (effective momentum = `0.984495`).
Run the example from the current folder using:
`cntk configFile=MLP_MNIST.cntk`
The network achieves an error rate of `1.45%`, which is about as good as one can have with MLP and no data augmentation (http://yann.lecun.com/exdb/mnist/).

Просмотреть файл

До

Ширина:  |  Высота:  |  Размер: 92 KiB

После

Ширина:  |  Высота:  |  Размер: 92 KiB

Просмотреть файл

@ -0,0 +1,21 @@
# CIFAR-10 Dataset
The CIFAR-10 dataset (http://www.cs.toronto.edu/~kriz/cifar.html) is a popular dataset for image classification, collected by Alex Krizhevsky, Vinod Nair, and Geoffrey Hinton. It is a labeled subset of the [80 million tiny images](http://people.csail.mit.edu/torralba/tinyimages/) dataset.
The CIFAR-10 dataset consists of 60,000 32x32 color images in 10 classes, with 6,000 images per class. There are 50,000 training images and 10,000 test images. The 10 classes are: airplane, automobile, bird, cat, deer, dog, frog, horse, ship, and truck.
The CIFAR-10 dataset is not included in the CNTK distribution but can be easily downloaded and converted to CNTK-supported format by running the following Python command:
```
python install_cifar10.py
```
After running `install_cifar10.py`, you will see the original CIFAR-10 data are copied in a folder named `cifar-10-batches-py`. Meanwhile, two text files `Train_cntk_text.txt` and `Test_cntk_text.txt` are created in the current folder. These text files can be read directly by CNTK.
In addition, the script will create a `train` and a `test` folder that store train and test images in png format. It will also create appropriate mapping files (`train_map.txt` and `test_map.txt`) for the CNTK `ImageReader` as well as mean file `CIFAR-10_mean.xml`.
The total amount of disk space required for both the text version and the png version for CIFAR-10 is around `950`MB.
We provide multiple examples in the [Classification](../../Classification) folder to train classifiers for CIFAR-10 with CNTK. Please refer there for more details.
If you are curious about how well computers can perform on CIFAR-10 today, Rodrigo Benenson maintains a [blog](http://rodrigob.github.io/are_we_there_yet/build/classification_datasets_results.html#43494641522d3130) on the state-of-the-art performance of various algorithms.

Просмотреть файл

@ -0,0 +1,132 @@
from __future__ import print_function
try:
from urllib.request import urlretrieve
except ImportError:
from urllib import urlretrieve
import sys
import tarfile
import shutil
import os
import struct
import numpy as np
import pickle as cp
from PIL import Image
import xml.etree.cElementTree as et
import xml.dom.minidom
import getopt
ImgSize = 32
NumFeat = ImgSize * ImgSize * 3
def readBatch(src):
with open(src, 'rb') as f:
if sys.version_info[0] < 3:
d = cp.load(f)
else:
d = cp.load(f, encoding='latin1')
data = d['data']
feat = data
res = np.hstack((feat, np.reshape(d['labels'], (len(d['labels']), 1))))
return res.astype(np.int)
def loadData(src):
print ('Downloading ' + src)
fname, h = urlretrieve(src, './delete.me')
print ('Done.')
try:
print ('Extracting files...')
with tarfile.open(fname) as tar:
tar.extractall()
print ('Done.')
print ('Preparing train set...')
trn = np.empty((0, NumFeat + 1), dtype=np.int)
for i in range(5):
batchName = './cifar-10-batches-py/data_batch_{0}'.format(i + 1)
trn = np.vstack((trn, readBatch(batchName)))
print ('Done.')
print ('Preparing test set...')
tst = readBatch('./cifar-10-batches-py/test_batch')
print ('Done.')
finally:
os.remove(fname)
return (trn, tst)
def saveTxt(filename, ndarray):
with open(filename, 'w') as f:
labels = list(map(' '.join, np.eye(10, dtype=np.uint).astype(str)))
for row in ndarray:
row_str = row.astype(str)
label_str = labels[row[-1]]
feature_str = ' '.join(row_str[:-1])
f.write('|labels {} |features {}\n'.format(label_str, feature_str))
def saveImage(fname, data, label, mapFile, regrFile, pad, **key_parms):
# data in CIFAR-10 dataset is in CHW format.
pixData = data.reshape((3, ImgSize, ImgSize))
if ('mean' in key_parms):
key_parms['mean'] += pixData
if pad > 0:
pixData = np.pad(pixData, ((0, 0), (pad, pad), (pad, pad)), mode='constant', constant_values=128) # can also use mode='edge'
img = Image.new('RGB', (ImgSize + 2 * pad, ImgSize + 2 * pad))
pixels = img.load()
for x in range(img.size[0]):
for y in range(img.size[1]):
pixels[x, y] = (pixData[0][y][x], pixData[1][y][x], pixData[2][y][x])
img.save(fname)
mapFile.write("%s\t%d\n" % (fname, label))
# compute per channel mean and store for regression example
channelMean = np.mean(pixData, axis=(1,2))
regrFile.write("|regrLabels\t%f\t%f\t%f\n" % (channelMean[0]/255.0, channelMean[1]/255.0, channelMean[2]/255.0))
def saveMean(fname, data):
root = et.Element('opencv_storage')
et.SubElement(root, 'Channel').text = '3'
et.SubElement(root, 'Row').text = str(ImgSize)
et.SubElement(root, 'Col').text = str(ImgSize)
meanImg = et.SubElement(root, 'MeanImg', type_id='opencv-matrix')
et.SubElement(meanImg, 'rows').text = '1'
et.SubElement(meanImg, 'cols').text = str(ImgSize * ImgSize * 3)
et.SubElement(meanImg, 'dt').text = 'f'
et.SubElement(meanImg, 'data').text = ' '.join(['%e' % n for n in np.reshape(data, (ImgSize * ImgSize * 3))])
tree = et.ElementTree(root)
tree.write(fname)
x = xml.dom.minidom.parse(fname)
with open(fname, 'w') as f:
f.write(x.toprettyxml(indent = ' '))
def saveTrainImages(filename, foldername):
if not os.path.exists(foldername):
os.makedirs(foldername)
data = {}
dataMean = np.zeros((3, ImgSize, ImgSize)) # mean is in CHW format.
with open('train_map.txt', 'w') as mapFile:
with open('train_regrLabels.txt', 'w') as regrFile:
for ifile in range(1, 6):
with open(os.path.join('./cifar-10-batches-py', 'data_batch_' + str(ifile)), 'rb') as f:
if sys.version_info[0] < 3:
data = cp.load(f)
else:
data = cp.load(f, encoding='latin1')
for i in range(10000):
fname = os.path.join(os.path.abspath(foldername), ('%05d.png' % (i + (ifile - 1) * 10000)))
saveImage(fname, data['data'][i, :], data['labels'][i], mapFile, regrFile, 4, mean=dataMean)
dataMean = dataMean / (50 * 1000)
saveMean('CIFAR-10_mean.xml', dataMean)
def saveTestImages(filename, foldername):
if not os.path.exists(foldername):
os.makedirs(foldername)
with open('test_map.txt', 'w') as mapFile:
with open('test_regrLabels.txt', 'w') as regrFile:
with open(os.path.join('./cifar-10-batches-py', 'test_batch'), 'rb') as f:
if sys.version_info[0] < 3:
data = cp.load(f)
else:
data = cp.load(f, encoding='latin1')
for i in range(10000):
fname = os.path.join(os.path.abspath(foldername), ('%05d.png' % i))
saveImage(fname, data['data'][i, :], data['labels'][i], mapFile, regrFile, 0)

Просмотреть файл

@ -0,0 +1,18 @@
from __future__ import print_function
import cifar_utils as ut
if __name__ == "__main__":
trn, tst= ut.loadData('http://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz')
print ('Writing train text file...')
ut.saveTxt(r'./Train_cntk_text.txt', trn)
print ('Done.')
print ('Writing test text file...')
ut.saveTxt(r'./Test_cntk_text.txt', tst)
print ('Done.')
print ('Converting train data to png images...')
ut.saveTrainImages(r'./Train_cntk_text.txt', 'train')
print ('Done.')
print ('Converting test data to png images...')
ut.saveTestImages(r'./Test_cntk_text.txt', 'test')
print ('Done.')

Просмотреть файл

@ -0,0 +1,14 @@
# MNIST Dataset
The MNIST dataset (http://yann.lecun.com/exdb/mnist/) for handwritten digits recognition is one of the most widely used image dataset for experimenting with different classification algorithms. MNIST has a training set of 60,000 examples, and a test set of 10,000 examples. Each example contains one digit that has been size-normalized and centered in a grayscale image at 28x28 pixel resolution.
The MNIST dataset is not included in the CNTK distribution but can be easily
downloaded and converted to CNTK-supported format by running the following Python command:
`python install_mnist.py`
After running the script, you will see two output files in the current folder: Train-28x28_cntk_text.txt and Test-28x28_cntk_text.txt. The total amount of disk space required is around `124`MB. You may now proceed to the [`GettingStarted`](../../GettingStarted) folder to play with this dataset.
Further, we provide two advanced examples with MNIST. The first one is a [`Multi-Layer Perceptron network (MLP)`](../../Classification/MLP), which achieves about 1.5% error rate. The second one is a [`Convolutional Neural Network (ConvNet)`](../../Classification/ConvNet), which achieves about 0.5% error rate. These results are comparable to the best published results using these types of networks.
If you are curious about how well computers can perform on MNIST today, Rodrigo Benenson maintains a [blog](http://rodrigob.github.io/are_we_there_yet/build/classification_datasets_results.html#4d4e495354) on the state-of-the-art performance of various algorithms.

Просмотреть файл

@ -0,0 +1,14 @@
from __future__ import print_function
import mnist_utils as ut
if __name__ == "__main__":
train = ut.load('http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz',
'http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz', 60000)
print ('Writing train text file...')
ut.savetxt(r'./Train-28x28_cntk_text.txt', train)
print ('Done.')
test = ut.load('http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz',
'http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz', 10000)
print ('Writing test text file...')
ut.savetxt(r'./Test-28x28_cntk_text.txt', test)
print ('Done.')

Просмотреть файл

@ -1,13 +1,18 @@
from __future__ import print_function
try:
from urllib.request import urlretrieve
except ImportError:
from urllib import urlretrieve
import sys
import urllib.request
import gzip
import shutil
import os
import struct
import numpy as np
def loadData(src, cimg):
print ('Downloading ' + src)
gzfname, h = urllib.request.urlretrieve(src, './delete.me')
gzfname, h = urlretrieve(src, './delete.me')
print ('Done.')
try:
with gzip.open(gzfname) as gz:
@ -31,7 +36,7 @@ def loadData(src, cimg):
def loadLabels(src, cimg):
print ('Downloading ' + src)
gzfname, h = urllib.request.urlretrieve(src, './delete.me')
gzfname, h = urlretrieve(src, './delete.me')
print ('Done.')
try:
with gzip.open(gzfname) as gz:
@ -49,29 +54,16 @@ def loadLabels(src, cimg):
os.remove(gzfname)
return res.reshape((cimg, 1))
def load(dataSrc, labelsSrc, cimg):
data = loadData(dataSrc, cimg)
labels = loadLabels(labelsSrc, cimg)
return np.hstack((data, labels))
def savetxt(filename, ndarray):
with open(filename, 'w', encoding="ascii") as f:
with open(filename, 'w') as f:
labels = list(map(' '.join, np.eye(10, dtype=np.uint).astype(str)))
for row in ndarray:
row_str = row.astype(str)
label_str = labels[row[-1]]
feature_str = ' '.join(row_str[:-1])
f.write('|labels {} |features {}\n'.format(label_str, feature_str))
if __name__ == "__main__":
train = load('http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz',
'http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz', 60000)
print ('Writing train text file...')
savetxt(r'./../Data/Train-28x28_cntk_text.txt', train)
print ('Done.')
test = load('http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz',
'http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz', 10000)
print ('Writing test text file...')
savetxt(r'./../Data/Test-28x28_cntk_text.txt', test)
print ('Done.')

Просмотреть файл

@ -0,0 +1,188 @@
# Fast-RCNN configuration for CNTK
# For algorithm and details see http://arxiv.org/abs/1504.08083
# Overview:
# The Fast-RCNN algorithm uses a DNN that takes as inputs a set of images
# and for each image a set of ROIs (Regions of interest). It first computes
# a convolutional feature map for the entire image using a series of
# of convolutional layers (usually from a pretrained network). Then it
# employs ROI pooling to crop out the part of the conv feature map
# that corresponds to an ROI and resizes it to the input size expected
# by the following layer (usually a set of pretrained fully connected layers).
# Classification error and evaluation criterion are computed for each ROI.
command = Train:Test
#command = Write
deviceId = "Auto"
precision = "float"
parallelTrain = "false"
traceLevel = 1
rootDir = "."
dataDir = "$rootDir$/data/"
outputDir = "$rootDir$/Output"
modelPath = "$outputDir$/Fast-RCNN"
stderr = "$outputDir$/Fast-RCNN.log"
ImageH = 1000
ImageW = 1000
ImageC = 3
NumLabels = 21
NumTrainROIs = 64
TrainROIDim = 256 # $NumTrainROIs$ * 4
TrainROILabelDim = 1344 # $NumTrainROIs$ * $NumLabels$
NumTestROIs = 200
TestROIDim = 800
TestROILabelDim = 4200
# For training we load a pretrained AlexNet model (AlexNet.89) and clone three parts of it.
# For the first part (up to pool1) we keep the weights fixed. The middle part contains the
# remaining convolutional and pooling layers and the last part are the FC layers.
# In the model we apply the first two cloned parts, then an ROI pooling layer and
# finally the pretrained FC layers followed by a new FC layer that maps to the new
# label dimensionality of 21 classes.
# The inputs are images (1000 x 1000 x 3), ROIs (64 ROIs x 4 coordinates (x, y, w, h))
# and ground truht labels per ROI (64 ROIs x 21 classes).
Train = {
action = "train"
BrainScriptNetworkBuilder = {
imageShape = $ImageH$:$ImageW$:$ImageC$ # 1000:1000:3
labelShape = $NumLabels$:$NumTrainROIs$ # 21:64
ROIShape = 4:$NumTrainROIs$ # 4:64
network = BS.Network.Load ("AlexNet.89")
pool1 = BS.Network.CloneFunction(network.features, network.pool1, parameters = "constant")
convLayers = BS.Network.CloneFunction(network.pool1, network.conv5_y)
fcLayers = BS.Network.CloneFunction(network.pool3, network.h2_d)
model (features, rois) = {
featNorm = features - 114
pool1Out = pool1 (featNorm)
conv5Out = convLayers (pool1Out)
roiOut = ROIPooling (conv5Out, rois, (6:6))
fcOut = fcLayers (roiOut)
W = ParameterTensor{(21:4096)}
b = ParameterTensor{21, init = 'zero'}
z = W * fcOut + b
}.z
features = Input {imageShape}
roiLabels = Input {labelShape}
rois = Input {ROIShape}
z = model (features, rois)
ce = CrossEntropyWithSoftmax(roiLabels, z, axis = 1)
errs = ClassificationError(roiLabels, z, axis = 1)
featureNodes = (features:rois)
labelNodes = (roiLabels)
criterionNodes = (ce)
evaluationNodes = (errs)
outputNodes = (z)
}
SGD = {
epochSize = 0
minibatchSize = 2
maxEpochs = 15
learningRatesPerSample = 0.00005
momentumAsTimeConstant = 0*5:1024 # was: 0.9 per MB
L2RegWeight = 0.0001
dropoutRate = 0.5
numMBsToShowResult = 50
}
reader = {
randomize = false
verbosity = 2
deserializers = ({
type = "CNTKTextFormatDeserializer" ; module = "CNTKTextFormatReader"
file = "$dataDir$/tv2012pad.rois.txt"
input = { rois = { dim = $TrainROIDim$ ; format = "dense" } }
}:{
type = "CNTKTextFormatDeserializer" ; module = "CNTKTextFormatReader"
file = "$dataDir$/tv2012pad.roilabels.txt"
input = { roiLabels = { dim = $TrainROILabelDim$ ; format = "dense" } }
}:{
type = "ImageDeserializer" ; module = "ImageReader"
file = "$dataDir$/tv2012pad.txt"
input = {
features = { transforms = (
{ type = "Scale" ; width = $ImageW$ ; height = $ImageW$ ; channels = $ImageC$ ; scaleMode = "pad" ; padValue = 114 }:
{ type = "Transpose" }
)}
ignored = {labelDim = 1000}
}
})
}
}
# For testing we load the trained Fast-RCNN model and modify the input size,
# such that the network accepts 200 ROIs per image. To this end we load and
# clone the entire network and define new inputs with the desired size
# corresponding to 200 ROIs.
Test = {
action = "test"
minibatchSize = 1
# use this for write action
# action = "write"
# outputPath = "$OutputDir$/fastrcnnNetOutput"
BrainScriptNetworkBuilder = {
imageShape = $ImageH$:$ImageW$:$ImageC$ # 1000:1000:3
labelShape = $NumLabels$:$NumTestROIs$ # 21:200
ROIShape = 4:$NumTestROIs$ # 4:200
# load network
network = BS.Network.Load ("$modelPath$")
clonedNet = BS.Network.CloneFunction ((network.features:network.rois), { z = network.z }, parameters = "constant")
features = Input {imageShape}
roiLabels = Input {labelShape}
rois = Input {ROIShape}
z = clonedNet(features, rois).z
ce = CrossEntropyWithSoftmax (roiLabels, z, axis = 1)
errs = ClassificationError(z, roiLabels, axis = 1)
featureNodes = (features:rois)
labelNodes = (roiLabels)
criterionNodes = (ce)
evaluationNodes = (errs)
outputNodes = (z)
}
reader = {
randomize = false
verbosity = 2
deserializers = ({
type = "CNTKTextFormatDeserializer" ; module = "CNTKTextFormatReader"
file = "$dataDir$/test2007pad_all.rois.txt"
input = { rois = { dim = $TestROIDim$ ; format = "dense" } }
}:{
type = "CNTKTextFormatDeserializer" ; module = "CNTKTextFormatReader"
file = "$dataDir$/test2007pad_all.roilabels.txt"
input = { roiLabels = { dim = $TestROILabelDim$ ; format = "dense" } }
}:{
type = "ImageDeserializer" ; module = "ImageReader"
file = "$dataDir$/test2007pad_all.txt"
input = {
features = { transforms = (
{ type = "Scale" ; width = $ImageW$ ; height = $ImageW$ ; channels = $ImageC$ ; scaleMode = "pad" ; padValue = 114 }:
{ type = "Transpose" }
)}
ignored = {labelDim = 1000}
}
})
}
}

Просмотреть файл

@ -0,0 +1,115 @@
# Parameters can be overwritten on the command line
# for example: cntk configFile=myConfigFile RootDir=../..
# For running from Visual Studio add
# currentDirectory=$(SolutionDir)/<path to corresponding data folder>
command = trainNetwork:testNetwork
precision = "float"; traceLevel = 1 ; deviceId = "auto"
rootDir = ".." ; dataDir = "$rootDir$/DataSets/MNIST" ;
outputDir = "./Output" ;
modelPath = "$outputDir$/Models/01_OneHidden"
#stderr = "$outputDir$/01_OneHidden_bs_out"
# TRAINING CONFIG
trainNetwork = {
action = "train"
BrainScriptNetworkBuilder = {
imageShape = 28:28:1 # image dimensions, 1 channel only
labelDim = 10 # number of distinct labels
featScale = 1/256
# This model returns multiple nodes as a record, which
# can be accessed using .x syntax.
model(x) = {
s1 = x * featScale
h1 = DenseLayer {200, activation=ReLU} (s1)
z = LinearLayer {labelDim} (h1)
}
# inputs
features = Input {imageShape}
labels = Input {labelDim}
# apply model to features
out = model (features)
# loss and error computation
ce = CrossEntropyWithSoftmax (labels, out.z)
errs = ClassificationError (labels, out.z)
# declare special nodes
featureNodes = (features)
labelNodes = (labels)
criterionNodes = (ce)
evaluationNodes = (errs)
outputNodes = (out.z)
# Alternative, you can use the Sequential keyword and write the model
# as follows. We keep the previous format because EvalClientTest needs
# to access the internal nodes, which is not doable yet with Sequential
#
# Scale{f} = x => Constant(f) .* x
# model = Sequential (
# Scale {featScale} :
# DenseLayer {200} : ReLU :
# LinearLayer {labelDim}
# )
# # inputs
# features = Input {imageShape}
# labels = Input (labelDim)
# # apply model to features
# ol = model (features)
# # loss and error computation
# ce = CrossEntropyWithSoftmax (labels, ol)
# errs = ClassificationError (labels, ol)
# # declare special nodes
# featureNodes = (features)
# labelNodes = (labels)
# criterionNodes = (ce)
# evaluationNodes = (errs)
# outputNodes = (ol)
}
SGD = {
epochSize = 60000
minibatchSize = 64
maxEpochs = 10
learningRatesPerSample = 0.01*5:0.005
momentumAsTimeConstant = 0
numMBsToShowResult = 500
}
reader = {
readerType = "CNTKTextFormatReader"
# See ../REAMDE.md for details on getting the data (Train-28x28_cntk_text.txt).
file = "$DataDir$/Train-28x28_cntk_text.txt"
input = {
features = { dim = 784 ; format = "dense" }
labels = { dim = 10 ; format = "dense" }
}
}
}
# TEST CONFIG
testNetwork = {
action = "test"
minibatchSize = 1024 # reduce this if you run out of memory
reader = {
readerType = "CNTKTextFormatReader"
file = "$DataDir$/Test-28x28_cntk_text.txt"
input = {
features = { dim = 784 ; format = "dense" }
labels = { dim = 10 ; format = "dense" }
}
}
}

Просмотреть файл

@ -1,5 +1,5 @@
# Parameters can be overwritten on the command line
# for example: cntk configFile=myConfigFile rootDir=../..
# for example: cntk configFile=myConfigFile RootDir=../..
# For running from Visual Studio add
# currentDirectory=$(SolutionDir)/<path to corresponding data folder>
@ -7,11 +7,11 @@ command = trainNetwork:testNetwork
precision = "float"; traceLevel = 1 ; deviceId = "auto"
rootDir = ".." ; configDir = "$rootDir$/Config" ; dataDir = "$rootDir$/Data" ;
outputDir = "$rootDir$/Output" ;
rootDir = ".." ; dataDir = "$rootDir$/DataSets/MNIST" ;
outputDir = "./Output" ;
modelPath = "$outputDir$/Models/02_Convolution"
stderr = "$outputDir$/02_Convolution_bs_out"
modelPath = "$outputDir$/Models/02_OneConv"
#stderr = "$outputDir$/02_OneConv_bs_out"
# TRAINING CONFIG
trainNetwork = {
@ -27,15 +27,13 @@ trainNetwork = {
Scale {featScale} :
ConvolutionalLayer {16, (5:5), pad = true} : ReLU :
MaxPoolingLayer {(2:2), stride=(2:2)} :
ConvolutionalLayer {32, (5:5), pad = true} : ReLU :
MaxPoolingLayer {(2:2), stride=(2:2)} :
DenseLayer {128, activation=Sigmoid} :
DenseLayer {64} : ReLU :
LinearLayer {labelDim}
)
# inputs
features = Input {imageShape}
labels = Input {labelDim}
labels = Input (labelDim)
# apply model to features
ol = model (features)
@ -57,8 +55,7 @@ trainNetwork = {
minibatchSize = 64
maxEpochs = 15
learningRatesPerSample = 0.001*5:0.0005
momentumAsTimeConstant = 0*5:1024
momentumAsTimeConstant = 0
numMBsToShowResult = 500
}
@ -75,7 +72,7 @@ trainNetwork = {
# TEST CONFIG
testNetwork = {
action = test
action = "test"
minibatchSize = 1024 # reduce this if you run out of memory
reader = {

Просмотреть файл

@ -7,11 +7,11 @@ command = trainNetwork:testNetwork
precision = "float"; traceLevel = 1 ; deviceId = "auto"
rootDir = ".." ; configDir = "$rootDir$/Config" ; dataDir = "$rootDir$/Data" ;
outputDir = "$rootDir$/Output" ;
rootDir = ".." ; dataDir = "$rootDir$/DataSets/MNIST" ;
outputDir = "./Output" ;
modelPath = "$outputDir$/Models/01_OneHidden"
stderr = "$outputDir$/01_OneHidden_bs_out"
modelPath = "$outputDir$/Models/03_OneConvDropout"
#stderr = "$outputDir$/03_OneConvDropout_bs_out"
# TRAINING CONFIG
trainNetwork = {
@ -21,41 +21,42 @@ trainNetwork = {
imageShape = 28:28:1 # image dimensions, 1 channel only
labelDim = 10 # number of distinct labels
featScale = 1/256
Scale{f} = x => Constant(f) .* x
# This model returns multiple nodes as a record, which
# can be accessed using .x syntax.
model(x) = {
s1 = x * featScale
h1 = DenseLayer {200, activation=Sigmoid} (s1)
z = LinearLayer {labelDim} (h1)
}
model = Sequential (
Scale {featScale} :
ConvolutionalLayer {16, (5:5), pad = true} : ReLU :
MaxPoolingLayer {(2:2), stride=(2:2)} : Dropout :
DenseLayer {64} : ReLU :
LinearLayer {labelDim}
)
# inputs
features = Input {imageShape}
labels = Input (labelDim)
# apply model to features
out = model (features)
ol = model (features)
# loss and error computation
ce = CrossEntropyWithSoftmax (labels, out.z)
errs = ClassificationError (labels, out.z)
ce = CrossEntropyWithSoftmax (labels, ol)
errs = ClassificationError (labels, ol)
# declare special nodes
featureNodes = (features)
labelNodes = (labels)
criterionNodes = (ce)
evaluationNodes = (errs)
outputNodes = (out.z)
outputNodes = (ol)
}
SGD = {
epochSize = 60000
minibatchSize = 64
maxEpochs = 30
learningRatesPerSample = 0.01*5:0.005
maxEpochs = 15
learningRatesPerSample = 0.001*5:0.0005
momentumAsTimeConstant = 0
dropoutRate = 0.5
numMBsToShowResult = 500
}

Просмотреть файл

@ -1,5 +1,5 @@
# Parameters can be overwritten on the command line
# for example: cntk configFile=myConfigFile rootDir=../..
# for example: cntk configFile=myConfigFile RootDir=../..
# For running from Visual Studio add
# currentDirectory=$(SolutionDir)/<path to corresponding data folder>
@ -7,11 +7,11 @@ command = trainNetwork:testNetwork
precision = "float"; traceLevel = 1 ; deviceId = "auto"
rootDir = ".." ; configDir = "$rootDir$/Config" ; dataDir = "$rootDir$/Data" ;
outputDir = "$rootDir$/Output" ;
rootDir = ".." ; dataDir = "$rootDir$/DataSets/MNIST" ;
outputDir = "./Output" ;
modelPath = "$outputDir$/Models/03_ConvBatchNorm"
stderr = "$outputDir$/03_ConvBatchNorm_bs_out"
modelPath = "$outputDir$/Models/04_OneConvBN"
#stderr = "$outputDir$/04_OneConvBN_bs_out"
# TRAINING CONFIG
trainNetwork = {
@ -24,20 +24,22 @@ trainNetwork = {
Scale{f} = x => Constant(f) .* x
# define a custom layer with 5x5 convolution, batch norm, relu and 2x2 max pooling
ConvBnReluPoolLayer {outChannels} = Sequential (
ConvolutionalLayer {outChannels, (5:5), pad=true, bias=false} :
ConvBnReluPoolLayer {outChannels, filterShape} = Sequential (
ConvolutionalLayer {outChannels, filterShape, pad=true, bias=false} :
BatchNormalizationLayer {spatialRank = 2} :
ReLU :
MaxPoolingLayer {(2:2), stride = (2:2)}
)
DenseBnReluLayer {outDim} = Sequential (
LinearLayer {outDim} :
BatchNormalizationLayer {spatialRank = 1} : ReLU
)
model = Sequential (
Scale {featScale} :
ConvBnReluPoolLayer {16} :
ConvBnReluPoolLayer {32} :
LinearLayer {128} :
BatchNormalizationLayer {} :
ReLU :
ConvBnReluPoolLayer {16, (5:5)} :
DenseBnReluLayer {64} :
LinearLayer {labelDim}
)
@ -63,10 +65,9 @@ trainNetwork = {
SGD = {
epochSize = 60000
minibatchSize = 64
maxEpochs = 3
learningRatesPerSample = 0.02:0.005
maxEpochs = 10
learningRatesPerSample = 0.01*5:0.001
momentumAsTimeConstant = 0
numMBsToShowResult = 500
}
@ -83,7 +84,7 @@ trainNetwork = {
# TEST CONFIG
testNetwork = {
action = test
action = "test"
minibatchSize = 1024 # reduce this if you run out of memory
reader = {

Просмотреть файл

@ -0,0 +1,86 @@
# Parameters can be overwritten on the command line
# for example: cntk configFile=myConfigFile RootDir=../..
# For running from Visual Studio add
# currentDirectory=$(SolutionDir)/<path to corresponding data folder>
command = trainNetwork:testNetwork
precision = "float"; traceLevel = 1 ; deviceId = "auto"
rootDir = ".." ; dataDir = "$rootDir$/DataSets/MNIST" ;
outputDir = "./Output" ;
modelPath = "$outputDir$/Models/05_OneConvRegr"
#stderr = "$outputDir$/05_OneConvRegr_bs_out"
# TRAINING CONFIG
trainNetwork = {
action = "train"
BrainScriptNetworkBuilder = {
imageShape = 28:28:1 # image dimensions, 1 channel only
labelDim = 10 # number of distinct labels
featScale = 1/256
Scale{f} = x => Constant(f) .* x
model = Sequential (
Scale {featScale} :
ConvolutionalLayer {16, (5:5), pad = true} : ReLU :
MaxPoolingLayer {(2:2), stride=(2:2)} :
DenseLayer {64} : ReLU :
LinearLayer {labelDim}
)
# inputs
features = Input {imageShape}
labels = Input {labelDim}
# apply model to features
z = model (features)
# loss and error computation
sqErr = SquareError (labels, z)
rmse = Sqrt (sqErr / labelDim)
# declare special nodes
featureNodes = (features)
labelNodes = (labels)
criterionNodes = (rmse)
evaluationNodes = (rmse)
outputNodes = (z)
}
SGD = {
epochSize = 0
minibatchSize = 64
maxEpochs = 15
learningRatesPerSample = 0.001*5:0.0005
momentumAsTimeConstant = 1024
numMBsToShowResult = 500
}
reader = {
readerType = "CNTKTextFormatReader"
# See ../REAMDE.md for details on getting the data (Train-28x28_cntk_text.txt).
file = "$DataDir$/Train-28x28_cntk_text.txt"
input = {
features = { dim = 784 ; format = "dense" }
labels = { dim = 10 ; format = "dense" }
}
}
}
# TEST CONFIG
testNetwork = {
action = "test"
minibatchSize = 1024 # reduce this if you run out of memory
reader = {
readerType = "CNTKTextFormatReader"
file = "$DataDir$/Test-28x28_cntk_text.txt"
input = {
features = { dim = 784 ; format = "dense" }
labels = { dim = 10 ; format = "dense" }
}
}
}

Просмотреть файл

@ -0,0 +1,103 @@
# CNTK Examples: Image/Getting Started
## Overview
|Data: |The MNIST dataset (http://yann.lecun.com/exdb/mnist/) of handwritten digits.
|:---------|:---
|Purpose |This folder contains a number of examples that demonstrate the usage of BrainScript to define basic networks for deep learning on image tasks.
|Network |Simple feed-forward networks including dense layers, convolution layers, drop out and batch normalization for classification and regression tasks.
|Training |Stochastic gradient descent both with and without momentum.
|Comments |There are five configuration files, details are provided below.
## Running the example
### Getting the data
These examples use the MNIST dataset to demonstrate various network configurations. MNIST dataset is not included in the CNTK distribution but can be easily downloaded and converted by following the instructions in [DataSets/MNIST](../DataSets/MNIST). We recommend you to keep the downloaded data in the respective folder while downloading, as the configuration files in this folder assumes that by default.
### Setup
Compile the sources to generate the cntk executable (not required if you downloaded the binaries).
__Windows:__ Add the folder of the cntk executable to your path
(e.g. `set PATH=%PATH%;c:/src/cntk/x64/Release/;`)
or prefix the call to the cntk executable with the corresponding folder.
__Linux:__ Add the folder of the cntk executable to your path
(e.g. `export PATH=$PATH:$HOME/src/cntk/build/Release/bin/`)
or prefix the call to the cntk executable with the corresponding folder.
### Run
Run the example from the current folder (recommended) using:
`cntk configFile=01_OneHidden.cntk`
or run from any folder and specify the `GettingStarted` folder as the `currentDirectory`,
e.g. running from the `Image` folder using:
`cntk configFile=GettingStarted/01_OneHidden.cntk currentDirectory=GettingStarted`
An Output folder will be created in the `Image/GettingStarted` folder, which is used to store intermediate results and trained models.
## Details
There are five cntk configuration files in the current folder. These cntk configuration files use BrainScript, a custom script language for CNTK. To learn more about BrainScript, please follow the introduction of [BrainScript Basic Concepts](https://github.com/Microsoft/CNTK/wiki/BS-Basic-concepts).
### 01_OneHidden.cntk
This is a simple, one hidden layer network that produces `1.76%` of error. Since this model does not assume any spatial relationships between the pixels, it is often referred as "permutation invariant".
To run this example, use the following command:
`cntk configFile=01_OneHidden.cntk`
In this example, the MNIST images are first normalized to the range `[0,1)`, followed by a single dense hidden layer with 200 nodes. A [rectified linear unit (ReLU)](http://machinelearning.wustl.edu/mlpapers/paper_files/icml2010_NairH10.pdf) activation function is added for nonlinearity. Afterwards, another dense linear layer is added to generate the output label. The training adopts cross entropy as the cost function after softmax.
In the `SGD` block, `learningRatesPerSample = 0.01*5:0.005` indicates using 0.01 as learning rate per sample for 5 epochs and then 0.005 for the rest. More details about the SGD block are explained [here](https://github.com/Microsoft/CNTK/wiki/SGD-Block).
The MNIST data is loaded with a simple CNTK text format reader. The train and test datasets are converted by running the Python script in [DataSets/MNIST](../DataSets/MNIST). For more information on the reader block, please refer [here](https://github.com/Microsoft/CNTK/wiki/Reader-block).
### 02_OneConv.cntk
In the second example, we add a convolution layer to the network. Convolution layers were inspired by biological process, and has been extremely popular in image-related tasks, where neighboring pixels have high correlation. One of the earliest papers on convolution neural networks can be found [here](http://yann.lecun.com/exdb/publis/pdf/lecun-01a.pdf).
To run this example, use the following command:
`cntk configFile=02_OneConv.cntk`
After normalization, a convolution layer with `16` kernels at size `(5,5)` is added, followed by a ReLU nonlinearity. Then, we perform max pooling on the output feature map, with size `(2,2)` and stride `(2,2)`. A dense layer of 64 hidden nodes is then added, followed by another ReLU, and another dense layer to generate the output. This network achieves `1.22%` error rate, which is better than the previous network.
In practice, one would be stacking multiple convolution layers to improve classification accuracy. State-of-the-art convolution neural networks can achieve lower than 0.5% error rate on MNIST. Interested readers can find more examples in [Classification/ConvNet](../Classification/ConvNet).
### 03_OneConvdropout.cntk
In the third example, we demonstrate the use of dropout layers. Dropout is a network regularization technique that helps combat overfitting, in particular when the network contains many parameters. Dropout, together with ReLU activiation, are the two key techniques that enables Alex Krizhevsky, Ilya Sutskever, and Geoffrey Hinton to win the ILSVRC-2012 competition, which has argueabally changed the course of computer vision research. Their paper can be found [here](https://papers.nips.cc/paper/4824-imagenet-classification-with-deep-convolutional-neural-networks.pdf).
To run this example, use the following command:
`cntk configFile=03_OneConvDropout.cntk`
Compared with the previous example, we added a dropout layer after max pooling. Dropout can also be added after dense layer if needed. The dropout rate is specified in the SGD block, as `dropoutRate = 0.5`.
With dropout, the accuracy of the network improves slightly to `1.10%` error rate.
### 04_OneConvBN.cntk
In the fourth example, we add [batch normalization](https://arxiv.org/abs/1502.03167) to the network. Batch normalization was designed to address the internal covariate shift problem caused by input and parameter changes during training. The technique has been proven to be very useful in training very deep and complicated networks.
In this example, we simply added a batch normalization layer to the `02_OneConv.cntk` network. To run this example, use the following command:
`cntk configFile=04_OneConvBN.cntk`
The network achieves around `0.96%` error rate, which is better than the previous examples. Due to the small training dataset and the extremely simple network, we have to stop the training early (10 epochs) in order to avoid overfitting.
This cntk configuration file also demonstrates the use of custom layer definition in BrainScript. Note `ConvBnReluPoolLayer` and `DenseBnReluLayer` are both custom layers that contains different basic layer types.
### 05_OneConvRegr.cntk
In the fifth example, we show how CNTK can be used to perform a regression task. To simplify our task and not introduce any new datasets, we assume the digit labels of MNIST is a regression target rather than a classification target. We then reuse the same network architecture in `02_OneConv`, only to replace the cost function with squared error. To run this example, use the following command:
`cntk configFile=05_OneConvRegr.cntk`
The trained network achieves root-mean-square error (RMSE) of 0.0039. To see more sophisticated examples on regression tasks, please refer to [Regression](../Regression).

Просмотреть файл

@ -1,77 +0,0 @@
import sys
import urllib
import gzip
import shutil
import os
import struct
import numpy as np
def loadData(src, cimg):
print ('Downloading ' + src)
gzfname, h = urllib.urlretrieve(src, './delete.me')
print ('Done.')
try:
with gzip.open(gzfname) as gz:
n = struct.unpack('I', gz.read(4))
# Read magic number.
if n[0] != 0x3080000:
raise Exception('Invalid file: unexpected magic number.')
# Read number of entries.
n = struct.unpack('>I', gz.read(4))[0]
if n != cimg:
raise Exception('Invalid file: expected {0} entries.'.format(cimg))
crow = struct.unpack('>I', gz.read(4))[0]
ccol = struct.unpack('>I', gz.read(4))[0]
if crow != 28 or ccol != 28:
raise Exception('Invalid file: expected 28 rows/cols per image.')
# Read data.
res = np.fromstring(gz.read(cimg * crow * ccol), dtype = np.uint8)
finally:
os.remove(gzfname)
return res.reshape((cimg, crow * ccol))
def loadLabels(src, cimg):
print 'Downloading ' + src
gzfname, h = urllib.urlretrieve(src, './delete.me')
print 'Done.'
try:
with gzip.open(gzfname) as gz:
n = struct.unpack('I', gz.read(4))
# Read magic number.
if n[0] != 0x1080000:
raise Exception('Invalid file: unexpected magic number.')
# Read number of entries.
n = struct.unpack('>I', gz.read(4))
if n[0] != cimg:
raise Exception('Invalid file: expected {0} rows.'.format(cimg))
# Read labels.
res = np.fromstring(gz.read(cimg), dtype = np.uint8)
finally:
os.remove(gzfname)
return res.reshape((cimg, 1))
def load(dataSrc, labelsSrc, cimg):
data = loadData(dataSrc, cimg)
labels = loadLabels(labelsSrc, cimg)
return np.hstack((data, labels))
def savetxt(filename, ndarray):
with open(filename, 'w') as f:
labels = map(' '.join, np.eye(10, dtype=np.uint).astype(str))
for row in ndarray:
row_str = row.astype(str)
label_str = labels[row[-1]]
feature_str = ' '.join(row_str[:-1])
f.write('|labels {} |features {}\n'.format(label_str, feature_str))
if __name__ == "__main__":
train = load('http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz',
'http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz', 60000)
print 'Writing train text file...'
savetxt(r'./../Data/Train-28x28_cntk_text.txt', train)
print 'Done.'
test = load('http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz',
'http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz', 10000)
print 'Writing test text file...'
savetxt(r'./../Data/Test-28x28_cntk_text.txt', test)
print 'Done.'

Просмотреть файл

@ -1,10 +0,0 @@
0
1
2
3
4
5
6
7
8
9

Просмотреть файл

@ -1,85 +0,0 @@
# CNTK example: MNIST
## Overview
|Data: |The MNIST database (http://yann.lecun.com/exdb/mnist/) of handwritten digits.
|:---------|:---
|Purpose |This example demonstrates usage of the NDL (Network Description Language) to define networks.
|Network |NDLNetworkBuilder, simple feed forward and convolutional networks, cross entropy with softmax.
|Training |Stochastic gradient descent both with and without momentum.
|Comments |There are four config files, details are provided below.
## Running the example
### Getting the data
The MNIST dataset is not included in the CNTK distribution but can be easily
downloaded and converted by running the following command from the 'AdditionalFiles' folder:
`python mnist_convert.py`
The script will download all required files and convert them to CNTK-supported format.
The resulting files (Train-28x28_cntk_text.txt and Test-28x28_cntk_text.txt) will be stored in the 'Data' folder.
In case you don't have Python installed, there are 2 options:
1. Download and install latest version of Python 2.7 from: https://www.python.org/downloads/
Then install the numpy package by following instruction from: http://www.scipy.org/install.html#individual-packages
2. Alternatively install the Python Anaconda distribution which contains most of the
popular Python packages including numpy: http://continuum.io/downloads
### Setup
Compile the sources to generate the cntk executable (not required if you downloaded the binaries).
__Windows:__ Add the folder of the cntk executable to your path
(e.g. `set PATH=%PATH%;c:/src/cntk/x64/Debug/;`)
or prefix the call to the cntk executable with the corresponding folder.
__Linux:__ Add the folder of the cntk executable to your path
(e.g. `export PATH=$PATH:$HOME/src/cntk/build/debug/bin/`)
or prefix the call to the cntk executable with the corresponding folder.
### Run
Run the example from the Image/MNIST/Data folder using:
`cntk configFile=../Config/01_OneHidden_ndl_deprecated.cntk`
or run from any folder and specify the Data folder as the `currentDirectory`,
e.g. running from the Image/MNIST folder using:
`cntk configFile=Config/01_OneHidden_ndl_deprecated.cntk currentDirectory=Data`
The output folder will be created inside Image/MNIST/.
## Details
### Config files
There are four config files and the corresponding network description files in the 'Config' folder:
1. 01_OneHidden.ndl is a simple, one hidden layer network that produces 2.3% of error.
To run the sample, navigate to the Data folder and run the following command:
`cntk configFile=../Config/01_OneHidden_ndl_deprecated.cntk`
2. 02_Convolution.ndl is more interesting, convolutional network which has 2 convolutional and 2 max pooling layers.
The network produces 0.87% of error after training for about 2 minutes on GPU.
To run the sample, navigate to the Data folder and run the following command:
`cntk configFile=../Config/02_Convolution_ndl_deprecated.cntk`
3. 03_ConvBatchNorm.ndl is almost identical to 02_Convolution.ndl
except that it uses batch normalization for the convolutional and fully connected layers.
As a result, it achieves around 0.8% of error after training for just 2 epochs (and less than 30 seconds).
To run the sample, navigate to the Data folder and run the following command:
`cntk configFile=../Config/03_ConvBatchNorm_ndl_deprecated.cntk`
4. 04_DeConv.ndl illustrates the usage of Deconvolution and Unpooling. It is a network with one Convolution, one Pooling, one Unpooling and one Deconvolution layer. In fact it is an auto-encoder network where Rectified Linear Unit (ReLU) or Sigmoid layer is now replaced with Convolutional ReLU (for encoding) and Deconvolutional ReLU (for decoding) layers. The network goal is to reconstruct the original signal, with Mean Squared Error (MSE) used to minimize the reconstruction error. Generally such networks are used in semantic segmentation.
To run the sample, navigate to the Data folder and run the following command:
`cntk configFile=../Config/04_DeConv_ndl_deprecated.cntk`
For more details, refer to .ndl and the corresponding .cntk files.
### Additional files
The 'AdditionalFiles' folder contains the python script to download and convert the data.

Просмотреть файл

@ -1,80 +0,0 @@
import os
import sys
import struct
import cPickle as cp
from PIL import Image
import numpy as np
import xml.etree.cElementTree as et
import xml.dom.minidom
imgSize = 32
def saveImage(fname, data, label, mapFile, regrFile, pad, **key_parms):
# data in CIFAR-10 dataset is in CHW format.
pixData = data.reshape((3, imgSize, imgSize))
if ('mean' in key_parms):
key_parms['mean'] += pixData
if pad > 0:
pixData = np.pad(pixData, ((0, 0), (pad, pad), (pad, pad)), mode='constant', constant_values=128) # can also use mode='edge'
img = Image.new('RGB', (imgSize + 2 * pad, imgSize + 2 * pad))
pixels = img.load()
for x in range(img.size[0]):
for y in range(img.size[1]):
pixels[x, y] = (pixData[0][y][x], pixData[1][y][x], pixData[2][y][x])
img.save(fname)
mapFile.write("%s\t%d\n" % (fname, label))
# compute per channel mean and store for regression example
channelMean = np.mean(pixData, axis=(1,2))
regrFile.write("|regrLabels\t%f\t%f\t%f\n" % (channelMean[0]/255.0, channelMean[1]/255.0, channelMean[2]/255.0))
def saveMean(fname, data):
root = et.Element('opencv_storage')
et.SubElement(root, 'Channel').text = '3'
et.SubElement(root, 'Row').text = str(imgSize)
et.SubElement(root, 'Col').text = str(imgSize)
meanImg = et.SubElement(root, 'MeanImg', type_id='opencv-matrix')
et.SubElement(meanImg, 'rows').text = '1'
et.SubElement(meanImg, 'cols').text = str(imgSize * imgSize * 3)
et.SubElement(meanImg, 'dt').text = 'f'
et.SubElement(meanImg, 'data').text = ' '.join(['%e' % n for n in np.reshape(data, (imgSize * imgSize * 3))])
tree = et.ElementTree(root)
tree.write(fname)
x = xml.dom.minidom.parse(fname)
with open(fname, 'w') as f:
f.write(x.toprettyxml(indent = ' '))
if __name__ == "__main__":
if len(sys.argv) != 2:
print "Usage: CifarConverter.py <path to CIFAR-10 dataset directory>\nCIFAR-10 dataset (Python version) can be downloaded from http://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz"
sys.exit(1)
rootDir = sys.argv[1]
trainDir = os.path.join(rootDir, os.path.join('data', 'train'))
if not os.path.exists(trainDir):
os.makedirs(trainDir)
testDir = os.path.join(rootDir, os.path.join('data', 'test'))
if not os.path.exists(testDir):
os.makedirs(testDir)
data = {}
dataMean = np.zeros((3, imgSize, imgSize)) # mean is in CHW format.
with open(os.path.join(rootDir, 'train_map.txt'), 'w') as mapFile:
with open(os.path.join(rootDir, 'train_regrLabels.txt'), 'w') as regrFile:
for ifile in range(1, 6):
with open(os.path.join(rootDir, 'data_batch_' + str(ifile)), 'rb') as f:
data = cp.load(f)
for i in range(10000):
fname = os.path.join(trainDir, ('%05d.png' % (i + (ifile - 1) * 10000)))
saveImage(fname, data['data'][i, :], data['labels'][i], mapFile, regrFile, 4, mean=dataMean)
dataMean = dataMean / (50 * 1000)
saveMean(os.path.join(rootDir, 'CIFAR-10_mean.xml'), dataMean)
with open(os.path.join(rootDir, 'test_map.txt'), 'w') as mapFile:
with open(os.path.join(rootDir, 'test_regrLabels.txt'), 'w') as regrFile:
with open(os.path.join(rootDir, 'test_batch'), 'rb') as f:
data = cp.load(f)
for i in range(10000):
fname = os.path.join(testDir, ('%05d.png' % i))
saveImage(fname, data['data'][i, :], data['labels'][i], mapFile, regrFile, 0)

Просмотреть файл

@ -1,73 +0,0 @@
import os
import sys
import struct
import pickle as cp
from PIL import Image
import numpy as np
import xml.etree.cElementTree as et
import xml.dom.minidom
imgSize = 32
def saveImage(fname, data, label, mapFile, pad, **key_parms):
# data in CIFAR-10 dataset is in CHW format.
pixData = data.reshape((3, imgSize, imgSize))
if ('mean' in key_parms):
key_parms['mean'] += pixData
if pad > 0:
pixData = np.pad(pixData, ((0, 0), (pad, pad), (pad, pad)), mode='constant', constant_values=128) # can also use mode='edge'
img = Image.new('RGB', (imgSize + 2 * pad, imgSize + 2 * pad))
pixels = img.load()
for x in range(img.size[0]):
for y in range(img.size[1]):
pixels[x, y] = (pixData[0][y][x], pixData[1][y][x], pixData[2][y][x])
img.save(fname)
mapFile.write("%s\t%d\n" % (fname, label))
def saveMean(fname, data):
root = et.Element('opencv_storage')
et.SubElement(root, 'Channel').text = '3'
et.SubElement(root, 'Row').text = str(imgSize)
et.SubElement(root, 'Col').text = str(imgSize)
meanImg = et.SubElement(root, 'MeanImg', type_id='opencv-matrix')
et.SubElement(meanImg, 'rows').text = '1'
et.SubElement(meanImg, 'cols').text = str(imgSize * imgSize * 3)
et.SubElement(meanImg, 'dt').text = 'f'
et.SubElement(meanImg, 'data').text = ' '.join(['%e' % n for n in np.reshape(data, (imgSize * imgSize * 3))])
tree = et.ElementTree(root)
tree.write(fname)
x = xml.dom.minidom.parse(fname)
with open(fname, 'w') as f:
f.write(x.toprettyxml(indent = ' '))
if __name__ == "__main__":
if len(sys.argv) != 2:
print ("Usage: CifarConverter.py <path to CIFAR-10 dataset directory>\nCIFAR-10 dataset (Python version) can be downloaded from http://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz")
sys.exit(1)
rootDir = sys.argv[1]
trainDir = os.path.join(rootDir, os.path.join('data', 'train'))
if not os.path.exists(trainDir):
os.makedirs(trainDir)
testDir = os.path.join(rootDir, os.path.join('data', 'test'))
if not os.path.exists(testDir):
os.makedirs(testDir)
data = {}
dataMean = np.zeros((3, imgSize, imgSize)) # mean is in CHW format.
with open(os.path.join(rootDir, 'train_map.txt'), 'w') as mapFile:
for ifile in range(1, 6):
with open(os.path.join(rootDir, 'data_batch_' + str(ifile)), 'rb') as f:
data = cp.load(f, encoding='latin1')
for i in range(10000):
fname = os.path.join(trainDir, ('%05d.png' % (i + (ifile - 1) * 10000)))
saveImage(fname, data['data'][i, :], data['labels'][i], mapFile, 4, mean=dataMean)
dataMean = dataMean / (50 * 1000)
saveMean(os.path.join(rootDir, 'CIFAR-10_mean.xml'), dataMean)
with open(os.path.join(rootDir, 'test_map.txt'), 'w') as mapFile:
with open(os.path.join(rootDir, 'test_batch'), 'rb') as f:
data = cp.load(f, encoding='latin1')
for i in range(10000):
fname = os.path.join(testDir, ('%05d.png' % i))
saveImage(fname, data['data'][i, :], data['labels'][i], mapFile, 0)

Просмотреть файл

@ -1,105 +0,0 @@
import sys
import urllib
import tarfile
import shutil
import os
import struct
import numpy as np
import cPickle as cp
import getopt
ImgSize = 32
NumFeat = ImgSize * ImgSize * 3
def readBatch(src, outFmt):
with open(src, 'rb') as f:
d = cp.load(f)
# Note: most of the frameworks use spatial-major (aka NCHW) input format:
# R0..RN,G0..GN,B0..BN
# There are 2 possible options in CNTK:
# 1. If CNTK is built with cuDNN then 'cudnn' (i.e. NCHW format) should be used.
# 2. Otherwise, legacy CNTK 'NHWC' format should be used. As CIFAR-10 dataset comes in
# NCHW format, it has to be converted to CNTK legacy format first.
data = d['data']
if outFmt == 'cudnn':
feat = data
elif outFmt == 'legacy':
r = data[:, : ImgSize * ImgSize]
g = data[:, ImgSize * ImgSize : 2 * ImgSize * ImgSize]
b = data[:, 2 * ImgSize * ImgSize : 3 * ImgSize * ImgSize]
feat = np.empty_like(data)
feat[:, ::3] = r
feat[:, 1::3] = g
feat[:, 2::3] = b
else:
print ('Format not supported: ' + outFmt)
usage()
sys.exit(1)
res = np.hstack((feat, np.reshape(d['labels'], (len(d['labels']), 1))))
return res.astype(np.int)
def loadData(src, outFmt):
print ('Downloading ' + src)
fname, h = urllib.urlretrieve(src, './delete.me')
print ('Done.')
try:
print ('Extracting files...')
with tarfile.open(fname) as tar:
tar.extractall()
print ('Done.')
print ('Preparing train set...')
trn = np.empty((0, NumFeat + 1), dtype=np.int)
for i in range(5):
batchName = './cifar-10-batches-py/data_batch_{0}'.format(i + 1)
trn = np.vstack((trn, readBatch(batchName, outFmt)))
print ('Done.')
print ('Preparing test set...')
tst = readBatch('./cifar-10-batches-py/test_batch', outFmt)
print ('Done.')
finally:
os.remove(fname)
return (trn, tst)
def usage():
print ('Usage: CifarDownload.py [-f <format>] \n where format can be either cudnn or legacy. Default is cudnn.')
def parseCmdOpt(argv):
if len(argv) == 0:
print ("Using cudnn output format.")
return "cudnn"
try:
opts, args = getopt.getopt(argv, 'hf:', ['help', 'outFormat='])
except getopt.GetoptError:
usage()
sys.exit(1)
for opt, arg in opts:
if opt in ('-h', '--help'):
usage()
sys.exit()
elif opt in ('-f', '--outFormat'):
fmt = arg
if fmt != 'cudnn' and fmt != 'legacy':
print ('Invalid output format option.')
usage()
sys.exit(1)
return fmt
def savetxt(filename, ndarray):
with open(filename, 'w') as f:
labels = map(' '.join, np.eye(10, dtype=np.uint).astype(str))
for row in ndarray:
row_str = row.astype(str)
label_str = labels[row[-1]]
feature_str = ' '.join(row_str[:-1])
f.write('|labels {} |features {}\n'.format(label_str, feature_str))
if __name__ == "__main__":
fmt = parseCmdOpt(sys.argv[1:])
trn, tst = loadData('http://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz', fmt)
print ('Writing train text file...')
savetxt(r'./Train_cntk_text.txt', trn)
print ('Done.')
print ('Writing test text file...')
savetxt(r'./Test_cntk_text.txt', tst)
print ('Done.')

Просмотреть файл

@ -1,105 +0,0 @@
import sys
import urllib.request as ul
import pickle as cp
import tarfile
import shutil
import os
import struct
import numpy as np
import getopt
ImgSize = 32
NumFeat = ImgSize * ImgSize * 3
def readBatch(src, outFmt):
with open(src, 'rb') as f:
d = cp.load(f, encoding="latin1")
# Note: most of the frameworks use spatial-major (aka NCHW) input format:
# R0..RN,G0..GN,B0..BN
# There are 2 possible options in CNTK:
# 1. If CNTK is built with cuDNN then 'cudnn' (i.e. NCHW format) should be used.
# 2. Otherwise, legacy CNTK 'NHWC' format should be used. As CIFAR-10 dataset comes in
# NCHW format, it has to be converted to CNTK legacy format first.
data = d['data']
if outFmt == 'cudnn':
feat = data
elif outFmt == 'legacy':
r = data[:, : ImgSize * ImgSize]
g = data[:, ImgSize * ImgSize : 2 * ImgSize * ImgSize]
b = data[:, 2 * ImgSize * ImgSize : 3 * ImgSize * ImgSize]
feat = np.empty_like(data)
feat[:, ::3] = r
feat[:, 1::3] = g
feat[:, 2::3] = b
else:
print ('Format not supported: ' + outFmt)
usage()
sys.exit(1)
res = np.hstack((feat, np.reshape(d['labels'], (len(d['labels']), 1))))
return res.astype(np.int)
def loadData(src, outFmt):
print ('Downloading ' + src)
fname, h = ul.urlretrieve(src, './delete.me')
print ('Done.')
try:
print ('Extracting files...')
with tarfile.open(fname) as tar:
tar.extractall()
print ('Done.')
print ('Preparing train set...')
trn = np.empty((0, NumFeat + 1), dtype=np.int)
for i in range(5):
batchName = './cifar-10-batches-py/data_batch_{0}'.format(i + 1)
trn = np.vstack((trn, readBatch(batchName, outFmt)))
print ('Done.')
print ('Preparing test set...')
tst = readBatch('./cifar-10-batches-py/test_batch', outFmt)
print ('Done.')
finally:
os.remove(fname)
return (trn, tst)
def usage():
print ('Usage: CifarDownload_py3.py [-f <format>] \n where format can be either cudnn or legacy. Default is cudnn.')
def parseCmdOpt(argv):
if len(argv) == 0:
print ("Using cudnn output format.")
return "cudnn"
try:
opts, args = getopt.getopt(argv, 'hf:', ['help', 'outFormat='])
except getopt.GetoptError:
usage()
sys.exit(1)
for opt, arg in opts:
if opt in ('-h', '--help'):
usage()
sys.exit()
elif opt in ('-f', '--outFormat'):
fmt = arg
if fmt != 'cudnn' and fmt != 'legacy':
print ('Invalid output format option.')
usage()
sys.exit(1)
return fmt
def savetxt(filename, ndarray):
with open(filename, 'w') as f:
labels = list(map(' '.join, np.eye(10, dtype=np.uint).astype(str)))
for row in ndarray:
row_str = row.astype(str)
label_str = labels[row[-1]]
feature_str = ' '.join(row_str[:-1])
f.write('|labels {} |features {}\n'.format(label_str, feature_str))
if __name__ == "__main__":
fmt = parseCmdOpt(sys.argv[1:])
trn, tst = loadData('http://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz', fmt)
print ('Writing train text file...')
savetxt(r'./Train_cntk_text.txt', trn)
print ('Done.')
print ('Writing test text file...')
savetxt(r'./Test_cntk_text.txt', tst)
print ('Done.')

Просмотреть файл

@ -1,91 +0,0 @@
# Simple CIFAR-10 convnet, without and with BatchNormalization.
command = TrainConvNetWithBN:Eval
makeMode = false ; traceLevel = 1 ; deviceId = 0
RootDir = "." ; DataDir = "$RootDir$" ; ModelDir = "$RootDir$/Output/Models"
modelPath = "$ModelDir$/ConvNetBN"
# Training with BN
TrainConvNetWithBN = {
action = "train"
BrainScriptNetworkBuilder = {
imageShape = 32:32:3
labelDim = 10
Subtract128 (x) = x - Constant (128)
model = Sequential (
Subtract128 :
ConvolutionalLayer {32, (5:5), pad = true, bias = false, init = "heNormal", initValueScale=0.00390625} :
BatchNormalizationLayer {spatialRank = 2, normalizationTimeConstant = 4096} : ReLU :
MaxPoolingLayer {(3:3), stride = (2:2)} :
ConvolutionalLayer {32, (5:5), pad = true, bias = false, init = "heNormal"} :
BatchNormalizationLayer {spatialRank = 2, normalizationTimeConstant = 4096} : ReLU :
MaxPoolingLayer {(3:3), stride = (2:2)} :
ConvolutionalLayer {64, (5:5), pad = true, bias = false, init = "heNormal"} :
BatchNormalizationLayer {spatialRank = 2, normalizationTimeConstant = 4096} : ReLU :
MaxPoolingLayer {(3:3), stride = (2:2)} :
LinearLayer {64, bias = false, init = "heNormal", initValueScale=0.1} :
BatchNormalizationLayer {normalizationTimeConstant = 4096} : ReLU :
LinearLayer {labelDim, init = "heNormal", initValueScale=0.1}
)
# inputs
features = Input {imageShape}
labels = Input {labelDim}
# apply model to features
z = model (features)
# connect to system
ce = CrossEntropyWithSoftmax (labels, z)
errs = ClassificationError (labels, z)
top5Errs = ClassificationError (labels, z, topN=5)
featureNodes = (features)
labelNodes = (labels)
criterionNodes = (ce)
evaluationNodes = (errs)
outputNodes = (z)
}
SGD = {
epochSize = 49984 ; minibatchSize = 64
learningRatesPerSample = 0.00046875*5:0.00015625
momentumAsTimeConstant = 0
maxEpochs = 10
L2RegWeight = 0.003
dropoutRate = 0
firstMBsToShowResult = 10 ; numMBsToShowResult = 500
}
reader = {
readerType = "CNTKTextFormatReader"
file = "$DataDir$/Train_cntk_text.txt"
input = {
features = { dim = 3072 ; format = "dense" }
labels = { dim = 10 ; format = "dense" }
}
}
}
# Eval action
Eval = {
action = "eval"
minibatchSize = 16
evalNodeNames = errs:top5Errs # also test top-5 error rate
reader = {
readerType = "CNTKTextFormatReader"
file = "$DataDir$/Test_cntk_text.txt"
input = {
features = { dim = 3072 ; format = "dense" }
labels = { dim = 10 ; format = "dense" }
}
}
}

Просмотреть файл

@ -1,57 +0,0 @@
# CNTK example: CIFAR-10
## Overview
|Data: |The CIFAR-10 dataset (http://www.cs.toronto.edu/~kriz/cifar.html) of small images.
|:---------|:---
|Purpose |This example demonstrates usage of the NDL (Network Description Language) to define networks.
|Network |NDLNetworkBuilder, convolutional networks with batch normalization (including ResNet), cross entropy with softmax.
|Training |Stochastic gradient descent with momentum.
|Comments |See below.
## Running the example
### Getting the data
CIFAR-10 dataset is not included in CNTK distribution but can be easily downloaded and converted by running the following commands from this folder:
```
python CifarDownload.py [-f <format: cudnn|legacy>]
python CifarConverter.py <path to CIFAR-10 dataset>
```
The scripts will download all required files and convert them to CNTK-supported format.
In case you don't have Python installed (you require Python 2.7 and numpy), we recommend to install the Python Anaconda distribution which contains most of the popular Python packages including numpy:
http://continuum.io/downloads
The download script has an optional `-f` parameter which specifies output format of the datasets. `cudnn` option (default) saves dataset in a spatial-major format used by cuDNN, while `legacy` - in CNTK legacy format. Use `cudnn` if CNTK is compiled with cuDNN and `legacy` otherwise.
The converter script takes a full path to the original CIFAR-10 dataset (in Python pickle format). The script will create `data` folder inside of provided path where it will store both train and test images (in `train` and `test` folders). It will also create appropriate mapping files for the CNTK ImageReader as well as mean file.
## Details
### Config files
1. 01_Convolution.ndl is a convolutional network which has 3 convolutional and 3 max pooling layers and resembles the network described here:
https://code.google.com/p/cuda-convnet/source/browse/trunk/example-layers/layers-80sec.cfg
(main differences are usage of max pooling layers everywhere rather than mix of max and average pooling, as well as dropout in fully-connected layer).
The network produces 20.5% of error after training for about 3 minutes on GPU.
To run the sample, navigate to the sample folder and run the following command:
```
cntk configFile=01_Conv_ndl_deprecated.cntk
```
2. 02_BatchNormConv.ndl is a convolutional network which uses batch normalization technique (http://arxiv.org/abs/1502.03167).
To run the sample, navigate to the sample folder and run the following command:
```
cntk configFile=02_BatchNormConv_ndl_deprecated.cntk
```
3. 03_ResNet.ndl and 04_ResNet_56.ndl are very deep convolutional networks that use ResNet architecture and have 20 and 56 layers respectively (http://arxiv.org/abs/1512.03385).
With 03_ResNet_ndl_deprecated.cntk you should get around 8.2% of error after training for about 50 minutes. 04_ResNet_56_ndl_deprecated.cntk should produce around 6.4% of error after training for about 3 hours (see log files in the Output directory).
4. 05_ConvLocal_ndl_deprecated.cntk uses locally-connected convolution layers (see `conv_local3` and `conv_local4` in `05_ConvLocal_ndl_deprecated.cntk`) and resembles a network described here: https://code.google.com/p/cuda-convnet/source/browse/trunk/example-layers/layers-conv-local-11pct.cfg
5. 06_RegressionSimple.cntk shows how to train a regression model on image data. It uses a very simple network and a composite reader using both the ImageReader and CNTKTextFormatReader and defines a the RMSE (root mean square error) as the loss function. The value that the network learns to predict are simply the average rgb values of an image normalized to [0, 1]. To generate the ground truth labels for regression you need to run the CifarConverter.py script (since this example was added later you might need to rerun it to generate the regression files). See also here: https://github.com/Microsoft/CNTK/wiki/Train-a-regression-model-on-images
For more details, refer to .ndl and corresponding .cntk files.

Просмотреть файл

@ -1,12 +0,0 @@
0
1
2
3
4
5
6
7
8
9

Просмотреть файл

@ -0,0 +1,30 @@
# CNTK Examples: Image/Regression
## Overview
|Data: |The CIFAR-10 dataset (http://www.cs.toronto.edu/~kriz/cifar.html) of small images.
|:---------|:---
|Purpose |This folder contains a number of examples that demonstrate the usage of BrainScript to define deep learning networks for image regression tasks.
|Network |Convolution neural networks.
|Training |Stochastic gradient descent with momentum.
|Comments |See below.
## Running the example
### Getting the data
we use the CIFAR-10 dataset to demonstrate how to perform regression on images. CIFAR-10 dataset is not included in the CNTK distribution but can be easily downloaded and converted by following the instructions in [DataSets/CIFAR-10](../DataSets/CIFAR-10). We recommend you to keep the downloaded data in the respective folder while downloading, as the configuration files in this folder assumes that by default.
## Details
### RegrSimple_CIFAR10.cntk
In this example, we set up a very simple task to have a neural network predict the average RGB values of images normalized to [0,1). To generate the ground truth labels for this regression task, the CIFAR-10 installation script in [DataSets/CIFAR-10](../DataSets/CIFAR-10) will generate two additional files, `train_regrLabels.txt` and `test_regrLabels.txt`, for train and test respectively.
Run the example from the current folder using:
`cntk configFile=RegrSimple_CIFAR10.cntk`
The network produces root-mean-square error (rmse) of around 0.00098, which indicates that the regression accuracy is very high for this simple task.
You may examine the cntk configuration file [RegrSimple_CIFAR10.cntk](./RegrSimple_CIFAR10.cntk) for more details. Note the network is a linear one without nonlinearity. This is intended as we know that computing the average RGB values of images is a linear operation. The reader is a composite reader that uses the `ImageReader` to read images and the `CNTKTextFormatReader` to read the regression ground truth labels. The configuration file also demonstrates how to write the network prediction for the test data into an output file.

Просмотреть файл

@ -1,21 +1,17 @@
# 06_RegressionSimple.cntk shows how to train a regression model on image data.
# RegrSimple_CIFAR10.cntk shows how to train a regression model on CIFAR-10 image data.
# It uses a very simple network and a composite reader using both the ImageReader
# and CNTKTextFormatReader and defines the RMSE (root mean square error) as the
# loss function. The value that the network learns to predict are simply the
# average rgb values of an image normalized to [0, 1].
# The network consists simply of two linear layers, i.e. two fully connected layers
# with no non-linear activation function, simply LinearLayer{...} (v) = W * v + b
# See https://github.com/Microsoft/CNTK/wiki/Layers-Reference for details on CNTK layers.
# See README.md for instructions on how to generate data and regression labels for this example.
command = TrainConvNet:Write
command = TrainConvNet:Write:Test
makeMode = false ; traceLevel = 1 ; deviceId = "auto"
rootDir = "." ; configDir = "$rootDir$" ; dataDir = "$rootDir$" ;
outputDir = "$rootDir$/Output" ; modelDir = "$outputDir$/Models"
rootDir = ".." ; dataDir = "$rootDir$/DataSets/CIFAR10" ;
outputDir = "Output" ; modelDir = "$outputDir$/Models"
modelPath = "$ModelDir$/06_RegressionSimple.cmf"
modelPath = "$ModelDir$/RegrSimple_CIFAR10.cmf"
# Training action for a convolutional network
TrainConvNet = {
@ -70,7 +66,7 @@ TrainConvNet = {
verbosity = 0 ; randomize = true
deserializers = ({
type = "ImageDeserializer" ; module = "ImageReader"
file = "$dataDir$/cifar-10-batches-py/train_map.txt"
file = "$dataDir$/train_map.txt"
input = {
features = { transforms = (
{ type = "Scale" ; width = 32 ; height = 32 ; channels = 3 ; interpolations = "linear" } :
@ -80,7 +76,7 @@ TrainConvNet = {
}
} : {
type = "CNTKTextFormatDeserializer" ; module = "CNTKTextFormatReader"
file = "$dataDir$/cifar-10-batches-py/train_regrLabels.txt"
file = "$dataDir$/train_regrLabels.txt"
input = {
regrLabels = { dim = 3 ; format = "dense" }
}
@ -93,13 +89,13 @@ Write = {
action = "write"
minibatchSize = 1
outputNodeNames = (ol, regrLabels, rmse)
outputPath = "$OutputDir$/06_RegressionSimple"
outputPath = "$OutputDir$/RegrSimple_CIFAR10"
reader = {
verbosity = 0 ; randomize = false
deserializers = ({
type = "ImageDeserializer" ; module = "ImageReader"
file = "$dataDir$/cifar-10-batches-py/test_map.txt"
file = "$dataDir$/test_map.txt"
input = {
features = { transforms = (
{ type = "Scale" ; width = 32 ; height = 32 ; channels = 3 ; interpolations = "linear" } :
@ -109,7 +105,7 @@ Write = {
}
} : {
type = "CNTKTextFormatDeserializer" ; module = "CNTKTextFormatReader"
file = "$dataDir$/cifar-10-batches-py/test_regrLabels.txt"
file = "$dataDir$/test_regrLabels.txt"
input = {
regrLabels = { dim = 3 ; format = "dense" }
}
@ -122,13 +118,13 @@ Test = {
action = "test"
minibatchSize = 512
outputNodeNames = (ol, regrLabels, rmse)
outputPath = "$OutputDir$/06_RegressionSimple"
outputPath = "$OutputDir$/RegrSimple_CIFAR10"
reader = {
verbosity = 0 ; randomize = false
deserializers = ({
type = "ImageDeserializer" ; module = "ImageReader"
file = "$dataDir$/cifar-10-batches-py/test_map.txt"
file = "$dataDir$/test_map.txt"
input = {
features = { transforms = (
{ type = "Scale" ; width = 32 ; height = 32 ; channels = 3 ; interpolations = "linear" } :
@ -138,7 +134,7 @@ Test = {
}
} : {
type = "CNTKTextFormatDeserializer" ; module = "CNTKTextFormatReader"
file = "$dataDir$/cifar-10-batches-py/test_regrLabels.txt"
file = "$dataDir$/test_regrLabels.txt"
input = {
regrLabels = { dim = 3 ; format = "dense" }
}

Просмотреть файл

@ -16,5 +16,5 @@ Please refer to the Readme file in the corresponding folder for further details.
|:------------------------|:-------------------------------------------------|:----------------|
|Other/Simple2d | Synthetic 2d data | FF (CPU and GPU)
|Speech/AN4 | Speech data (CMU AN4) | FF and LSTM
|Image/MNIST | Image data (MNIST handwritten digit recognition) | CNN
|Image/GettingStarted | Image data (MNIST handwritten digit recognition) | CNN
|Text/PennTreebank | Text data (penn treebank) | RNN

Просмотреть файл

@ -412,6 +412,7 @@ $(CNTKLIBRARY_LIB): $(CNTKLIBRARY_OBJ) | $(CNTKMATH_LIB)
CNTKLIBRARY_TESTS_SRC =\
Tests/UnitTests/V2LibraryTests/FeedForwardTests.cpp \
Tests/UnitTests/V2LibraryTests/Main.cpp \
Tests/UnitTests/V2LibraryTests/Common.cpp \
Tests/UnitTests/V2LibraryTests/NDArrayViewTests.cpp \
Tests/UnitTests/V2LibraryTests/RecurrentFunctionTests.cpp \
Tests/UnitTests/V2LibraryTests/TensorTests.cpp \
@ -422,6 +423,8 @@ CNTKLIBRARY_TESTS_SRC =\
Tests/UnitTests/V2LibraryTests/FunctionTests.cpp \
Tests/UnitTests/V2LibraryTests/SequenceClassification.cpp \
Tests/UnitTests/V2LibraryTests/Seq2Seq.cpp \
Tests/UnitTests/V2LibraryTests/TruncatedLSTMAcousticModel.cpp \
Tests/UnitTests/V2LibraryTests/DeviceSelectionTests.cpp \
Examples/Evaluation/CPPEvalV2Client/EvalMultithreads.cpp \
CNTKLIBRARY_TESTS:=$(BINDIR)/v2librarytests
@ -474,7 +477,7 @@ $(EVAL_LIB): $(EVAL_OBJ) | $(CNTKMATH_LIB) $(MULTIVERSO_LIB)
@echo $(SEPARATOR)
@mkdir -p $(dir $@)
@echo Building $(EVAL_LIB) for $(ARCH) with build type $(BUILDTYPE)
$(CXX) $(LDFLAGS) -shared $(patsubst %,-L%, $(LIBDIR) $(LIBPATH) $(GDK_NVML_LIB_PATH)) $(patsubst %,$(RPATH)%, $(ORIGINDIR) $(LIBPATH)) -o $@ $^ $(LIBS) -l$(CNTKMATH) -l$(MULTIVERSO)
$(CXX) $(LDFLAGS) -shared $(patsubst %,-L%, $(LIBDIR) $(LIBPATH) $(GDK_NVML_LIB_PATH)) $(patsubst %,$(RPATH)%, $(ORIGINDIR) $(LIBPATH)) -o $@ $^ $(LIBS) -l$(CNTKMATH) $(lMULTIVERSO)
########################################
# Eval Sample client
@ -493,7 +496,7 @@ $(EVAL_SAMPLE_CLIENT): $(EVAL_SAMPLE_CLIENT_OBJ) | $(EVAL_LIB) $(MULTIVERSO_LIB)
@echo $(SEPARATOR)
@mkdir -p $(dir $@)
@echo building $(EVAL_SAMPLE_CLIENT) for $(ARCH) with build type $(BUILDTYPE)
$(CXX) $(LDFLAGS) $(patsubst %,-L%, $(LIBDIR) $(LIBPATH) $(GDK_NVML_LIB_PATH)) $(patsubst %,$(RPATH)%, $(ORIGINLIBDIR) $(LIBPATH)) -o $@ $^ $(LIBS) -l$(EVAL) -l$(CNTKMATH) -l$(MULTIVERSO)
$(CXX) $(LDFLAGS) $(patsubst %,-L%, $(LIBDIR) $(LIBPATH) $(GDK_NVML_LIB_PATH)) $(patsubst %,$(RPATH)%, $(ORIGINLIBDIR) $(LIBPATH)) -o $@ $^ $(LIBS) -l$(EVAL) -l$(CNTKMATH) $(lMULTIVERSO)
########################################
# BinaryReader plugin
@ -733,8 +736,11 @@ IMAGE_READER_LIBS += -lopencv_core -lopencv_imgproc -lopencv_imgcodecs
ifdef LIBZIP_PATH
CPPFLAGS += -DUSE_ZIP
#both directories are needed for building libzip
INCLUDEPATH += $(LIBZIP_PATH)/include
INCLUDEPATH += $(LIBZIP_PATH)/lib/libzip/include
IMAGE_READER_LIBS += -lzip
LIBPATH += $(LIBZIP_PATH)/lib
endif
IMAGEREADER_SRC =\
@ -788,7 +794,7 @@ ifeq (,$(wildcard Source/Multiverso/include/multiverso/*.h))
$(error Build with Multiverso was requested but cannot find the code. Please check https://github.com/Microsoft/DMTK to learn more.)
endif
MULTIVERSO:=multiverso
lMULTIVERSO:=-lmultiverso
INCLUDEPATH += $(SOURCEDIR)/Multiverso/include
COMMON_FLAGS += -DMULTIVERSO_SUPPORT
@ -813,6 +819,26 @@ $(MULTIVERSO_LIB):
-B./Source/Multiverso/build -H./Source/Multiverso
@make VERBOSE=1 -C ./Source/Multiverso/build/ -j multiverso
UNITTEST_MULTIVERSO_SRC = \
$(SOURCEDIR)/Multiverso/Test/unittests/test_array.cpp \
$(SOURCEDIR)/Multiverso/Test/unittests/test_blob.cpp \
$(SOURCEDIR)/Multiverso/Test/unittests/test_kv.cpp \
$(SOURCEDIR)/Multiverso/Test/unittests/test_message.cpp \
$(SOURCEDIR)/Multiverso/Test/unittests/test_multiverso.cpp \
$(SOURCEDIR)/Multiverso/Test/unittests/test_node.cpp \
$(SOURCEDIR)/Multiverso/Test/unittests/test_sync.cpp \
UNITTEST_MULTIVERSO_OBJ := $(patsubst %.cpp, $(OBJDIR)/%.o, $(UNITTEST_MULTIVERSO_SRC))
UNITTEST_MULTIVERSO := $(BINDIR)/multiversotests
ALL += $(UNITTEST_MULTIVERSO)
$(UNITTEST_MULTIVERSO): $(UNITTEST_MULTIVERSO_OBJ) | $(MULTIVERSO_LIB)
@echo $(SEPARATOR)
@mkdir -p $(dir $@)
@echo building $@ for $(ARCH) with build type $(BUILDTYPE)
$(CXX) $(LDFLAGS) $(patsubst %,-L%, $(LIBDIR) $(BOOSTLIB_PATH)) $(patsubst %, $(RPATH)%, $(ORIGINLIBDIR) $(BOOSTLIB_PATH)) -o $@ $^ $(BOOSTLIBS) $(lMULTIVERSO) -ldl
endif
########################################
@ -849,7 +875,7 @@ $(CNTK): $(CNTK_OBJ) | $(CNTKMATH_LIB) $(MULTIVERSO_LIB)
@echo $(SEPARATOR)
@mkdir -p $(dir $@)
@echo building output for $(ARCH) with build type $(BUILDTYPE)
$(CXX) $(LDFLAGS) $(patsubst %,-L%, $(LIBDIR) $(LIBPATH) $(GDK_NVML_LIB_PATH)) $(patsubst %,$(RPATH)%, $(ORIGINLIBDIR) $(LIBPATH)) -o $@ $^ $(LIBS) -l$(CNTKMATH) -l$(MULTIVERSO) -fopenmp
$(CXX) $(LDFLAGS) $(patsubst %,-L%, $(LIBDIR) $(LIBPATH) $(GDK_NVML_LIB_PATH)) $(patsubst %,$(RPATH)%, $(ORIGINLIBDIR) $(LIBPATH)) -o $@ $^ $(LIBS) -l$(CNTKMATH) $(lMULTIVERSO) -fopenmp
# deployable resources: standard library of BS
CNTK_CORE_BS:=$(BINDIR)/cntk.core.bs
@ -886,7 +912,7 @@ $(UNITTEST_EVAL) : $(UNITTEST_EVAL_OBJ) | $(EVAL_LIB) $(CNTKMATH_LIB)
@echo $(SEPARATOR)
@mkdir -p $(dir $@)
@echo building $@ for $(ARCH) with build type $(BUILDTYPE)
$(CXX) $(LDFLAGS) $(patsubst %,-L%, $(LIBDIR) $(LIBPATH) $(GDK_NVML_LIB_PATH) $(BOOSTLIB_PATH)) $(patsubst %, $(RPATH)%, $(ORIGINLIBDIR) $(LIBPATH) $(BOOSTLIB_PATH)) -o $@ $^ $(BOOSTLIBS) $(LIBS) -l$(EVAL) -l$(CNTKMATH) -l$(MULTIVERSO)
$(CXX) $(LDFLAGS) $(patsubst %,-L%, $(LIBDIR) $(LIBPATH) $(GDK_NVML_LIB_PATH) $(BOOSTLIB_PATH)) $(patsubst %, $(RPATH)%, $(ORIGINLIBDIR) $(LIBPATH) $(BOOSTLIB_PATH)) -o $@ $^ $(BOOSTLIBS) $(LIBS) -l$(EVAL) -l$(CNTKMATH) $(lMULTIVERSO)
#TODO: create project specific makefile or rules to avoid adding project specific path to the global path
INCLUDEPATH += $(SOURCEDIR)/Readers/CNTKTextFormatReader
@ -944,7 +970,7 @@ $(UNITTEST_NETWORK): $(UNITTEST_NETWORK_OBJ) | $(CNTKMATH_LIB) $(CNTKTEXTFORMATR
@echo $(SEPARATOR)
@mkdir -p $(dir $@)
@echo building $@ for $(ARCH) with build type $(BUILDTYPE)
$(CXX) $(LDFLAGS) $(patsubst %,-L%, $(LIBDIR) $(LIBPATH) $(GDK_NVML_LIB_PATH) $(BOOSTLIB_PATH)) $(patsubst %, $(RPATH)%, $(ORIGINLIBDIR) $(LIBPATH) $(BOOSTLIB_PATH)) -o $@ $^ $(BOOSTLIBS) $(LIBS) -l$(MULTIVERSO) -l$(CNTKMATH) -fopenmp
$(CXX) $(LDFLAGS) $(patsubst %,-L%, $(LIBDIR) $(LIBPATH) $(GDK_NVML_LIB_PATH) $(BOOSTLIB_PATH)) $(patsubst %, $(RPATH)%, $(ORIGINLIBDIR) $(LIBPATH) $(BOOSTLIB_PATH)) -o $@ $^ $(BOOSTLIBS) $(LIBS) $(lMULTIVERSO) -l$(CNTKMATH) -fopenmp
UNITTEST_MATH_SRC = \
$(SOURCEDIR)/../Tests/UnitTests/MathTests/BatchNormalizationEngineTests.cpp \
@ -1003,27 +1029,6 @@ $(UNITTEST_BRAINSCRIPT): $(UNITTEST_BRAINSCRIPT_OBJ) | $(CNTKMATH_LIB)
@echo building $@ for $(ARCH) with build type $(BUILDTYPE)
$(CXX) $(LDFLAGS) $(patsubst %,-L%, $(LIBDIR) $(LIBPATH) $(GDK_NVML_LIB_PATH) $(BOOSTLIB_PATH)) $(patsubst %, $(RPATH)%, $(ORIGINLIBDIR) $(LIBPATH) $(BOOSTLIB_PATH)) -o $@ $^ $(BOOSTLIBS) $(LIBS) -ldl -l$(CNTKMATH)
UNITTEST_MULTIVERSO_SRC = \
$(SOURCEDIR)/Multiverso/Test/unittests/test_array.cpp \
$(SOURCEDIR)/Multiverso/Test/unittests/test_blob.cpp \
$(SOURCEDIR)/Multiverso/Test/unittests/test_kv.cpp \
$(SOURCEDIR)/Multiverso/Test/unittests/test_message.cpp \
$(SOURCEDIR)/Multiverso/Test/unittests/test_multiverso.cpp \
$(SOURCEDIR)/Multiverso/Test/unittests/test_node.cpp \
$(SOURCEDIR)/Multiverso/Test/unittests/test_sync.cpp \
UNITTEST_MULTIVERSO_OBJ := $(patsubst %.cpp, $(OBJDIR)/%.o, $(UNITTEST_MULTIVERSO_SRC))
UNITTEST_MULTIVERSO := $(BINDIR)/multiversotests
ALL += $(UNITTEST_MULTIVERSO)
$(UNITTEST_MULTIVERSO): $(UNITTEST_MULTIVERSO_OBJ) | $(MULTIVERSO_LIB)
@echo $(SEPARATOR)
@mkdir -p $(dir $@)
@echo building $@ for $(ARCH) with build type $(BUILDTYPE)
$(CXX) $(LDFLAGS) $(patsubst %,-L%, $(LIBDIR) $(BOOSTLIB_PATH)) $(patsubst %, $(RPATH)%, $(ORIGINLIBDIR) $(BOOSTLIB_PATH)) -o $@ $^ $(BOOSTLIBS) -l$(MULTIVERSO) -ldl
unittests: $(UNITTEST_EVAL) $(UNITTEST_READER) $(UNITTEST_NETWORK) $(UNITTEST_MATH) $(UNITTEST_BRAINSCRIPT)
endif

Просмотреть файл

@ -1,6 +1,14 @@
# CNTK
## Latest news
*2016-10-03.* V 1.7.2 Binary release
**This is a Hot Fix Release. It affects all users of Model Evaluation Library**
If you are NOT using Model Evaluation Library you may skip this release.
If you ARE using Model Evaluation Library we **strongly recommend** installing version 1.7.2 instead of **any** previous version you might be using.
See [Release Notes](https://github.com/Microsoft/CNTk/wiki/CNTK_1_7_2_Release_Notes) for details.
*2016-09-28.* V 1.7.1 Binary release
Highlights of this Release:
* Two Breaking Changes related to Layers library default initialization and ```fsAdagrad``` gradient-normalization scheme
@ -31,9 +39,6 @@ Get the Release from the [CNTK Releases page](https://github.com/Microsoft/CNTK/
*2016-08-10.* We have significantly simplified handling of **Gated Recurrent Units (GRU)**. Read more in the [corresponding article](https://github.com/Microsoft/CNTK/wiki/GRUs-on-CNTK-with-BrainScript).
*2016-07-15.* V 1.6 Binary release
CNTK v.1.6 binaries are on the [CNTK Releases page](https://github.com/Microsoft/CNTK/releases)
See [all news](https://github.com/Microsoft/CNTK/wiki/News).
## What is CNTK

Просмотреть файл

@ -90,22 +90,24 @@ if __name__ == "__main__":
#####################################################################################################
try:
import StringIO
stringio = StringIO.StringIO
except ImportError:
from io import StringIO
stringio = StringIO
try:
import pytest
except ImportError:
pass
def test_simpleSanityCheck():
dictionary1 = StringIO.StringIO("hello\nmy\nworld\nof\nnothing\n")
dictionary2 = StringIO.StringIO("let\nme\nbe\nclear\nabout\nit\n")
input = StringIO.StringIO("hello my\tclear about\nworld of\tit let clear\n")
output = StringIO.StringIO()
dictionary1 = stringio("hello\nmy\nworld\nof\nnothing\n")
dictionary2 = stringio("let\nme\nbe\nclear\nabout\nit\n")
input = stringio("hello my\tclear about\nworld of\tit let clear\n")
output = stringio()
convert([dictionary1, dictionary2], [input], output, None, False)
expectedOutput = StringIO.StringIO()
expectedOutput = stringio()
expectedOutput.write("0\t|S0 0:1\t|S1 3:1\n")
expectedOutput.write("0\t|S0 1:1\t|S1 4:1\n")
expectedOutput.write("1\t|S0 2:1\t|S1 5:1\n")
@ -115,10 +117,10 @@ def test_simpleSanityCheck():
assert expectedOutput.getvalue() == output.getvalue()
def test_nonExistingWord():
dictionary1 = StringIO.StringIO("hello\nmy\nworld\nof\nnothing\n")
input = StringIO.StringIO("hello my\nworld of nonexistent\n")
output = StringIO.StringIO()
dictionary1 = stringio("hello\nmy\nworld\nof\nnothing\n")
input = stringio("hello my\nworld of nonexistent\n")
output = stringio()
with pytest.raises(Exception) as info:
convert([dictionary1], [input], output, None, False)
assert info.value.message == "Token 'nonexistent' cannot be found in the dictionary for stream 0"
assert str(info.value) == "Token 'nonexistent' cannot be found in the dictionary for stream 0"

Просмотреть файл

@ -39,6 +39,12 @@ using namespace std;
using namespace Microsoft::MSR;
using namespace Microsoft::MSR::CNTK;
bool GetDistributedMBReadingDefaultValue(const ConfigParameters& config, const IDataReader& reader)
{
// Return 'true' if we're running a parallel training with a v2 reader, 'false' otherwise.
return (MPIWrapper::GetInstance() != nullptr && !reader.IsLegacyReader());
}
// ===========================================================================
// DoEvalBase() - implements CNTK "eval" command
// ===========================================================================
@ -62,7 +68,7 @@ static void DoEvalBase(const ConfigParameters& config, IDataReader& reader)
size_t maxSamplesInRAM = config(L"maxSamplesInRAM", (size_t)SIZE_MAX);
size_t numSubminiBatches = config(L"numSubminibatches", (size_t)1);
bool enableDistributedMBReading = config(L"distributedMBReading", false);
bool enableDistributedMBReading = config(L"distributedMBReading", GetDistributedMBReadingDefaultValue(config, reader));
vector<wstring> evalNodeNamesVector;
@ -104,7 +110,7 @@ static void DoEvalBNBase(const ConfigParameters& config, IDataReader& reader)
size_t maxSamplesInRAM = config(L"maxSamplesInRAM", (size_t)SIZE_MAX);
size_t numSubminiBatches = config(L"numSubminibatches", (size_t)1);
bool enableDistributedMBReading = config(L"distributedMBReading", false);
bool enableDistributedMBReading = config(L"distributedMBReading", GetDistributedMBReadingDefaultValue(config, reader));
vector<wstring> evalNodeNamesVector;
@ -189,8 +195,6 @@ void DoCrossValidate(const ConfigParameters& config)
size_t maxSamplesInRAM = config(L"maxSamplesInRAM", (size_t)SIZE_MAX);
size_t numSubminiBatches = config(L"numSubminibatches", (size_t)1);
bool enableDistributedMBReading = config(L"distributedMBReading", false);
ConfigArray evalNodeNames = config(L"evalNodeNames", "");
vector<wstring> evalNodeNamesVector;
for (int i = 0; i < evalNodeNames.size(); ++i)
@ -203,6 +207,8 @@ void DoCrossValidate(const ConfigParameters& config)
DataReader cvDataReader(readerConfig);
bool enableDistributedMBReading = config(L"distributedMBReading", GetDistributedMBReadingDefaultValue(config, cvDataReader));
bool finalModelEvaluated = false;
for (size_t i = cvInterval[0]; i <= cvInterval[2]; i += cvInterval[1])
{

Просмотреть файл

@ -269,6 +269,16 @@ void NDLNodeEvaluatorImpl<ElemType>::Evaluate(NDLNode<ElemType>* node, const wst
nodePtr = builder.LegacyReshape(NULL, num_rows, ImageDimensions::AsTensorShape(img_width, img_height, img_channels, imageLayoutKind), name);
}
}
else if (cnNodeType == OperationNameOf(ReconcileDynamicAxisNode))
{
nodeParamCount = 2;
nodeParamStart = 0;
if (pass == ndlPassInitial)
{
nodePtr = builder.ReconcileDynamicAxis(NULL, NULL, name);
}
}
else if (cnNodeType == OperationNameOf(PastValueNode) ||
cnNodeType == OperationNameOf(FutureValueNode))
{

Просмотреть файл

@ -208,8 +208,10 @@ bool CheckFunction(std::string& p_nodeType, bool* allowUndeterminedVariable)
else if (EqualInsensitive(nodeType, OperationNameOf(PerDimMeanVarNormalizationNode), L"PerDimMVNorm")) ret = true;
else if (EqualInsensitive(nodeType, OperationNameOf(PlusNode))) ret = true;
else if (EqualInsensitive(nodeType, OperationNameOf(ReciprocalNode))) ret = true;
else if (EqualInsensitive(nodeType, OperationNameOf(ReconcileDynamicAxisNode))) ret = true;
else if (EqualInsensitive(nodeType, OperationNameOf(RectifiedLinearNode), L"ReLU")) ret = true;
else if (EqualInsensitive(nodeType, OperationNameOf(ReshapeNode))) ret = true;
else if (EqualInsensitive(nodeType, OperationNameOf(ROIPoolingNode))) ret = true;
else if (EqualInsensitive(nodeType, OperationNameOf(RowRepeatNode))) ret = true;
else if (EqualInsensitive(nodeType, OperationNameOf(RowStackNode))) ret = true;
#ifdef COMING_SOON

Просмотреть файл

@ -455,8 +455,22 @@ CNTK2 = [
// 11. Criterion nodes
// No changes here - we said the default input would be the label sequence here, against which the
// empirical sequence is compared to. Keeping this for now.
CrossEntropyWithSoftmax(_, outProbVectorSequence, tag='') = new ComputationNode [ operation = 'CrossEntropyWithSoftmax' ; inputs = _AsNodes (_ : outProbVectorSequence) /*plus the function args*/ ]
ClassificationError(_, outVectorSequence, topN=1, tag='') = new ComputationNode [ operation = 'ClassificationError' ; inputs = _AsNodes (if topN == 1 then (_ : outVectorSequence) else (_ : outVectorSequence : Constant (topN))) /*plus the function args*/ ]
CrossEntropyWithSoftmax(labelSequence, outProbVectorSequence, axis=0, tag='') =
if axis==0 then new ComputationNode [ operation = 'CrossEntropyWithSoftmax' ; inputs = _AsNodes (labelSequence : outProbVectorSequence) /*plus the function args*/ ]
else [ tag1 = tag; out = Minus (ReduceLogSum (outProbVectorSequence, axis=axis), ReduceSum (labelSequence .* outProbVectorSequence, axis=axis), tag=tag1) ].out
# Classification error along a specific axis: account only for missed labels, i.e.
# strictly check whether at the one “1” location in labels we find a value equal to the max
ClassificationError(labelSequence, outVectorSequence, topN=1, axis=0, tag='') =
if axis==0 then new ComputationNode [ operation = 'ClassificationError' ; inputs = _AsNodes (if topN == 1 then (labelSequence : outVectorSequence) else (labelSequence : outVectorSequence : Constant (topN))) /*plus the function args*/ ]
else if topN != 1 then Fail ("ClassificationError() along a specific axis does not support topN.")
else {
axMax = ReduceMax (outVectorSequence, axis=axis) # max value along competition axis
pred = outVectorSequence == axMax # 1 for all values that are max
wrongPred = labelSequence != pred # look up all wrong predictions {label index}
axErr = ReduceSum (wrongPred, axis=axis) # sum up wrong predictions along competition axis
capErr = axErr >= 1 # only count maximally one error per prediction
err = ReduceMean (capErr, tag=tag) # average into a single number per sample
}.err
ErrorPrediction = ClassificationError # legacy
# TODO: replace with this (need to deal with topN thing):
# (_new will be removed once the change is made)
@ -547,6 +561,7 @@ MaxUnpooling(unpoolInput, poolInput, kernelDims, stride=1, autoPadding = true, l
# 2D pooling
MaxPooling(input, windowWidth, windowHeight, horizontalSubsample, verticalSubsample, imageLayout='CHW', tag='') = new ComputationNode [ operation = 'MaxPooling' ; inputs = _AsNodes (input) /*plus the function args*/ ]
AveragePooling(input, windowWidth, windowHeight, horizontalSubsample, verticalSubsample, imageLayout='CHW', tag='') = new ComputationNode [ operation = 'AveragePooling' ; inputs = _AsNodes (input) /*plus the function args*/ ]
ROIPooling (input, ROIs, shape) = new ComputationNode { operation = 'ROIPooling' ; inputs = _AsNodes (input : ROIs) ; outputShape = new TensorShape [ dims = shape ] ; tag='' /*plus the function args*/ }
ColumnwiseCrossProduct = KhatriRaoProduct // deprecated
ErrorPrediction = ClassificationError # legacy name
Delay = PastValue
@ -560,8 +575,6 @@ CosDistance(aVectorSequence, anotherVectorSequence, tag='') = new ComputationNod
CosDistanceWithNegativeSamples(aVectorSequence, anotherVectorSequence, numShifts, numNegSamples, tag='') = new ComputationNode [ operation = 'CosDistanceWithNegativeSamples' ; inputs = _AsNodes (aVectorSequence : anotherVectorSequence : numShifts : numNegSamples) /*plus the function args*/ ]
Cosine(x, tag='') = new ComputationNode [ operation = 'Cosine' ; inputs = _AsNodes (x) /*plus the function args*/ ]
CrossEntropy(refProbVectorSequence, outProbVectorSequence, tag='') = new ComputationNode [ operation = 'CrossEntropy' ; inputs = _AsNodes (refProbVectorSequence : outProbVectorSequence) /*plus the function args*/ ]
# once ReduceLogSum becomes proper C++, CrossEntropyWithSoftmax() will become this:
NewCrossEntropyWithSoftmax (labelSequence, z, tag='') = [ tag1 = tag; out = Minus (ReduceLogSum (z), ReduceSum (labelSequence .* z), tag=tag1) ].out
DiagTimes(diagonalMatrixAsColumnVector, matrix, tag='') = new ComputationNode [ operation = 'DiagTimes' ; inputs = _AsNodes (diagonalMatrixAsColumnVector : matrix) /*plus the function args*/ ]
// TODO: DiagTimes = ElementTimes
GatherPacked(indexSequence, sourceData, tag='') = new ComputationNode [ operation = 'GatherPacked' ; inputs = _AsNodes (indexSequence : sourceData) /*plus the function args*/ ]

Просмотреть файл

@ -553,9 +553,14 @@ int wmainWithBS(int argc, wchar_t* argv[]) // called from wmain which is a wrapp
// parallel training
shared_ptr<Microsoft::MSR::CNTK::MPIWrapper> mpi;
auto ensureMPIWrapperCleanup = MakeScopeExit(&MPIWrapper::DeleteInstance);
bool paralleltrain = config(L"parallelTrain", false);
// when running under MPI with more than one node, use 'true' as the default value for parallelTrain,
// 'false' otherwise.
bool paralleltrain = config(L"parallelTrain", (MPIWrapper::GetTotalNumberOfMPINodes() > 1));
if (paralleltrain)
{
mpi = MPIWrapper::GetInstance(true /*create*/);
}
g_shareNodeValueMatrices = config(L"shareNodeValueMatrices", false);
@ -638,7 +643,7 @@ int wmainWithBS(int argc, wchar_t* argv[]) // called from wmain which is a wrapp
static void PrintBanner(int argc, wchar_t* argv[], const string& timestamp)
{
fprintf(stderr, "CNTK 1.7.1+ (");
fprintf(stderr, "CNTK 1.7.2+ (");
#ifdef _GIT_EXIST
fprintf(stderr, "%s %.6s, ", _BUILDBRANCH_, _BUILDSHA1_);
#endif
@ -687,9 +692,15 @@ int wmainOldCNTKConfig(int argc, wchar_t* argv[])
// The top-level 'parallelTrain' is a bool, not to be confused with the parallelTrain block inside SGD.
shared_ptr<Microsoft::MSR::CNTK::MPIWrapper> mpi;
auto ensureMPIWrapperCleanup = MakeScopeExit(&MPIWrapper::DeleteInstance);
bool paralleltrain = config(L"parallelTrain", "false");
// when running under MPI with more than one node, use 'true' as the default value for parallelTrain,
// 'false' otherwise.
bool paralleltrain = config(L"parallelTrain", (MPIWrapper::GetTotalNumberOfMPINodes() > 1));
if (paralleltrain)
{
mpi = MPIWrapper::GetInstance(true /*create*/);
}
g_shareNodeValueMatrices = config(L"shareNodeValueMatrices", false);

Просмотреть файл

@ -208,21 +208,21 @@ namespace CNTK
NDShape() {}
///
/// Contruct a NDShape instance with the specified number of axes and dimensionality in each axis.
/// Construct a NDShape instance with the specified rank and dimensionality in each axis.
///
explicit NDShape(size_t numAxes, size_t dimension = InferredDimension)
: m_shapeDims(numAxes, dimension)
{}
///
/// Contruct a NDShape instance with specified dimensions.
/// Construct a NDShape instance with specified dimensions.
///
NDShape(const std::vector<size_t>& dimensions)
: m_shapeDims(dimensions)
{}
///
/// Contruct a NDShape instance with specified dimensions.
/// Construct a NDShape instance with specified dimensions.
///
NDShape(const std::initializer_list<size_t>& dimensions)
: m_shapeDims(dimensions)
@ -234,7 +234,7 @@ namespace CNTK
const std::vector<size_t>& Dimensions() const { return m_shapeDims; }
///
/// Returns the number of axes of 'this' shape.
/// Returns the rank of 'this' shape.
///
size_t Rank() const { return m_shapeDims.size(); }
@ -255,7 +255,7 @@ namespace CNTK
{
endAxisId = (endAxisId == SIZE_MAX) ? Rank() : endAxisId;
if ((endAxisId < beginAxisId) || (endAxisId > Rank()))
InvalidArgument("NDShape::SubShape : The specified endAxisId (%d) cannot exceed the number of axes (%d) of 'this' NDShape and must be >= than the specified beginAxisId (%d)", (int)endAxisId, (int)Rank(), (int)beginAxisId);
InvalidArgument("NDShape::SubShape : The specified endAxisId (%d) cannot exceed the rank (%d) of 'this' NDShape and must be >= than the specified beginAxisId (%d)", (int)endAxisId, (int)Rank(), (int)beginAxisId);
std::vector<size_t> subShapeDims(m_shapeDims.begin() + beginAxisId, m_shapeDims.begin() + endAxisId);
return subShapeDims;
@ -343,6 +343,7 @@ namespace CNTK
friend class CompositeFunction;
friend class LearnerBase;
friend class Variable;
friend class PackedValue;
template <typename T, typename ...CtorArgTypes>
friend inline std::shared_ptr<T> MakeSharedObject(CtorArgTypes&& ...ctorArgs);
@ -593,6 +594,13 @@ namespace CNTK
std::shared_ptr<void> m_tensorView; // Microsoft::MSR::CNTK::TensorView<ElemType>*
};
enum class MaskKind : char
{
Invalid = 0,
Valid = 1,
SequenceBegin = 2,
};
///
/// Denotes a multi-dimensional mask used for specifying specific sections of a NDArrayView object as masked/invalid.
/// This type denotes a view and there may be multiple simultaneous views of the data underlying a NDMask instance.
@ -603,6 +611,7 @@ namespace CNTK
template <typename T, typename ...CtorArgTypes>
friend inline std::shared_ptr<T> MakeSharedObject(CtorArgTypes&& ...ctorArgs);
public:
///
/// Construct a new Mask object of specified shape
@ -615,12 +624,32 @@ namespace CNTK
CNTK_API ~NDMask();
///
/// Mask out the specified sub-section of 'this' mask
/// Mask out (i.e. mark Invalid) the specified sub-section of 'this' mask
///
CNTK_API void MaskSection(const std::vector<size_t>& sectionOffset, const NDShape& sectionShape);
void InvalidateSection(const std::vector<size_t>& sectionOffset, const NDShape& sectionShape)
{
MarkSectionAs(sectionOffset, sectionShape, MaskKind::Invalid);
}
///
/// Clear the mask; i.e. unmask all currently masked values
/// Mark the specified position in 'this' mask as sequence begin
///
void MarkSequenceBegin(const std::vector<size_t>& offset)
{
NDShape sectionShape = NDShape(Shape().Rank(), 1);
MarkSectionAs(offset, sectionShape, MaskKind::SequenceBegin);
}
///
/// Mark the specified sub-section of 'this' mask as sequence begin
///
void MarkSequenceBegin(const std::vector<size_t>& offset, const NDShape& sectionShape)
{
MarkSectionAs(offset, sectionShape, MaskKind::SequenceBegin);
}
///
/// Clear the mask; i.e. unmask or mark Valid all currently masked (i.e. Invalid) values
///
CNTK_API void Clear();
@ -642,12 +671,20 @@ namespace CNTK
///
/// Returns a read-only pointer to the data buffer underlying 'this' Mask object
///
CNTK_API const char* DataBuffer() const;
CNTK_API const MaskKind* DataBuffer() const;
///
/// Creates a new NDArrayView with newly allocated storage on the specified device and copies 'this' view's contents into the newly allocated view.
///
CNTK_API NDMaskPtr DeepClone(const DeviceDescriptor& device) const;
///
/// Creates a new NDMask with newly allocated storage on the same device as 'this' mask and copies 'this' mask's contents into the newly allocated mask.
///
CNTK_API NDMaskPtr DeepClone() const;
NDMaskPtr DeepClone() const
{
return DeepClone(this->Device());
}
///
/// Creates a new NDMask which is an alias of 'this' mask.
@ -662,6 +699,9 @@ namespace CNTK
private:
NDMask(const NDShape& shape, Microsoft::MSR::CNTK::Matrix<char>* matrix);
CNTK_API void MarkSectionAs(const std::vector<size_t>& sectionOffset, const NDShape& sectionShape, MaskKind maskKind);
Microsoft::MSR::CNTK::Matrix<char>* GetMatrix() const;
// Disallow copy and move construction and assignment
@ -710,41 +750,82 @@ namespace CNTK
///
/// Destruct 'this' Value object.
///
CNTK_API virtual ~Value();
virtual ~Value();
///
/// Returns the descriptor of the device that 'this' Value resides on
///
virtual DeviceDescriptor Device() const { return m_data->Device(); }
///
/// Returns the data type of 'this' Value's contents.
///
virtual DataType GetDataType() const { return m_data->GetDataType(); }
///
/// Returns the storage format of 'this' Value.
///
virtual StorageFormat GetStorageFormat() const { return m_data->GetStorageFormat(); }
///
/// Returns the shape 'this' Value.
///
virtual const NDShape& Shape() const { return m_data->Shape(); }
///
/// Returns a boolean indicating if 'this' Value contains data in sparse storage format.
///
bool IsSparse() const
{
return (GetStorageFormat() != StorageFormat::Dense);
}
///
/// Returns a boolean indicating if 'this' Value is read-only.
///
virtual bool IsReadOnly() const { return m_data->IsReadOnly(); }
///
/// Returns the number of masked/invalid values
///
virtual size_t MaskedCount() const
{
return m_mask ? m_mask->MaskedCount() : 0;
}
///
/// Returns the NDArrayView object corresponding to the data contents of 'this value object.
///
CNTK_API virtual NDArrayViewPtr Data() const;
virtual NDArrayViewPtr Data() const;
///
/// Returns the NDMask object corresponding to the mask associated with 'this value object.
///
CNTK_API virtual NDMaskPtr Mask() const;
virtual NDMaskPtr Mask() const;
///
/// Creates a new Value with newly allocated storage on the same device as 'this' Value and copies 'this' Value's contents into the newly allocated Value.
///
CNTK_API virtual ValuePtr DeepClone(bool readOnly = false) const;
virtual ValuePtr DeepClone(bool readOnly = false) const;
///
/// Creates a new Value which is an alias of 'this' Value.
///
CNTK_API virtual ValuePtr Alias(bool readOnly = false) const;
virtual ValuePtr Alias(bool readOnly = false) const;
///
/// Copies the contents of the 'source' Value to 'this' Value.
/// The shapes of the 'source' Value's data and mask must be identical to 'this' Value's data and mask.
///
CNTK_API virtual void CopyFrom(const Value& source);
virtual void CopyFrom(const Value& source);
private:
// Disallow copy and move construction and assignment
Value(const Value&) = delete; Value& operator=(const Value&) = delete; Value(Value&&) = delete; Value& operator=(Value&&) = delete;
private:
NDArrayViewPtr m_data;
NDMaskPtr m_mask;
protected:
mutable NDArrayViewPtr m_data;
mutable NDMaskPtr m_mask;
};
///
@ -758,6 +839,7 @@ namespace CNTK
{
CNTK_API static const std::wstring StaticAxisNamePrefix;
static const size_t SentinelStaticAxisIndexValueForDynamicAxes = SIZE_MAX;
static const size_t SentinelStaticAxisIndexValueForAllStaticAxes = SIZE_MAX - 1;
class UniqueDynamicAxesNames
{
@ -839,15 +921,20 @@ namespace CNTK
}
///
/// Static Axis object representing the default dynamic axis.
/// Axis object representing the default dynamic axis.
///
CNTK_API static const Axis& DefaultDynamicAxis();
///
/// Static Axis object representing the batch axis.
/// Axis object representing the batch axis.
///
CNTK_API static const Axis& DefaultBatchAxis();
///
/// Axis object representing all the static axes of an operand
///
CNTK_API static const Axis& AllStaticAxes();
///
/// Returns a new unique Dynamic axis
///
@ -1282,6 +1369,8 @@ namespace CNTK
return Contains(key.c_str());
}
CNTK_API void Add(const Dictionary& other);
CNTK_API bool operator==(const Dictionary& other) const;
CNTK_API bool operator!=(const Dictionary& other) const;
@ -1335,7 +1424,7 @@ namespace CNTK
typedef Dictionary ParameterInitializer;
// Forward declarations
inline Variable PlaceholderVariable(const NDShape& shape, const std::vector<Axis>& dynamicAxes = Axis::DefaultInputVariableDynamicAxes);
inline Variable PlaceholderVariable(const NDShape& shape, const std::wstring& name, const std::vector<Axis>& dynamicAxes = Axis::DefaultInputVariableDynamicAxes);
inline Variable InputVariable(const NDShape& shape, bool isSparse, CNTK::DataType dataType, bool needsGradient, const std::wstring& name, const std::vector<Axis>& dynamicAxes = Axis::DefaultInputVariableDynamicAxes);
inline Variable OutputVariable(const NDShape& shape, CNTK::DataType dataType, Function* ownerFunction, const std::vector<Axis>& dynamicAxes, const std::wstring& name = L"");
@ -1362,7 +1451,7 @@ namespace CNTK
#ifndef SWIG
private:
friend inline Variable PlaceholderVariable(const NDShape& shape, const std::vector<Axis>& dynamicAxes /*= Axis::DefaultInputVariableDynamicAxes*/);
friend inline Variable PlaceholderVariable(const NDShape& shape, const std::wstring& name, const std::vector<Axis>& dynamicAxes /*= Axis::DefaultInputVariableDynamicAxes*/);
friend inline Variable InputVariable(const NDShape& shape, bool isSparse, CNTK::DataType dataType, bool needsGradient, const std::wstring& name, const std::vector<Axis>& dynamicAxes /*= Axis::DefaultInputVariableDynamicAxes*/);
friend inline Variable OutputVariable(const NDShape& shape, CNTK::DataType dataType, Function* ownerFunction, const std::vector<Axis>& dynamicAxes, const std::wstring& name /*= L""*/);
#endif
@ -1481,6 +1570,7 @@ namespace CNTK
: m_dataFields(MakeSharedObject<VariableFields>(shape, varType, dataType, ownerFunction, value, needsGradient, dynamicAxes, isSparse, name, uid))
{}
private:
Variable Clone() const
{
Variable clonedVariable;
@ -1544,17 +1634,7 @@ namespace CNTK
Internal::GenerateUid(m_varKind));
}
void SetValueInitialization(const ParameterInitializer& initializationConfig, const DeviceDescriptor& device)
{
if (m_value != nullptr)
LogicError("Value initialization config cannot be set if a value already exists");
assert(!m_valueInitializer);
assert(!m_valueInitializationDevice);
m_valueInitializer.reset(new ParameterInitializer(initializationConfig));
m_valueInitializationDevice.reset(new DeviceDescriptor(device));
}
CNTK_API void SetValueInitialization(const ParameterInitializer& initializationConfig, const DeviceDescriptor& device);
private:
// Disallow copy and move construction and assignment
@ -1580,10 +1660,19 @@ namespace CNTK
/// Create a Placeholder variable to be used as a temporary/placeholder input to a Function.
/// All placeholder inputs of a Function must be replaced with non-placeholder Variables before Forward evaluation of the Function.
///
inline Variable PlaceholderVariable(const NDShape& shape, const std::vector<Axis>& dynamicAxes /*= Axis::DefaultInputVariableDynamicAxes*/)
inline Variable PlaceholderVariable(const NDShape& shape, const std::wstring& name, const std::vector<Axis>& dynamicAxes /*= Axis::DefaultInputVariableDynamicAxes*/)
{
auto varKind = VariableKind::Placeholder;
return Variable(shape, varKind, DataType::Unknown, nullptr, false, dynamicAxes, L"", Internal::GenerateUid(varKind));
return Variable(shape, varKind, DataType::Unknown, nullptr, false, dynamicAxes, name, Internal::GenerateUid(varKind));
}
///
/// Create a Placeholder variable to be used as a temporary/placeholder input to a Function.
/// All placeholder inputs of a Function must be replaced with non-placeholder Variables before Forward evaluation of the Function.
///
inline Variable PlaceholderVariable(const NDShape& shape, const std::vector<Axis>& dynamicAxes = Axis::DefaultInputVariableDynamicAxes)
{
return PlaceholderVariable(shape, L"", dynamicAxes);
}
///
@ -1765,7 +1854,7 @@ namespace CNTK
public:
///
/// Contruct a Constant whose initial contents are a copy of the specified value
/// Construct a Constant whose initial contents are a copy of the specified value
///
Constant(const NDArrayViewPtr& value, const std::wstring& name = L"")
: Constant(value, name, Internal::GenerateUid(VariableKind::Constant))
@ -1946,7 +2035,7 @@ namespace CNTK
/// and the user is responsible for ensuring that the contents of the inputs and outputs are unchanged until after any uses of the BackPropState instance
/// for backpropagating gradients through this function.
///
CNTK_API virtual BackPropStatePtr Forward(const std::unordered_map<Variable, ValuePtr>& arguments,
virtual BackPropStatePtr Forward(const std::unordered_map<Variable, ValuePtr>& arguments,
std::unordered_map<Variable, ValuePtr>& outputs,
const DeviceDescriptor& computeDevice = DeviceDescriptor::UseDefaultDevice(),
const std::unordered_set<Variable>& outputsToRetainBackwardStateFor = {}) = 0;
@ -1960,10 +2049,15 @@ namespace CNTK
/// The 'state' parameter is an instance of an BackPropState instance obtained from a previous call to the Forward method on 'this; Function for the
/// computation that this gradient backpropagation corresponds to.
///
CNTK_API virtual void Backward(const BackPropStatePtr& state,
virtual void Backward(const BackPropStatePtr& state,
const std::unordered_map<Variable, ValuePtr>& rootGradientValues,
std::unordered_map<Variable, ValuePtr>& backPropagatedGradientValuesForInputs) = 0;
///
/// Returns the name of the operation that this Function denotes
///
virtual const std::wstring& OpName() = 0;
public:
// Optional overrides
@ -2074,6 +2168,11 @@ namespace CNTK
///
CNTK_API FunctionPtr ReplacePlaceholder(const Variable& placeholderReplacement);
///
/// Restore the models parameters from a saved model file
///
CNTK_API void RestoreFromLegacyModel(const std::wstring& modelFilePath);
private:
template <typename VariableType, typename FilterFunction>
@ -2144,9 +2243,6 @@ namespace CNTK
}
}
private:
void RestoreFromLegacyModel(const std::wstring& modelFilePath);
private:
std::vector<Variable> m_inputs;
@ -2501,7 +2597,7 @@ namespace CNTK
/// E.g. When creating a classification model, typically the CrossEntropy loss Function and the ClassificationError Function comprise the two roots
/// of the computation graph which can be "Combine"d to create a single Function with 2 outputs; viz. CrossEntropy loss and ClassificationError output.
///
CNTK_API FunctionPtr Combine(const std::vector<FunctionPtr>& operands, const std::wstring& name = L"");
CNTK_API FunctionPtr Combine(const std::vector<Variable>& operands, const std::wstring& name = L"");
namespace Sequence
{
@ -2535,12 +2631,14 @@ namespace CNTK
///
class Learner : public std::enable_shared_from_this<Learner>
{
static const std::wstring LearningRateAttributeName;
public:
//
// Method to update the parameters associated with this learner. By returning false, this method indicates that
// learning has stopped for all of the parameters associated with this learner
//
CNTK_API virtual bool Update(const std::unordered_map<Parameter, NDArrayViewPtr>& gradientValues, size_t trainingSampleCount) = 0;
virtual bool Update(const std::unordered_map<Parameter, NDArrayViewPtr>& gradientValues, size_t trainingSampleCount) = 0;
///
/// Returns the set of parameters associated with this learner.
@ -2552,32 +2650,50 @@ namespace CNTK
///
// TODO: move the following two methods into ISerializable interface, make
// Learner (and all other entities that need checkpointing capability) implement it.
CNTK_API virtual Dictionary GetCheckpointState() const { return Dictionary(); }
virtual Dictionary GetCheckpointState() const
{
Dictionary baseCheckpointState;
baseCheckpointState[LearningRateAttributeName] = m_learningRate;
return baseCheckpointState;
}
///
/// Optionally overridable method to restore the learner's state from a previous checkpoint.
///
CNTK_API virtual void RestoreFromCheckpoint(const Dictionary& /*checkpoint*/) {}
virtual void RestoreFromCheckpoint(const Dictionary& checkpoint)
{
if (checkpoint.Contains(LearningRateAttributeName))
m_learningRate = checkpoint[LearningRateAttributeName].Value<double>();
}
///
/// Destruct this Learner.
///
virtual ~Learner() {}
virtual void ResetLearningRate(double learningRate) { m_learningRate = learningRate; }
virtual double LearningRate() const { return m_learningRate; }
protected:
Learner(const std::vector<Parameter>& parameters)
: m_parameters(parameters.begin(), parameters.end())
Learner(const std::vector<Parameter>& parameters, double learningRate)
: m_parameters(parameters.begin(), parameters.end()), m_learningRate(learningRate)
{}
std::unordered_set<Parameter> m_parameters;
double m_learningRate;
};
///
/// A collection of key-value pairs that represents training parameter schedule in
/// terms of the number of processed samples.
/// This class provides a number of convenience constructors to allow easy conversion
/// from a single value, a vector of values and a list of pairs to the training schedule.
/// A collection of key-value pairs that represents a training parameter schedule in
/// terms of the number of processed samples (e.g., learning rate and momentum schedules).
/// This class is designed to simplify Learner's factory methods and provides a number of
/// convenience constructors to allow easy conversion from a single value, a vector of values
/// and a list of pairs to the training schedule. For example, a learning rate schedule
/// { { 10, 0.5 }, { 100, 0.3 }, { 20, 0.2 } } indicates that the rate of 0.5 should be
/// used for the first 10 units (equivalently, samples if the default unit = 1 is used)
/// followed by 0.3 for the next 100 units, and then 0.2 for the remaining 20 units or
/// until the end of training if it takes longer.
///
template <typename T>
class TrainingParameterSchedule
@ -2586,31 +2702,14 @@ namespace CNTK
///
/// Create a schedule with a constant parameter value.
///
TrainingParameterSchedule(T value)
: m_schedule({ std::make_pair(0, value) }), m_unit(1)
{}
CNTK_API TrainingParameterSchedule(T value);
///
/// Create a schedule where the parameter changes its value every 'unit' samples:
/// schedule[0] is used for the first 'unit' samples, schedule[1] -- for the second,
/// and so on. The last value is then used repeatedly until the end of training.
///
TrainingParameterSchedule(const std::vector<T>& schedule, size_t unit = 1)
: m_unit(unit)
{
// TODO: 0 will be used to mean "the entire sweep"
if (unit == 0)
RuntimeError("TrainingParameterSchedule::constructor : 'unit' cannot be 0.");
if (schedule.size() == 0)
RuntimeError("TrainingParameterSchedule::constructor : schedule is empty.");
size_t i = 1;
for (const auto& value : schedule)
{
m_schedule[m_unit * i++] = value;
}
}
CNTK_API TrainingParameterSchedule(const std::vector<T>& schedule, size_t unit = 1);
///
/// Create a schedule using the list of key-value pairs, where the key specifies
@ -2621,74 +2720,104 @@ namespace CNTK
/// '0.1' is used for the second 200 samples, after which the values is switched
/// to '0.005'.
///
TrainingParameterSchedule(const std::initializer_list<std::pair<const size_t, T>>& schedule, size_t unit = 1)
: m_unit(unit)
{
// TODO: 0 will be used to mean "the entire sweep"
if (unit == 0)
RuntimeError("TrainingParameterSchedule::constructor : 'unit' cannot be 0.");
if (schedule.size() == 0)
RuntimeError("TrainingParameterSchedule::constructor : schedule is empty.");
size_t i = 0;
for (const auto& it : schedule)
{
if (it.first == 0)
RuntimeError("TrainingParameterSchedule::constructor : unit count cannot be 0.");
i += it.first;
m_schedule[m_unit * i] = it.second;
}
}
CNTK_API TrainingParameterSchedule(const std::vector<std::pair<size_t, T>>& schedule, size_t unit = 1);
///
/// Returns a value corresponding to the absolute sample count from the beginning of training.
///
CNTK_API const T& operator[](size_t samleCount) const;
CNTK_API virtual const T& operator[](size_t sampleCount) const;
CNTK_API virtual ~TrainingParameterSchedule();
CNTK_API TrainingParameterSchedule(const TrainingParameterSchedule<T>&);
CNTK_API TrainingParameterSchedule(TrainingParameterSchedule<T>&&);
CNTK_API TrainingParameterSchedule<T>& operator=(const TrainingParameterSchedule<T>&);
CNTK_API TrainingParameterSchedule<T>& operator=(TrainingParameterSchedule<T>&&);
private:
CNTK_API void ConstructSchedule(const std::vector<std::pair<size_t, T>>& schedule);
protected:
std::map<size_t, T> m_schedule;
size_t m_unit;
};
typedef TrainingParameterSchedule<double> LearningRatesPerSample;
typedef TrainingParameterSchedule<double> MomentumsPerSample;
typedef TrainingParameterSchedule<double> MomentumValuesPerSample;
///
/// This class allows to specify momentum as time constant in place of momentum per sample in
/// all of Learners factory methods. The specified values are then automatically converted into
/// per sample values.
///
class MomentumValuesAsTimeConstants: public MomentumValuesPerSample
{
public:
MomentumValuesAsTimeConstants(double value)
: MomentumValuesPerSample(value)
{
ConvertToPerSampleValues();
}
MomentumValuesAsTimeConstants(const std::vector<double>& schedule, size_t unit = 1)
: MomentumValuesPerSample(schedule, unit)
{
ConvertToPerSampleValues();
}
MomentumValuesAsTimeConstants(const std::vector<std::pair<size_t, double>>& schedule, size_t unit = 1)
: MomentumValuesPerSample(schedule, unit)
{
ConvertToPerSampleValues();
}
private:
CNTK_API void ConvertToPerSampleValues();
};
/// A collection of additional options that affect parameter updates and
/// are applicable for all standard learners
struct AdditionalLearningOptions
{
double l1RegularizationWeight = 0.0;
double l2RegularizationWeight = 0.0;
double gaussianNoiseInjectionStdDev = 0.0;
double gradientClippingThresholdPerSample = std::numeric_limits<double>::infinity();
bool gradientClippingWithTruncation = true;
};
///
/// Create an instance of the CNTK built-in SGD learner.
///
CNTK_API LearnerPtr SGDLearner(const std::vector<Parameter>& parameters,
const LearningRatesPerSample& learningRates,
double clippingThresholdPerSample = std::numeric_limits<double>::infinity(),
bool gradientClippingWithTruncation = true);
AdditionalLearningOptions additionalOptions = AdditionalLearningOptions());
///
/// Create an instance of the CNTK built-in Momentum SGD learner.
///
CNTK_API LearnerPtr MomentumSGDLearner(const std::vector<Parameter>& parameters,
const LearningRatesPerSample& learningRates,
const MomentumsPerSample& momentums,
double clippingThresholdPerSample = std::numeric_limits<double>::infinity(),
bool gradientClippingWithTruncation = true);
const MomentumValuesPerSample& momentumValues,
AdditionalLearningOptions additionalOptions = AdditionalLearningOptions());
///
/// Create an instance of the CNTK built-in Nesterov's accelerated SGD learner.
///
CNTK_API LearnerPtr NesterovLearner(const std::vector<Parameter>& parameters,
const LearningRatesPerSample& learningRates,
const MomentumsPerSample& momentums,
double clippingThresholdPerSample = std::numeric_limits<double>::infinity(),
bool gradientClippingWithTruncation = true);
const MomentumValuesPerSample& momentumValues,
AdditionalLearningOptions additionalOptions = AdditionalLearningOptions());
///
/// Create an instance of the CNTK built-in FSAdaGrad (improved AdaGrad) learner.
///
CNTK_API LearnerPtr FSAdaGradLearner(const std::vector<Parameter>& parameters,
const LearningRatesPerSample& learningRates,
const MomentumsPerSample& momentums,
double clippingThresholdPerSample = std::numeric_limits<double>::infinity(),
bool gradientClippingWithTruncation = true);
const MomentumValuesPerSample& momentumValues,
const double targetAdagradAvDenom = 0.0025, // 1/400 magic constant
const size_t adagradT = 2 * 3600 * 100,
AdditionalLearningOptions additionalOptions = AdditionalLearningOptions());
///
/// Create an instance of the CNTK built-in AdaGrad learner.
@ -2696,8 +2825,7 @@ namespace CNTK
CNTK_API LearnerPtr AdaGradLearner(const std::vector<Parameter>& parameters,
const LearningRatesPerSample& learningRates,
bool needAveMultiplier = true,
double clippingThresholdPerSample = std::numeric_limits<double>::infinity(),
bool gradientClippingWithTruncation = true);
AdditionalLearningOptions additionalOptions = AdditionalLearningOptions());
///
/// Create an instance of the CNTK built-in RMSProp learner.
@ -2710,8 +2838,7 @@ namespace CNTK
double max,
double min,
bool needAveMultiplier = true,
double clippingThresholdPerSample = std::numeric_limits<double>::infinity(),
bool gradientClippingWithTruncation = true);
AdditionalLearningOptions additionalOptions = AdditionalLearningOptions());
///
/// Trainer is the top-level abstraction responsible for the orchestration of the training of a model
@ -2805,7 +2932,9 @@ namespace CNTK
FunctionPtr m_combinedTrainingFunction;
FunctionPtr m_model;
FunctionPtr m_lossFunction;
FunctionPtr m_aggregatedLossFunction;
FunctionPtr m_evaluationFunction;
FunctionPtr m_aggregatedEvaluationFunction;
std::unordered_set<LearnerPtr> m_parameterLearners;
@ -2930,11 +3059,14 @@ namespace CNTK
///
/// Instantiate the CNTK built-in test format minibatch source
///
inline MinibatchSourcePtr TextFormatMinibatchSource(const std::wstring& dataFilePath, const std::vector<StreamConfiguration>& streamConfigs, size_t epochSize = SIZE_MAX)
inline MinibatchSourcePtr TextFormatMinibatchSource(const std::wstring& dataFilePath, const std::vector<StreamConfiguration>& streamConfigs, size_t epochSize = SIZE_MAX, bool randomize = true)
{
CNTK::Dictionary minibatchSourceConfiguration;
minibatchSourceConfiguration[L"epochSize"] = epochSize;
if (randomize)
minibatchSourceConfiguration[L"randomize"] = true;
CNTK::Dictionary deserializerConfiguration;
deserializerConfiguration[L"type"] = L"CNTKTextFormatDeserializer";
deserializerConfiguration[L"file"] = dataFilePath;
@ -2968,4 +3100,17 @@ namespace CNTK
CNTK_API void ComputeInputPerDimMeansAndInvStdDevs(const MinibatchSourcePtr& minibatchSource,
std::unordered_map<StreamInformation, std::pair<NDArrayViewPtr, NDArrayViewPtr>>& computedMeanAndVariances,
const DeviceDescriptor& device = DeviceDescriptor::CPUDevice());
///
/// Set the process-wide setting for maximum number of CPU threads to be used by any individual compute operation
/// Note that this is a per compute operation limit and if the user performs multiple compute operations concurrently
/// by launching multiple threads and performing a compute operation inside, it will result in each of those concurrently
/// executing operations to use the specified number of CPU threads limit.
///
CNTK_API void SetMaxNumCPUThreads(size_t numCPUThreads);
///
/// Returns the current process-wide setting for maximum number of CPU threads to be used by any individual compute operation
///
CNTK_API size_t GetMaxNumCPUThreads();
}

Просмотреть файл

@ -186,9 +186,6 @@ namespace CNTK
namespace Internal
{
// Create a new Function instance which just passes through specified list of 'operands'.
CNTK_API FunctionPtr Combine(const std::vector<Variable>& operands, const std::wstring& name = L"");
CNTK_API FunctionPtr IsWithin(const Variable& operand, int offset, const std::wstring& name = L"");
CNTK_API FunctionPtr PackedIndex(const Variable& operand, const Variable& index, const std::wstring& name = L"");
CNTK_API FunctionPtr GatherPacked(const Variable& operand, const Variable& packedIndex, const std::wstring& name = L"");
@ -202,10 +199,15 @@ namespace CNTK
CNTK_API size_t NewUniqueId();
// Internal hooks for testing and higher-level bindings
// These should not be directly called by C++ API users
CNTK_API void EnableReversingTensorShapesInErrorMessages();
bool IsReversingTensorShapesInErrorMessagesEnabled();
CNTK_API void AlwaysAllowSettingDefaultDevice();
bool IsSettingDefaultDeviceAlwaysAllowed();
CNTK_API void DisableAutomaticUnpackingOfPackedValues();
bool IsAutomaticUnpackingOfPackedValuesDisabled();
}
}

Просмотреть файл

@ -36,8 +36,11 @@ namespace CNTK
if (node->IsLeaf())
{
std::wstring varUid, varName;
if (node->Is<InputValueBase<ElementType>>())
{
std::tie(varUid, varName) = UidAndNameFromCNTKInternalNodeName(node->NodeName(), VariableKind::Input);
bool isSparse = node->Is<SparseInputValue<ElementType>>();
if (node->HasMBLayout())
{
@ -45,12 +48,12 @@ namespace CNTK
auto inputNodeInternalDynamicAxisName = node->GetMBLayout()->GetAxisName();
std::vector<Axis> inputVarDynamicAxes = DynamicAxesFromInternalDynamicAxisName(inputNodeInternalDynamicAxisName);
var = Variable(varShape, isSparse, AsDataType<ElementType>(), node->GetLearningRateMultiplier() != 0, node->NodeName(), inputVarDynamicAxes, node->NodeName());
var = Variable(varShape, isSparse, AsDataType<ElementType>(), node->GetLearningRateMultiplier() != 0, varName, inputVarDynamicAxes, varUid);
}
else
{
// TODO: Allow creating inputs without a dynamic axis
LogicError("Found InputNode with no dynamic axis which is currently unsupported");
LogicError("Found InputNode with no dynamic axes which is currently unsupported");
}
}
else if (node->Is<LearnableParameter<ElementType>>())
@ -60,9 +63,15 @@ namespace CNTK
auto tensorView = new TensorView<ElementType>(std::make_shared<Matrix<ElementType>>(matrix.AsReference()), AsTensorViewShape(node->GetSampleLayout()));
NDArrayViewPtr value = MakeSharedObject<NDArrayView>(AsDataType<ElementType>(), AsDeviceDescriptor(matrix.GetDeviceId()), AsStorageFormat(matrix.GetFormat()), varShape, false, tensorView);
if (isConstant)
var = Constant(value, node->NodeName(), node->NodeName());
{
std::tie(varUid, varName) = UidAndNameFromCNTKInternalNodeName(node->NodeName(), VariableKind::Constant);
var = Constant(value, varName, varUid);
}
else
var = Parameter(value, node->NodeName(), node->NodeName());
{
std::tie(varUid, varName) = UidAndNameFromCNTKInternalNodeName(node->NodeName(), VariableKind::Parameter);
var = Parameter(value, varName, varUid);
}
}
else
LogicError("CNTK::LoadLegacyModel: Unsupported legacy CNTK node named '%S'", node->NodeName().c_str());
@ -299,17 +308,17 @@ namespace CNTK
std::unordered_map<ComputationNodeBasePtr, Variable> nodeToVariableMap;
std::unordered_map<Variable, Variable> placeholderReplacements;
std::unordered_set<FunctionPtr> allPrimitiveFunctions;
std::vector<FunctionPtr> rootFunctions;
std::vector<Variable> rootVariables;
auto& networkRoots = net->RootNodes();
for (auto& rootNode : networkRoots)
{
if (rootNode->IsLeaf())
continue;
rootFunctions.push_back(GetVariable<ElementType>(rootNode, nodeToVariableMap, placeholderReplacements, allPrimitiveFunctions).Owner());
rootVariables.push_back(GetVariable<ElementType>(rootNode, nodeToVariableMap, placeholderReplacements, allPrimitiveFunctions).Owner());
}
auto rootComposite = Combine(rootFunctions);
auto rootComposite = Combine(rootVariables);
rootComposite->ReplacePlaceholders(placeholderReplacements);
return rootComposite;
@ -350,8 +359,5 @@ namespace CNTK
}
computationNetwork->Save(modelFile);
if (!compositeFunction->NetworkMatricesAllocated())
compositeFunction->PurgeComputationNetwork();
}
}

Просмотреть файл

@ -5,9 +5,12 @@
#include "stdafx.h"
#include "CNTKLibrary.h"
#include "Utils.h"
#include "BestGpu.h"
#include <mutex>
#include <algorithm>
#include <CPUMatrix.h> // For CPUMatrix::SetNumThreads
#include <thread>
namespace CNTK
{
@ -40,6 +43,17 @@ namespace CNTK
{
return s_alwaysAllowSettingDefaultDevice.load();
}
std::atomic<bool> s_disableAutomaticUnpackingOfPackedValues(false);
void DisableAutomaticUnpackingOfPackedValues()
{
s_disableAutomaticUnpackingOfPackedValues.store(true);
}
bool IsAutomaticUnpackingOfPackedValuesDisabled()
{
return s_disableAutomaticUnpackingOfPackedValues.load();
}
}
/*static*/ std::atomic<bool> DeviceDescriptor::s_defaultDeviceFrozen(false);
@ -62,7 +76,7 @@ namespace CNTK
auto selectedDevice = DefaultDevice();
if (!alreadyFrozen)
{
Microsoft::MSR::CNTK::OnDeviceSelected(selectedDevice.Id());
Microsoft::MSR::CNTK::OnDeviceSelected(AsCNTKImplDeviceId(selectedDevice));
}
return selectedDevice;
}
@ -74,7 +88,7 @@ namespace CNTK
RuntimeError("Process wide default device cannot be changed since it has been frozen by being implicitly used as the default device in a CNTK API call");
std::call_once(s_initDefaultDeviceFlag, []{
// do nothing. This will set the flag above, in case the DefaultDevice() was never called before.
// do nothing. This will set the flag above, in case when DefaultDevice() was never called before.
});
s_defaultDevice.reset(new DeviceDescriptor(newDefaultDevice));
@ -82,7 +96,9 @@ namespace CNTK
/*static*/ DeviceDescriptor DeviceDescriptor::BestDevice()
{
// TODO: add unit tests for this.
//TODO: BestDevice remains locked if UseDefaultDevice is never executed
// or if BestDevice() is invoked after UseDefaultDevice().
// Should we do anything about it?
auto id = Microsoft::MSR::CNTK::GetBestDevice();
return id >= 0 ? DeviceDescriptor::GPUDevice(id) : DeviceDescriptor::CPUDevice();
}
@ -140,6 +156,12 @@ namespace CNTK
return s_defaultBatchAxis;
}
/*static*/ const Axis& Axis::AllStaticAxes()
{
static const Axis s_allStaticAxes(SentinelStaticAxisIndexValueForAllStaticAxes);
return s_allStaticAxes;
}
/*static*/ Axis Axis::NewUniqueDynamicAxis(const std::wstring& axisNamePrefix, bool isOrderedDynamicAxis /*= true*/)
{
return Axis(s_uniqueDynamicAxisNames.NewUniqueDynamicAxisName(axisNamePrefix), isOrderedDynamicAxis);
@ -149,4 +171,16 @@ namespace CNTK
{
s_uniqueDynamicAxisNames.RegisterAxisName(axisName);
}
std::atomic<size_t> s_maxNumCPUThreads(std::thread::hardware_concurrency());
void SetMaxNumCPUThreads(size_t numCPUThreads)
{
s_maxNumCPUThreads.store(numCPUThreads);
Microsoft::MSR::CNTK::CPUMatrix<float>::SetNumThreads((int)numCPUThreads);
}
size_t GetMaxNumCPUThreads()
{
return s_maxNumCPUThreads.load();
}
}

Просмотреть файл

@ -16,6 +16,7 @@
#include "InputAndParamNodes.h"
#include "NonlinearityNodes.h"
#include "RecurrentNodes.h"
#include "Value.h"
using namespace Microsoft::MSR::CNTK;
@ -81,7 +82,7 @@ namespace CNTK
}
}
auto outputsUsingNewInputs = PrimitiveFunction::GetOutputVariables(primitiveFunction->OpType(), m_inputs, this, primitiveFunction->Attributes());
auto outputsUsingNewInputs = PrimitiveFunction::GetOutputVariables(primitiveFunction->OpType(), m_inputs, this, primitiveFunction->Attributes(), primitiveFunction->Name());
auto currentOutputs = Outputs();
for (size_t i = 0; i < currentOutputs.size(); ++i)
{
@ -197,7 +198,7 @@ namespace CNTK
{
auto placeholders = Placeholders();
if (placeholders.size() != 1)
InvalidArgument("Function::ReplacePlaceholders called with a single replacement variable but this Function has none or more than 1 placeholders");
InvalidArgument("Function::ReplacePlaceholders called with a single replacement variable but this Function has %d placeholders", (int)placeholders.size());
return ReplacePlaceholders({ { *(placeholders.begin()), placeholderReplacement } });
}
@ -413,26 +414,52 @@ namespace CNTK
/*static*/ const std::wstring PrimitiveFunction::AttributeNameEndIndex = L"endIndex";
/*static*/ const std::wstring PrimitiveFunction::AttributeNameReductionOpName = L"reductionOpName";
/*static*/ std::vector<Variable> PrimitiveFunction::GetOutputVariables(PrimitiveOpType op, const std::vector<Variable>& inputs, Function* owner, const Dictionary& functionConfig)
/*static*/ std::vector<Variable> PrimitiveFunction::GetOutputVariables(PrimitiveOpType op, const std::vector<Variable>& inputs, Function* owner, const Dictionary& functionConfig, const std::wstring& functionName)
{
if (op == PrimitiveOpType::Combine)
return inputs;
// TODO: We are just using the primary operand's DataType as output node's DataType. Is this always correct?
// We use the first non-constant input operand's DataType as the output DataType
// In case there are no non-constant known DataTypes, we just pick the first known operand DataType
// Also, all the known DataTypes of operands should match except for constants where coercion is allowed
DataType firstKnownInputDataType = DataType::Unknown;
DataType outputDataType = DataType::Unknown;
NDShape outputShape;
size_t i = 0;
while ((outputDataType == DataType::Unknown) && (i < inputs.size()))
outputDataType = inputs[i++].GetDataType();
while (i < inputs.size())
{
auto input = inputs[i++];
auto inputDataType = input.GetDataType();
if (inputDataType != DataType::Unknown)
{
if (firstKnownInputDataType == DataType::Unknown)
firstKnownInputDataType = inputDataType;
if (outputDataType == DataType::Unknown)
InvalidArgument("The DataType of all the input operands of primitive function with op type %s are unknown", PrimitiveOpTypeName(op));
{
if (!input.IsConstant())
outputDataType = inputDataType;
}
else
{
// The DataType of all operands should match except for Constants where we allow coercion
if ((inputDataType != DataType::Unknown) && (inputDataType != outputDataType) && !input.IsConstant())
InvalidArgument("Primitive function with op type %S has operands with different DataTypes %s and %s", PrimitiveOpTypeName(op).c_str(), DataTypeName(outputDataType), DataTypeName(inputDataType));
}
}
}
if (outputDataType == DataType::Unknown)
outputDataType = firstKnownInputDataType;
if (outputDataType == DataType::Unknown)
InvalidArgument("The DataType of all the input operands of primitive function with op type %S are unknown", PrimitiveOpTypeName(op).c_str());
// We currently require that the inputs' dynamic axes if any match
std::vector<Axis> outputDynamicAxes;
if ((op == PrimitiveOpType::SumAll) || (op == PrimitiveOpType::SquaredError) || (op == PrimitiveOpType::CrossEntropyWithSoftmax) || (op == PrimitiveOpType::ClassificationError))
outputDynamicAxes = std::vector<Axis>({});
if (op == PrimitiveOpType::Where)
else if (op == PrimitiveOpType::Where)
outputDynamicAxes = AsVector<Axis>(functionConfig[PrimitiveFunction::AttributeNameNewDynamicAxes].Value<std::vector<DictionaryValue>>());
else if (op == PrimitiveOpType::ScatterPacked)
outputDynamicAxes = inputs[2].DynamicAxes();
@ -598,18 +625,18 @@ namespace CNTK
assert(inputs.size() == 2);
if ((inputs[0].Shape().Rank() > 2) || ((inputs[0].Shape().Rank() > 1) && (inputs[0].Shape()[1] != 1)))
InvalidArgument("The shape of input operands for the %s operation should have at most one axis", PrimitiveOpTypeName(op));
InvalidArgument("The shape of input operands for the %S operation should have at most one axis", PrimitiveOpTypeName(op).c_str());
auto predictionShape = inputs[0].Shape();
auto labelsShape = inputs[1].Shape();
if (predictionShape != labelsShape)
RuntimeError("Prediction output operand's shape %S is incompatible with label operand's shape %S for the %s operation", AsStringForErrorReporting(predictionShape).c_str(), AsStringForErrorReporting(labelsShape).c_str(), PrimitiveOpTypeName(op));
RuntimeError("Prediction output operand's shape %S is incompatible with label operand's shape %S for the %S operation", AsStringForErrorReporting(predictionShape).c_str(), AsStringForErrorReporting(labelsShape).c_str(), PrimitiveOpTypeName(op).c_str());
std::vector<size_t> reductionAxes;
for (size_t i = 0; i < inputs[0].Shape().Rank(); ++i)
reductionAxes.push_back(i);
outputShape = ReductionOpOutputShape(op, predictionShape, reductionAxes);
outputShape = ReductionOpOutputShape(op, predictionShape, reductionAxes, /*preserveReductionAxes =*/ false);
break;
}
case PrimitiveOpType::PastValue:
@ -630,9 +657,13 @@ namespace CNTK
{
assert(inputs.size() == 1);
auto reductionAxis = functionConfig[PrimitiveFunction::AttributeNameAxis].Value<Axis>();
if (reductionAxis == Axis::AllStaticAxes())
outputShape = {};
else
{
std::vector<size_t> reductionAxes = { reductionAxis.StaticAxisIndex() };
outputShape = ReductionOpOutputShape(op, inputs[0].Shape(), reductionAxes);
outputShape = ReductionOpOutputShape(op, inputs[0].Shape(), reductionAxes, /*preserveReductionAxes =*/ true);
}
break;
}
case PrimitiveOpType::BatchNormalization:
@ -664,9 +695,6 @@ namespace CNTK
if (inputs[0].DynamicAxes().empty() || inputs[1].DynamicAxes().empty() || inputs[2].DynamicAxes().empty())
InvalidArgument("ScatterPacked requires all its operands to have dynamic axes");
if (inputs[1].Shape().Rank() != 1)
InvalidArgument("ScatterPacked requires the packedIndex operand to be a scalar sequence");
outputShape = inputs[0].Shape();
break;
}
@ -686,13 +714,14 @@ namespace CNTK
break;
}
default:
LogicError("Specified op %s not yet supported", PrimitiveOpTypeName(op));
LogicError("Specified op %S not yet supported", PrimitiveOpTypeName(op).c_str());
break;
}
return{ OutputVariable(outputShape, outputDataType, owner, outputDynamicAxes) };
return{ OutputVariable(outputShape, outputDataType, owner, outputDynamicAxes, functionName.empty() ? L"" : functionName + L"_output") };
}
/*static*/ const std::wstring CompositeFunction::CompositeFunctionOpName = L"CompositeFunctionOpName";
/*static*/ std::atomic<unsigned int> CompositeFunction::s_nextAutoGeneratedDynamicAxis(0);
// Names of the dynamic axes in the CNTK engine for some special sets of dynamic axes values
@ -746,9 +775,10 @@ namespace CNTK
variableToNodeMap[variable] = nullptr;
std::shared_ptr<ComputationNode<ElementType>> computationNodePtr;
auto internalNodeName = CNTKInternalNodeNameFromUidAndName(variable.Uid(), variable.Name());
if (variable.IsParameter() || variable.IsConstant())
{
computationNodePtr = builder.CreateLearnableParameter(variable.Uid(), AsTensorShape(variable.Shape()));
computationNodePtr = builder.CreateLearnableParameter(internalNodeName, AsTensorShape(variable.Shape()));
network->InitLearnableParameters(computationNodePtr, L"fixedValue", 0); // must call this to follow protocol; can overwrite later
if (!variable.NeedsGradient())
computationNodePtr->SetLearningRateMultiplier(0.0);
@ -786,9 +816,9 @@ namespace CNTK
network->AddNodeToNetAndAttachInputs(New<DynamicAxisNode<ElementType>>(network->GetDeviceId(), internalDynamicAxisName), {});
if (IsSparseInput(variable))
computationNodePtr = builder.CreateSparseInputNode(variable.Uid(), AsTensorShape(variable.Shape()), internalDynamicAxisName);
computationNodePtr = builder.CreateSparseInputNode(internalNodeName, AsTensorShape(variable.Shape()), internalDynamicAxisName);
else
computationNodePtr = builder.CreateInputNode(variable.Uid(), AsTensorShape(variable.Shape()), internalDynamicAxisName);
computationNodePtr = builder.CreateInputNode(internalNodeName, AsTensorShape(variable.Shape()), internalDynamicAxisName);
if (variable.NeedsGradient())
{
@ -1033,7 +1063,7 @@ namespace CNTK
break;
}
default:
LogicError("Specified op %s not yet supported", PrimitiveOpTypeName(op));
LogicError("Specified op %S not yet supported", PrimitiveOpTypeName(op).c_str());
break;
}
@ -1047,8 +1077,8 @@ namespace CNTK
{
auto computationNodeExpectedInputCount = computationNodePtr->As<INumInputs>()->GetExpectedNumInputs();
if (computationNodeExpectedInputCount != inputNodesBasePtrs.size())
LogicError("Input count mismatch: The Primitive function for op %s has %d inputs while the corresponding ComputationNode has %d inputs",
PrimitiveOpTypeName(op),
LogicError("Input count mismatch: The Primitive function for op %S has %d inputs while the corresponding ComputationNode has %d inputs",
PrimitiveOpTypeName(op).c_str(),
(int)inputNodesBasePtrs.size(),
(int)computationNodeExpectedInputCount);
}
@ -1128,8 +1158,8 @@ namespace CNTK
// TODO: Support changing the device across different invocations of the forward method on a Function instance
if (AsDeviceDescriptor(m_computationNetwork->GetDeviceId()) != device)
LogicError("Changing device across different Forward calls on a CNTK composite Function is currently unsupported");
}
}
else
{
m_computationNetwork = std::make_shared<ComputationNetwork>(AsCNTKImplDeviceId(device));
@ -1140,20 +1170,11 @@ namespace CNTK
if (backpropRoots.size() > 1)
LogicError("More than one backprop roots is currently unsupported");
ComputationNodeBasePtr backpropRootNode;
// Now recursively create the network in a top-down fashion
auto rootFunction = RootFunction();
auto rootFunctionOutputs = rootFunction->Outputs();
std::vector<ComputationNodeBasePtr> forwardRootNodes;
for (auto rootOutput : rootFunctionOutputs)
{
auto currentRootNode = GetNode(rootOutput, m_computationNetwork, builder, m_variableToNodeMap, m_isVariableRootMap);
forwardRootNodes.push_back(currentRootNode);
if (backpropRoots.find(rootOutput) != backpropRoots.end())
backpropRootNode = m_variableToNodeMap[rootOutput];
}
GetNode(rootOutput, m_computationNetwork, builder, m_variableToNodeMap, m_isVariableRootMap);
// If any of the function outputs is not a root node, we need to explicitly add it to the 'output' group of the ComputationNetwork
for (auto rootOutput : rootFunctionOutputs)
@ -1212,8 +1233,26 @@ namespace CNTK
}
}
}
}
if (!m_networkMatricesAllocated && allocateNetworkMatrices)
{
ComputationNodeBasePtr backpropRootNode;
// Now recursively create the network in a top-down fashion
auto rootFunction = RootFunction();
auto rootFunctionOutputs = rootFunction->Outputs();
std::vector<ComputationNodeBasePtr> forwardRootNodes;
for (auto rootOutput : rootFunctionOutputs)
{
auto currentRootNode = m_variableToNodeMap[rootOutput];
forwardRootNodes.push_back(currentRootNode);
if (m_currentBackpropRoots.find(rootOutput) != m_currentBackpropRoots.end())
backpropRootNode = currentRootNode;
}
if (allocateNetworkMatrices)
m_computationNetwork->AllocateAllMatrices(forwardRootNodes, {}, backpropRootNode);
m_networkMatricesAllocated = allocateNetworkMatrices;
}
@ -1224,107 +1263,180 @@ namespace CNTK
template <typename ElementType>
/*static*/ std::pair<std::shared_ptr<const Matrix<ElementType>>, MBLayoutPtr> CompositeFunction::GetCNTKImplMatrixAndMBLayoutFromValueObject(Variable var, const ValuePtr& value)
{
if (var.GetDataType() != value->Data()->GetDataType())
LogicError("The Variable's DataType %s does not match the corresponding Value's DataType %s", DataTypeName(var.GetDataType()), DataTypeName(value->Data()->GetDataType()));
if (var.GetDataType() != value->GetDataType())
LogicError("The Variable's DataType %s does not match the corresponding Value's DataType %s", DataTypeName(var.GetDataType()), DataTypeName(value->GetDataType()));
if (AsDataType<ElementType>() != value->Data()->GetDataType())
LogicError("The specified ElementType %s does not match the DataType %s", typeid(ElementType).name(), DataTypeName(value->Data()->GetDataType()));
if (AsDataType<ElementType>() != value->GetDataType())
LogicError("The specified ElementType %s does not match the DataType %s", typeid(ElementType).name(), DataTypeName(value->GetDataType()));
// TODO: Is supplying dense data for an Input variable tagged as sparse, a fatal error?
if (IsSparseInput(var) && !value->Data()->IsSparse())
if (IsSparseInput(var) && !value->IsSparse())
InvalidArgument("Dense input data supplied for a sparse input Variable");
if (IsSparseInput(var) && (value->Data()->GetStorageFormat() != StorageFormat::SparseCSC))
if (IsSparseInput(var) && (value->GetStorageFormat() != StorageFormat::SparseCSC))
InvalidArgument("Sparse Input data must be in SparseCSC format");
if (value->Data()->Shape().Rank() == var.Shape().Rank())
return{ value->Data()->GetMatrix<ElementType>(), nullptr };
auto varShape = var.Shape();
auto valueShape = value->Shape();
if (valueShape.Rank() < varShape.Rank())
InvalidArgument("Value's rank should be >= the Variable's rank");
if (value->Data()->Shape().Rank() < (var.Shape().Rank() + var.DynamicAxes().size()))
InvalidArgument("Value's number of axes should be larger than the Variable's number of axes by number of dynamic axes");
size_t maxAddionalValueAxes = std::max<size_t>(2, var.DynamicAxes().size());
if (valueShape.Rank() > (varShape.Rank() + maxAddionalValueAxes))
InvalidArgument("Value rank should be larger than the Variable%S rank at most by number of dynamic axes", ParanthesizedName(var.Name()).c_str());
if (valueShape.SubShape(0, varShape.Rank()) != varShape)
{
InvalidArgument("The %s dimensions of the Value shape %S do not match the shape of the variable %S that it corresponds to!",
Internal::IsReversingTensorShapesInErrorMessagesEnabled() ? "trailing" : "leading",
AsStringForErrorReporting(valueShape).c_str(),
AsStringForErrorReporting(varShape).c_str());
}
if (var.DynamicAxes().empty())
return{ value->Data()->GetMatrix<ElementType>(), nullptr };
if (var.DynamicAxes().size() > 2)
LogicError("More than 2 dynamic axis for a variable is currently unsupported");
if (value->Data()->Shape().SubShape(0, var.Shape().Rank()) != var.Shape())
{
InvalidArgument("The %s dimensions of the Value shape %S do not match the shape of the variable %S that it corresponds to!",
Internal::IsReversingTensorShapesInErrorMessagesEnabled() ? "trailing" : "leading",
AsStringForErrorReporting(value->Data()->Shape()).c_str(),
AsStringForErrorReporting(var.Shape()).c_str());
}
size_t maxNumTimeSteps = value->Data()->Shape()[var.Shape().Rank()];
size_t numSequences = value->Data()->Shape()[var.Shape().Rank() + 1];
auto mask = value->Mask();
if ((mask != nullptr) && ((var.Shape().Rank() + mask->Shape().Rank()) != value->Data()->Shape().Rank()))
if ((mask != nullptr) && ((varShape.Rank() + mask->Shape().Rank()) != valueShape.Rank()))
InvalidArgument("Invalid Value object; the sum of the rank of the mask and data does not equal the Variable's rank + number of dynamic axes");
if ((numSequences == 1) || (maxNumTimeSteps == 1))
{
// The data need not be shuffled
std::shared_ptr<const Matrix<ElementType>> matrixData = value->Data()->GetMatrix<ElementType>(var.Shape().Rank());
auto layout = std::make_shared<MBLayout>();
if (maxNumTimeSteps == 1)
layout->InitAsFrameMode(numSequences);
else
{
layout->Init(1, maxNumTimeSteps);
layout->AddSequence(0, 0, 0, maxNumTimeSteps);
}
auto getNumTimeStepsAndSequencesFunc = [](const NDShape& maskShape) {
size_t maxNumTimeSteps = 1;
size_t numSequences = 1;
if (maskShape.Rank() > 0)
maxNumTimeSteps = maskShape[0];
if (maskShape.Rank() > 1)
numSequences = maskShape[1];
return std::pair<size_t, size_t>(maxNumTimeSteps, numSequences);
};
size_t maxNumTimeSteps, numSequences;
std::tie(maxNumTimeSteps, numSequences) = getNumTimeStepsAndSequencesFunc(valueShape.SubShape(varShape.Rank()));
auto getSequenceStartsAndLengthsFunc = [&getNumTimeStepsAndSequencesFunc](const NDMaskPtr& mask, std::vector<ptrdiff_t>& sequenceBeginIndices, std::vector<size_t>& sequenceLengths) {
auto cpuMask = mask;
if (mask->Device() != DeviceDescriptor::CPUDevice())
cpuMask = mask->DeepClone(DeviceDescriptor::CPUDevice());
const MaskKind* maskBuffer = cpuMask->DataBuffer();
size_t maxNumTimeSteps, numSequences;
std::tie(maxNumTimeSteps, numSequences) = getNumTimeStepsAndSequencesFunc(mask->Shape());
return{ matrixData , layout};
}
else
{
std::vector<size_t> sequenceLengths(numSequences, maxNumTimeSteps);
if (mask != nullptr)
{
// Determine the sequence lengths from the mask
std::unique_ptr<char[]> maskData(mask->GetMatrix()->CopyToArray());
for (size_t i = 0; i < numSequences; ++i)
{
size_t currentSequenceLength = 0;
MaskKind firstMaskEntry = maskBuffer[i * maxNumTimeSteps];
if (firstMaskEntry == MaskKind::SequenceBegin)
sequenceBeginIndices[i] = 0;
else if (firstMaskEntry == MaskKind::Valid)
sequenceBeginIndices[i] = Microsoft::MSR::CNTK::SentinelValueIndicatingUnspecifedSequenceBeginIdx;
else
LogicError("The first entry of a mask should be Valid or SequenceBegin");
size_t currentSequenceLength = 1;
bool currentSequenceEndAlreadyFound = false;
for (size_t j = 0; j < maxNumTimeSteps; ++j)
for (size_t j = 1; j < maxNumTimeSteps; ++j)
{
if (maskData[(i * maxNumTimeSteps) + j] == 1)
if (maskBuffer[(i * maxNumTimeSteps) + j] == MaskKind::Invalid)
currentSequenceEndAlreadyFound = true;
else
{
if (currentSequenceEndAlreadyFound)
InvalidArgument("Invalid Value object; only trailing steps of a sequence can be masked");
currentSequenceLength++;
}
else
currentSequenceEndAlreadyFound = true;
}
sequenceLengths[i] = currentSequenceLength;
}
};
if ((numSequences == 1) || (maxNumTimeSteps == 1))
{
// The data need not be shuffled
std::shared_ptr<const Matrix<ElementType>> matrixData = value->Data()->GetMatrix<ElementType>(varShape.Rank());
auto layout = std::make_shared<MBLayout>();
if (!mask)
{
if (maxNumTimeSteps == 1)
layout->InitAsFrameMode(numSequences);
else
{
layout->Init(numSequences, maxNumTimeSteps);
layout->AddSequence(0, 0, 0, maxNumTimeSteps);
}
}
else
{
layout->Init(numSequences, maxNumTimeSteps);
std::vector<ptrdiff_t> sequenceBeginIndices(numSequences, 0);
std::vector<size_t> sequenceLengths(numSequences, maxNumTimeSteps);
getSequenceStartsAndLengthsFunc(mask, sequenceBeginIndices, sequenceLengths);
for (size_t i = 0; i < numSequences; ++i)
layout->AddSequence(i, i, sequenceBeginIndices[i], sequenceLengths[i]);
}
// The data needs to be rearranged since CNTK requires sequences to be interleaved across timesteps
std::vector<MBLayout::SequenceInfo> sequences;
for (size_t i = 0; i < numSequences; ++i)
sequences.push_back({ i, SIZE_MAX, 0, sequenceLengths[i]});
return{ matrixData , layout};
}
else
{
std::vector<ptrdiff_t> sequenceBeginIndices(numSequences, 0);
std::vector<size_t> sequenceLengths(numSequences, maxNumTimeSteps);
if (mask != nullptr)
getSequenceStartsAndLengthsFunc(mask, sequenceBeginIndices, sequenceLengths);
bool hasTruncatedSequences = std::find_if(sequenceBeginIndices.begin(), sequenceBeginIndices.end(), [](const int& val) { return (val < 0); }) != sequenceBeginIndices.end();
auto layout = std::make_shared<MBLayout>();
std::vector<std::pair<size_t, size_t>> placement;
if (!hasTruncatedSequences)
{
std::vector<MBLayout::SequenceInfo> sequences;
for (size_t i = 0; i < numSequences; ++i)
sequences.push_back({ i, SIZE_MAX, sequenceBeginIndices[i], sequenceLengths[i] });
std::vector<size_t> rowAllocations;
layout->InitAsPackedSequences(sequences, placement, rowAllocations);
}
else
{
layout->Init(numSequences, maxNumTimeSteps);
// We cannot pack as some of the sequences are truncated and thus all sequences have to be
// kept in their original parallel streams
placement.resize(numSequences);
for (size_t i = 0; i < numSequences; ++i)
{
layout->AddSequence(i, i, sequenceBeginIndices[i], sequenceLengths[i]);
// Add the gap if there is one
if (sequenceLengths[i] < maxNumTimeSteps)
layout->AddSequence(GAP_SEQUENCE_ID, i, sequenceLengths[i], maxNumTimeSteps);
placement[i] = std::make_pair(i, 0);
}
}
if (maxNumTimeSteps != layout->GetNumTimeSteps())
LogicError("The number of time steps in the packed MBLayout does not match the longest sequence's length in the Value object");
if (numSequences != layout->GetNumSequences())
LogicError("The number of sequences in the packed MBLayout does not match the sequence count in the Value object");
// The data needs to be rearranged since CNTK requires sequences to be interleaved across timesteps
// Now generate the gather indices
auto matrixData = std::make_shared<Matrix<ElementType>>(var.Shape().TotalSize(),
auto matrixData = std::make_shared<Matrix<ElementType>>(varShape.TotalSize(),
layout->GetNumCols(),
AsCNTKImplDeviceId(value->Data()->Device()),
value->Data()->IsSparse() ? MatrixType::SPARSE : MatrixType::DENSE,
AsCNTKImplMatrixFormat(value->Data()->GetStorageFormat()));
AsCNTKImplDeviceId(value->Device()),
value->IsSparse() ? MatrixType::SPARSE : MatrixType::DENSE,
AsCNTKImplMatrixFormat(value->GetStorageFormat()));
std::vector<size_t> sequencesShorterThanLongestSequence;
for (size_t i = 0; i < numSequences; ++i)
@ -1342,8 +1454,8 @@ namespace CNTK
gatherIndicesVector[((targetStartIdxInParallelStream + j) * layout->GetNumParallelSequences()) + targetParallelStreamIdx] = (ElementType)((i * maxNumTimeSteps) + j);
}
auto gatherIdxMatrix = std::make_shared<Matrix<ElementType>>(1, layout->GetNumCols(), gatherIndicesVector.data(), AsCNTKImplDeviceId(value->Data()->Device()));
matrixData->DoGatherColumnsOf(0, *gatherIdxMatrix, *(value->Data()->GetMatrix<ElementType>(var.Shape().Rank())), 1);
auto gatherIdxMatrix = std::make_shared<Matrix<ElementType>>(1, layout->GetNumCols(), gatherIndicesVector.data(), AsCNTKImplDeviceId(value->Device()));
matrixData->DoGatherColumnsOf(0, *gatherIdxMatrix, *(value->Data()->GetMatrix<ElementType>(varShape.Rank())), 1);
return{ matrixData, layout };
}
}
@ -1352,53 +1464,111 @@ namespace CNTK
/*static*/ ValuePtr CompositeFunction::GetValueObjectFromCNTKImplMatrixAndMBLayout(const NDShape& sampleShape, const Matrix<ElementType>& matrix, const MBLayoutPtr& layout, bool readOnly /*= true*/)
{
NDShape valueDataShape = sampleShape;
size_t maxNumTimeSteps = 1;
size_t numSequences = 1;
if (layout != nullptr)
valueDataShape = valueDataShape.AppendShape({ layout->GetNumTimeSteps(), layout->GetNumSequences() });
{
maxNumTimeSteps = layout->GetNumTimeSteps();
numSequences = layout->GetNumSequences();
valueDataShape = valueDataShape.AppendShape({ maxNumTimeSteps, numSequences });
}
auto createMaskFunc = [](const MBLayoutPtr& layout, const DeviceDescriptor& device, std::vector<size_t>& sequencesShorterThanLongestSequence) {
std::vector<bool> sequenceBeginFlags;
std::vector<size_t> sequenceLengths;
sequencesShorterThanLongestSequence.clear();
size_t maxNumTimeSteps = layout->GetNumTimeSteps();
size_t numSequences = layout->GetNumSequences();
auto& layoutSequences = layout->GetAllSequences();
size_t sequenceIdx = 0;
bool allSequencesStartInThisMB = true;
bool allSequencesSameLength = true;
for (auto sequenceInfo : layoutSequences)
{
if (sequenceInfo.seqId != GAP_SEQUENCE_ID)
{
auto currentSequenceBeginIdx = std::max<ptrdiff_t>(0, sequenceInfo.tBegin);
auto currentSequenceEndIdx = std::min(maxNumTimeSteps, sequenceInfo.tEnd);
auto currentSequenceLength = (currentSequenceEndIdx - currentSequenceBeginIdx);
auto isCurrentSequenceBeginningInsideThisMB = sequenceInfo.tBegin >= 0;
allSequencesStartInThisMB = allSequencesStartInThisMB && isCurrentSequenceBeginningInsideThisMB;
allSequencesSameLength = allSequencesSameLength && (currentSequenceLength == maxNumTimeSteps);
sequenceBeginFlags.push_back(isCurrentSequenceBeginningInsideThisMB);
sequenceLengths.push_back(currentSequenceLength);
if (currentSequenceLength != maxNumTimeSteps)
sequencesShorterThanLongestSequence.push_back(sequenceIdx);
sequenceIdx++;
}
}
if (!allSequencesStartInThisMB && (numSequences != layout->GetNumParallelSequences()))
LogicError("Cannot create an unpacked Value object from packed data where one or more sequences are truncated");
bool maskNeeded = !allSequencesSameLength || !allSequencesStartInThisMB;
NDMaskPtr mask;
if (maskNeeded)
{
mask = MakeSharedObject<NDMask>(NDShape({ maxNumTimeSteps, numSequences }), device);
for (size_t i = 0; i < numSequences; ++i)
if (sequenceBeginFlags[i])
mask->MarkSequenceBegin({0, i});
for (auto shortSequenceIdx : sequencesShorterThanLongestSequence)
mask->InvalidateSection({ sequenceLengths[shortSequenceIdx], shortSequenceIdx }, { NDShape::InferredDimension, 1 });
}
return mask;
};
// No data shuffling needed if no layout or the layout has just one time-step or just one sequence
if ((layout == nullptr) || (layout->GetNumTimeSteps() == 1) || (layout->GetNumSequences() == 1))
std::vector<size_t> sequencesShorterThanLongestSequence;
if ((maxNumTimeSteps == 1) || (numSequences == 1))
{
// Just create a view over the existing matrix itself
auto tensorView = new TensorView<ElementType>(std::make_shared<Matrix<ElementType>>(matrix.AsReference()), AsTensorViewShape(valueDataShape));
auto data = MakeSharedObject<NDArrayView>(AsDataType<ElementType>(), AsDeviceDescriptor(matrix.GetDeviceId()), AsStorageFormat(matrix.GetFormat()), valueDataShape, readOnly, tensorView);
if (layout == nullptr)
return MakeSharedObject<Value>(data);
else
{
auto mask = createMaskFunc(layout, AsDeviceDescriptor(matrix.GetDeviceId()), sequencesShorterThanLongestSequence);
return MakeSharedObject<Value>(data, mask);
}
}
if (layout->GetNumCols() != matrix.GetNumCols())
LogicError("Bad MBLayout: The number of columns in the MBLayout does not match the number of columns in the data matrix!");
size_t maxNumTimeSteps = layout->GetNumTimeSteps();
size_t numSequences = layout->GetNumSequences();
std::vector<size_t> sequenceLengths;
auto& layoutSequences = layout->GetAllSequences();
for (auto sequenceInfo : layoutSequences)
{
if (sequenceInfo.seqId != GAP_SEQUENCE_ID)
sequenceLengths.push_back(sequenceInfo.GetNumTimeSteps());
}
// Reshuffle to data to unpack and uninterleave the CNTK form packed data
// Now generate the scatter indices
auto shuffledMatrixData = std::make_shared<Matrix<ElementType>>(matrix.GetNumRows(), maxNumTimeSteps * numSequences, matrix.GetDeviceId(), matrix.GetMatrixType(), matrix.GetFormat());
std::vector<size_t> sequencesShorterThanLongestSequence;
for (size_t i = 0; i < numSequences; ++i)
if (sequenceLengths[i] != maxNumTimeSteps)
sequencesShorterThanLongestSequence.push_back(i);
auto mask = createMaskFunc(layout, AsDeviceDescriptor(matrix.GetDeviceId()), sequencesShorterThanLongestSequence);
// Set the target location of all gaps to be the last step of the first sequence that is shorter than the longest sequence in the batch
size_t targetColIdxForInvalidColumns = sequencesShorterThanLongestSequence.empty() ? 0 : (((sequencesShorterThanLongestSequence[0] + 1) * maxNumTimeSteps) - 1);
std::vector<ElementType> scatterIndicesVector(layout->GetNumCols(), (ElementType)targetColIdxForInvalidColumns);
size_t i = 0;
auto& layoutSequences = layout->GetAllSequences();
for (auto sequenceInfo : layoutSequences)
{
if (sequenceInfo.seqId != GAP_SEQUENCE_ID)
{
size_t targetParallelStreamIdx = sequenceInfo.s;
size_t targetStartIdxInParallelStream = sequenceInfo.tBegin;
for (size_t j = 0; j < sequenceInfo.GetNumTimeSteps(); ++j)
scatterIndicesVector[((targetStartIdxInParallelStream + j) * layout->GetNumParallelSequences()) + targetParallelStreamIdx] = (ElementType)((i * maxNumTimeSteps) + j);
auto currentSequenceBeginIdx = std::max<ptrdiff_t>(0, sequenceInfo.tBegin);
auto currentSequenceEndIdx = std::min(maxNumTimeSteps, sequenceInfo.tEnd);
size_t currentSequenceLength = (currentSequenceEndIdx - currentSequenceBeginIdx);
for (size_t j = 0; j < currentSequenceLength; ++j)
scatterIndicesVector[((currentSequenceBeginIdx + j) * layout->GetNumParallelSequences()) + targetParallelStreamIdx] = (ElementType)((i * maxNumTimeSteps) + j);
i++;
}
@ -1407,17 +1577,6 @@ namespace CNTK
auto scatterIdxMatrix = std::make_shared<Matrix<ElementType>>(1, layout->GetNumCols(), scatterIndicesVector.data(), matrix.GetDeviceId());
shuffledMatrixData->DoScatterColumnsOf(0, *scatterIdxMatrix, matrix, 1);
// Create the mask if needed
NDMaskPtr mask;
if (!sequencesShorterThanLongestSequence.empty())
{
mask = MakeSharedObject<NDMask>(NDShape({ maxNumTimeSteps, numSequences }), AsDeviceDescriptor(matrix.GetDeviceId()));
for (auto shortSequenceIdx : sequencesShorterThanLongestSequence)
{
mask->MaskSection({ sequenceLengths[shortSequenceIdx], shortSequenceIdx }, { NDShape::InferredDimension, 1 });
}
}
auto tensorView = new TensorView<ElementType>(shuffledMatrixData, AsTensorViewShape(valueDataShape));
auto data = MakeSharedObject<NDArrayView>(AsDataType<ElementType>(), AsDeviceDescriptor(matrix.GetDeviceId()), AsStorageFormat(shuffledMatrixData->GetFormat()), valueDataShape, readOnly, tensorView);
return MakeSharedObject<Value>(data, mask);
@ -1441,7 +1600,13 @@ namespace CNTK
template <typename ElementType>
/*static*/ void CompositeFunction::PopulateComputationNodeValue(const std::pair<Variable, ValuePtr>& variableValue, ComputationNodeBasePtr& computationNode)
{
auto CNTKMatrixAndMBLayout = GetCNTKImplMatrixAndMBLayoutFromValueObject<ElementType>(variableValue.first, variableValue.second);
std::pair<std::shared_ptr<const Matrix<ElementType>>, MBLayoutPtr> CNTKMatrixAndMBLayout;
auto packedValue = dynamic_cast<PackedValue*>(variableValue.second.get());
if (packedValue)
CNTKMatrixAndMBLayout = packedValue->PackedData<ElementType>();
else
CNTKMatrixAndMBLayout = GetCNTKImplMatrixAndMBLayoutFromValueObject<ElementType>(variableValue.first, variableValue.second);
MBLayoutPtr layout = CNTKMatrixAndMBLayout.second;
auto& nodeData = computationNode->As<ComputationNode<ElementType>>()->Value();
@ -1464,7 +1629,7 @@ namespace CNTK
ValuePtr argumentValue = arguments.at(argument);
MBLayoutPtr layout;
switch (argumentValue->Data()->GetDataType())
switch (argumentValue->GetDataType())
{
case DataType::Float:
PopulateComputationNodeValue<float>({ argument, argumentValue }, argumentComputationNode);
@ -1473,7 +1638,7 @@ namespace CNTK
PopulateComputationNodeValue<double>({ argument, argumentValue }, argumentComputationNode);
break;
default:
LogicError("Unsupported DataType %s", DataTypeName(argumentValue->Data()->GetDataType()));
LogicError("Unsupported DataType %s", DataTypeName(argumentValue->GetDataType()));
break;
}
}
@ -1484,7 +1649,13 @@ namespace CNTK
template <typename ElementType>
/*static*/ void CompositeFunction::PopulateComputationNodeGradient(const std::pair<Variable, ValuePtr>& variableGradient, Microsoft::MSR::CNTK::ComputationNodeBasePtr& computationNode)
{
auto CNTKMatrixAndMBLayout = GetCNTKImplMatrixAndMBLayoutFromValueObject<ElementType>(variableGradient.first, variableGradient.second);
std::pair<std::shared_ptr<const Matrix<ElementType>>, MBLayoutPtr> CNTKMatrixAndMBLayout;
auto packedValue = dynamic_cast<PackedValue*>(variableGradient.second.get());
if (packedValue)
CNTKMatrixAndMBLayout = packedValue->PackedData<ElementType>();
else
CNTKMatrixAndMBLayout = GetCNTKImplMatrixAndMBLayoutFromValueObject<ElementType>(variableGradient.first, variableGradient.second);
MBLayoutPtr layout = CNTKMatrixAndMBLayout.second;
auto nodeLayout = computationNode->GetMBLayout();
if (((layout == nullptr) != (nodeLayout == nullptr)) || ((layout != nullptr) && (*layout != *nodeLayout)))
@ -1505,7 +1676,7 @@ namespace CNTK
auto outputComputationNode = m_variableToNodeMap[gradientVarValuePair.first];
ValuePtr gradientValue = gradientVarValuePair.second;
switch (gradientValue->Data()->GetDataType())
switch (gradientValue->GetDataType())
{
case DataType::Float:
PopulateComputationNodeGradient<float>(gradientVarValuePair, outputComputationNode);
@ -1514,7 +1685,7 @@ namespace CNTK
PopulateComputationNodeGradient<double>(gradientVarValuePair, outputComputationNode);
break;
default:
LogicError("Unsupported DataType %s", DataTypeName(gradientValue->Data()->GetDataType()));
LogicError("Unsupported DataType %s", DataTypeName(gradientValue->GetDataType()));
break;
}
}
@ -1547,23 +1718,32 @@ namespace CNTK
if (varValue != nullptr)
{
// TODO: The shape of the specified output Value object must match the actual output shape
if (varValue->Data()->Shape() != valueShape)
InvalidArgument("The shape %S of the specified Value object for %s does not match the actual shape %S", AsStringForErrorReporting(varValue->Data()->Shape()).c_str(), getGradient ? "gradient" : "output", AsStringForErrorReporting(valueShape).c_str());
if (varValue->Shape() != valueShape)
InvalidArgument("The shape %S of the specified Value object for %s does not match the actual shape %S", AsStringForErrorReporting(varValue->Shape()).c_str(), getGradient ? "gradient" : "output", AsStringForErrorReporting(valueShape).c_str());
}
ValuePtr nodeValue;
auto layout = computationNode->GetMBLayout();
switch (var.GetDataType())
{
case DataType::Float:
nodeValue = GetValueObjectFromCNTKImplMatrixAndMBLayout<float>(var,
getGradient ? computationNode->As<ComputationNode<float>>()->Gradient() : computationNode->As<ComputationNode<float>>()->Value(),
computationNode->GetMBLayout());
{
auto& matrix = getGradient ? computationNode->As<ComputationNode<float>>()->Gradient() : computationNode->As<ComputationNode<float>>()->Value();
if (varValue == nullptr)
nodeValue = MakeSharedObject<PackedValue>(var.Shape(), std::make_shared<Matrix<float>>(matrix.AsReference()), layout, /*readOnly =*/ false);
else
nodeValue = GetValueObjectFromCNTKImplMatrixAndMBLayout<float>(var, matrix, layout);
break;
}
case DataType::Double:
nodeValue = GetValueObjectFromCNTKImplMatrixAndMBLayout<double>(var,
getGradient ? computationNode->As<ComputationNode<double>>()->Gradient() : computationNode->As<ComputationNode<double>>()->Value(),
computationNode->GetMBLayout());
{
auto& matrix = getGradient ? computationNode->As<ComputationNode<double>>()->Gradient() : computationNode->As<ComputationNode<double>>()->Value();
if (varValue == nullptr)
nodeValue = MakeSharedObject<PackedValue>(var.Shape(), std::make_shared<Matrix<double>>(matrix.AsReference()), layout, /*readOnly =*/ false);
else
nodeValue = GetValueObjectFromCNTKImplMatrixAndMBLayout<double>(var, matrix, layout);
break;
}
default:
LogicError("Unsupported DataType %s", DataTypeName(var.GetDataType()));
break;
@ -1605,6 +1785,20 @@ namespace CNTK
}
}
const std::vector<Variable>& CompositeFunction::GetArgumentDependencies(const Variable& output)
{
assert(output.IsOutput());
auto iter = m_perOutputVarArgumentDependencies.find(output);
if (iter != m_perOutputVarArgumentDependencies.end())
return iter->second;
auto wrappedComposite = CompositeFunction::Create(output.Owner());
m_perOutputVarArgumentDependencies[output] = wrappedComposite->Arguments();
return m_perOutputVarArgumentDependencies[output];
}
/*virtual*/ BackPropStatePtr CompositeFunction::Forward(const std::unordered_map<Variable, ValuePtr>& arguments,
std::unordered_map<Variable, ValuePtr>& outputs,
const DeviceDescriptor& computeDevice,
@ -1641,8 +1835,31 @@ namespace CNTK
else
InvalidArgument("Unsupported DataType %s", DataTypeName(dataType));
std::unordered_set<Variable> functionOutputs(this->Outputs().begin(), this->Outputs().end());
std::vector<ComputationNodeBasePtr> outputsToEvaluate;
std::unordered_set<Variable> requiredArguments;
for (auto outputVarValuePair : outputs)
{
// Ensure that only a subset of this function's outputs are being asked to be evaluated
if (functionOutputs.find(outputVarValuePair.first) == functionOutputs.end())
InvalidArgument("Requested output is not an Ouptut of the Function");
auto& requiredArgumentsForCurrentOutput = GetArgumentDependencies(outputVarValuePair.first);
requiredArguments.insert(requiredArgumentsForCurrentOutput.begin(), requiredArgumentsForCurrentOutput.end());
auto outputComputationNode = m_variableToNodeMap[outputVarValuePair.first];
outputsToEvaluate.push_back(outputComputationNode);
}
// TODO: Avoid copying the data when possible
// We should have argument values supplied for all required argument dependencies for the requested outputs
for (auto requiredArgument : requiredArguments)
{
if (arguments.find(requiredArgument) == arguments.end())
InvalidArgument("Function::Forward: Required argument's (%S) value that the requested output(s) depend on has not been provided", requiredArgument.Name().c_str());
}
// Feed data into the arguments of the network
PopulateNetworkInputs(arguments);
@ -1653,19 +1870,6 @@ namespace CNTK
for (auto& nodeIter : dropoutNodes)
nodeIter->SetEvalTimeStampOutdatedWrtAll();
std::unordered_set<Variable> functionOutputs(this->Outputs().begin(), this->Outputs().end());
std::vector<ComputationNodeBasePtr> outputsToEvaluate;
for (auto outputVarValuePair : outputs)
{
// Ensure that only a subset of this function's outputs are being asked to be evaluated
if (functionOutputs.find(outputVarValuePair.first) == functionOutputs.end())
InvalidArgument("Requested output is not an Ouptut of the Function");
auto outputComputationNode = m_variableToNodeMap[outputVarValuePair.first];
outputsToEvaluate.push_back(outputComputationNode);
}
// The 'outputsToRetainBackwardStateFor' nodes also need to be evaluated if not already specified in 'outputs'
for (auto rootVarForBackprop : outputsToRetainBackwardStateFor)
{
@ -1879,7 +2083,7 @@ namespace CNTK
newDynamicAxes.push_back(operandAxis);
}
return Internal::Gather(operand, flags, newDynamicAxes);
return Internal::Gather(operand, flags, newDynamicAxes, name);
}
FunctionPtr Dropout(const Variable& operand, double dropoutRate, const std::wstring& name /*= L""*/)
@ -1968,23 +2172,25 @@ namespace CNTK
FunctionPtr SquaredError(const Variable& prediction, const Variable& targets, const std::wstring& name/* = L""*/)
{
return BinaryOp(PrimitiveOpType::SquaredError, prediction, targets, Dictionary(), name);
auto difference = Minus(prediction, targets);
auto squaredDifference = ElementTimes(difference, difference);
return Internal::ReduceElements(squaredDifference, PrimitiveFunction::InternalSumReductionOpName, Axis::AllStaticAxes(), name);
}
FunctionPtr CrossEntropyWithSoftmax(const Variable& prediction, const Variable& labels, const std::wstring& name/* = L""*/)
{
return ReduceSum(Minus(ReduceLogSum(prediction, Axis(0)), TransposeTimes(labels, prediction)), name);
return Minus(ReduceLogSum(prediction, Axis(0)), TransposeTimes(labels, prediction), name);
}
FunctionPtr ClassificationError(const Variable& prediction, const Variable& labels, const std::wstring& name/* = L""*/)
{
return ReduceSum(Minus(Constant::Scalar(prediction.GetDataType(), 1.0), TransposeTimes(labels, Hardmax(prediction))), name);
return Minus(Constant::Scalar(prediction.GetDataType(), 1.0), TransposeTimes(labels, Hardmax(prediction)), name);
}
FunctionPtr PastValue(const Variable& operand, const Variable& initialState, size_t offset, const std::wstring& name)
{
if (operand.DynamicAxes().size() != 2)
InvalidArgument("PastValue overload that does not explicitly specify a dynamic axis can only be used for operands with exactly one dynamic sequence-axis");
InvalidArgument("PastValue can only be used for operands with exactly one dynamic sequence-axis and one dynamic batch axis");
auto additionalProperties = Dictionary();
additionalProperties[PrimitiveFunction::AttributeNameOffset] = DictionaryValue(offset);
@ -1994,7 +2200,7 @@ namespace CNTK
FunctionPtr FutureValue(const Variable& operand, const Variable& initialState, size_t offset, const std::wstring& name)
{
if (operand.DynamicAxes().size() != 2)
InvalidArgument("FutureValue overload that does not explicitly specify a dynamic axis can only be used for operands with exactly one dynamic sequence-axis");
InvalidArgument("FutureValue can only be used for operands with exactly one dynamic sequence-axis and one dynamic batch axis");
auto additionalProperties = Dictionary();
additionalProperties[PrimitiveFunction::AttributeNameOffset] = DictionaryValue(offset);
@ -2035,7 +2241,7 @@ namespace CNTK
Constant meanVar(mean);
Constant invStdDevVar(invStdDev);
return ElementTimes(Minus(operand, meanVar), invStdDevVar);
return ElementTimes(Minus(operand, meanVar), invStdDevVar, name);
}
FunctionPtr Convolution(const Variable& convolutionMap,
@ -2049,6 +2255,12 @@ namespace CNTK
size_t maxTempMemSizeInSamples,
const std::wstring& name)
{
// Currently we require that the Convolution function's operand have a dynamic axis since otherwise
// the internal implementation incorrectly infers the batch axis dimension by picking up the first axis as
// the sample shape and considering the rest to be part of the batch axis
if (operand.DynamicAxes().empty())
LogicError("Convolution currently requires the main operand to have dynamic axes");
auto additionalProperties = Dictionary();
additionalProperties[PrimitiveFunction::AttributeNameStrides] = strides;
additionalProperties[PrimitiveFunction::AttributeNameSharing] = AsDictionaryValueVector(sharing);
@ -2129,16 +2341,18 @@ namespace CNTK
return CompositeFunction::Create(MakeSharedObject<PrimitiveFunction>(PrimitiveOpType::Splice, operands, std::move(additionalProperties), name), name);
}
FunctionPtr Combine(const std::vector<FunctionPtr>& operands, const std::wstring& name/* = L""*/)
FunctionPtr Combine(const std::vector<Variable>& operands, const std::wstring& name /*= L""*/)
{
std::vector<Variable> inputs;
std::unordered_set<Variable> uniqueOperands;
for (auto operand : operands)
{
auto currentFunctionOutputs = operand->Outputs();
std::copy(currentFunctionOutputs.begin(), currentFunctionOutputs.end(), std::back_inserter(inputs));
if (uniqueOperands.find(operand) != uniqueOperands.end())
LogicError("All operands specified to Combine must be unique");
uniqueOperands.insert(operand);
}
return Internal::Combine(inputs, name);
return CompositeFunction::Create(MakeSharedObject<PrimitiveFunction>(PrimitiveOpType::Combine, operands, Dictionary(), name), name);
}
namespace Sequence
@ -2153,25 +2367,25 @@ namespace CNTK
FunctionPtr IsFirst(const Variable& operand, const std::wstring& name /*= L""*/)
{
VerifyIsSequence(operand);
return Internal::IsWithin(operand, 1);
return Internal::IsWithin(operand, 1, name);
}
FunctionPtr IsLast(const Variable& operand, const std::wstring& name /*= L""*/)
{
VerifyIsSequence(operand);
return Internal::IsWithin(operand, -1);
return Internal::IsWithin(operand, -1, name);
}
FunctionPtr First(const Variable& operand, const std::wstring& name /*= L""*/)
{
VerifyIsSequence(operand);
return Slice(operand, operand.DynamicAxes()[0], 0, 1);
return Slice(operand, operand.DynamicAxes()[0], 0, 1, name);
}
FunctionPtr Last(const Variable& operand, const std::wstring& name /*= L""*/)
{
VerifyIsSequence(operand);
return Slice(operand, operand.DynamicAxes()[0], -1, 0);
return Slice(operand, operand.DynamicAxes()[0], -1, 0, name);
}
std::vector<Axis> WhereOpDynamicAxes(const Variable& operand)
@ -2211,20 +2425,6 @@ namespace CNTK
namespace Internal
{
FunctionPtr Combine(const std::vector<Variable>& operands, const std::wstring& name /*= L""*/)
{
std::unordered_set<Variable> uniqueOperands;
for (auto operand : operands)
{
if (uniqueOperands.find(operand) != uniqueOperands.end())
LogicError("All operands specified to Combine must be unique");
uniqueOperands.insert(operand);
}
return CompositeFunction::Create(MakeSharedObject<PrimitiveFunction>(PrimitiveOpType::Combine, operands, Dictionary(), name), name);
}
FunctionPtr IsWithin(const Variable& operand, int offset, const std::wstring& name /*= L""*/)
{
Sequence::VerifyIsSequence(operand);
@ -2266,14 +2466,8 @@ namespace CNTK
}
else
{
auto rowSliceFunc = Internal::Slice(operand, Axis(0), 0, 1);
auto result = Minus(rowSliceFunc, rowSliceFunc);
// Reduce away all but the static axis 0
for (size_t i = 1; i < result->Output().Shape().Rank(); ++i)
result = ReduceSum(result, Axis(i));
return result;
auto reduceAllStaticAxesFunc = Internal::ReduceElements(operand, PrimitiveFunction::InternalSumReductionOpName, Axis::AllStaticAxes());
return Minus(reduceAllStaticAxesFunc, reduceAllStaticAxesFunc);
}
}
@ -2286,12 +2480,12 @@ namespace CNTK
FunctionPtr Gather(const Variable& operand, const Variable& condition, const std::vector<Axis>& newDynamicAxes, const std::wstring& name /*= L""*/)
{
return Internal::GatherPacked(operand, Internal::PackedIndex(operand, Where(condition, newDynamicAxes)));
return Internal::GatherPacked(operand, Internal::PackedIndex(/*layout of*/ operand, Where(condition, newDynamicAxes)), name);
}
FunctionPtr Scatter(const Variable& operand, const Variable& condition, const std::vector<Axis>& newDynamicAxes, const std::wstring& name /*= L""*/)
{
return Internal::ScatterPacked(operand, Internal::PackedIndex(operand, Where(condition, newDynamicAxes)), condition);
return Internal::ScatterPacked(operand, Internal::PackedIndex(/*layout of*/ condition, Where(condition, newDynamicAxes)), /*layout of*/ condition, name);
}
FunctionPtr Slice(const Variable& operand, const Axis& axis, int beginIndex, int endIndex, const std::wstring& name /*= L""*/)
@ -2308,7 +2502,7 @@ namespace CNTK
{
using namespace std::placeholders;
if (axis.IsStaticAxis())
if (axis.IsStaticAxis() || (axis == Axis::AllStaticAxes()))
{
auto additionalProperties = Dictionary();
additionalProperties[PrimitiveFunction::AttributeNameAxis] = axis;
@ -2332,7 +2526,7 @@ namespace CNTK
auto cumulativeSumFunction = reductionFunctor(prevAccumulatedValuesFunction, operand);
cumulativeSumFunction->ReplacePlaceholders({ { cumulativeSumFunctionPlaceholder, cumulativeSumFunction } });
return CNTK::Slice(cumulativeSumFunction, axis, -1, 0);
return CNTK::Slice(cumulativeSumFunction, axis, -1, 0, name);
}
}
}

Просмотреть файл

@ -77,54 +77,54 @@ namespace std
namespace CNTK
{
inline const char* PrimitiveOpTypeName(PrimitiveOpType opType)
inline const std::wstring& PrimitiveOpTypeName(PrimitiveOpType opType)
{
static const std::unordered_map<PrimitiveOpType, const char*> primitiveOpNames = {
{ PrimitiveOpType::Negate, "Negate" },
{ PrimitiveOpType::Sigmoid, "Sigmoid" },
{ PrimitiveOpType::Tanh, "Tanh" },
{ PrimitiveOpType::ReLU, "ReLU" },
{ PrimitiveOpType::Exp, "Exp" },
{ PrimitiveOpType::Log, "Log" },
{ PrimitiveOpType::Sqrt, "Sqrt" },
{ PrimitiveOpType::Floor, "Floor" },
{ PrimitiveOpType::Abs, "Abs" },
{ PrimitiveOpType::Reciprocal, "Reciprocal" },
{ PrimitiveOpType::Softmax, "Softmax" },
{ PrimitiveOpType::Hardmax, "Hardmax" },
{ PrimitiveOpType::TransposeAxes, "TransposeAxes" },
{ PrimitiveOpType::Where, "Where" },
{ PrimitiveOpType::Slice, "Slice" },
{ PrimitiveOpType::Dropout, "Dropout" },
{ PrimitiveOpType::Reshape, "Reshape" },
{ PrimitiveOpType::Pooling, "Pooling" },
{ PrimitiveOpType::SumAll, "SumAll" },
{ PrimitiveOpType::Plus, "Plus" },
{ PrimitiveOpType::Minus, "Minus" },
{ PrimitiveOpType::ElementTimes, "ElementTimes" },
{ PrimitiveOpType::Equal, "Equal" },
{ PrimitiveOpType::NotEqual, "NotEqual" },
{ PrimitiveOpType::Less, "Less" },
{ PrimitiveOpType::LessEqual, "LessEqual" },
{ PrimitiveOpType::Greater, "Greater" },
{ PrimitiveOpType::GreaterEqual, "GreaterEqual" },
{ PrimitiveOpType::PackedIndex, "PackedIndex" },
{ PrimitiveOpType::GatherPacked, "GatherPacked" },
{ PrimitiveOpType::ScatterPacked, "ScatterPacked" },
{ PrimitiveOpType::Times, "Times" },
{ PrimitiveOpType::TransposeTimes, "TransposeTimes" },
{ PrimitiveOpType::Convolution, "Convolution" },
{ PrimitiveOpType::SquaredError, "SquaredError" },
{ PrimitiveOpType::CrossEntropyWithSoftmax, "CrossEntropyWithSoftmax" },
{ PrimitiveOpType::ClassificationError, "ClassificationError" },
{ PrimitiveOpType::PastValue, "PastValue" },
{ PrimitiveOpType::FutureValue, "FutureValue" },
{ PrimitiveOpType::ReduceElements, "ReduceElements" },
{ PrimitiveOpType::BatchNormalization, "BatchNormalization" },
{ PrimitiveOpType::Clip, "Clip" },
{ PrimitiveOpType::Select, "Select" },
{ PrimitiveOpType::Splice, "Splice" },
{ PrimitiveOpType::Combine, "Combine" }
static const std::unordered_map<PrimitiveOpType, std::wstring> primitiveOpNames = {
{ PrimitiveOpType::Negate, L"Negate" },
{ PrimitiveOpType::Sigmoid, L"Sigmoid" },
{ PrimitiveOpType::Tanh, L"Tanh" },
{ PrimitiveOpType::ReLU, L"ReLU" },
{ PrimitiveOpType::Exp, L"Exp" },
{ PrimitiveOpType::Log, L"Log" },
{ PrimitiveOpType::Sqrt, L"Sqrt" },
{ PrimitiveOpType::Floor, L"Floor" },
{ PrimitiveOpType::Abs, L"Abs" },
{ PrimitiveOpType::Reciprocal, L"Reciprocal" },
{ PrimitiveOpType::Softmax, L"Softmax" },
{ PrimitiveOpType::Hardmax, L"Hardmax" },
{ PrimitiveOpType::TransposeAxes, L"TransposeAxes" },
{ PrimitiveOpType::Where, L"Where" },
{ PrimitiveOpType::Slice, L"Slice" },
{ PrimitiveOpType::Dropout, L"Dropout" },
{ PrimitiveOpType::Reshape, L"Reshape" },
{ PrimitiveOpType::Pooling, L"Pooling" },
{ PrimitiveOpType::SumAll, L"SumAll" },
{ PrimitiveOpType::Plus, L"Plus" },
{ PrimitiveOpType::Minus, L"Minus" },
{ PrimitiveOpType::ElementTimes, L"ElementTimes" },
{ PrimitiveOpType::Equal, L"Equal" },
{ PrimitiveOpType::NotEqual, L"NotEqual" },
{ PrimitiveOpType::Less, L"Less" },
{ PrimitiveOpType::LessEqual, L"LessEqual" },
{ PrimitiveOpType::Greater, L"Greater" },
{ PrimitiveOpType::GreaterEqual, L"GreaterEqual" },
{ PrimitiveOpType::PackedIndex, L"PackedIndex" },
{ PrimitiveOpType::GatherPacked, L"GatherPacked" },
{ PrimitiveOpType::ScatterPacked, L"ScatterPacked" },
{ PrimitiveOpType::Times, L"Times" },
{ PrimitiveOpType::TransposeTimes, L"TransposeTimes" },
{ PrimitiveOpType::Convolution, L"Convolution" },
{ PrimitiveOpType::SquaredError, L"SquaredError" },
{ PrimitiveOpType::CrossEntropyWithSoftmax, L"CrossEntropyWithSoftmax" },
{ PrimitiveOpType::ClassificationError, L"ClassificationError" },
{ PrimitiveOpType::PastValue, L"PastValue" },
{ PrimitiveOpType::FutureValue, L"FutureValue" },
{ PrimitiveOpType::ReduceElements, L"ReduceElements" },
{ PrimitiveOpType::BatchNormalization, L"BatchNormalization" },
{ PrimitiveOpType::Clip, L"Clip" },
{ PrimitiveOpType::Select, L"Select" },
{ PrimitiveOpType::Splice, L"Splice" },
{ PrimitiveOpType::Combine, L"Combine" },
};
if (primitiveOpNames.find(opType) == primitiveOpNames.end())
@ -220,7 +220,7 @@ namespace CNTK
public:
PrimitiveFunction(PrimitiveOpType op, const std::vector<Variable>& inputs, Dictionary&& functionConfig, const std::wstring& functionName = L"")
: Function(inputs, GetOutputVariables(op, inputs, this, functionConfig), std::move(functionConfig), nullptr, functionName), m_op(op)
: Function(inputs, GetOutputVariables(op, inputs, this, functionConfig, functionName), std::move(functionConfig), nullptr, functionName), m_op(op)
{
}
@ -239,6 +239,11 @@ namespace CNTK
NOT_IMPLEMENTED;
}
virtual const std::wstring& OpName() override
{
return PrimitiveOpTypeName(OpType());
}
public:
PrimitiveOpType OpType() const
{
@ -343,7 +348,10 @@ namespace CNTK
else
{
if (leftOperandShape[i] != rightOperandShape[i])
RuntimeError("Left operand's shape %S is not compatible with right operand's shape %S for the binary elementwise operation %s", AsStringForErrorReporting(leftOperandShape).c_str(), AsStringForErrorReporting(rightOperandShape).c_str(), PrimitiveOpTypeName(op));
RuntimeError("Left operand's shape %S is not compatible with right operand's shape %S for the binary elementwise operation %S",
AsStringForErrorReporting(leftOperandShape).c_str(),
AsStringForErrorReporting(rightOperandShape).c_str(),
PrimitiveOpTypeName(op).c_str());
outputDims[i] = leftOperandShape[i];
}
@ -399,19 +407,25 @@ namespace CNTK
return leftOperandShape.SubShape(0, outputRank).AppendShape(rightOperandShape.SubShape(numReductionAxes));
}
static NDShape ReductionOpOutputShape(PrimitiveOpType op, const NDShape& operandShape, const std::vector<size_t>& reductionAxes)
static NDShape ReductionOpOutputShape(PrimitiveOpType op, const NDShape& operandShape, const std::vector<size_t>& reductionAxes, bool preserveReductionAxes)
{
if (reductionAxes.size() > operandShape.Rank())
RuntimeError("The number of reduction axes %d exceeds the number of axes in the operand shape %S of the reduction operation %s", (int)reductionAxes.size(), AsStringForErrorReporting(operandShape).c_str(), PrimitiveOpTypeName(op));
RuntimeError("The number of reduction axes %d exceeds the rank in the operand shape %S of the reduction operation %S",
(int)reductionAxes.size(),
AsStringForErrorReporting(operandShape).c_str(),
PrimitiveOpTypeName(op).c_str());
size_t numOutputAxes = operandShape.Rank() - reductionAxes.size();
size_t numOutputAxes = operandShape.Rank() - (preserveReductionAxes ? 0 : reductionAxes.size());
std::vector<size_t> outputDims(numOutputAxes);
for (size_t i = 0, j = 0; i < operandShape.Rank(); ++i)
{
// Skip axes being reduced over
if (std::find(reductionAxes.begin(), reductionAxes.end(), i) != reductionAxes.end())
continue;
{
if (preserveReductionAxes)
outputDims[j++] = 1;
}
else
outputDims[j++] = operandShape[i];
}
@ -433,7 +447,7 @@ namespace CNTK
}
// TODO: Reconcile this with the ComputationNode::Validate functionality in core CNTK to avoid duplication of inference logic
static std::vector<Variable> GetOutputVariables(PrimitiveOpType op, const std::vector<Variable>& inputs, Function* owner, const Dictionary& functionConfig);
static std::vector<Variable> GetOutputVariables(PrimitiveOpType op, const std::vector<Variable>& inputs, Function* owner, const Dictionary& functionConfig, const std::wstring& functionName);
private:
PrimitiveOpType m_op;
@ -464,6 +478,7 @@ namespace CNTK
friend class Function;
friend class Trainer;
friend class CompositeMinibatchSource;
friend class PackedValue;
template <typename T, typename ...CtorArgTypes>
friend inline std::shared_ptr<T> MakeSharedObject(CtorArgTypes&& ...ctorArgs);
@ -476,6 +491,8 @@ namespace CNTK
static std::atomic<unsigned int> s_nextAutoGeneratedDynamicAxis;
static const std::wstring CompositeFunctionOpName;
public:
static const std::wstring InternalDefaultDynamicAxisName;
static const std::wstring InternalNoSequenceAxisName;
@ -506,15 +523,9 @@ namespace CNTK
const std::unordered_map<Variable, ValuePtr>& rootGradientValues,
std::unordered_map<Variable, ValuePtr>& backPropagatedGradientValuesForInputs) override;
public:
bool NetworkMatricesAllocated() const
virtual const std::wstring& OpName() override
{
return (m_computationNetwork != nullptr) && m_networkMatricesAllocated;
}
void PurgeComputationNetwork()
{
m_computationNetwork = nullptr;
return CompositeFunctionOpName;
}
private:
@ -523,7 +534,7 @@ namespace CNTK
std::unordered_set<Variable>& replacedPlaceholders) override;
CompositeFunction(const FunctionPtr& rootFunction, std::unordered_set<FunctionPtr>&& allPrimitiveFunctions, const std::wstring& name)
: Function({}, rootFunction->Outputs(), Dictionary(), rootFunction, name), m_allPrimitiveFunctions(std::move(allPrimitiveFunctions))
: Function({}, rootFunction->Outputs(), Dictionary(), rootFunction, name), m_allPrimitiveFunctions(std::move(allPrimitiveFunctions)), m_networkMatricesAllocated(false)
{}
std::vector<Variable> DetermineInputs() const
@ -597,6 +608,8 @@ namespace CNTK
template <typename ElementType>
static ValuePtr GetValueObjectFromCNTKImplMatrixAndMBLayout(Variable var, const Microsoft::MSR::CNTK::Matrix<ElementType>& matrix, const Microsoft::MSR::CNTK::MBLayoutPtr& layout, bool readOnly = true);
const std::vector<Variable>& GetArgumentDependencies(const Variable& output);
private:
// Set of all primitive functions in the graph underlying 'this' Function. Also keeps the primitive Function objects alive
@ -617,6 +630,8 @@ namespace CNTK
// the next 'Backward' call.
std::unordered_set<Variable> m_currentBackpropRoots;
std::unordered_map<Variable, std::vector<Variable>> m_perOutputVarArgumentDependencies;
bool m_networkMatricesAllocated;
};

Просмотреть файл

@ -26,6 +26,9 @@ using namespace std;
namespace CNTK
{
/*static*/ const std::wstring Learner::LearningRateAttributeName = L"learningRate";
/*static*/ const std::wstring LearnerBase::WasLearningRateResetAttributeName = L"wasLearningRateReset";
template <typename ElementType>
/*static*/ shared_ptr<const Matrix<ElementType>> LearnerBase::GetMatrix(const NDArrayViewPtr& arrayView)
{
@ -141,7 +144,7 @@ namespace CNTK
// L1 regularizer with proximal gradient descent method
if (m_additionalOptions.l1RegularizationWeight > 0)
{
auto learningRate = ElementType(m_learningRates[m_sampleCount]);
auto learningRate = ElementType(LearningRate());
// multiply by actualMBSize so that it's invariant to minibatch size since learning rate is per sample
auto weight = ElementType(learningRate * m_additionalOptions.l1RegularizationWeight * actualMBSize);
parameterValue->GetWritableMatrix<ElementType>()->InplaceSoftThreshold(weight);
@ -156,17 +159,15 @@ namespace CNTK
LearnerBase::LearnerBase(const vector<Parameter>& parameters,
const LearningRatesPerSample& learningRates,
bool allocateSmoothGradients /* = true */,
double clippingThresholdPerSample /*= std::numeric_limits<double>::infinity()*/,
bool gradientClippingWithTruncation /*= true*/)
: Learner(parameters),
m_learningRates(learningRates),
AdditionalLearningOptions additionalOptions,
bool allocateSmoothGradients /* = true */)
: Learner(parameters, learningRates[0]),
m_wasLearningRateReset(false),
m_learningRateSchedule(learningRates),
m_sampleCount(0),
m_minibatchCount(0)
m_minibatchCount(0),
m_additionalOptions(additionalOptions)
{
m_additionalOptions.gradientClippingThresholdPerSample = clippingThresholdPerSample;
m_additionalOptions.gradientClippingWithTruncation = gradientClippingWithTruncation;
for (const auto& parameter : parameters)
{
if (!allocateSmoothGradients)
@ -225,8 +226,8 @@ namespace CNTK
#endif
#if DUMPOUTPUT
auto learningRate = ElementType(m_learningRates[m_sampleCount]);
auto momentum = ElementType(MomentumPerMB(m_momentums[m_sampleCount], trainingSampleCount));
auto learningRate = ElementType(LearningRate());
auto momentum = ElementType(MomentumValueForMB(m_momentumValues[m_sampleCount], trainingSampleCount));
LOGPRINTF(stderr, "learnRatePerSample=%0.8f, momentum=%0.8f, actualMBSize=%ld\n",
learningRate, momentum, trainingSampleCount);
LOGPRINTF(stderr, "GradUpdateType()=%s, GradientUpdateNoiseStd()=%0.8f\n",
@ -280,6 +281,9 @@ namespace CNTK
checkpoint[L"sampleCount"] = m_sampleCount;
checkpoint[L"minibatchCount"] = m_minibatchCount;
if (m_wasLearningRateReset)
checkpoint[WasLearningRateResetAttributeName] = m_wasLearningRateReset;
// TODO: should we also save learning rate schedule into the checkpoint?
// If that is the case, need to be able to override this method in subclasses
// and save momentum schedule as well.
@ -294,11 +298,19 @@ namespace CNTK
const auto& smoothedGradientValue = m_smoothedGradientValues.at(parameter);
checkpoint[parameter.Uid()] = *smoothedGradientValue;
}
// Add the base Learner's checkpoint state
auto baseCheckpointState = Learner::GetCheckpointState();
checkpoint.Add(baseCheckpointState);
return checkpoint;
}
/*virtual*/ void LearnerBase::RestoreFromCheckpoint(const Dictionary& checkpoint) /*override*/
{
// Restore the base learner's checkpoint state
Learner::RestoreFromCheckpoint(checkpoint);
m_sampleCount = checkpoint[L"sampleCount"].Value<size_t>();
m_minibatchCount = checkpoint[L"minibatchCount"].Value<size_t>();
@ -309,6 +321,9 @@ namespace CNTK
LogicError("Unsupported checkpoint version.");
}
if (checkpoint.Contains(WasLearningRateResetAttributeName))
m_wasLearningRateReset = checkpoint[WasLearningRateResetAttributeName].Value<bool>();
for (const auto& parameter : Parameters())
{
if (!checkpoint.Contains(parameter.Uid()))
@ -348,25 +363,16 @@ namespace CNTK
const auto& gradientMatrix = GetWritableMatrix<ElementType>(gradientValue);
const auto& parameterMatrix = GetWritableMatrix<ElementType>(parameterValue);
auto learningRate = ElementType(m_learningRates[m_sampleCount]);
auto momentum = ElementType(MomentumPerMB(m_momentums[m_sampleCount], trainingSampleCount));
auto learningRate = ElementType(LearningRate());
auto momentum = ElementType(MomentumValueForMB(m_momentumValues[m_sampleCount], trainingSampleCount));
// TODO: break up the NormalGrad into 3 different functions, each with its own set of parameters
// Also, come up with a better name for NormalGrad (Default? Regular? Plain?).
// (one for vanilla SGD, the other for momentum SGD, and the third one for NAG).
smoothedGradientMatrix->NormalGrad(*gradientMatrix, *parameterMatrix,
learningRate, momentum, m_useNesterovAcceleration);
}
LearnerAdaGrad::LearnerAdaGrad(const vector<Parameter>& parameters,
const LearningRatesPerSample& learningRates,
bool needAveMultiplier,
double clippingThresholdPerSample /*= std::numeric_limits<double>::infinity()*/,
bool gradientClippingWithTruncation /*= true*/)
: LearnerBase(parameters, learningRates, true, clippingThresholdPerSample, gradientClippingWithTruncation),
m_needAveMultiplier(needAveMultiplier)
{
}
/*virtual*/ void LearnerAdaGrad::Update(const Parameter& parameter, const NDArrayViewPtr& gradientValue, const NDArrayViewPtr& smoothedGradientValue, size_t trainingSampleCount) const /*override*/
{
UPDATE_FUNCTION;
@ -382,7 +388,7 @@ namespace CNTK
const auto& gradientMatrix = GetWritableMatrix<ElementType>(gradientValue);
const auto& parameterMatrix = GetWritableMatrix<ElementType>(parameterValue);
auto learningRate = ElementType(m_learningRates[m_sampleCount]);
auto learningRate = ElementType(LearningRate());
auto aveMultiplier = smoothedGradientMatrix->Adagrad(*gradientMatrix, m_needAveMultiplier);
Matrix<ElementType>::ScaleAndAdd(ElementType(-learningRate / aveMultiplier), *gradientMatrix, *parameterMatrix);
@ -390,16 +396,20 @@ namespace CNTK
LearnerFSAdaGrad::LearnerFSAdaGrad(const vector<Parameter>& parameters,
const LearningRatesPerSample& learningRates,
const MomentumsPerSample& momentums,
double clippingThresholdPerSample /*= std::numeric_limits<double>::infinity()*/,
bool gradientClippingWithTruncation /*= true*/)
: LearnerMomentumSGD(parameters, learningRates, momentums, /*allocateSmoothGradients*/ false, clippingThresholdPerSample, gradientClippingWithTruncation)
const MomentumValuesPerSample& momentumValues,
const double targetAdagradAvDenom,
const size_t adagradT,
AdditionalLearningOptions additionalOptions)
: LearnerMomentumSGD(parameters, learningRates, momentumValues, additionalOptions, /*allocateSmoothGradients*/ false),
m_targetAdagradAvDenom(targetAdagradAvDenom),
m_adagradT(adagradT)
{
for (const auto& parameter : parameters)
{
auto shape = GetMatrixShape(parameter);
NDArrayViewPtr view = AllocateNDArrayView(parameter, {shape[0], 2 * shape[1]});
m_smoothedGradientValues.insert(make_pair(parameter, view));
m_smoothedCounts.insert(make_pair(parameter, 0.0));
}
}
@ -411,36 +421,31 @@ namespace CNTK
template <typename ElementType>
void LearnerFSAdaGrad::Update(const Parameter& parameter, const NDArrayViewPtr& gradientValue, const NDArrayViewPtr& smoothedGradientValue, size_t trainingSampleCount) const
{
UNUSED(trainingSampleCount);
const auto& parameterValue = parameter.Value();
const auto& smoothedGradientMatrix = GetWritableMatrix<ElementType>(smoothedGradientValue);
const auto& gradientMatrix = GetWritableMatrix<ElementType>(gradientValue);
const auto& parameterMatrix = GetWritableMatrix<ElementType>(parameterValue);
auto learningRate = m_learningRates[m_sampleCount];
auto momentum = MomentumPerMB(m_momentums[m_sampleCount], trainingSampleCount);
auto learningRate = LearningRate();
auto momentum = MomentumValueForMB(m_momentumValues[m_sampleCount], trainingSampleCount);
const double targetAdagradAvDenom = 0.0025; // 1/400 magic constant
const size_t adagradT = 2 * 3600 * 100;
const double varMomentum = (exp(-1.0 * trainingSampleCount / m_adagradT));
double& smoothedCount = m_smoothedCounts.at(parameter);
const double varMomentum = (exp(-1.0 * trainingSampleCount / adagradT));
static double smoothedCount = 0; // BUGBUG!!! Carried over from Alexey's original implementation, needs to be fixed.
smoothedGradientMatrix->FSAdagradUpdate(trainingSampleCount, *gradientMatrix, *parameterMatrix, smoothedCount, learningRate, targetAdagradAvDenom, momentum, varMomentum);
smoothedGradientMatrix->FSAdagradUpdate(trainingSampleCount, *gradientMatrix, *parameterMatrix, smoothedCount, learningRate, m_targetAdagradAvDenom, momentum, varMomentum);
}
LearnerRMSProp::LearnerRMSProp(const vector<Parameter>& parameters, const LearningRatesPerSample& learningRates,
double gamma, double inc, double dec, double max, double min, bool needAveMultiplier,
double clippingThresholdPerSample /*= std::numeric_limits<double>::infinity()*/,
bool gradientClippingWithTruncation /*= true*/)
: LearnerBase(parameters, learningRates, /*allocateSmoothGradients*/ false, clippingThresholdPerSample, gradientClippingWithTruncation),
LearnerRMSProp::LearnerRMSProp(const vector<Parameter>& parameters,
const LearningRatesPerSample& learningRates,
double gamma, double inc, double dec, double max, double min,
bool needAveMultiplier,
AdditionalLearningOptions additionalOptions)
: LearnerBase(parameters, learningRates, additionalOptions, /*allocateSmoothGradients*/ false),
m_gamma(gamma), m_inc(inc), m_dec(dec), m_max(max), m_min(min), m_needAveMultiplier(needAveMultiplier)
{
for (const auto& parameter : parameters)
{
// When needAveMultiplier == true, CPU and GPU implementations of RMSProp require different number of columns.
// TODO: verify that this is correct.
size_t factor = 3;
if (needAveMultiplier && parameter.Value()->Device().Type() == DeviceKind::GPU)
{
@ -469,12 +474,15 @@ namespace CNTK
const auto& gradientMatrix = GetWritableMatrix<ElementType>(gradientValue);
const auto& parameterMatrix = GetWritableMatrix<ElementType>(parameterValue);
auto learningRate = ElementType(m_learningRates[m_sampleCount]);
auto learningRate = ElementType(LearningRate());
auto aveMultiplier = smoothedGradientMatrix->RmsProp(*gradientMatrix,
ElementType(m_gamma), ElementType(m_inc),
ElementType(m_max), ElementType(m_dec),
ElementType(m_min), m_needAveMultiplier);
ElementType(m_gamma),
ElementType(m_inc),
ElementType(m_max),
ElementType(m_dec),
ElementType(m_min),
m_needAveMultiplier);
Matrix<ElementType>::ScaleAndAdd(ElementType(-learningRate / aveMultiplier), *gradientMatrix, *parameterMatrix);
}
@ -484,54 +492,51 @@ namespace CNTK
LearnerPtr SGDLearner(const vector<Parameter>& parameters,
const LearningRatesPerSample& learningRates,
double clippingThresholdPerSample /*= std::numeric_limits<double>::infinity()*/,
bool gradientClippingWithTruncation /*= true*/)
AdditionalLearningOptions additionalOptions /*= AdditionalLearningOptions()*/)
{
return MakeSharedObject<LearnerSGD>(parameters, learningRates, true, clippingThresholdPerSample, gradientClippingWithTruncation);
return MakeSharedObject<LearnerSGD>(parameters, learningRates, additionalOptions);
}
LearnerPtr MomentumSGDLearner(const vector<Parameter>& parameters,
const LearningRatesPerSample& learningRates,
const MomentumsPerSample& momentums,
double clippingThresholdPerSample /*= std::numeric_limits<double>::infinity()*/,
bool gradientClippingWithTruncation /*= true*/)
const MomentumValuesPerSample& momentumValues,
AdditionalLearningOptions additionalOptions /*= AdditionalLearningOptions()*/)
{
return MakeSharedObject<LearnerMomentumSGD>(parameters, learningRates, momentums, true, clippingThresholdPerSample, gradientClippingWithTruncation);
return MakeSharedObject<LearnerMomentumSGD>(parameters, learningRates, momentumValues, additionalOptions);
}
LearnerPtr NesterovLearner(const vector<Parameter>& parameters,
const LearningRatesPerSample& learningRates,
const MomentumsPerSample& momentums,
double clippingThresholdPerSample /*= std::numeric_limits<double>::infinity()*/,
bool gradientClippingWithTruncation /*= true*/)
const MomentumValuesPerSample& momentumValues,
AdditionalLearningOptions additionalOptions /*= AdditionalLearningOptions()*/)
{
return MakeSharedObject<LearnerNesterov>(parameters, learningRates, momentums, clippingThresholdPerSample, gradientClippingWithTruncation);
return MakeSharedObject<LearnerNesterov>(parameters, learningRates, momentumValues, additionalOptions);
}
LearnerPtr FSAdaGradLearner(const vector<Parameter>& parameters,
const LearningRatesPerSample& learningRates,
const MomentumsPerSample& momentums,
double clippingThresholdPerSample /*= std::numeric_limits<double>::infinity()*/,
bool gradientClippingWithTruncation /*= true*/)
const MomentumValuesPerSample& momentumValues,
const double targetAdagradAvDenom /*= 0.0025*/,
const size_t adagradT /*= 2 * 3600 * 100*/,
AdditionalLearningOptions additionalOptions /*= AdditionalLearningOptions()*/)
{
return MakeSharedObject<LearnerFSAdaGrad>(parameters, learningRates, momentums, clippingThresholdPerSample, gradientClippingWithTruncation);
return MakeSharedObject<LearnerFSAdaGrad>(parameters, learningRates, momentumValues, targetAdagradAvDenom, adagradT, additionalOptions);
}
LearnerPtr AdaGradLearner(const vector<Parameter>& parameters,
const LearningRatesPerSample& learningRates,
bool needAveMultiplier /*= true*/,
double clippingThresholdPerSample /*= std::numeric_limits<double>::infinity()*/,
bool gradientClippingWithTruncation /*= true*/)
AdditionalLearningOptions additionalOptions /*= AdditionalLearningOptions()*/)
{
return MakeSharedObject<LearnerAdaGrad>(parameters, learningRates, needAveMultiplier, clippingThresholdPerSample, gradientClippingWithTruncation);
return MakeSharedObject<LearnerAdaGrad>(parameters, learningRates, needAveMultiplier, additionalOptions);
}
LearnerPtr RMSPropLearner(const vector<Parameter>& parameters, const LearningRatesPerSample& learningRates,
LearnerPtr RMSPropLearner(const vector<Parameter>& parameters,
const LearningRatesPerSample& learningRates,
double gamma, double inc, double dec, double max, double min,
bool needAveMultiplier /*= true*/,
double clippingThresholdPerSample /*= std::numeric_limits<double>::infinity()*/,
bool gradientClippingWithTruncation /*= true*/)
AdditionalLearningOptions additionalOptions /*= AdditionalLearningOptions()*/)
{
return MakeSharedObject<LearnerRMSProp>(parameters, learningRates, gamma, inc, dec, max, min, needAveMultiplier, clippingThresholdPerSample, gradientClippingWithTruncation);
return MakeSharedObject<LearnerRMSProp>(parameters, learningRates, gamma, inc, dec, max, min, needAveMultiplier, additionalOptions);
}
}

Просмотреть файл

@ -3,29 +3,21 @@
// Licensed under the MIT license. See LICENSE.md file in the project root for full license information.
//
#pragma once
#include "stdafx.h"
#include "CNTKLibrary.h"
#include <numeric>
namespace CNTK
{
// TODO: Move this to Trainer along with Pre-, PostProcess and ClipGradient.
// A collection of additional options that are applicable for all standard learners
// (after these options are set, they retain their value for the entire lifespan of a learner).
struct AdditionalLearningOptions
{
double l1RegularizationWeight = 0.0;
double l2RegularizationWeight = 0.0;
double gaussianNoiseInjectionStdDev = 0.0;
bool gradientClippingWithTruncation = true;
double gradientClippingThresholdPerSample = std::numeric_limits<double>::infinity();
};
// An abstract base class at the root of the standard learners hierarchy
// It implements most of the learner functionality, except for the actual update function,
// and adds a few pre-/postprocessing methods (which are invoked before and after the update).
class LearnerBase : public Learner
{
static const std::wstring WasLearningRateResetAttributeName;
public:
virtual bool Update(const std::unordered_map<Parameter, NDArrayViewPtr>& gradientValues, size_t trainingSampleCount) override final;
@ -33,18 +25,36 @@ namespace CNTK
virtual void RestoreFromCheckpoint(const Dictionary& checkpoint) override final;
virtual void ResetLearningRate(double learningRate) override final
{
m_wasLearningRateReset = true;
Learner::ResetLearningRate(learningRate);
}
virtual double LearningRate() const override final
{
if (m_wasLearningRateReset)
return Learner::LearningRate();
else
return m_learningRateSchedule[m_sampleCount];
}
protected:
// allocateSmoothGradients flag specifies whether NDArrayViews for smoothed gradients can be allocated
// in the base class constructor (in which case they are allocated with the shapes identical to the shapes of
// the corresponding parameters) or if the allocation should be deferred to the subclass constructor (which
// performs allocation that is specific to the particular learner, see FSAdaGrad and RMSProp).
LearnerBase(const std::vector<Parameter>& parameters,
const LearningRatesPerSample& learningRates,
bool allocateSmoothGradients = true,
double clippingThresholdPerSample = std::numeric_limits<double>::infinity(),
bool gradientClippingWithTruncation = true);
AdditionalLearningOptions additionalOptions,
bool allocateSmoothGradients = true);
virtual void Update(const Parameter& parameter, const NDArrayViewPtr& gradientValue, const NDArrayViewPtr& smoothedGradientValue, size_t trainingSampleCount) const = 0;
std::string LearnerType() const;
LearningRatesPerSample m_learningRates;
bool m_wasLearningRateReset;
LearningRatesPerSample m_learningRateSchedule;
AdditionalLearningOptions m_additionalOptions;
@ -84,6 +94,7 @@ namespace CNTK
// Retrieves the shape of the matrix corresponding to the parameter value.
static NDShape GetMatrixShape(const Parameter& parameter);
size_t m_sampleCount;
size_t m_minibatchCount;
@ -106,11 +117,10 @@ namespace CNTK
public:
LearnerSGD(const std::vector<Parameter>& parameters,
const LearningRatesPerSample& learningRates,
bool allocateSmoothGradients = true,
double clippingThresholdPerSample = std::numeric_limits<double>::infinity(),
bool gradientClippingWithTruncation = true)
: LearnerBase(parameters, learningRates, allocateSmoothGradients, clippingThresholdPerSample, gradientClippingWithTruncation),
m_momentums(0.0),
AdditionalLearningOptions additionalOptions,
bool allocateSmoothGradients = true)
: LearnerBase(parameters, learningRates, additionalOptions, allocateSmoothGradients),
m_momentumValues(0.0),
m_useNesterovAcceleration(false)
{}
@ -121,8 +131,8 @@ namespace CNTK
template <typename ElementType>
void Update(const Parameter& parameter, const NDArrayViewPtr& gradientValue, const NDArrayViewPtr& smoothedGradientValue, size_t trainingSampleCount) const;
// TODO: Move m_momentums to LearnerMomentumSGD as soon as NormalGrad is refactored.
MomentumsPerSample m_momentums;
// TODO: Move m_momentumValues to LearnerMomentumSGD as soon as NormalGrad is refactored.
MomentumValuesPerSample m_momentumValues;
bool m_useNesterovAcceleration;
};
@ -132,13 +142,12 @@ namespace CNTK
public:
LearnerMomentumSGD(const std::vector<Parameter>& parameters,
const LearningRatesPerSample& learningRates,
const MomentumsPerSample& momentums,
bool allocateSmoothGradients = true,
double clippingThresholdPerSample = std::numeric_limits<double>::infinity(),
bool gradientClippingWithTruncation = true)
: LearnerSGD(parameters, learningRates, allocateSmoothGradients, clippingThresholdPerSample, gradientClippingWithTruncation)
const MomentumValuesPerSample& momentumValues,
AdditionalLearningOptions additionalOptions,
bool allocateSmoothGradients = true)
: LearnerSGD(parameters, learningRates, additionalOptions, allocateSmoothGradients)
{
m_momentums = momentums;
m_momentumValues = momentumValues;
}
};
@ -149,10 +158,9 @@ namespace CNTK
LearnerNesterov(const std::vector<Parameter>& parameters,
const LearningRatesPerSample& learningRates,
const MomentumsPerSample& momentums,
double clippingThresholdPerSample = std::numeric_limits<double>::infinity(),
bool gradientClippingWithTruncation = true)
: LearnerMomentumSGD(parameters, learningRates, momentums, true, clippingThresholdPerSample, gradientClippingWithTruncation)
const MomentumValuesPerSample& momentumValues,
AdditionalLearningOptions additionalOptions)
: LearnerMomentumSGD(parameters, learningRates, momentumValues, additionalOptions, /*allocateSmoothGradients*/ true)
{
m_useNesterovAcceleration = true;
}
@ -165,8 +173,11 @@ namespace CNTK
LearnerAdaGrad(const std::vector<Parameter>& parameters,
const LearningRatesPerSample& learningRates,
bool needAveMultiplier,
double clippingThresholdPerSample = std::numeric_limits<double>::infinity(),
bool gradientClippingWithTruncation = true);
AdditionalLearningOptions additionalOptions)
: LearnerBase(parameters, learningRates, additionalOptions, /*allocateSmoothGradients*/ true),
m_needAveMultiplier(needAveMultiplier)
{
}
protected:
bool m_needAveMultiplier;
@ -183,9 +194,10 @@ namespace CNTK
LearnerFSAdaGrad(const std::vector<Parameter>& parameters,
const LearningRatesPerSample& learningRates,
const MomentumsPerSample& momentums,
double clippingThresholdPerSample = std::numeric_limits<double>::infinity(),
bool gradientClippingWithTruncation = true);
const MomentumValuesPerSample& momentumValues,
const double targetAdagradAvDenom,
const size_t adagradT,
AdditionalLearningOptions additionalOptions);
protected:
@ -193,6 +205,11 @@ namespace CNTK
template <typename ElementType>
void Update(const Parameter& parameter, const NDArrayViewPtr& gradientValue, const NDArrayViewPtr& smoothedGradientValue, size_t trainingSampleCount) const;
private:
mutable std::unordered_map<Parameter, double> m_smoothedCounts;
double m_targetAdagradAvDenom;
size_t m_adagradT;
};
class LearnerRMSProp : public LearnerBase
@ -203,8 +220,7 @@ namespace CNTK
const LearningRatesPerSample& learningRates,
double gamma, double inc, double dec, double max, double min,
bool needAveMultiplier,
double clippingThresholdPerSample = std::numeric_limits<double>::infinity(),
bool gradientClippingWithTruncation = true);
AdditionalLearningOptions additionalOptions);
protected:

Просмотреть файл

@ -12,6 +12,7 @@
#include "ReaderShim.h"
#include "Function.h"
#include <tuple>
#include "Value.h"
using namespace Microsoft::MSR::CNTK;
@ -78,6 +79,8 @@ namespace CNTK
static const std::unordered_map<std::wstring, std::wstring> deserializerTypeNameToModuleNameMap = {
{ L"CNTKTextFormatDeserializer", L"CNTKTextFormatReader" },
{ L"ImageDeserializer", L"ImageReader" },
{ L"HTKFeatureDeserializer", L"HTKDeserializers" },
{ L"HTKMLFDeserializer", L"HTKDeserializers" },
};
auto& deserializerConfigDict = deserializerConfig.Value<Dictionary>();
@ -103,6 +106,10 @@ namespace CNTK
}
}
if (deserializerTypeNameToModuleNameMap.find(deserializerTypeName) == deserializerTypeNameToModuleNameMap.end())
InvalidArgument("Unknown deserializer type (%S)", deserializerTypeName.c_str());
deserializerConfigDict[L"module"] = deserializerTypeNameToModuleNameMap.at(deserializerTypeName);
}
@ -197,7 +204,7 @@ namespace CNTK
// TODO: Eliminate the unnecessary CPU to CPU copy
ReaderShim<float>::FillMatrixFromStream(currentStreamDesc->m_storageType, dataMatrix.get(), sampleSize, currentStreamMinibatchData, nullptr);
minibatchValuePtr = CompositeFunction::GetValueObjectFromCNTKImplMatrixAndMBLayout<float>(sampleShape, *dataMatrix, currentStreamMinibatchData->m_layout, false);
minibatchValuePtr = MakeSharedObject<PackedValue>(sampleShape, dataMatrix, currentStreamMinibatchData->m_layout, /*readOnly =*/ false);
size_t numSamples = currentStreamMinibatchData->m_layout->GetActualNumSamples();
size_t numSequences = currentStreamMinibatchData->m_layout->GetNumSequences();

Просмотреть файл

@ -36,19 +36,18 @@ namespace CNTK
}
NDMask::~NDMask()
{
}
{}
void NDMask::MaskSection(const std::vector<size_t>& sectionOffset, const NDShape& sectionShape)
void NDMask::MarkSectionAs(const std::vector<size_t>& sectionOffset, const NDShape& sectionShape, MaskKind maskKind)
{
// TODO: Implement batching of masking operation for masks residing on GPUs to avoid making
// GPU invocations for each MaskSection call.
if (sectionOffset.size() > m_maskShape.Rank())
LogicError("NDMask::MaskSection: The sectionOffset cannot have dimensionality higher than the number of axes of 'this' mask");
LogicError("NDMask::MaskSection: The sectionOffset cannot have dimensionality higher than the rank of 'this' mask");
if (sectionShape.Rank() > m_maskShape.Rank())
LogicError("NDMask::MaskSection: The section shape cannot have an axes count higher than the number of axes of 'this' mask");
LogicError("NDMask::MaskSection: The section shape cannot have an axes count higher than the rank of 'this' mask");
std::vector<size_t> offset(m_maskShape.Rank(), 0);
for (size_t i = 0; i < sectionOffset.size(); ++i)
@ -62,7 +61,7 @@ namespace CNTK
size_t sliceRowLength = (shape[0] != NDShape::InferredDimension) ? shape[0] : (maskMatrix->GetNumRows() - rowOffset);
size_t sliceColLength = (shape[1] != NDShape::InferredDimension) ? shape[1] : (maskMatrix->GetNumCols() - colOffset);
if ((rowOffset == 0) && (sliceRowLength == maskMatrix->GetNumRows()))
maskMatrix->ColumnSlice(colOffset, sliceColLength).SetValue(0);
maskMatrix->ColumnSlice(colOffset, sliceColLength).SetValue((char)maskKind);
else
{
// Since Matrix does not support strides in the row dimension, we will need to create separate slices for each column
@ -70,15 +69,15 @@ namespace CNTK
{
auto column = maskMatrix->ColumnSlice(i, 1);
column.Reshape(1, maskMatrix->GetNumRows());
column.ColumnSlice(rowOffset, sliceRowLength).SetValue(0);
column.ColumnSlice(rowOffset, sliceRowLength).SetValue((char)maskKind);
}
}
}
void NDMask::Clear()
{
// Clear the mask by marking all samples as Valid; i.e. a value of 1
GetMatrix()->SetValue(1);
// Clear the mask by marking all samples as Valid
GetMatrix()->SetValue((char)MaskKind::Valid);
}
size_t NDMask::MaskedCount() const
@ -86,17 +85,17 @@ namespace CNTK
auto maskMatrix = GetMatrix();
std::unique_ptr<char[]> maskData(maskMatrix->CopyToArray());
return std::count_if(maskData.get(), maskData.get() + maskMatrix->GetNumElements(), [](const char& val) {
return val == 0;
return val == (char)MaskKind::Invalid;
});
}
// TODO: This could actually be strided?
const char* NDMask::DataBuffer() const
const MaskKind* NDMask::DataBuffer() const
{
// First make sure that the underlying matrix is on the right device
auto matrix = GetMatrix();
matrix->TransferToDeviceIfNotThere(AsCNTKImplDeviceId(m_device), true);
return matrix->Data();
return (const MaskKind*)(matrix->Data());
}
Matrix<char>* NDMask::GetMatrix() const
@ -112,9 +111,9 @@ namespace CNTK
GetMatrix()->AssignValuesOf(*source.GetMatrix());
}
NDMaskPtr NDMask::DeepClone() const
NDMaskPtr NDMask::DeepClone(const DeviceDescriptor& device) const
{
NDMaskPtr newMask = MakeSharedObject<NDMask>(this->Shape(), this->Device());
NDMaskPtr newMask = MakeSharedObject<NDMask>(this->Shape(), device);
newMask->CopyFrom(*this);
return newMask;

Просмотреть файл

@ -13,7 +13,24 @@ namespace CNTK
Trainer::Trainer(const FunctionPtr& model, const FunctionPtr& lossFunction, const FunctionPtr& evaluationFunction, const std::unordered_set<LearnerPtr>& parameterLearners)
: m_model(model), m_lossFunction(lossFunction), m_evaluationFunction(evaluationFunction), m_parameterLearners(parameterLearners), m_prevMinibatchNumSamples(1)
{
m_combinedTrainingFunction = Combine({ model, lossFunction, evaluationFunction });
if (m_lossFunction->Output().DynamicAxes().empty())
InvalidArgument("The loss function specified in the Trainer constructor must correspond to minibatch data and have dynamic axes");
if (m_evaluationFunction && m_evaluationFunction->Output().DynamicAxes().empty())
InvalidArgument("The evaluation function specified in the Trainer constructor must correspond to minibatch data and have dynamic axes");
m_aggregatedLossFunction = ReduceSum(lossFunction);
if (m_evaluationFunction)
m_aggregatedEvaluationFunction = ReduceSum(m_evaluationFunction);
std::vector<Variable> combinedFunctionArgs = { m_model, m_aggregatedLossFunction, m_lossFunction };
if (m_evaluationFunction)
{
combinedFunctionArgs.push_back(m_aggregatedEvaluationFunction);
combinedFunctionArgs.push_back(m_evaluationFunction);
}
m_combinedTrainingFunction = Combine(combinedFunctionArgs);
auto modelParameters = m_combinedTrainingFunction->Parameters();
std::unordered_set<Parameter> learnerParameters;
@ -66,20 +83,11 @@ namespace CNTK
return scalar;
}
static size_t GetSampleCountFromArguments(const Variable& evalOrLossArgument, const std::unordered_map<Variable, ValuePtr>& arguments)
static size_t GetSampleCount(const Variable& var, const ValuePtr& value)
{
// Find the argument whose dynamic axes match the criterion operation's dynamic axes (i.e. label dynamic axes)
// Then we determine the actual number of samples contributing to the training loss from the argument's Value object
auto argumentIter = std::find_if(arguments.begin(), arguments.end(), [evalOrLossArgument](const std::pair<Variable, ValuePtr>& currentPair) {
return (currentPair.first.DynamicAxes() == evalOrLossArgument.DynamicAxes());
});
auto argumentValue = argumentIter->second;
auto argumentVar = argumentIter->first;
auto argumentDataShape = argumentValue->Data()->Shape();
auto mask = argumentValue->Mask();
size_t numMaskedSamples = (mask != nullptr) ? mask->MaskedCount() : 0;
size_t numSamplesInDataArrayView = argumentDataShape.SubShape(argumentVar.Shape().Rank()).TotalSize();
auto valueDataShape = value->Shape();
size_t numMaskedSamples = value->MaskedCount();
size_t numSamplesInDataArrayView = valueDataShape.SubShape(var.Shape().Rank()).TotalSize();
if (numMaskedSamples > numSamplesInDataArrayView)
LogicError("Number of masked values cannot exceed the number of samples that the Value object's Data NDArrayView can hold");
@ -88,15 +96,15 @@ namespace CNTK
double Trainer::TestMinibatch(const std::unordered_map<Variable, ValuePtr>& arguments, const DeviceDescriptor& computeDevice /*= DeviceDescriptor::UseDefaultDevice()*/)
{
if (!m_evaluationFunction)
if (!m_aggregatedEvaluationFunction)
InvalidArgument("Trainer::TestMinibatch: Cannot test when no evaluation function was specified during 'this' trainer's construction");
// TODO: Should we refactor this code that is somewhat similar to the prologue of the TrainMinibatch function
std::unordered_map<Variable, ValuePtr> outputs = { { m_evaluationFunction, nullptr } };
std::unordered_map<Variable, ValuePtr> outputs = { { m_aggregatedEvaluationFunction, nullptr }, {m_evaluationFunction, nullptr} };
m_combinedTrainingFunction->Forward(arguments, outputs, computeDevice);
auto sampleCount = GetSampleCountFromArguments(*(m_evaluationFunction->Arguments().begin()), arguments);
return (GetScalarValue(outputs[m_evaluationFunction]) / sampleCount);
auto sampleCount = GetSampleCount(m_evaluationFunction, outputs[m_evaluationFunction]);
return (GetScalarValue(outputs[m_aggregatedEvaluationFunction]) / sampleCount);
}
bool Trainer::TrainMinibatch(const std::unordered_map<Variable, ValuePtr>& arguments, const DeviceDescriptor& computeDevice /*= DeviceDescriptor::UseDefaultDevice()*/)
@ -107,16 +115,16 @@ namespace CNTK
bool Trainer::TrainMinibatch(const std::unordered_map<Variable, ValuePtr>& arguments, std::unordered_map<Variable, ValuePtr>& outputsToFetch, const DeviceDescriptor& computeDevice /*= DeviceDescriptor::UseDefaultDevice()*/)
{
std::unordered_map<Variable, ValuePtr> outputs = { { m_lossFunction, nullptr } };
if (m_evaluationFunction)
outputs.insert({ m_evaluationFunction, nullptr });
std::unordered_map<Variable, ValuePtr> outputs = { { m_aggregatedLossFunction, nullptr }, { m_lossFunction, nullptr } };
if (m_aggregatedEvaluationFunction)
outputs.insert({ m_aggregatedEvaluationFunction, nullptr });
outputs.insert(outputsToFetch.begin(), outputsToFetch.end());
auto backPropSate = m_combinedTrainingFunction->Forward(arguments, outputs, computeDevice, { m_lossFunction });
m_prevMinibatchAggregateTrainingLossValue = outputs[m_lossFunction];
if (m_evaluationFunction)
m_prevMinibatchAggregateEvalCriterionValue = outputs[m_evaluationFunction];
auto backPropSate = m_combinedTrainingFunction->Forward(arguments, outputs, computeDevice, { m_aggregatedLossFunction });
m_prevMinibatchAggregateTrainingLossValue = outputs[m_aggregatedLossFunction];
if (m_aggregatedEvaluationFunction)
m_prevMinibatchAggregateEvalCriterionValue = outputs[m_aggregatedEvaluationFunction];
for (auto outputToFetch : outputsToFetch)
{
@ -124,8 +132,8 @@ namespace CNTK
outputsToFetch[outputToFetch.first] = outputs[outputToFetch.first];
}
ValuePtr rootGradientValue = MakeSharedObject<Value>(MakeSharedObject<NDArrayView>(m_lossFunction->Output().GetDataType(), m_prevMinibatchAggregateTrainingLossValue->Data()->Shape(), computeDevice), outputs.at(m_lossFunction)->Mask());
if (m_lossFunction->Output().GetDataType() == DataType::Float)
ValuePtr rootGradientValue = MakeSharedObject<Value>(MakeSharedObject<NDArrayView>(m_aggregatedLossFunction->Output().GetDataType(), m_prevMinibatchAggregateTrainingLossValue->Shape(), computeDevice), outputs.at(m_aggregatedLossFunction)->Mask());
if (m_aggregatedLossFunction->Output().GetDataType() == DataType::Float)
rootGradientValue->Data()->SetValue(1.0f);
else
rootGradientValue->Data()->SetValue(1.0);
@ -135,9 +143,9 @@ namespace CNTK
for (const auto& parameter : modelParameters)
parameterGradients[parameter] = nullptr;
m_combinedTrainingFunction->Backward(backPropSate, { { m_lossFunction, rootGradientValue } }, parameterGradients);
m_combinedTrainingFunction->Backward(backPropSate, { { m_aggregatedLossFunction, rootGradientValue } }, parameterGradients);
m_prevMinibatchNumSamples = GetSampleCountFromArguments(*(m_lossFunction->Arguments().begin()), arguments);
m_prevMinibatchNumSamples = GetSampleCount(m_lossFunction, outputs[m_lossFunction]);
bool anyUpdatesPerformed = false;
for (auto learner : m_parameterLearners)

Просмотреть файл

@ -13,6 +13,7 @@ using namespace std;
namespace CNTK
{
// This wrapper redefines operator<< in terms of unformatted (binary) write operation.
struct BinaryOStreamWrapper
{
@ -527,6 +528,17 @@ namespace CNTK
return (m_dictionaryData->find(key) != m_dictionaryData->end());
}
void Dictionary::Add(const Dictionary& other)
{
for (auto kv : *(other.m_dictionaryData))
{
if (Contains(kv.first))
InvalidArgument("Dictionary::Add: This dictionary already contains an entry with key %S that is being attempted to add from the 'other' dinctionary", kv.first.c_str());
(*this)[kv.first] = kv.second;
}
}
bool Dictionary::operator==(const Dictionary& other) const
{
if (this == &other)
@ -539,7 +551,7 @@ namespace CNTK
return false;
}
for (auto& kv : *m_dictionaryData)
for (const auto& kv : *m_dictionaryData)
{
auto result = other.m_dictionaryData->find(kv.first);
if (result == other.m_dictionaryData->end() || kv.second != result->second)
@ -561,7 +573,7 @@ namespace CNTK
BinaryOStreamWrapper stream(stdStream);
stream << us.version;
stream << us.m_dictionaryData->size();
for (auto& kv : *(us.m_dictionaryData))
for (const auto& kv : *(us.m_dictionaryData))
{
stream << kv.first;
stream << kv.second;
@ -586,10 +598,62 @@ namespace CNTK
return stream;
}
template <typename T>
TrainingParameterSchedule<T>::TrainingParameterSchedule(T value)
: m_schedule({ make_pair(0, value) }), m_unit(1)
{
}
template <typename T>
TrainingParameterSchedule<T>::TrainingParameterSchedule(const vector<T>& schedule, size_t unit)
: m_unit(unit)
{
std::vector<std::pair<size_t, T>> s(schedule.size());
for (auto i = 0; i < schedule.size(); ++i)
{
s[i].first = 1;
s[i].second = schedule[i];
}
ConstructSchedule(s);
}
template <typename T>
TrainingParameterSchedule<T>::TrainingParameterSchedule(const vector<std::pair<size_t, T>>& schedule, size_t unit)
: m_unit(unit)
{
ConstructSchedule(schedule);
}
template <typename T>
void TrainingParameterSchedule<T>::ConstructSchedule(const std::vector<std::pair<size_t, T>>& schedule)
{
// TODO: 0 will be used to mean "the entire sweep"
if (m_unit == 0)
RuntimeError("TrainingParameterSchedule::ConstructSchedule : 'unit' cannot be 0.");
if (schedule.size() == 0)
RuntimeError("TrainingParameterSchedule::ConstructSchedule : schedule is empty.");
size_t i = 0;
for (const auto& it : schedule)
{
if (it.first == 0)
RuntimeError("TrainingParameterSchedule::ConstructSchedule : unit count cannot be 0.");
i += it.first;
m_schedule[m_unit * i] = it.second;
}
}
template <typename T>
/*virtual*/ TrainingParameterSchedule<T>::~TrainingParameterSchedule()
{
}
// Returns the element whose key is greater than the required sample count
// or the last element if no such key exists.
template <typename T>
const T& TrainingParameterSchedule<T>::operator[](size_t sampleCount) const
/*virtual*/ const T& TrainingParameterSchedule<T>::operator[](size_t sampleCount) const
{
assert(m_schedule.size() > 0);
auto it = m_schedule.upper_bound(sampleCount);
@ -600,6 +664,38 @@ namespace CNTK
return it->second;
}
template <typename T>
TrainingParameterSchedule<T>::TrainingParameterSchedule(const TrainingParameterSchedule<T>&) = default;
// cannot be defaulted due to a bug in VS2013 (https://connect.microsoft.com/VisualStudio/feedback/details/1255564)
template <typename T>
TrainingParameterSchedule<T>::TrainingParameterSchedule(TrainingParameterSchedule<T>&& that)
:m_schedule(move(that.m_schedule)), m_unit(that.m_unit)
{
}
template <typename T>
TrainingParameterSchedule<T>& TrainingParameterSchedule<T>::operator=(const TrainingParameterSchedule<T>&) = default;
// cannot be defaulted due to a bug in VS2013 (https://connect.microsoft.com/VisualStudio/feedback/details/1255564)
template <typename T>
TrainingParameterSchedule<T>& TrainingParameterSchedule<T>::operator=(TrainingParameterSchedule<T>&& that)
{
m_schedule = move(that.m_schedule);
m_unit = that.m_unit;
return *this;
}
void MomentumValuesAsTimeConstants::ConvertToPerSampleValues()
{
for (auto& it : m_schedule)
{
double momTC = it.second;
double momPS = momTC == 0.0 ? 0 : exp(-1.0 / momTC);
it.second = momPS;
}
}
template void DictionaryValue::AllocateDataPtr<NDShape>(const NDShape& value);
template void DictionaryValue::AllocateDataPtr<Axis>(const Axis& value);
template void DictionaryValue::AllocateDataPtr<vector<DictionaryValue>>(const vector<DictionaryValue>& value);
@ -614,5 +710,5 @@ namespace CNTK
template void DictionaryValue::FreePtrAsType<Dictionary>();
template void DictionaryValue::FreePtrAsType<NDArrayView>();
template const double& TrainingParameterSchedule<double>::operator[](size_t key) const;
template class TrainingParameterSchedule<double>;
}

Просмотреть файл

@ -32,7 +32,7 @@ namespace CNTK
inline DEVICEID_TYPE AsCNTKImplDeviceId(const DeviceDescriptor& device)
{
if (device.Type() == DeviceKind::CPU)
return -1;
return CPUDEVICE;
else if (device.Type() == DeviceKind::GPU)
return device.Id();
else
@ -304,16 +304,20 @@ namespace CNTK
}
}
static size_t const CNTKInternalIdxValueForAllStaticAxes = 0;
inline Axis AsAxis(size_t CNTKInternalAxisIdx)
{
if (CNTKInternalAxisIdx == 0)
LogicError("CNTK internal axis indices must be > 0");
if (CNTKInternalAxisIdx == CNTKInternalIdxValueForAllStaticAxes)
return Axis::AllStaticAxes();
return Axis(CNTKInternalAxisIdx - 1);
}
inline int AsCNTKInternalAxisIdx(const Axis& axis)
{
if (axis == Axis::AllStaticAxes())
return CNTKInternalIdxValueForAllStaticAxes;
if (!axis.IsStaticAxis())
LogicError("Only Axis that represent static indices can be converted to a CNTK internal axis index");
@ -322,19 +326,16 @@ namespace CNTK
inline std::pair<NDShape, NDShape> GetConvolutionOutputMapCountAndKernelShape(const NDShape& convolutionMapShape, const NDShape& operandShape)
{
auto outputMapCount = convolutionMapShape.SubShape(0, convolutionMapShape.Rank() - operandShape.Rank());
NDShape kernelShape = convolutionMapShape.SubShape(0, operandShape.Rank());
auto outputMapCount = convolutionMapShape.SubShape(kernelShape.Rank());
NDShape paddedOutputMapCount(operandShape.Rank(), 1);
for (size_t i = 0; i < outputMapCount.Rank(); ++i)
paddedOutputMapCount[paddedOutputMapCount.Rank() - 1 - i] = outputMapCount[outputMapCount.Rank() - 1 - i];
//for (size_t i = 0; i < outputMapCount.Rank(); ++i)
// paddedOutputMapCount[i] = outputMapCount[i];
NDShape kernelShape = convolutionMapShape.SubShape(outputMapCount.Rank());
return{ paddedOutputMapCount, kernelShape };
}
inline double MomentumPerMB(double momentumPerSample, size_t minibatchSize)
inline double MomentumValueForMB(double momentumPerSample, size_t minibatchSize)
{
return std::pow(momentumPerSample, minibatchSize);
}
@ -369,4 +370,45 @@ namespace CNTK
double* castValue = Copy<float, double>(source->DataBuffer<float>(), sourceSize);
return MakeSharedObject<NDArrayView>(sourceShape, castValue, sourceSize, DeviceDescriptor::CPUDevice(), readOnly);
}
inline std::wstring ParanthesizedName(const std::wstring& name)
{
if (name.empty())
return name;
return L"(" + name + L")";
}
static const std::wstring UidPrefix = L"__v2libuid__";
static const std::wstring NamePrefix = L"__v2libname__";
inline std::wstring CNTKInternalNodeNameFromUidAndName(const std::wstring& uid, const std::wstring& name)
{
return UidPrefix + uid + NamePrefix + name;
}
inline std::pair<std::wstring, std::wstring> UidAndNameFromCNTKInternalNodeName(const std::wstring& CNTKInternalNodeName, VariableKind varKind)
{
std::wstring uid, name;
auto uidPrefixBeginPos = CNTKInternalNodeName.find(UidPrefix);
if (uidPrefixBeginPos != std::wstring::npos)
{
auto uidBeginPos = uidPrefixBeginPos + UidPrefix.length();
auto namePrefixBeginPos = CNTKInternalNodeName.find(NamePrefix, uidBeginPos);
if (namePrefixBeginPos == std::wstring::npos)
LogicError("CNTK internal node name found to contain uid but not name!");
auto nameBeginPos = namePrefixBeginPos + NamePrefix.length();
uid = CNTKInternalNodeName.substr(uidBeginPos, namePrefixBeginPos - uidBeginPos);
name = CNTKInternalNodeName.substr(nameBeginPos);
}
else
{
name = CNTKInternalNodeName;
uid = Internal::GenerateUid(varKind);
}
return{ uid, name };
}
}

Просмотреть файл

@ -11,6 +11,8 @@
#include "CNTKLibrary.h"
#include "Utils.h"
#include "Value.h"
#include "Function.h"
namespace CNTK
{
@ -28,7 +30,7 @@ namespace CNTK
auto maskShape = mask->Shape();
if (maskShape.Rank() > dataShape.Rank())
InvalidArgument("The number of axes (%d) of the mask of a Value object cannot exceed the number of axes (%d) of the data NDArrayView object", (int)maskShape.Rank(), (int)dataShape.Rank());
InvalidArgument("The rank (%d) of the mask of a Value object cannot exceed the rank (%d) of the data NDArrayView object", (int)maskShape.Rank(), (int)dataShape.Rank());
if (dataShape.SubShape(dataShape.Rank() - maskShape.Rank()) != maskShape)
InvalidArgument("Invalid Value object; the data and mask are incompatible. The trailing dimensions of the data with shape %S do not match the dimensions of the mask with shape %S", AsStringForErrorReporting(dataShape).c_str(), AsStringForErrorReporting(maskShape).c_str());
@ -60,7 +62,10 @@ namespace CNTK
NDShape valueMaskShape = { maxSequenceLength, numSequences };
deviceValueMask = MakeSharedObject<NDMask>(valueMaskShape, device);
for (size_t i = 0; i < numSequences; ++i)
deviceValueMask->MaskSection({ sequenceLengths[i], i }, { NDShape::InferredDimension, 1 });
{
deviceValueMask->MarkSequenceBegin({0, i});
deviceValueMask->InvalidateSection({ sequenceLengths[i], i }, { NDShape::InferredDimension, 1 });
}
}
return deviceValueMask;
@ -179,6 +184,39 @@ namespace CNTK
}
}
void PackedValue::Unpack() const
{
if (m_packedDataLayout && (m_packedDataLayout->GetNumTimeSteps() != 1) && (m_packedDataLayout->GetNumSequences() != 1) && Internal::IsAutomaticUnpackingOfPackedValuesDisabled())
LogicError("PackedValue::Unpack: Automatic unpacking of PackedValue objects is disabled");
if (m_isPacked)
{
ValuePtr valueObject;
auto dataType = m_packedData->GetDataType();
switch (dataType)
{
case DataType::Float:
valueObject = CompositeFunction::GetValueObjectFromCNTKImplMatrixAndMBLayout(m_sampleShape, *(m_packedData->GetMatrix<float>()), m_packedDataLayout, m_isReadOnly);
break;
case DataType::Double:
valueObject = CompositeFunction::GetValueObjectFromCNTKImplMatrixAndMBLayout(m_sampleShape, *(m_packedData->GetMatrix<double>()), m_packedDataLayout, m_isReadOnly);
break;
default:
LogicError("Unsupported DataType %s", DataTypeName(dataType));
}
m_data = valueObject->Data();
m_mask = valueObject->Mask();
m_packedData = nullptr;
m_packedDataLayout = nullptr;
m_isPacked = false;
if (m_unpackedShape != m_data->Shape())
LogicError("The computed unpacked shape of the PackedValue object does not match the actual Data NDArrayView's shape after unpacking");
}
}
// Explicit template instantiations
template /*static*/ CNTK_API ValuePtr Value::Create<float>(const NDShape& sampleShape, const std::vector<std::vector<float>>& sequences, const DeviceDescriptor& device, bool readOnly/* = false*/);
template /*static*/ CNTK_API ValuePtr Value::Create<double>(const NDShape& sampleShape, const std::vector<std::vector<double>>& sequences, const DeviceDescriptor& device, bool readOnly/* = false*/);

Просмотреть файл

@ -7,14 +7,112 @@
#include "stdafx.h"
#include "CNTKLibrary.h"
#include "Sequences.h"
#include "Utils.h"
namespace CNTK
{
class CNTKValue final : public Value
class PackedValue final : public Value
{
template <typename T, typename ...CtorArgTypes>
friend inline std::shared_ptr<T> MakeSharedObject(CtorArgTypes&& ...ctorArgs);
public:
template <typename ElementType>
PackedValue(const NDShape& sampleShape, const std::shared_ptr<Microsoft::MSR::CNTK::Matrix<ElementType>>& packedDataMatrix, const std::shared_ptr<Microsoft::MSR::CNTK::MBLayout>& packedDataLayout, bool isReadOnly)
: Value(nullptr), m_isPacked(true), m_sampleShape(sampleShape), m_packedData(nullptr), m_packedDataLayout(packedDataLayout), m_isReadOnly(isReadOnly)
{
NDShape packedMatrixShape({ packedDataMatrix->GetNumRows(), packedDataMatrix->GetNumCols() });
auto tensorView = new Microsoft::MSR::CNTK::TensorView<ElementType>(packedDataMatrix, AsTensorViewShape(packedMatrixShape));
m_packedData = MakeSharedObject<NDArrayView>(AsDataType<ElementType>(), AsDeviceDescriptor(packedDataMatrix->GetDeviceId()), AsStorageFormat(packedDataMatrix->GetFormat()), packedMatrixShape, m_isReadOnly, tensorView);
// Determine unpacked shape
m_unpackedShape = sampleShape;
if (packedDataLayout)
m_unpackedShape = m_unpackedShape.AppendShape({ packedDataLayout->GetNumTimeSteps(), packedDataLayout->GetNumSequences() });
}
void Unpack() const;
const NDShape& Shape() const override { return m_unpackedShape; }
DeviceDescriptor Device() const override { return m_isPacked ? m_packedData->Device() : Value::Device(); }
DataType GetDataType() const override { return m_isPacked ? m_packedData->GetDataType() : Value::GetDataType(); }
StorageFormat GetStorageFormat() const override { return m_isPacked? m_packedData->GetStorageFormat() : Value::GetStorageFormat(); }
bool IsReadOnly() const override { return m_isPacked ? m_packedData->IsReadOnly() : Value::IsReadOnly(); }
size_t MaskedCount() const override
{
if (m_isPacked)
// Compute the number of masked samples after the data will be unpacked
return m_packedDataLayout ? ((m_packedDataLayout->GetNumTimeSteps() * m_packedDataLayout->GetNumSequences()) - m_packedDataLayout->GetActualNumSamples()) : 0;
else
return Value::MaskedCount();
}
NDArrayViewPtr Data() const override
{
Unpack();
return Value::Data();
}
NDMaskPtr Mask() const override
{
Unpack();
return Value::Mask();
}
ValuePtr DeepClone(bool /*readOnly = false*/) const override
{
if (m_isPacked)
{
std::shared_ptr<Microsoft::MSR::CNTK::MBLayout> packedLayoutCopy;
if (m_packedDataLayout)
{
packedLayoutCopy = std::make_shared<Microsoft::MSR::CNTK::MBLayout>();
packedLayoutCopy->CopyFrom(m_packedDataLayout);
}
return MakeSharedObject<PackedValue>(m_sampleShape, m_packedData->DeepClone(), packedLayoutCopy, m_isReadOnly);
}
else
return Value::DeepClone();
}
ValuePtr Alias(bool /*readOnly = false*/) const override
{
LogicError("Alias is currently unsupported for PackedValue objects");
}
void CopyFrom(const Value& /*source*/) override
{
LogicError("CopyFrom is currently unsupported for PackedValue objects");
}
template <typename ElementType>
std::pair<std::shared_ptr<const Microsoft::MSR::CNTK::Matrix<ElementType>>, std::shared_ptr<Microsoft::MSR::CNTK::MBLayout>> PackedData()
{
if (!m_isPacked)
InvalidArgument("PackedValue::PackedData called on a Value object that has already been unpacked");
return { m_packedData->GetMatrix<ElementType>(), m_packedDataLayout };
}
private:
PackedValue(const NDShape& sampleShape, const NDArrayViewPtr& packedData, const std::shared_ptr<Microsoft::MSR::CNTK::MBLayout>& packedDataLayout, bool isReadOnly)
: Value(nullptr), m_isPacked(true), m_sampleShape(sampleShape), m_packedData(packedData), m_packedDataLayout(packedDataLayout), m_isReadOnly(isReadOnly)
{
// Determine unpacked shape
m_unpackedShape = sampleShape;
if (packedDataLayout)
m_unpackedShape = m_unpackedShape.AppendShape({ packedDataLayout->GetNumTimeSteps(), packedDataLayout->GetNumSequences() });
}
private:
bool m_isReadOnly;
NDShape m_sampleShape;
NDShape m_unpackedShape;
mutable bool m_isPacked;
mutable NDArrayViewPtr m_packedData;
mutable std::shared_ptr<Microsoft::MSR::CNTK::MBLayout> m_packedDataLayout;
};
}

Просмотреть файл

@ -30,7 +30,7 @@ namespace CNTK
if (varOwner)
return CompositeFunction::Create(varOwner, varOwner->Name());
else
return Internal::Combine({ *this });
return Combine({ *this });
}
NDArrayViewPtr Variable::Value() const
@ -70,14 +70,24 @@ namespace CNTK
static const std::wstring KernelWidthAttributeName = L"kernelWidth";
static const std::wstring KernelHeightAttributeName = L"kernelHeight";
ParameterInitializer UniformInitializer(double scale, unsigned long seed)
void Variable::VariableFields::SetValueInitialization(const ParameterInitializer& initializationConfig, const DeviceDescriptor& device)
{
Dictionary initConfig;
initConfig[InitializerTypeAttributeName] = Microsoft::MSR::CNTK::UniformInitializerTypeName;
initConfig[ScaleAttributeName] = scale;
initConfig[RandomSeedAttributeName] = (size_t)seed;
if (m_value != nullptr)
LogicError("Value initialization config cannot be set if a value already exists");
return initConfig;
assert(!m_valueInitializer);
assert(!m_valueInitializationDevice);
if (initializationConfig.Contains(FilterRankAttributeName))
{
auto filterRank = (int)initializationConfig[FilterRankAttributeName].Value<size_t>();
auto outputRank = (int)initializationConfig[OutputRankAttributeName].Value<size_t>();
if ((filterRank + outputRank) > m_shape.Rank())
InvalidArgument("Sum of filter rank (%d) and output rank (%d) of the parameter initializer cannot exceed the Parameter's rank(%d)", filterRank, outputRank, (int)m_shape.Rank());
}
m_valueInitializer.reset(new ParameterInitializer(initializationConfig));
m_valueInitializationDevice.reset(new DeviceDescriptor(device));
}
static ParameterInitializer CreateInitializer(const std::wstring& initializerTypeName, int outputRank, int filterRank, double scale, unsigned long seed)
@ -92,6 +102,16 @@ namespace CNTK
return initConfig;
}
ParameterInitializer UniformInitializer(double scale, unsigned long seed)
{
Dictionary initConfig;
initConfig[InitializerTypeAttributeName] = Microsoft::MSR::CNTK::UniformInitializerTypeName;
initConfig[ScaleAttributeName] = scale;
initConfig[RandomSeedAttributeName] = (size_t)seed;
return initConfig;
}
ParameterInitializer GaussianInitializer(int outputRank, int filterRank, double scale, unsigned long seed)
{
return CreateInitializer(Microsoft::MSR::CNTK::GaussianInitializerTypeName, outputRank, filterRank, scale, seed);

Просмотреть файл

@ -124,7 +124,7 @@ private:
};
static DEVICEID_TYPE s_bestDeviceId = DEVICEID_NOTYETDETERMINED;
static BestGpu* s_bestGpu = nullptr;
static std::unique_ptr<BestGpu> s_bestGpu = nullptr;
// DeviceFromConfig - Parse 'deviceId' config parameter to determine what type of behavior is desired
//Symbol - Meaning
@ -149,7 +149,7 @@ static DEVICEID_TYPE SelectDevice(DEVICEID_TYPE deviceId, bool bLockGPU, const i
// GPU device to be auto-selected, so init our class
if (s_bestGpu == nullptr)
{
s_bestGpu = new BestGpu();
s_bestGpu = make_unique<BestGpu>();
for (int i = 0; i < excludedDevices.size(); ++i)
{
s_bestGpu->DisallowDevice(excludedDevices[i]);
@ -270,6 +270,8 @@ void BestGpu::GetCudaProperties()
if (m_cudaData)
return;
int currentDevice, rc;
rc = cudaGetDevice(&currentDevice);
int dev = 0;
for (ProcessorData* pd : m_procData)
@ -284,9 +286,16 @@ void BestGpu::GetCudaProperties()
pd->cudaFreeMem = free;
pd->cudaTotalMem = total;
dev++;
cudaDeviceReset();
// cudaDeviceReset() explicitly destroys and cleans up all resources associated with the
// current device in the current process.
// Will result in a segmentation fault is called, for instance, after cudnnCreate, but before cudnnDestroy.
// cudaDeviceReset();
}
m_cudaData = m_procData.size() > 0;
if (rc == CUDA_SUCCESS)
{
cudaSetDevice(currentDevice);
}
}
void BestGpu::Init()
@ -325,8 +334,11 @@ BestGpu::~BestGpu()
if (m_nvmlData)
{
// TODO: Check for error code and throw if !std::uncaught_exception()
nvmlShutdown();
nvmlReturn_t r = nvmlShutdown();
if ((r != NVML_SUCCESS) && !std::uncaught_exception())
{
RuntimeError("BestGPU Destructor: failed to shut down NVML. \n");
}
}
}

Просмотреть файл

@ -7,6 +7,10 @@
#include <cassert>
#include <string>
#define CLOSEHANDLE_ERROR 0
#define RELEASEMUTEX_ERROR 0
#define FCNTL_ERROR -1
#ifdef WIN32 // --- Windows version
#define NOMINMAX
@ -46,7 +50,11 @@ public:
if (::WaitForSingleObject(m_handle, wait ? INFINITE : 0) != WAIT_OBJECT_0)
{
// failed to acquire
::CloseHandle(m_handle);
int rc = ::CloseHandle(m_handle);
if ((rc == CLOSEHANDLE_ERROR) && !std::uncaught_exception())
{
RuntimeError("Acquire: Handler close failure with error code %d", ::GetLastError());
}
m_handle = NULL;
return false;
}
@ -58,9 +66,17 @@ public:
void Release()
{
assert(m_handle != NULL);
// TODO: Check for error code and throw if !std::uncaught_exception()
::ReleaseMutex(m_handle);
::CloseHandle(m_handle);
int rc = 0;
rc = ::ReleaseMutex(m_handle);
if ((rc == RELEASEMUTEX_ERROR) && !std::uncaught_exception())
{
RuntimeError("Mutex Release: Failed to release mutex %s: %d", m_name.c_str(), ::GetLastError());
}
rc = ::CloseHandle(m_handle);
if ((rc == CLOSEHANDLE_ERROR) && !std::uncaught_exception())
{
RuntimeError("Mutex Release: Failed to close handler %s: %d", m_name.c_str(), ::GetLastError());
}
m_handle = NULL;
}
@ -121,6 +137,8 @@ public:
// Returns false if !wait and lock cannot be acquired, or in case of a system error that prevents us from acquiring the lock.
bool Acquire(bool wait)
{
mode_t mask = umask(0);
assert(m_fd == -1);
for (;;)
{
@ -146,6 +164,7 @@ public:
{
// acquire failed
close(fd);
umask(mask);
return false;
}
// we own the exclusive lock on file descriptor, but we need to double-check
@ -165,6 +184,7 @@ public:
{
// lock acquired successfully
m_fd = fd;
umask(mask);
return true;
}
}
@ -181,8 +201,11 @@ public:
m_lock.l_type = F_UNLCK;
// Now removing the lock and closing the file descriptor
// waiting processes will be notified
// TODO: Check for error code and throw if !std::uncaught_exception()
fcntl(m_fd, F_SETLKW, &m_lock);
int rc = fcntl(m_fd, F_SETLKW, &m_lock);
if (rc == FCNTL_ERROR)
{
RuntimeError("Mutex Release: Failed to release mutex %s", m_fileName.c_str());
}
close(m_fd);
m_fd = -1;
}

Просмотреть файл

@ -184,6 +184,23 @@ bool DataReader::SupportsDistributedMBRead() const
return supportsDistributedMBRead;
}
//IsLegacyReader - Returns true if one of the readers is a legacy reader, false otherwise.
bool DataReader::IsLegacyReader() const
{
for (size_t i = 0; i < m_ioNames.size(); i++)
{
auto currReaderIter = m_dataReaders.find(m_ioNames[i]);
assert(currReaderIter != m_dataReaders.end());
if (currReaderIter->second->IsLegacyReader())
{
return true;
}
}
return false;
}
//StartDistributedMinibatchLoop - Startup a distributed minibatch loop for parallel training
// mbSize - [in] size of the minibatch (number of frames, etc.)
// epoch - [in] epoch number for this loop
@ -207,6 +224,13 @@ void DataReader::StartDistributedMinibatchLoop(size_t mbSize, size_t epoch, size
}
}
size_t DataReader::GetCurrentSamplePosition()
{
// BUGBUG: composition of old readers is not supported.
// Returning just for the last reader.
return m_dataReaders[m_ioNames.back()]->GetCurrentSamplePosition();
}
// GetMinibatch - Get the next minibatch (features and labels)
// matrices - [in] a map with named matrix types (i.e. 'features', 'labels') mapped to the corresponding matrix,
// [out] each matrix resized if necessary containing data.

Просмотреть файл

@ -26,6 +26,8 @@
#include <linux/limits.h> // for PATH_MAX
#endif
#define PCLOSE_ERROR -1
namespace Microsoft { namespace MSR { namespace CNTK {
// File creation
@ -255,17 +257,23 @@ bool File::IsTextBased()
// Note: this does not check for errors when the File corresponds to pipe stream. In this case, use Flush() before closing a file you are writing.
File::~File(void)
{
int rc = 0;
if (m_pcloseNeeded)
{
// TODO: Check for error code and throw if !std::uncaught_exception()
_pclose(m_file);
rc = _pclose(m_file);
if ((rc == PCLOSE_ERROR) && !std::uncaught_exception())
{
RuntimeError("File: failed to close file at %S", m_filename.c_str());
}
}
else if (m_file != stdin && m_file != stdout && m_file != stderr)
{
int rc = fclose(m_file);
if ((rc != 0) && !std::uncaught_exception())
rc = fclose(m_file);
if ((rc != FCLOSE_SUCCESS) && !std::uncaught_exception())
{
RuntimeError("File: failed to close file at %S", m_filename.c_str());
}
}
}
void File::Flush()

Просмотреть файл

@ -26,6 +26,7 @@
#define EPSILON 1e-5
#define ISCLOSE(a, b, threshold) (abs(a - b) < threshold) ? true : false
#define DLCLOSE_SUCCESS 0
#define UNUSED(x) (void)(x) // for variables that are, e.g., only used in _DEBUG builds
@ -705,9 +706,14 @@ public:
}
~Plugin()
{
// TODO: Check for error code and throw if !std::uncaught_exception()
if (handle != NULL)
dlclose(handle);
{
int rc = dlclose(handle);
if ((rc != DLCLOSE_SUCCESS) && !std::uncaught_exception())
{
RuntimeError("Plugin: Failed to decrements the reference count.");
}
}
}
};
#endif

Просмотреть файл

@ -239,6 +239,18 @@ public:
return false;
};
// old DataReader architecture
virtual bool IsLegacyReader() const
{
return true;
};
// Gets current sample position on the global timeline.
virtual size_t GetCurrentSamplePosition()
{
NOT_IMPLEMENTED;
}
virtual void StartDistributedMinibatchLoop(size_t mbSize, size_t epoch, size_t subsetNum, size_t numSubsets, size_t requestedEpochSamples = requestDataSize)
{
if (SupportsDistributedMBRead() || (numSubsets != 1) || (subsetNum != 0))
@ -410,6 +422,8 @@ public:
}
virtual ~DataReader();
size_t GetCurrentSamplePosition() override;
// StartMinibatchLoop - Startup a minibatch loop
// mbSize - [in] size of the minibatch (number of frames, etc.)
// epoch - [in] epoch number for this loop
@ -417,6 +431,7 @@ public:
virtual void StartMinibatchLoop(size_t mbSize, size_t epoch, size_t requestedEpochSamples = requestDataSize);
virtual bool SupportsDistributedMBRead() const override;
virtual bool IsLegacyReader() const override;
virtual void StartDistributedMinibatchLoop(size_t mbSize, size_t epoch, size_t subsetNum, size_t numSubsets, size_t requestedEpochSamples = requestDataSize) override;
virtual void StartMinibatchLoop(size_t mbSize, size_t epoch, const std::unordered_set<InputStreamDescription>&, size_t requestedEpochSamples = requestDataSize) override;

Просмотреть файл

@ -14,12 +14,14 @@
#endif
#pragma comment(lib, "msmpi.lib")
#include <errno.h>
#include <string>
#include <array>
#include <vector>
#include <memory>
#define FFLUSH_SUCCESS 0
namespace Microsoft { namespace MSR { namespace CNTK {
struct MpiFail : public std::string
@ -138,6 +140,14 @@ public:
MPI_Comm_size(MPI_COMM_WORLD, &m_numMPINodes);
m_numNodesInUse = m_numMPINodes;
// Verify that the environment variable used by GetTotalNumberOfMPINodes()
// matches what the MPI API says. There're actually two possible cases:
// 1) when we're running with mpiexec both values have to match;
// 2) when we're running without mpiexec, the former will return 0, and
// the later will be set to 1.
assert((GetTotalNumberOfMPINodes() == 0 && m_numNodesInUse == 1) ||
(GetTotalNumberOfMPINodes() == m_numNodesInUse));
// Applying MPI workaround
s_myRank = m_myRank;
atexit(&MPIWrapper::MPIWorkaroundAtExit);
@ -160,19 +170,50 @@ public:
::Sleep((DWORD)(500 * CurrentNodeRank()));
}
// Note that specifically, this function is such that it does not require
// MPI initialization. Moreover, it can be used without actually loading any
// MPI libs.
// TODO: Once we move to dynamic loading for MPI libs on Linux, move it to utilities.
static int GetTotalNumberOfMPINodes()
{
#ifdef WIN32
const char* p = std::getenv("PMI_SIZE");
#else
const char* p = std::getenv("OMPI_COMM_WORLD_SIZE");
#endif
if (!p)
{
return 0;
}
else
{
return std::stoi(string(p));
}
}
// Note: we don't clear the sub-communication here although we should, because in case of a crash, this prevents the EXE from terminating.
// It's OK since this class is a singleton anyway that gets instantiated exactly once at program startup.
~MPIWrapper()
{
fprintf(stderr, "~MPIWrapper\n");
fflush(stderr);
// TODO: Check for error code and throw if !std::uncaught_exception()
// Do not finalize in event of an exception since calling MPI_Finalize without
// all pending communications being finished results in a hang
int rc = fflush(stderr);
if (!std::uncaught_exception())
{
if (rc != FFLUSH_SUCCESS)
{
#ifdef _WIN32
RuntimeError("MPIWrapper: Failed to flush stderr, %d", ::GetLastError());
#else
RuntimeError("MPIWrapper: Failed to flush stderr, %d", errno);
#endif
}
MPI_Finalize();
}
}
private:
void Ping(const char *msg) const

Некоторые файлы не были показаны из-за слишком большого количества измененных файлов Показать больше