From 085470840080425ad5655f6eed2ba10f7cfd30a2 Mon Sep 17 00:00:00 2001 From: Amit Agarwal Date: Sat, 30 Jul 2016 16:16:41 -0700 Subject: [PATCH] CNTK v2 library: a) Convolution b) Mean variance normalization c) Added Cifar resnet test --- Makefile | 1 + Source/CNTKv2LibraryDll/API/CNTKLibrary.h | 110 +++++++- Source/CNTKv2LibraryDll/BackCompat.cpp | 89 +++++- Source/CNTKv2LibraryDll/Function.cpp | 244 +++++++++++----- Source/CNTKv2LibraryDll/Function.h | 100 +++++-- Source/CNTKv2LibraryDll/MinibatchSource.cpp | 263 +++++++++++++----- Source/CNTKv2LibraryDll/MinibatchSource.h | 7 +- Source/CNTKv2LibraryDll/NDArrayView.cpp | 15 +- Source/CNTKv2LibraryDll/Utils.h | 76 ++++- .../ComputationNetworkLib/ComputationNode.h | 19 +- .../ConvolutionalNodes.h | 10 + .../LinearAlgebraNodes.h | 2 + Source/ComputationNetworkLib/TrainingNodes.h | 6 + .../CNTKv2Library/UnitTests/run-test | 7 +- .../UnitTests/V2LibraryTests/CifarResNet.cpp | 175 ++++++++++++ Tests/UnitTests/V2LibraryTests/Common.h | 9 + .../V2LibraryTests/FeedForwardTests.cpp | 2 +- Tests/UnitTests/V2LibraryTests/Image.h | 78 ++++++ Tests/UnitTests/V2LibraryTests/Main.cpp | 7 + .../UnitTests/V2LibraryTests/TrainerTests.cpp | 58 ++-- .../V2LibraryTests/V2LibraryTests.vcxproj | 2 + .../V2LibraryTests.vcxproj.filters | 6 + 22 files changed, 1062 insertions(+), 224 deletions(-) create mode 100644 Tests/UnitTests/V2LibraryTests/CifarResNet.cpp create mode 100644 Tests/UnitTests/V2LibraryTests/Image.h diff --git a/Makefile b/Makefile index 46d0c374f..ba36337db 100644 --- a/Makefile +++ b/Makefile @@ -417,6 +417,7 @@ CNTKLIBRARY_TESTS_SRC =\ Tests/UnitTests/V2LibraryTests/RecurrentFunctionTests.cpp \ Tests/UnitTests/V2LibraryTests/TensorTests.cpp \ Tests/UnitTests/V2LibraryTests/TrainerTests.cpp \ + Tests/UnitTests/V2LibraryTests/CifarResNet.cpp \ CNTKLIBRARY_TESTS:=$(BINDIR)/v2librarytests CNTKLIBRARY_TESTS_OBJ := $(patsubst %.cu, $(OBJDIR)/%.o, $(patsubst %.cpp, $(OBJDIR)/%.o, $(CNTKLIBRARY_TESTS_SRC))) diff --git a/Source/CNTKv2LibraryDll/API/CNTKLibrary.h b/Source/CNTKv2LibraryDll/API/CNTKLibrary.h index 60dbe28b4..1e2193cff 100644 --- a/Source/CNTKv2LibraryDll/API/CNTKLibrary.h +++ b/Source/CNTKv2LibraryDll/API/CNTKLibrary.h @@ -784,14 +784,21 @@ namespace CNTK /// /// Create an 'Input' Variable. /// - Variable(const NDShape& shape, CNTK::DataType dataType, const wchar_t* name = L"") + Variable(const NDShape& shape, CNTK::DataType dataType) + : Variable(shape, dataType, L"") + {} + + /// + /// Create an 'Input' Variable. + /// + Variable(const NDShape& shape, CNTK::DataType dataType, const wchar_t* name) : Variable(shape, dataType, std::wstring(name)) {} /// /// Create an 'Input' Variable. /// - Variable(const NDShape& shape, CNTK::DataType dataType, const std::wstring& name = L"") + Variable(const NDShape& shape, CNTK::DataType dataType, const std::wstring& name) : Variable(shape, VariableKind::Input, dataType, nullptr, nullptr, false, { Axis::DefaultDynamicAxis() }, false, name) {} @@ -1488,7 +1495,7 @@ namespace CNTK /// Create an instance of the CNTK built-in matrix multiplication operation with the specified input operands. /// TODO: Specify the constraints on the shapes of the operands. /// - CNTK_API FunctionPtr Times(const Variable& leftOperand, const Variable& rightOperand, const std::wstring& name = L""); + CNTK_API FunctionPtr Times(const Variable& leftOperand, const Variable& rightOperand, size_t numOutputAxes = 1, const std::wstring& name = L""); /// /// Create an instance of the CNTK built-in operation to compute squared-error for specified input operands. @@ -1525,6 +1532,61 @@ namespace CNTK /// CNTK_API FunctionPtr ReduceSum(const Variable& operand, const std::wstring& name = L""); + /// + /// Per dimension mean-variance normalization of the specified input operand. + /// + CNTK_API FunctionPtr PerDimMeanVarianceNormalize(const Variable& operand, const NDArrayViewPtr& mean, const NDArrayViewPtr& invStdDev, const std::wstring& name = L""); + + /// + /// TODO: + /// + CNTK_API FunctionPtr Convolution(const Variable& convolutionMap, + const Variable& operand, + const NDShape& strides = {1}, + const std::vector& sharing = {true}, + const std::vector& autoPadding = {true}, + const NDShape& lowerPad = {0}, + const NDShape& upperPad = {0}, + bool transpose = false, + size_t maxTempMemSizeInSamples = 0, + const std::wstring& name = L""); + + /// + /// TODO: + /// + enum class PoolingType + { + Max, + Average, + }; + + /// + /// TODO: + /// + CNTK_API FunctionPtr Pooling(const Variable& operand, + PoolingType poolingType, + const NDShape& poolingWindowShape, + const NDShape& strides = {1}, + const std::vector& autoPadding = {false}, + const NDShape& lowerPad = {0}, + const NDShape& upperPad = {0}, + const std::wstring& name = L""); + + /// + /// TODO: + /// + CNTK_API FunctionPtr BatchNormalization(const Variable& operand, + const Variable& scale, + const Variable& bias, + const Variable& runningMean, + const Variable& runningInvStd, + bool spacial, + double normalizationTimeConstant = 0, + double blendTimeConstant = 0, + double epsilon = 0.00001, + bool useCuDNNEngine = false, + const std::wstring& name = L""); + /// /// Create a new Function instance which just combines the outputs of the specified list of 'operands' Functions such that the 'Outputs' of the /// new 'Function' are union of the 'Outputs' of each of the specified 'operands' Functions. @@ -1629,9 +1691,9 @@ namespace CNTK DictionaryValue(const T& value) : m_valueType(GetValueType()) { static_assert(std::is_same::value || - std::is_same::value || - std::is_same>::value || - std::is_same::value, + std::is_same::value || + std::is_same>::value || + std::is_same::value, "Unsupported ValueType"); AllocateDataPtr(value); @@ -1730,10 +1792,10 @@ namespace CNTK std::is_same::value || std::is_same::value || std::is_same::value || - std::is_same::value || + std::is_same::value || std::is_same::value || - std::is_same>::value || - std::is_same::value, + std::is_same>::value || + std::is_same::value, "Unsupported ValueType"); if (std::is_same::value) return Type::Bool; @@ -1973,7 +2035,11 @@ namespace CNTK inline bool operator==(const StreamInfo& left, const StreamInfo& right) { - return (left.m_id == right.m_id); + return ((left.m_id == right.m_id) && + (left.m_name == right.m_name) && + (left.m_storageFormat == right.m_storageFormat) && + (left.m_elementType == right.m_elementType) && + (left.m_sampleLayout == right.m_sampleLayout)); } } @@ -1989,6 +2055,13 @@ namespace std { namespace CNTK { + struct MinibatchData + { + size_t m_numSequences; + size_t m_numSamples; + ValuePtr m_data; + }; + /// /// Abstraction for generating minbatches of samples for training/evaluation. /// @@ -2002,10 +2075,14 @@ namespace CNTK /// /// Reads a minibatch that contains data across all input streams. - /// The minibatchData argument specifies the desired minibatch size for each stream of the reader and the actual returned size is the min across all streams. - /// The return value of false indciates that the reader will no longer return any further data. + /// The minibatchData argument specifies the desired minibatch size for each stream of the reader either in terms of #sequences or + /// #samples or both. In case the size is specified in terms of both #sequences and #samples, the smaller of the 2 is taken. The actual + /// returned size of the minibatch is the min across all streams. Also the requested MB size fields in the maps are updated by the + /// MinibatchSource to contain the actual #sequences and #samples in the returned minibatch for the corresponding stream. + /// The return value indciates if the MinibatchSource will return any further data in subsequent calls of this function. /// - virtual bool GetNextMinibatch(std::unordered_map>& minibatchData) = 0; + virtual std::unordered_map GetNextMinibatch(const std::unordered_map>& perStreamMBSizeLimits, + const DeviceDescriptor& device = DeviceDescriptor::DefaultDevice()) = 0; // TODO: Methods to save and restore from checkpoints @@ -2020,4 +2097,11 @@ namespace CNTK /// Instantiate the CNTK built-in composite minibatch source. /// CNTK_API MinibatchSourcePtr CreateCompositeMinibatchSource(const Dictionary& configuration); + + /// + /// Compute the per dimension means and variances for each of the specified streams using data from the specified minibatchSource. + /// + CNTK_API void ComputeInputPerDimMeansAndInvStdDevs(const MinibatchSourcePtr& minibatchSource, + std::unordered_map>& computedMeanAndVariances, + const DeviceDescriptor& device = DeviceDescriptor::CPUDevice()); } diff --git a/Source/CNTKv2LibraryDll/BackCompat.cpp b/Source/CNTKv2LibraryDll/BackCompat.cpp index 43b0f1798..a785d9c58 100644 --- a/Source/CNTKv2LibraryDll/BackCompat.cpp +++ b/Source/CNTKv2LibraryDll/BackCompat.cpp @@ -71,10 +71,14 @@ namespace CNTK } else if (node->Is>()) { + bool isConstant = (node->GetLearningRateMultiplier() == 0); auto& matrix = node->As>()->Value(); auto tensorView = new TensorView(std::make_shared>(matrix.AsReference()), node->GetSampleLayout()); NDArrayViewPtr parameterValue = MakeSharedObject(AsDataType(), AsDeviceDescriptor(matrix.GetDeviceId()), AsStorageFormat(matrix.GetFormat()), varShape, false, tensorView); - var = Parameter(parameterValue, node->GetName()); + if (isConstant) + var = Constant(parameterValue, node->GetName()); + else + var = Parameter(parameterValue, node->GetName()); } else LogicError("CNTK::LoadLegacyModel: Unsupported legacy CNTK node named '%S'", node->NodeName().c_str()); @@ -95,16 +99,51 @@ namespace CNTK PrimitiveOpType opType; Dictionary primitiveFunctionConfigParameters; - if (node->OperationName() == OperationNameOf(TanhNode)) - opType = PrimitiveOpType::Tanh; + if (node->OperationName() == OperationNameOf(NegateNode)) + opType = PrimitiveOpType::Negate; else if (node->OperationName() == OperationNameOf(SigmoidNode)) opType = PrimitiveOpType::Sigmoid; + else if (node->OperationName() == OperationNameOf(TanhNode)) + opType = PrimitiveOpType::Tanh; + else if (node->OperationName() == OperationNameOf(RectifiedLinearNode)) + opType = PrimitiveOpType::ReLU; else if (node->OperationName() == OperationNameOf(ExpNode)) opType = PrimitiveOpType::Exp; - else if (node->OperationName() == OperationNameOf(TimesNode)) - opType = PrimitiveOpType::Times; + else if (node->OperationName() == OperationNameOf(LogNode)) + opType = PrimitiveOpType::Log; + else if (node->OperationName() == OperationNameOf(SqrtNode)) + opType = PrimitiveOpType::Sqrt; + else if (node->OperationName() == OperationNameOf(FloorNode)) + opType = PrimitiveOpType::Floor; + else if (node->OperationName() == OperationNameOf(AbsNode)) + opType = PrimitiveOpType::Abs; + else if (node->OperationName() == OperationNameOf(ReciprocalNode)) + opType = PrimitiveOpType::Reciprocal; + else if (node->OperationName() == OperationNameOf(SoftmaxNode)) + opType = PrimitiveOpType::Softmax; else if (node->OperationName() == OperationNameOf(PlusNode)) opType = PrimitiveOpType::Plus; + else if (node->OperationName() == OperationNameOf(MinusNode)) + opType = PrimitiveOpType::Minus; + else if (node->OperationName() == OperationNameOf(ElementTimesNode)) + opType = PrimitiveOpType::ElementTimes; + else if (node->OperationName() == OperationNameOf(EqualNode)) + opType = PrimitiveOpType::Equal; + else if (node->OperationName() == OperationNameOf(NotEqualNode)) + opType = PrimitiveOpType::NotEqual; + else if (node->OperationName() == OperationNameOf(LessNode)) + opType = PrimitiveOpType::Less; + else if (node->OperationName() == OperationNameOf(LessEqualNode)) + opType = PrimitiveOpType::LessEqual; + else if (node->OperationName() == OperationNameOf(GreaterNode)) + opType = PrimitiveOpType::Greater; + else if (node->OperationName() == OperationNameOf(GreaterEqualNode)) + opType = PrimitiveOpType::GreaterEqual; + else if (node->OperationName() == OperationNameOf(TimesNode)) + { + primitiveFunctionConfigParameters[L"numOutputAxes"] = DictionaryValue((size_t)node->As>()->OutputRank()); + opType = PrimitiveOpType::Times; + } else if (node->OperationName() == OperationNameOf(PastValueNode)) { if (inputVars.size() == 1) @@ -125,6 +164,8 @@ namespace CNTK primitiveFunctionConfigParameters[L"stepSize"] = DictionaryValue((size_t)node->As>()->TimeStep()); opType = PrimitiveOpType::FutureValue; } + else if (node->OperationName() == OperationNameOf(SquareErrorNode)) + opType = PrimitiveOpType::SquaredError; else if (node->OperationName() == OperationNameOf(CrossEntropyWithSoftmaxNode)) { std::swap(inputVars[0], inputVars[1]); @@ -135,10 +176,44 @@ namespace CNTK std::swap(inputVars[0], inputVars[1]); opType = PrimitiveOpType::ClassificationError; } - else if (node->OperationName() == OperationNameOf(ElementTimesNode)) - opType = PrimitiveOpType::ElementTimes; else if (node->OperationName() == OperationNameOf(SumElementsNode)) opType = PrimitiveOpType::ReduceSum; + else if (node->OperationName() == OperationNameOf(ConvolutionNode)) + { + auto convolutionNode = node->As>(); + primitiveFunctionConfigParameters[L"strides"] = AsNDShape(convolutionNode->Strides()); + primitiveFunctionConfigParameters[L"sharing"] = AsDictionaryValueVector(convolutionNode->Sharing()); + primitiveFunctionConfigParameters[L"autoPadding"] = AsDictionaryValueVector(convolutionNode->AutoPad()); + primitiveFunctionConfigParameters[L"lowerPad"] = AsNDShape(convolutionNode->LowerPad()); + primitiveFunctionConfigParameters[L"upperPad"] = AsNDShape(convolutionNode->UpperPad()); + primitiveFunctionConfigParameters[L"transpose"] = convolutionNode->Transpose(); + primitiveFunctionConfigParameters[L"maxTempMemSizeInSamples"] = convolutionNode->MaxTempMemSizeInSamples(); + + opType = PrimitiveOpType::Convolution; + } + else if (node->OperationName() == OperationNameOf(PoolingNode)) + { + auto poolingNode = node->As>(); + primitiveFunctionConfigParameters[L"poolingType"] = (size_t)(AsPoolingType(poolingNode->PoolingKind())); + primitiveFunctionConfigParameters[L"poolingWindowShape"] = AsNDShape(poolingNode->KernelShape()); + primitiveFunctionConfigParameters[L"strides"] = AsNDShape(poolingNode->Strides()); + primitiveFunctionConfigParameters[L"autoPadding"] = AsDictionaryValueVector(poolingNode->AutoPad()); + primitiveFunctionConfigParameters[L"lowerPad"] = AsNDShape(poolingNode->LowerPad()); + primitiveFunctionConfigParameters[L"upperPad"] = AsNDShape(poolingNode->UpperPad()); + + opType = PrimitiveOpType::Pooling; + } + else if (node->OperationName() == OperationNameOf(BatchNormalizationNode)) + { + auto batchNormalizationNode = node->As>(); + primitiveFunctionConfigParameters[L"spacial"] = batchNormalizationNode->Spatial(); + primitiveFunctionConfigParameters[L"normalizationTimeConstant"] = batchNormalizationNode->NormalizationTimeConstant(); + primitiveFunctionConfigParameters[L"blendTimeConstant"] = batchNormalizationNode->BlendTimeConstant(); + primitiveFunctionConfigParameters[L"epsilon"] = batchNormalizationNode->Epsilon(); + primitiveFunctionConfigParameters[L"useCuDNNEngine"] = !batchNormalizationNode->UseCNTKEngine(); + + opType = PrimitiveOpType::BatchNormalization; + } else LogicError("Unsupported ComputationNode with OperationName='%S' found when loading legacy CNTK model", node->OperationName().c_str()); diff --git a/Source/CNTKv2LibraryDll/Function.cpp b/Source/CNTKv2LibraryDll/Function.cpp index 56ce8d78d..b103d8493 100644 --- a/Source/CNTKv2LibraryDll/Function.cpp +++ b/Source/CNTKv2LibraryDll/Function.cpp @@ -170,6 +170,7 @@ namespace CNTK if (dynamic_cast(function)) { PrimitiveFunction* primitiveFunction = dynamic_cast(function); + auto functionConfig = primitiveFunction->FunctionConfig(); // Create the nodes corresponding to the inputs auto functionInputs = primitiveFunction->Inputs(); @@ -222,6 +223,17 @@ namespace CNTK computationNodePtr = builder.Softmax(input0Node, function->Name()); break; + case PrimitiveOpType::Pooling: + { + PoolingType poolingType = (PoolingType)(functionConfig[L"poolingType"].GetValue()); + auto poolingWindowsShape = functionConfig[L"poolingWindowShape"].GetValue(); + auto strides = functionConfig[L"strides"].GetValue(); + auto lowerPad = functionConfig[L"lowerPad"].GetValue(); + auto upperPad = functionConfig[L"upperPad"].GetValue(); + auto autoPadding = AsBasicElementTypeVector(functionConfig[L"autoPadding"].GetValue>()); + computationNodePtr = builder.Pooling(input0Node, AsCNTKPoolKind(poolingType), AsTensorShape(poolingWindowsShape, true), AsTensorShape(strides, true), autoPadding, AsTensorShape(lowerPad, true), AsTensorShape(upperPad, true), ImageLayoutKind::CHW, function->Name()); + break; + } case PrimitiveOpType::Plus: computationNodePtr = builder.Plus(input0Node, input1Node, function->Name()); break; @@ -250,9 +262,25 @@ namespace CNTK computationNodePtr = builder.GreaterEqual(input0Node, input1Node, function->Name()); break; case PrimitiveOpType::Times: - // TODO: The output rank of the times operation is currently hardcoded to 1 - computationNodePtr = builder.Times(input0Node, input1Node, 1, function->Name()); + { + size_t numOutputAxes = functionConfig[L"numOutputAxes"].GetValue(); + computationNodePtr = builder.Times(input0Node, input1Node, numOutputAxes, function->Name()); break; + } + case PrimitiveOpType::Convolution: + { + NDShape outputMapCount, kernelShape; + std::tie(outputMapCount, kernelShape) = GetConvolutionOutputMapCountAndKernelShape(functionInputs[0].Shape(), functionInputs[1].Shape()); + auto strides = functionConfig[L"strides"].GetValue(); + auto lowerPad = functionConfig[L"lowerPad"].GetValue(); + auto upperPad = functionConfig[L"upperPad"].GetValue(); + auto sharing = AsBasicElementTypeVector(functionConfig[L"sharing"].GetValue>()); + auto autoPadding = AsBasicElementTypeVector(functionConfig[L"autoPadding"].GetValue>()); + auto transpose = functionConfig[L"transpose"].GetValue(); + auto maxTempMemSizeInSamples = functionConfig[L"maxTempMemSizeInSamples"].GetValue(); + computationNodePtr = builder.Convolution(input0Node, input1Node, AsTensorShape(kernelShape, true), AsTensorShape(outputMapCount, true), AsTensorShape(strides, true), sharing, autoPadding, AsTensorShape(lowerPad, true), AsTensorShape(upperPad, true), transpose, ImageLayoutKind::CHW, maxTempMemSizeInSamples, function->Name()); + break; + } case PrimitiveOpType::SquaredError: computationNodePtr = builder.SquareError(input0Node, input1Node, function->Name()); break; @@ -298,6 +326,23 @@ namespace CNTK computationNodePtr = builder.Sum(input0Node, function->Name()); break; } + case PrimitiveOpType::BatchNormalization: + { + auto spacial = functionConfig[L"spacial"].GetValue(); + auto normalizationTimeConstant = functionConfig[L"normalizationTimeConstant"].GetValue(); + auto blendTimeConstant = functionConfig[L"blendTimeConstant"].GetValue(); + auto epsilon = functionConfig[L"epsilon"].GetValue(); + auto useCuDNNEngine = functionConfig[L"useCuDNNEngine"].GetValue(); + std::vector>> inputNodes; + for (auto inputVar : functionInputs) + { + auto baseNodePtr = GetNode(inputVar, network, builder, variableToNodeMap, isVariableRootMap); + inputNodes.push_back((baseNodePtr != nullptr) ? baseNodePtr->template As>()->shared_from_this() : nullptr); + } + + computationNodePtr = builder.BatchNormalization(inputNodes[0], inputNodes[1], inputNodes[2], inputNodes[3], inputNodes[4], spacial, normalizationTimeConstant, blendTimeConstant, epsilon, !useCuDNNEngine, ImageLayoutKind::CHW, function->Name()); + break; + } case PrimitiveOpType::Combine: // This operation is just a no-op and is a means to combine multiple functions to create a single Function // whose outputs are a union of tyhe outputs of the Functions being combined. @@ -408,7 +453,7 @@ namespace CNTK auto outputShape = outputVar.Shape(); auto computationNodeSampleLayout = computationNodePtr->GetSampleLayout(); if (((outputShape.NumAxes() == 0) && (computationNodeSampleLayout[0] != 1)) || - ((outputShape.NumAxes() != 0) && (computationNodeSampleLayout != AsTensorShape(outputShape)))) + ((outputShape.NumAxes() != 0) && (computationNodeSampleLayout != AsTensorShape(outputShape)) && (computationNodeSampleLayout != AsTensorShape(outputShape, true)))) { LogicError("The output Variable shape %s does not match the SampleLayout shape %s of the corresponding ComputationNode in the network", AsString(outputShape).c_str(), ((std::string)computationNodeSampleLayout).c_str()); } @@ -739,45 +784,48 @@ namespace CNTK return NDShape(outputShapeDims); } + /*static*/ void CompositeFunction::GetNodeOutputOrGradient(Variable var, ValuePtr& varValue, Microsoft::MSR::CNTK::ComputationNodeBasePtr& computationNode, bool getGradient) + { + auto valueShape = GetValueShape(var, computationNode); + if (varValue != nullptr) + { + // TODO: The shape of the specified output Value object must match the actual output shape + if (varValue->Data()->Shape() != valueShape) + InvalidArgument("The shape %s of the specified Value object for %s does not match the actual shape %s", AsString(varValue->Data()->Shape()).c_str(), getGradient ? "gradient" : "output", AsString(valueShape).c_str()); + } + + ValuePtr nodeValue; + switch (var.GetDataType()) + { + case DataType::Float: + nodeValue = GetValueObjectFromCNTKImplMatrixAndMBLayout(var, + getGradient ? computationNode->As>()->Gradient() : computationNode->As>()->Value(), + computationNode->GetMBLayout()); + break; + case DataType::Double: + nodeValue = GetValueObjectFromCNTKImplMatrixAndMBLayout(var, + getGradient ? computationNode->As>()->Gradient() : computationNode->As>()->Value(), + computationNode->GetMBLayout()); + break; + default: + LogicError("Unsupported DataType %s", DataTypeName(var.GetDataType())); + break; + } + + if (varValue == nullptr) + { + auto data = MakeSharedObject(var.GetDataType(), valueShape, AsDeviceDescriptor(computationNode->ValuePtr()->GetDeviceId())); + auto mask = (nodeValue->Mask() != nullptr) ? MakeSharedObject(nodeValue->Mask()->Shape(), nodeValue->Mask()->Device()) : nullptr; + varValue = MakeSharedObject(data, mask); + } + varValue->CopyFrom(*nodeValue); + } + void CompositeFunction::GetNetworkOutputs(std::unordered_map& outputs) { // Now copy the Forward values of output nodes from the network to outputs' Value objects for (auto outputVarValuePair : outputs) - { - auto computationNodePtr = m_variableToNodeMap[outputVarValuePair.first]; - auto outputValuePtr = outputVarValuePair.second; - - auto outputShape = GetValueShape(outputVarValuePair.first, computationNodePtr); - if (outputValuePtr != nullptr) - { - // TODO: The shape of the specified output Value object must match the actual output shape - if (outputValuePtr->Data()->Shape() != outputShape) - InvalidArgument("The shape %s of the specified Value object for output does not match the actual output shape %s", AsString(outputValuePtr->Data()->Shape()).c_str(), AsString(outputShape).c_str()); - } - - ValuePtr nodeValue; - switch (outputVarValuePair.first.GetDataType()) - { - case DataType::Float: - nodeValue = GetValueObjectFromCNTKImplMatrixAndMBLayout(outputVarValuePair.first, computationNodePtr->As>()->Value(), computationNodePtr->GetMBLayout()); - break; - case DataType::Double: - nodeValue = GetValueObjectFromCNTKImplMatrixAndMBLayout(outputVarValuePair.first, computationNodePtr->As>()->Value(), computationNodePtr->GetMBLayout()); - break; - default: - LogicError("Unsupported DataType %s", DataTypeName(outputVarValuePair.first.GetDataType())); - break; - } - - if (outputValuePtr == nullptr) - { - auto data = MakeSharedObject(outputVarValuePair.first.GetDataType(), outputShape, AsDeviceDescriptor(computationNodePtr->ValuePtr()->GetDeviceId())); - auto mask = (nodeValue->Mask() != nullptr) ? MakeSharedObject(nodeValue->Mask()->Shape(), nodeValue->Mask()->Device()) : nullptr; - outputValuePtr = MakeSharedObject(data, mask); - } - outputValuePtr->CopyFrom(*nodeValue); - outputs[outputVarValuePair.first] = outputValuePtr; - } + GetNodeOutputOrGradient(outputVarValuePair.first, outputs[outputVarValuePair.first], m_variableToNodeMap[outputVarValuePair.first], false /*getGradient*/); } void CompositeFunction::GetNetworkGradients(std::unordered_map& gradients) @@ -795,42 +843,11 @@ namespace CNTK InvalidArgument("Gradient value incorrectly requested for an Output or Constant Variable, or an Input Variable with NeedsGradient setting of false"); auto computationNodePtr = m_variableToNodeMap[gradientVarValuePair.first]; - auto gradientValuePtr = gradientVarValuePair.second; - - auto gradientShape = GetValueShape(gradientVarValuePair.first, computationNodePtr); - if (gradientValuePtr != nullptr) - { - // TODO: The shape of the specified output Value object must match the actual output shape - if (gradientValuePtr->Data()->Shape() != gradientShape) - InvalidArgument("The shape %s of the specified Value object for gradient does not match the actual gradient shape %s", AsString(gradientValuePtr->Data()->Shape()).c_str(), AsString(gradientShape).c_str()); - } if (!computationNodePtr->NeedsGradient()) LogicError("Backpropagated gradient value cannot be read from a ComputationNode that has NeedsGradient set to false"); - ValuePtr nodeValue; - switch (gradientVarValuePair.first.GetDataType()) - { - case DataType::Float: - nodeValue = GetValueObjectFromCNTKImplMatrixAndMBLayout(gradientVarValuePair.first, computationNodePtr->As>()->Gradient(), computationNodePtr->GetMBLayout()); - break; - case DataType::Double: - nodeValue = GetValueObjectFromCNTKImplMatrixAndMBLayout(gradientVarValuePair.first, computationNodePtr->As>()->Gradient(), computationNodePtr->GetMBLayout()); - break; - default: - LogicError("Unsupported DataType %s", DataTypeName(gradientVarValuePair.first.GetDataType())); - break; - } - - if (gradientValuePtr == nullptr) - { - auto data = MakeSharedObject(gradientVarValuePair.first.GetDataType(), gradientShape, AsDeviceDescriptor(computationNodePtr->ValuePtr()->GetDeviceId())); - auto mask = (nodeValue->Mask() != nullptr) ? MakeSharedObject(nodeValue->Mask()->Shape(), nodeValue->Mask()->Device()) : nullptr; - gradientValuePtr = MakeSharedObject(data, mask); - } - - gradientValuePtr->CopyFrom(*nodeValue); - gradients[gradientVarValuePair.first] = gradientValuePtr; + GetNodeOutputOrGradient(gradientVarValuePair.first, gradients[gradientVarValuePair.first], computationNodePtr, true /*getGradient*/); } } @@ -872,6 +889,8 @@ namespace CNTK outputsToEvaluate.push_back(m_variableToNodeMap[rootVarForBackprop]); } + ScopedNetworkOperationMode modeGuard(m_computationNetwork, outputsToRetainBackwardStateFor.empty() ? NetworkOperationMode::inferring : NetworkOperationMode::training); + m_computationNetwork->ForwardProp(outputsToEvaluate); GetNetworkOutputs(outputs); @@ -907,6 +926,8 @@ namespace CNTK PopulateNetworkGradients(rootGradientValues); // Backpropagate through the network + ScopedNetworkOperationMode modeGuard(m_computationNetwork, NetworkOperationMode::training); + auto rootComputationNodePtr = m_variableToNodeMap[rootGradientValues.begin()->first]; m_computationNetwork->GetNestedNetwork(rootComputationNodePtr)->Backprop(FrameRange(nullptr), true, true); @@ -1045,9 +1066,11 @@ namespace CNTK return BinaryOp(PrimitiveOpType::GreaterEqual, leftOperand, rightOperand, Dictionary(), name); } - FunctionPtr Times(const Variable& leftOperand, const Variable& rightOperand, const std::wstring& name/* = L""*/) + FunctionPtr Times(const Variable& leftOperand, const Variable& rightOperand, size_t numOutputAxes /*= 1*/, const std::wstring& name/* = L""*/) { - return BinaryOp(PrimitiveOpType::Times, leftOperand, rightOperand, Dictionary(), name); + auto additionalProperties = Dictionary(); + additionalProperties[L"numOutputAxes"] = numOutputAxes; + return BinaryOp(PrimitiveOpType::Times, leftOperand, rightOperand, std::move(additionalProperties), name); } FunctionPtr SquaredError(const Variable& prediction, const Variable& targets, const std::wstring& name/* = L""*/) @@ -1090,6 +1113,83 @@ namespace CNTK return UnaryOp(PrimitiveOpType::ReduceSum, operand, Dictionary(), name); } + FunctionPtr PerDimMeanVarianceNormalize(const Variable& operand, const NDArrayViewPtr& mean, const NDArrayViewPtr& invStdDev, const std::wstring& name /*= L""*/) + { + Constant meanVar(mean); + Constant invStdDevVar(invStdDev); + + return ElementTimes(Minus(operand, meanVar), invStdDevVar); + } + + FunctionPtr Convolution(const Variable& convolutionMap, + const Variable& operand, + const NDShape& strides, + const std::vector& sharing, + const std::vector& autoPadding, + const NDShape& lowerPad, + const NDShape& upperPad, + bool transpose, + size_t maxTempMemSizeInSamples, + const std::wstring& name) + { + auto additionalProperties = Dictionary(); + additionalProperties[L"strides"] = strides; + additionalProperties[L"sharing"] = AsDictionaryValueVector(sharing); + additionalProperties[L"autoPadding"] = AsDictionaryValueVector(autoPadding); + additionalProperties[L"lowerPad"] = lowerPad; + additionalProperties[L"upperPad"] = upperPad; + additionalProperties[L"transpose"] = transpose; + additionalProperties[L"maxTempMemSizeInSamples"] = maxTempMemSizeInSamples; + + return BinaryOp(PrimitiveOpType::Convolution, convolutionMap, operand, std::move(additionalProperties), name); + } + + FunctionPtr Pooling(const Variable& operand, + PoolingType poolingType, + const NDShape& poolingWindowShape, + const NDShape& strides, + const std::vector& autoPadding, + const NDShape& lowerPad, + const NDShape& upperPad, + const std::wstring& name) + { + auto additionalProperties = Dictionary(); + additionalProperties[L"poolingType"] = (size_t)poolingType; + additionalProperties[L"poolingWindowShape"] = poolingWindowShape; + additionalProperties[L"strides"] = strides; + additionalProperties[L"autoPadding"] = AsDictionaryValueVector(autoPadding); + additionalProperties[L"lowerPad"] = lowerPad; + additionalProperties[L"upperPad"] = upperPad; + + return UnaryOp(PrimitiveOpType::Pooling, operand, std::move(additionalProperties), name); + } + + FunctionPtr BatchNormalization(const Variable& operand, + const Variable& scale, + const Variable& bias, + const Variable& runningMean, + const Variable& runningInvStd, + bool spacial, + double normalizationTimeConstant, + double blendTimeConstant, + double epsilon, + bool useCuDNNEngine, + const std::wstring& name) + { + auto additionalProperties = Dictionary(); + additionalProperties[L"spacial"] = spacial; + additionalProperties[L"normalizationTimeConstant"] = normalizationTimeConstant; + additionalProperties[L"blendTimeConstant"] = blendTimeConstant; + additionalProperties[L"epsilon"] = epsilon; + additionalProperties[L"useCuDNNEngine"] = useCuDNNEngine; + + return CompositeFunction::Create(MakeSharedObject(PrimitiveOpType::BatchNormalization, + std::vector({ operand, scale, bias, runningMean, runningInvStd }), + std::move(additionalProperties), + name), + name); + } + FunctionPtr Combine(const std::vector& operands, const std::wstring& name/* = L""*/) { std::unordered_set uniqueOperands; diff --git a/Source/CNTKv2LibraryDll/Function.h b/Source/CNTKv2LibraryDll/Function.h index 9151769fb..7b8f07279 100644 --- a/Source/CNTKv2LibraryDll/Function.h +++ b/Source/CNTKv2LibraryDll/Function.h @@ -10,6 +10,7 @@ #include #include "ComputationNetwork.h" #include "Utils.h" +#include "ConvolveGeometry.h" namespace CNTK { @@ -26,6 +27,7 @@ namespace CNTK Abs, Reciprocal, Softmax, + Pooling, Plus, Minus, ElementTimes, @@ -36,12 +38,14 @@ namespace CNTK Greater, GreaterEqual, Times, + Convolution, SquaredError, CrossEntropyWithSoftmax, ClassificationError, PastValue, FutureValue, ReduceSum, + BatchNormalization, Combine, }; } @@ -73,6 +77,7 @@ namespace CNTK { PrimitiveOpType::Abs, "Abs" }, { PrimitiveOpType::Reciprocal, "Reciprocal" }, { PrimitiveOpType::Softmax, "Softmax" }, + { PrimitiveOpType::Pooling, "Pooling" }, { PrimitiveOpType::Plus, "Plus" }, { PrimitiveOpType::Minus, "Minus" }, { PrimitiveOpType::ElementTimes, "ElementTimes" }, @@ -83,12 +88,14 @@ namespace CNTK { PrimitiveOpType::Greater, "Greater" }, { PrimitiveOpType::GreaterEqual, "GreaterEqual" }, { PrimitiveOpType::Times, "Times" }, + { PrimitiveOpType::Convolution, "Convolution" }, { PrimitiveOpType::SquaredError, "SquaredError" }, { PrimitiveOpType::CrossEntropyWithSoftmax, "CrossEntropyWithSoftmax" }, { PrimitiveOpType::ClassificationError, "ClassificationError" }, { PrimitiveOpType::PastValue, "PastValue" }, { PrimitiveOpType::FutureValue, "FutureValue" }, { PrimitiveOpType::ReduceSum, "ReduceSum" }, + { PrimitiveOpType::BatchNormalization, "BatchNormalization" }, { PrimitiveOpType::Combine, "Combine" } }; @@ -102,7 +109,7 @@ namespace CNTK { public: PrimitiveFunction(PrimitiveOpType op, const std::vector& inputs, Dictionary&& functionConfig, const std::wstring& functionName = L"") - : Function(inputs, GetOutputVariables(op, inputs, this), nullptr, functionName), m_op(op), m_functionConfig(std::move(functionConfig)) + : Function(inputs, GetOutputVariables(op, inputs, this, functionConfig), nullptr, functionName), m_op(op), m_functionConfig(std::move(functionConfig)) { } @@ -169,25 +176,28 @@ namespace CNTK return NDShape(std::move(outputDims)); } - static NDShape TimesOpOutputShape(const NDShape& leftOperandShape, const NDShape& rightOperandShape) + static NDShape TimesOpOutputShape(const NDShape& leftOperandShape, const NDShape& rightOperandShape, size_t numOutputAxes) { - if (rightOperandShape.NumAxes() > 2) - RuntimeError("The right operand of a times operation can have at most 2 axes"); + if (numOutputAxes == 0) + InvalidArgument("Output #axes of times operation should be at least one"); - size_t numOutputAxes = rightOperandShape.NumAxes(); + if (numOutputAxes > leftOperandShape.NumAxes()) + InvalidArgument("Output #axes of times operation can at most be the #axes of the left operand"); - if (leftOperandShape.NumAxes() != 2) - RuntimeError("The left operand of a times operation must have 2 axes"); + size_t numReductionAxes = leftOperandShape.NumAxes() - numOutputAxes; - std::vector outputDims(numOutputAxes); - outputDims[0] = leftOperandShape[0]; - if (numOutputAxes > 1) - outputDims[1] = rightOperandShape[1]; + // The 'numReductionAxes' trailing dimensions of the left operand's shape must match the corresponding leading + // dimensions of the right operand - if (leftOperandShape[1] != rightOperandShape[0]) - RuntimeError("Left operand's shape %s is not compatible with right operand's shape %s for the times operation", AsString(leftOperandShape).c_str(), AsString(rightOperandShape).c_str()); + if (rightOperandShape.NumAxes() != numReductionAxes) + RuntimeError("The right operand's #axes in a times operation should equal #axes being reduced over!"); - return NDShape(std::move(outputDims)); + if (leftOperandShape.SubShape(numOutputAxes) != rightOperandShape) + InvalidArgument("The trailing dimensions of the left operand (%s) do not match the right operand's dimensions (%s)", + AsString(leftOperandShape.SubShape(numOutputAxes)).c_str(), + AsString(rightOperandShape).c_str()); + + return leftOperandShape.SubShape(0, numOutputAxes); } static NDShape ReductionOpOutputShape(PrimitiveOpType op, const NDShape& operandShape, const std::vector& reductionAxes) @@ -209,8 +219,22 @@ namespace CNTK return NDShape(std::move(outputDims)); } + static NDShape ConvolutionOpOutputShape(const NDShape& operandShape, const NDShape& kernelShape, const NDShape& outputMapCount, const NDShape& strides, + const std::vector& sharing, + std::vector& autoPad, const NDShape& lowerPad, const NDShape& upperPad, + bool transpose) + { + decltype(&Microsoft::MSR::CNTK::ConvolveGeometry::ComputeOutputShape) computeOutputShapeFunc; + if (!transpose) + computeOutputShapeFunc = &Microsoft::MSR::CNTK::ConvolveGeometry::ComputeOutputShape; + else + computeOutputShapeFunc = &Microsoft::MSR::CNTK::ConvolveGeometry::ComputeInputShape; + + return AsNDShape(computeOutputShapeFunc(AsTensorShape(operandShape, true), AsTensorShape(kernelShape, true), AsTensorShape(outputMapCount, true), AsTensorShape(strides, true), sharing, autoPad, AsTensorShape(lowerPad, true), AsTensorShape(upperPad, true))); + } + // TODO: Reconcile this with the ComputationNode::Validate functionality in core CNTK to avoid duplication of inference logic - static std::vector GetOutputVariables(PrimitiveOpType op, const std::vector& inputs, Function* owner) + static std::vector GetOutputVariables(PrimitiveOpType op, const std::vector& inputs, Function* owner, const Dictionary& functionConfig) { std::vector outputs; @@ -247,6 +271,17 @@ namespace CNTK assert(inputs.size() == 1); outputs.push_back(Variable(UnaryElementwiseOpOutputShape(inputs[0].Shape()), outputDataType, owner, outputDynamicAxes)); break; + case PrimitiveOpType::Pooling: + { + assert(inputs.size() == 1); + auto poolingWindowsShape = functionConfig[L"poolingWindowShape"].GetValue(); + auto strides = functionConfig[L"strides"].GetValue(); + auto lowerPad = functionConfig[L"lowerPad"].GetValue(); + auto upperPad = functionConfig[L"upperPad"].GetValue(); + auto autoPadding = AsBasicElementTypeVector(functionConfig[L"autoPadding"].GetValue>()); + outputs.push_back(Variable(ConvolutionOpOutputShape(inputs[0].Shape(), poolingWindowsShape, { 1 }, strides, { true }, autoPadding, lowerPad, upperPad, false), outputDataType, owner, outputDynamicAxes)); + break; + } case PrimitiveOpType::Plus: case PrimitiveOpType::Minus: case PrimitiveOpType::ElementTimes: @@ -260,9 +295,34 @@ namespace CNTK outputs.push_back(Variable(BinaryElementwiseOpOutputShape(op, inputs[0].Shape(), inputs[1].Shape()), outputDataType, owner, outputDynamicAxes)); break; case PrimitiveOpType::Times: + { assert(inputs.size() == 2); - outputs.push_back(Variable(TimesOpOutputShape(inputs[0].Shape(), inputs[1].Shape()), outputDataType, owner, outputDynamicAxes)); + + // TODO: Support dynamic axes on the left operand + if (!inputs[0].DynamicAxes().empty()) + LogicError("Dynamic axes are currently unsupported for left operand of a Times operation"); + + size_t numOutputAxes = functionConfig[L"numOutputAxes"].GetValue(); + outputs.push_back(Variable(TimesOpOutputShape(inputs[0].Shape(), inputs[1].Shape(), numOutputAxes), outputDataType, owner, outputDynamicAxes)); break; + } + case PrimitiveOpType::Convolution: + { + assert(inputs.size() == 2); + auto strides = functionConfig[L"strides"].GetValue(); + auto lowerPad = functionConfig[L"lowerPad"].GetValue(); + auto upperPad = functionConfig[L"upperPad"].GetValue(); + auto sharing = AsBasicElementTypeVector(functionConfig[L"sharing"].GetValue>()); + auto autoPadding = AsBasicElementTypeVector(functionConfig[L"autoPadding"].GetValue>()); + bool transpose = functionConfig[L"transpose"].GetValue(); + if (inputs[0].Shape().NumAxes() < inputs[1].Shape().NumAxes()) + InvalidArgument("The convolution map should have at least as many axes as the shape of the input it operates on!"); + + NDShape outputMapCount, kernelShape; + std::tie(outputMapCount, kernelShape) = GetConvolutionOutputMapCountAndKernelShape(inputs[0].Shape(), inputs[1].Shape()); + outputs.push_back(Variable(ConvolutionOpOutputShape(inputs[1].Shape(), kernelShape, outputMapCount, strides, sharing, autoPadding, lowerPad, upperPad, transpose), outputDataType, owner, outputDynamicAxes)); + break; + } case PrimitiveOpType::SquaredError: case PrimitiveOpType::CrossEntropyWithSoftmax: case PrimitiveOpType::ClassificationError: @@ -303,6 +363,9 @@ namespace CNTK outputs.push_back(Variable(ReductionOpOutputShape(op, inputs[0].Shape(), reductionAxes), outputDataType, owner, reductionOutputDynamicAxes)); break; } + case PrimitiveOpType::BatchNormalization: + outputs.push_back(Variable(UnaryElementwiseOpOutputShape(inputs[0].Shape()), outputDataType, owner, outputDynamicAxes)); + break; case PrimitiveOpType::Combine: outputs = inputs; break; @@ -350,6 +413,10 @@ namespace CNTK template friend void SaveAsLegacyModel(const FunctionPtr& rootFunction, const std::wstring& modelFile); + friend void ComputeInputPerDimMeansAndInvStdDevs(const MinibatchSourcePtr& minibatchSource, + std::unordered_map>& computedMeanAndInvStdDevs, + const DeviceDescriptor& device /*= DeviceDescriptor::CPUDevice()*/); + public: static CompositeFunctionPtr Create(const FunctionPtr& rootFunction, const std::wstring& name = L"") { @@ -425,6 +492,7 @@ namespace CNTK static void PopulateComputationNodeGradient(const std::pair& variableGradient, Microsoft::MSR::CNTK::ComputationNodeBasePtr& computationNode); void PopulateNetworkGradients(const std::unordered_map& gradients); + static void GetNodeOutputOrGradient(Variable var, ValuePtr& varValue, Microsoft::MSR::CNTK::ComputationNodeBasePtr& computationNode, bool getGradient); void GetNetworkOutputs(std::unordered_map& outputs); void GetNetworkGradients(std::unordered_map& gradients); diff --git a/Source/CNTKv2LibraryDll/MinibatchSource.cpp b/Source/CNTKv2LibraryDll/MinibatchSource.cpp index 78af9c66d..b90e8e4f3 100644 --- a/Source/CNTKv2LibraryDll/MinibatchSource.cpp +++ b/Source/CNTKv2LibraryDll/MinibatchSource.cpp @@ -11,6 +11,8 @@ #include "HeapMemoryProvider.h" #include "ReaderShim.h" #include "Function.h" +#include +#include "ComputationNetworkBuilder.h" using namespace Microsoft::MSR::CNTK; @@ -22,21 +24,21 @@ namespace CNTK } CompositeMinibatchSource::CompositeMinibatchSource(const Dictionary& configuration) - : m_startNewEpoch(true), m_nextEpochIndex(0), m_prevMinibatchSize(0) + : m_epochEndReached(false), m_prevMinibatchSize(0), m_epochSize(SIZE_MAX) { ConfigParameters config; std::wstringstream s; for (const auto& keyValuePair : *(configuration.m_dictionaryData)) - { AddConfigString(s, keyValuePair.first, keyValuePair.second, 0); - } + config.Parse(msra::strfun::utf8(s.str())); const wchar_t* epochSizeConfigurationKey = L"epochSize"; - if (!configuration.Contains(epochSizeConfigurationKey)) - InvalidArgument("'epochSize' value must be configured when constructing a CNTK built-in composite MinibatchSource!"); + if (configuration.Contains(epochSizeConfigurationKey)) + m_epochSize = configuration[epochSizeConfigurationKey].GetValue(); - m_epochSize = configuration[epochSizeConfigurationKey].GetValue(); + if (m_epochSize == 0) + m_epochSize = Microsoft::MSR::CNTK::requestDataSize; typedef Reader*(*CreateCompositeDataReaderProc)(const ConfigParameters* parameters); CreateCompositeDataReaderProc createReaderProc = (CreateCompositeDataReaderProc)Plugin().Load(L"CompositeDataReader", "CreateCompositeDataReader"); @@ -47,79 +49,198 @@ namespace CNTK m_streamInfos.insert({ streamDesc->m_name, streamDesc->m_id, AsStorageFormat(streamDesc->m_storageType), AsDataType(streamDesc->m_elementType), AsNDShape(*(streamDesc->m_sampleLayout)) }); } - /*virtual*/ bool CompositeMinibatchSource::GetNextMinibatch(std::unordered_map>& minibatchData) /*override*/ + /*virtual*/ std::unordered_map CompositeMinibatchSource::GetNextMinibatch(const std::unordered_map>& perStreamMBSizeLimits, + const DeviceDescriptor& device /*= DeviceDescriptor::DefaultDevice()*/) /*override*/ { - // TODO: Support different minibatch sizes for different streams - size_t requestedMinibatchSize = 0; - for (const auto& val : minibatchData) + std::unordered_map minibatchData; + if (!m_epochEndReached) { - if (requestedMinibatchSize == 0) - requestedMinibatchSize = val.second.first; - else + // TODO: Support different minibatch sizes for different streams + size_t requestedMinibatchSizeInSamples = 0; + for (const auto& val : perStreamMBSizeLimits) { - if (requestedMinibatchSize != val.second.first) - LogicError("Different minibatch sizes across different input streams is currently unsupported!"); - } - } + size_t maxNumSequencesRequested = val.second.first; + size_t maxNumSamplesRequested = val.second.second; - if (requestedMinibatchSize == 0) - InvalidArgument("GetNextMinibatch: Requested minibatch sizes must be > 0"); + // TODO: Specifying minibatch size in #sequences is currently unsupported + if (maxNumSequencesRequested != 0) + LogicError("Specifying minibatch size in #sequences is currently unsupported"); - if (m_startNewEpoch) - { - // TODO: Add support for distributed reading - EpochConfiguration epochConfig = { 1, 0, requestedMinibatchSize, m_epochSize, m_nextEpochIndex, 0 }; - m_compositeDataReader->StartEpoch(epochConfig); - m_prevMinibatchSize = requestedMinibatchSize; - } - - if (requestedMinibatchSize != m_prevMinibatchSize) - LogicError("GetNextMinibatch: Changing minibatch sizes across calls is currently unsupported"); - - auto compositeReaderMinibatchData = m_compositeDataReader->ReadMinibatch(); - m_startNewEpoch = compositeReaderMinibatchData.m_endOfEpoch; - if (m_startNewEpoch) - m_nextEpochIndex++; - - auto compositeDataReaderStreamDescs = m_compositeDataReader->GetStreamDescriptions(); - size_t numStreams = compositeDataReaderStreamDescs.size(); - for (size_t i = 0; i < numStreams; ++i) - { - auto currentStreamDesc = compositeDataReaderStreamDescs[i]; - auto sampleShape = AsNDShape(*(currentStreamDesc->m_sampleLayout)); - auto minibatchDataEntryForCurrentStream = std::find_if(minibatchData.begin(), minibatchData.end(), [currentStreamDesc](const std::pair>& entry) { - return entry.first.m_id == currentStreamDesc->m_id; - }); - - auto minibatchValuePtr = minibatchDataEntryForCurrentStream->second.second; - if (compositeReaderMinibatchData.m_data.empty()) - { - minibatchValuePtr = MakeSharedObject(MakeSharedObject(minibatchDataEntryForCurrentStream->first.m_elementType, sampleShape.AppendShape({ 0, 0 }), DeviceDescriptor::CPUDevice())); - continue; - } - - auto currentStreamMinibatchData = compositeReaderMinibatchData.m_data[i]; - - if (currentStreamDesc->m_elementType == ElementType::tfloat) - { - auto dataMatrix = std::make_shared>(CPUDEVICE); - size_t sampleSize = currentStreamDesc->m_sampleLayout->GetNumElements(); - - // TODO: Eliminate the unnecessary CPU to CPU copy - ReaderShim::FillMatrixFromStream(currentStreamDesc->m_storageType, dataMatrix.get(), sampleSize, currentStreamMinibatchData); - auto minibatchValueObject = CompositeFunction::GetValueObjectFromCNTKImplMatrixAndMBLayout(sampleShape, *dataMatrix, currentStreamMinibatchData->m_layout, false); - - // TODO: Should slice off the supplied Value object instead of reallocating, in cases the actual minibatch - // size is smaller than the supplied storage in the Value object - if ((minibatchValuePtr == nullptr) || (minibatchValuePtr->Data()->Shape() != minibatchValueObject->Data()->Shape())) - minibatchData[minibatchDataEntryForCurrentStream->first].second = minibatchValueObject; + if (requestedMinibatchSizeInSamples == 0) + requestedMinibatchSizeInSamples = maxNumSamplesRequested; else - minibatchValuePtr->CopyFrom(*minibatchValueObject); + { + if (requestedMinibatchSizeInSamples != maxNumSamplesRequested) + LogicError("Different minibatch sizes across different input streams is currently unsupported!"); + } + } + + if (requestedMinibatchSizeInSamples == 0) + InvalidArgument("GetNextMinibatch: Requested minibatch sizes must be > 0"); + + if (m_prevMinibatchSize == 0) + { + // TODO: Add support for distributed reading + EpochConfiguration epochConfig = { 1, 0, requestedMinibatchSizeInSamples, m_epochSize, 0, 0 }; + m_compositeDataReader->StartEpoch(epochConfig); + m_prevMinibatchSize = requestedMinibatchSizeInSamples; + } + + if (requestedMinibatchSizeInSamples != m_prevMinibatchSize) + LogicError("GetNextMinibatch: Changing minibatch sizes across calls is currently unsupported"); + + auto compositeReaderMinibatchData = m_compositeDataReader->ReadMinibatch(); + m_epochEndReached = compositeReaderMinibatchData.m_endOfEpoch; + + auto compositeDataReaderStreamDescs = m_compositeDataReader->GetStreamDescriptions(); + size_t numStreams = compositeDataReaderStreamDescs.size(); + for (size_t i = 0; i < numStreams; ++i) + { + auto currentStreamDesc = compositeDataReaderStreamDescs[i]; + auto iter = std::find_if(perStreamMBSizeLimits.begin(), perStreamMBSizeLimits.end(), [currentStreamDesc](const std::pair>& entry) { + return entry.first.m_id == currentStreamDesc->m_id; + }); + + if (iter == perStreamMBSizeLimits.end()) + continue; + + auto& currentStreamInfo = iter->first; + auto sampleShape = AsNDShape(*(currentStreamDesc->m_sampleLayout)); + + ValuePtr minibatchValuePtr; + if (compositeReaderMinibatchData.m_data.empty()) + { + minibatchValuePtr = MakeSharedObject(MakeSharedObject(currentStreamInfo.m_elementType, sampleShape.AppendShape({ 0, 0 }), DeviceDescriptor::CPUDevice())); + continue; + } + + auto currentStreamMinibatchData = compositeReaderMinibatchData.m_data[i]; + if (currentStreamDesc->m_elementType == ElementType::tfloat) + { + auto dataMatrix = std::make_shared>(CPUDEVICE); + size_t sampleSize = currentStreamDesc->m_sampleLayout->GetNumElements(); + + // TODO: Eliminate the unnecessary CPU to CPU copy + ReaderShim::FillMatrixFromStream(currentStreamDesc->m_storageType, dataMatrix.get(), sampleSize, currentStreamMinibatchData); + minibatchValuePtr = CompositeFunction::GetValueObjectFromCNTKImplMatrixAndMBLayout(sampleShape, *dataMatrix, currentStreamMinibatchData->m_layout, false); + + size_t numSamples = currentStreamMinibatchData->m_layout->GetActualNumSamples(); + size_t numSequences = currentStreamMinibatchData->m_layout->GetNumSequences(); + + minibatchData[currentStreamInfo] = { numSequences, numSamples, minibatchValuePtr }; + } + else + LogicError("Input data of type other than DataType::Float is currently unsupported by the CNTK built-in composite MinibatchSource!"); } - else - LogicError("Double precision input data is currently unsupported by the CNTK built-in composite MinibatchSource!"); } - return true; + return minibatchData; + } + + void ComputeInputPerDimMeansAndInvStdDevs(const MinibatchSourcePtr& minibatchSource, + std::unordered_map>& computedMeanAndInvStdDevs, + const DeviceDescriptor& device /*= DeviceDescriptor::CPUDevice()*/) + { + typedef std::shared_ptr> ComputationNodePtr; + const auto& minibatchSourceStreams = minibatchSource->StreamInfos(); + + auto computationNetwork = std::make_shared(AsCNTKImplDeviceId(device)); + ComputationNetworkBuilder builder(*computationNetwork); + + std::vector allInputNodes; + std::unordered_map streamToInputNodeMap; + std::unordered_map streamToDummyInputVariableMap; + std::unordered_map streamToMeanNodeMap; + std::unordered_map streamToInvStdDevNodeMap; + + size_t totalSizePerSample = 0; + for (auto& currentStreamKV : computedMeanAndInvStdDevs) + { + auto currentStreamInfo = currentStreamKV.first; + if (minibatchSourceStreams.find(currentStreamInfo) == minibatchSourceStreams.end()) + InvalidArgument("ComputeMeanAndVariance: Stream for which mean and variance is to be computed is not supported by the specified minibatchSource"); + + if (currentStreamInfo.m_elementType != DataType::Float) + LogicError("Input data of type other than DataType::Float is currently unsupported by the CNTK built-in composite MinibatchSource!"); + + auto inputVariableShape = currentStreamInfo.m_sampleLayout; + auto inputTensorShape = AsTensorShape(inputVariableShape); + totalSizePerSample += (inputVariableShape.TotalSize() * sizeof(float)); + + ComputationNodePtr inputNode; + Variable inputVariable; + if (currentStreamInfo.m_storageFormat != StorageFormat::Dense) + { + inputNode = builder.CreateSparseInputNode(currentStreamInfo.m_name, inputTensorShape); + inputVariable = Variable(inputVariableShape, true, DataType::Float, currentStreamInfo.m_name); + } + else + { + inputNode = builder.CreateInputNode(currentStreamInfo.m_name, inputTensorShape); + inputVariable = Variable(inputVariableShape, DataType::Float, currentStreamInfo.m_name); + } + + allInputNodes.push_back(inputNode); + streamToInputNodeMap[currentStreamInfo] = inputNode; + streamToDummyInputVariableMap[currentStreamInfo] = inputVariable; + streamToMeanNodeMap[currentStreamInfo] = builder.Mean(inputNode); + streamToInvStdDevNodeMap[currentStreamInfo] = builder.InvStdDev(inputNode); + } + + computationNetwork->CompileNetwork(); + computationNetwork->AllocateAllMatrices(computationNetwork->RootNodes(), {}, nullptr); + + ScopedNetworkOperationMode modeGuard(computationNetwork, NetworkOperationMode::preComputing); + + // initialize + auto preComputeNodes = computationNetwork->GetNodesRequiringPreComputation(); + for (auto & preComputeNode : preComputeNodes) + dynamic_pointer_cast(preComputeNode)->MarkComputed(false /*begin accumulating*/); + + const size_t maxMinibatchDataSize = (1 << 27); // 128 MB + const size_t minibatchSize = maxMinibatchDataSize / totalSizePerSample; + std::unordered_map> minibatchSizeLimits; + for (auto& currentStreamKV : computedMeanAndInvStdDevs) + minibatchSizeLimits.insert(std::make_pair(currentStreamKV.first, std::make_pair((size_t)0, minibatchSize))); + + for (;;) + { + auto minibatchData = minibatchSource->GetNextMinibatch(minibatchSizeLimits, device); + if (minibatchData.empty()) + break; + + for (auto& currentStreamKV : computedMeanAndInvStdDevs) + CompositeFunction::PopulateComputationNodeValue({ streamToDummyInputVariableMap[currentStreamKV.first], minibatchData[currentStreamKV.first].m_data }, streamToInputNodeMap[currentStreamKV.first]); + + ComputationNetwork::BumpEvalTimeStamp(allInputNodes); + + computationNetwork->ForwardProp(preComputeNodes); + } + + // finalize + for (auto & preComputeNode : preComputeNodes) + dynamic_pointer_cast(preComputeNode)->MarkComputed(true /*done accumulating*/); + + // Copy out the results + for (auto& currentStreamKV : computedMeanAndInvStdDevs) + { + ValuePtr mean, invStdDev; + if (computedMeanAndInvStdDevs[currentStreamKV.first].first != nullptr) + mean = MakeSharedObject(computedMeanAndInvStdDevs[currentStreamKV.first].first); + + if (computedMeanAndInvStdDevs[currentStreamKV.first].second != nullptr) + invStdDev = MakeSharedObject(computedMeanAndInvStdDevs[currentStreamKV.first].second); + + CompositeFunction::GetNodeOutputOrGradient(streamToDummyInputVariableMap[currentStreamKV.first], mean, streamToMeanNodeMap[currentStreamKV.first], false /*getGradient*/); + CompositeFunction::GetNodeOutputOrGradient(streamToDummyInputVariableMap[currentStreamKV.first], invStdDev, streamToInvStdDevNodeMap[currentStreamKV.first], false /*getGradient*/); + + if (computedMeanAndInvStdDevs[currentStreamKV.first].first == nullptr) + computedMeanAndInvStdDevs[currentStreamKV.first].first = mean->Data(); + + if (computedMeanAndInvStdDevs[currentStreamKV.first].second == nullptr) + computedMeanAndInvStdDevs[currentStreamKV.first].second = invStdDev->Data(); + + } } } + diff --git a/Source/CNTKv2LibraryDll/MinibatchSource.h b/Source/CNTKv2LibraryDll/MinibatchSource.h index 439960703..22478c9af 100644 --- a/Source/CNTKv2LibraryDll/MinibatchSource.h +++ b/Source/CNTKv2LibraryDll/MinibatchSource.h @@ -19,15 +19,14 @@ namespace CNTK virtual const std::unordered_set& StreamInfos() override { return m_streamInfos; } - virtual bool GetNextMinibatch(std::unordered_map>& minibatchData) override; + virtual std::unordered_map GetNextMinibatch(const std::unordered_map>& perStreamMBSizeLimits, + const DeviceDescriptor& device = DeviceDescriptor::DefaultDevice()) override; private: std::unordered_set m_streamInfos; std::shared_ptr m_compositeDataReader; - bool m_startNewEpoch; - size_t m_nextEpochIndex; + bool m_epochEndReached; size_t m_prevMinibatchSize; size_t m_epochSize; }; } - diff --git a/Source/CNTKv2LibraryDll/NDArrayView.cpp b/Source/CNTKv2LibraryDll/NDArrayView.cpp index 1a4ed1ac1..fca4a8e54 100644 --- a/Source/CNTKv2LibraryDll/NDArrayView.cpp +++ b/Source/CNTKv2LibraryDll/NDArrayView.cpp @@ -316,7 +316,17 @@ namespace CNTK } template - NDArrayViewPtr NDArrayView::RandomUniform(const NDShape& shape, double rangeBegin, double rangeEnd, unsigned long seed, const DeviceDescriptor& device/* = DeviceDescriptor::DefaultDevice()*/) + /*static*/ NDArrayViewPtr NDArrayView::RandomNormal(const NDShape& shape, double mean, double stdDev, unsigned long seed, const DeviceDescriptor& device /*= DeviceDescriptor::DefaultDevice()*/) + { + auto matrixDims = GetMatrixDimensions(shape); + auto randomNormalMatrix = std::make_shared>(Matrix::RandomGaussian(matrixDims.first, matrixDims.second, AsCNTKImplDeviceId(device), (ElementType)mean, (ElementType)stdDev, seed)); + auto tensorView = new TensorView(randomNormalMatrix, AsTensorShape(shape)); + + return MakeSharedObject(AsDataType(), device, StorageFormat::Dense, shape, false, tensorView); + } + + template + /*static*/ NDArrayViewPtr NDArrayView::RandomUniform(const NDShape& shape, double rangeBegin, double rangeEnd, unsigned long seed, const DeviceDescriptor& device/* = DeviceDescriptor::DefaultDevice()*/) { auto matrixDims = GetMatrixDimensions(shape); auto randomUniformMatrix = std::make_shared>(Matrix::RandomUniform(matrixDims.first, matrixDims.second, AsCNTKImplDeviceId(device), (ElementType)rangeBegin, (ElementType)rangeEnd, seed)); @@ -329,6 +339,9 @@ namespace CNTK template CNTK_API NDArrayViewPtr NDArrayView::RandomUniform(const NDShape& shape, double rangeBegin, double rangeEnd, unsigned long seed, const DeviceDescriptor& device/* = DeviceDescriptor::DefaultDevice()*/); template CNTK_API NDArrayViewPtr NDArrayView::RandomUniform(const NDShape& shape, double rangeBegin, double rangeEnd, unsigned long seed, const DeviceDescriptor& device/* = DeviceDescriptor::DefaultDevice()*/); + template CNTK_API NDArrayViewPtr NDArrayView::RandomNormal(const NDShape& shape, double mean, double stdDev, unsigned long seed, const DeviceDescriptor& device/* = DeviceDescriptor::DefaultDevice()*/); + template CNTK_API NDArrayViewPtr NDArrayView::RandomNormal(const NDShape& shape, double mean, double stdDev, unsigned long seed, const DeviceDescriptor& device/* = DeviceDescriptor::DefaultDevice()*/); + template CNTK_API const float* NDArrayView::DataBuffer() const; template CNTK_API const double* NDArrayView::DataBuffer() const; diff --git a/Source/CNTKv2LibraryDll/Utils.h b/Source/CNTKv2LibraryDll/Utils.h index d7155c899..69c091a3b 100644 --- a/Source/CNTKv2LibraryDll/Utils.h +++ b/Source/CNTKv2LibraryDll/Utils.h @@ -11,6 +11,7 @@ #include #include "Config.h" #include "Reader.h" +#include "ConvolutionEngine.h" namespace CNTK { @@ -118,14 +119,15 @@ namespace CNTK } } - inline Microsoft::MSR::CNTK::TensorShape AsTensorShape(const NDShape& viewShape) + inline Microsoft::MSR::CNTK::TensorShape AsTensorShape(const NDShape& viewShape, bool preserveRank = false) { const size_t maxNumAxesSupportedByTensorView = 12; if (viewShape.NumAxes() > maxNumAxesSupportedByTensorView) LogicError("The number of requested axes exceeds the currently supported limit"); // TensorShape is required to be at least 2D - Microsoft::MSR::CNTK::SmallVector tensorViewShape(std::max(2, viewShape.NumAxes())); + size_t minRankSize = preserveRank ? viewShape.NumAxes() : 2; + Microsoft::MSR::CNTK::SmallVector tensorViewShape(std::max(minRankSize, viewShape.NumAxes())); for (size_t i = 0; i < tensorViewShape.size(); ++i) tensorViewShape[i] = (i < viewShape.NumAxes()) ? viewShape[i] : 1; @@ -241,4 +243,74 @@ namespace CNTK AddConfigString(s, value, numIndentationSpaces); s << std::endl; } + + template + inline std::vector AsDictionaryValueVector(const std::vector& basicElementTypeVector) + { + static_assert(std::is_same::value || + std::is_same::value || + std::is_same::value || + std::is_same::value, "Unsupported ValueType"); + + std::vector dictionaryValueVector; + for (auto value : basicElementTypeVector) + dictionaryValueVector.push_back(value); + + return dictionaryValueVector; + } + + template + inline std::vector AsBasicElementTypeVector(const std::vector& dictionaryValueVector) + { + static_assert(std::is_same::value || + std::is_same::value || + std::is_same::value || + std::is_same::value, "Unsupported ValueType"); + + std::vector basicElementTypeVector; + for (auto value : dictionaryValueVector) + basicElementTypeVector.push_back(value.GetValue()); + + return basicElementTypeVector; + } + + inline PoolingType AsPoolingType(Microsoft::MSR::CNTK::PoolKind cntkPoolingKind) + { + switch (cntkPoolingKind) + { + case Microsoft::MSR::CNTK::PoolKind::Average: + return PoolingType::Average; + case Microsoft::MSR::CNTK::PoolKind::Max: + return PoolingType::Max; + default: + LogicError("Unknown pooling type"); + } + } + + inline Microsoft::MSR::CNTK::PoolKind AsCNTKPoolKind(PoolingType poolingType) + { + switch (poolingType) + { + case PoolingType::Average: + return Microsoft::MSR::CNTK::PoolKind::Average; + case PoolingType::Max: + return Microsoft::MSR::CNTK::PoolKind::Max; + default: + LogicError("Unknown pooling type"); + } + } + + inline std::pair GetConvolutionOutputMapCountAndKernelShape(const NDShape& convolutionMapShape, const NDShape& operandShape) + { + auto outputMapCount = convolutionMapShape.SubShape(0, convolutionMapShape.NumAxes() - operandShape.NumAxes()); + NDShape paddedOutputMapCount(operandShape.NumAxes(), 1); + for (size_t i = 0; i < outputMapCount.NumAxes(); ++i) + paddedOutputMapCount[paddedOutputMapCount.NumAxes() - 1 - i] = outputMapCount[outputMapCount.NumAxes() - 1 - i]; + //for (size_t i = 0; i < outputMapCount.NumAxes(); ++i) + // paddedOutputMapCount[i] = outputMapCount[i]; + + NDShape kernelShape = convolutionMapShape.SubShape(outputMapCount.NumAxes()); + + return{ paddedOutputMapCount, kernelShape }; + } } diff --git a/Source/ComputationNetworkLib/ComputationNode.h b/Source/ComputationNetworkLib/ComputationNode.h index 76a8e9c42..076376a79 100644 --- a/Source/ComputationNetworkLib/ComputationNode.h +++ b/Source/ComputationNetworkLib/ComputationNode.h @@ -433,7 +433,18 @@ private: { if (HasMBLayout()) LogicError("%ls: Minibatch data cannot be interpreted as a single 2D tensor.", NodeDescription().c_str()); - else if (m_sampleLayout.GetRank() < 1 || m_sampleLayout.GetRank() > 2) // note: scalars are not stored as tensors of rank 0, but rather as 1-dim vectors. TODO: clean this up some day + + bool notFlattenableTo2D = false; + for (size_t i = 2; i < m_sampleLayout.GetRank(); ++i) + { + if (!m_sampleLayout.CanFlatten(i)) + { + notFlattenableTo2D = true; + break; + } + } + + if (m_sampleLayout.GetRank() < 1 || ((m_sampleLayout.GetRank() > 2) && notFlattenableTo2D)) // note: scalars are not stored as tensors of rank 0, but rather as 1-dim vectors. TODO: clean this up some day LogicError("%ls: Sample [%s] is not a column vector or matrix (1D or 2D tensor).", NodeDescription().c_str(), string(m_sampleLayout).c_str()); } public: @@ -445,7 +456,11 @@ public: size_t GetAsMatrixNumCols() const { CheckTensorIsMatrix(); - return m_sampleLayout.GetRank() > 1 ? m_sampleLayout[1] : 1; // a column vector is also a Matrix + auto flattenedLayout = m_sampleLayout; + if (flattenedLayout.GetRank() > 2) + flattenedLayout.FlattenTo2DInPlace(1, "GetAsMatrixNumCols()"); + + return flattenedLayout.GetRank() > 1 ? flattenedLayout[1] : 1; // a column vector is also a Matrix } // setting/updating the dimensions of the node diff --git a/Source/ComputationNetworkLib/ConvolutionalNodes.h b/Source/ComputationNetworkLib/ConvolutionalNodes.h index 1450d1123..6cfb8c0de 100644 --- a/Source/ComputationNetworkLib/ConvolutionalNodes.h +++ b/Source/ComputationNetworkLib/ConvolutionalNodes.h @@ -139,6 +139,16 @@ public: fstream << "PoolKind: " << (int)m_poolKind << "\n"; } + TensorShape KernelShape() const { return m_kernelShape; } + TensorShape Strides() const { return m_stride; } + std::vector Sharing() const { return m_sharing; } + std::vector AutoPad() const { return m_autoPad; } + TensorShape LowerPad() const { return m_lowerPad; } + TensorShape UpperPad() const { return m_upperPad; } + bool Transpose() const { return m_transpose; } + size_t MaxTempMemSizeInSamples() const { return m_maxTempMemSizeInSamples; } + PoolKind PoolingKind() const { return m_poolKind; } + protected: TensorShape m_kernelShape; TensorShape m_mapCount; diff --git a/Source/ComputationNetworkLib/LinearAlgebraNodes.h b/Source/ComputationNetworkLib/LinearAlgebraNodes.h index ceb7ee880..929e00249 100644 --- a/Source/ComputationNetworkLib/LinearAlgebraNodes.h +++ b/Source/ComputationNetworkLib/LinearAlgebraNodes.h @@ -463,6 +463,8 @@ public: Base::AllocateGradientMatricesForInputs(matrixPool); } + size_t OutputRank() const { return m_outputRank; } + private: size_t m_outputRank; }; diff --git a/Source/ComputationNetworkLib/TrainingNodes.h b/Source/ComputationNetworkLib/TrainingNodes.h index 5ef3db0ac..f4019724c 100644 --- a/Source/ComputationNetworkLib/TrainingNodes.h +++ b/Source/ComputationNetworkLib/TrainingNodes.h @@ -1872,6 +1872,12 @@ public: m_blendTimeConst = std::numeric_limits::infinity(); } + double NormalizationTimeConstant() const { return m_normTimeConst; } + double BlendTimeConstant() const { return m_blendTimeConst; } + bool Spatial() const { return m_spatial; } + double Epsilon() const { return m_epsilon; } + bool UseCNTKEngine() const { return m_useCntkEngine; } + private: // Old versioning - do not use. Do not remove until we're sure there are no old models around. struct VersionInfo diff --git a/Tests/EndToEndTests/CNTKv2Library/UnitTests/run-test b/Tests/EndToEndTests/CNTKv2Library/UnitTests/run-test index a30429fb3..8a18ace17 100755 --- a/Tests/EndToEndTests/CNTKv2Library/UnitTests/run-test +++ b/Tests/EndToEndTests/CNTKv2Library/UnitTests/run-test @@ -10,15 +10,16 @@ if [[ "$CNTK_EXTERNAL_TESTDATA_SOURCE_DIRECTORY" == "" || ! -d "$CNTK_EXTERNAL_T fi if [ "$OS" == "Windows_NT" ]; then - DataSourceDir=`cygpath -au $CNTK_EXTERNAL_TESTDATA_SOURCE_DIRECTORY`/Image/MNIST/v0 + DataSourceDir=`cygpath -au $CNTK_EXTERNAL_TESTDATA_SOURCE_DIRECTORY`/Image else - DataSourceDir=$CNTK_EXTERNAL_TESTDATA_SOURCE_DIRECTORY/Image/MNIST/v0 + DataSourceDir=$CNTK_EXTERNAL_TESTDATA_SOURCE_DIRECTORY/Image fi # Copy the test data to the test run directory DataDir=$TEST_RUN_DIR/TestData mkdir $DataDir -cp -R $DataSourceDir/Train-28x28_cntk_text.txt $DataDir || exit $? +cp -R $DataSourceDir/MNIST/v0/Train-28x28_cntk_text.txt $DataDir || exit $? +cp -R $DataSourceDir/CIFAR/v0/cifar-10-batches-py $DataDir || exit $? cp -R $TEST_DIR/../../../../Examples/Other/Simple2d/Data/SimpleDataTrain_cntk_text.txt $DataDir || exit $? pushd $DataDir diff --git a/Tests/UnitTests/V2LibraryTests/CifarResNet.cpp b/Tests/UnitTests/V2LibraryTests/CifarResNet.cpp new file mode 100644 index 000000000..60d81da36 --- /dev/null +++ b/Tests/UnitTests/V2LibraryTests/CifarResNet.cpp @@ -0,0 +1,175 @@ +#include "CNTKLibrary.h" +#include +#include "Common.h" +#include "Image.h" + +using namespace CNTK; + +MinibatchSourcePtr CreateCifarMinibatchSource(size_t epochSize) +{ + size_t imageHeight = 32; + size_t imageWidth = 32; + size_t numChannels = 3; + size_t numClasses = 10; + auto mapFilePath = L"cifar-10-batches-py/train_map.txt"; + auto meanFilePath = L"cifar-10-batches-py/CIFAR-10_mean.xml"; + + Dictionary cropTransformConfig; + cropTransformConfig[L"type"] = L"Crop"; + cropTransformConfig[L"cropType"] = L"Random"; + cropTransformConfig[L"cropRatio"] = L"0.8"; + cropTransformConfig[L"jitterType"] = L"uniRatio"; + + Dictionary scaleTransformConfig; + scaleTransformConfig[L"type"] = L"Scale"; + scaleTransformConfig[L"width"] = imageWidth; + scaleTransformConfig[L"height"] = imageHeight; + scaleTransformConfig[L"channels"] = numChannels; + scaleTransformConfig[L"interpolations"] = L"linear"; + + Dictionary meanTransformConfig; + meanTransformConfig[L"type"] = L"Mean"; + meanTransformConfig[L"meanFile"] = meanFilePath; + + std::vector allTransforms = { cropTransformConfig, scaleTransformConfig, meanTransformConfig }; + + Dictionary featuresStreamConfig; + featuresStreamConfig[L"transforms"] = allTransforms; + + Dictionary labelsStreamConfig; + labelsStreamConfig[L"labelDim"] = numClasses; + + Dictionary inputStreamsConfig; + inputStreamsConfig[L"features"] = featuresStreamConfig; + inputStreamsConfig[L"labels"] = labelsStreamConfig; + + Dictionary deserializerConfiguration; + deserializerConfiguration[L"type"] = L"ImageDeserializer"; + deserializerConfiguration[L"module"] = L"ImageReader"; + deserializerConfiguration[L"file"] = mapFilePath; + deserializerConfiguration[L"input"] = inputStreamsConfig; + + Dictionary minibatchSourceConfiguration; + minibatchSourceConfiguration[L"epochSize"] = epochSize; + minibatchSourceConfiguration[L"deserializers"] = std::vector({ deserializerConfiguration }); + + return CreateCompositeMinibatchSource(minibatchSourceConfiguration); +} + +Constant GetProjectionMap(size_t outputDim, size_t inputDim, const DeviceDescriptor& device) +{ + if (inputDim > outputDim) + throw std::runtime_error("Can only project from lower to higher dimensionality"); + + std::vector projectionMapValues(inputDim * outputDim); + for (size_t i = 0; i < inputDim; ++i) + projectionMapValues[(i * outputDim) + i] = 1.0f; + + auto projectionMap = MakeSharedObject(DataType::Float, NDShape({ outputDim, 1, 1, inputDim }), device); + projectionMap->CopyFrom(NDArrayView(NDShape({ outputDim, 1, 1, inputDim }), projectionMapValues)); + + return Constant(projectionMap); +} + +FunctionPtr ResNetClassifier(Variable input, size_t numOutputClasses, const DeviceDescriptor& device, const std::wstring& outputName) +{ + double convWScale = 7.07; + double convBValue = 0; + + double fc1WScale = 0.4; + double fc1BValue = 0; + + double scValue = 1; + size_t bnTimeConst = 4096; + + size_t kernelWidth = 3; + size_t kernelHeight = 3; + + double conv1WScale = 0.26; + size_t cMap1 = 16; + auto conv1 = ConvBNReLULayer(input, cMap1, kernelWidth, kernelHeight, 1, 1, conv1WScale, convBValue, scValue, bnTimeConst, device); + + auto rn1_1 = ResNetNode2(conv1, cMap1, kernelWidth, kernelHeight, convWScale, convBValue, scValue, bnTimeConst, device); + auto rn1_2 = ResNetNode2(rn1_1, cMap1, kernelWidth, kernelHeight, convWScale, convBValue, scValue, bnTimeConst, device); + auto rn1_3 = ResNetNode2(rn1_2, cMap1, kernelWidth, kernelHeight, convWScale, convBValue, scValue, bnTimeConst, device); + + size_t cMap2 = 32; + auto rn2_1_wProj = GetProjectionMap(cMap2, cMap1, device); + auto rn2_1 = ResNetNode2Inc(rn1_3, cMap2, kernelWidth, kernelHeight, convWScale, convBValue, scValue, bnTimeConst, rn2_1_wProj, device); + auto rn2_2 = ResNetNode2(rn2_1, cMap2, kernelWidth, kernelHeight, convWScale, convBValue, scValue, bnTimeConst, device); + auto rn2_3 = ResNetNode2(rn2_2, cMap2, kernelWidth, kernelHeight, convWScale, convBValue, scValue, bnTimeConst, device); + + size_t cMap3 = 64; + auto rn3_1_wProj = GetProjectionMap(cMap3, cMap2, device); + auto rn3_1 = ResNetNode2Inc(rn2_3, cMap3, kernelWidth, kernelHeight, convWScale, convBValue, scValue, bnTimeConst, rn3_1_wProj, device); + auto rn3_2 = ResNetNode2(rn3_1, cMap3, kernelWidth, kernelHeight, convWScale, convBValue, scValue, bnTimeConst, device); + auto rn3_3 = ResNetNode2(rn3_2, cMap3, kernelWidth, kernelHeight, convWScale, convBValue, scValue, bnTimeConst, device); + + // Global average pooling + size_t poolW = 8; + size_t poolH = 8; + size_t poolhStride = 1; + size_t poolvStride = 1; + //size_t numInputChannels = rn3_3->Output().Shape()[rn3_3->Output().Shape().NumAxes() - 1]; + auto pool = Pooling(rn3_3, PoolingType::Average, { poolW, poolH, 1 }, { poolhStride, poolvStride, 1 }); + + // Output DNN layer + auto outTimesParams = Parameter(NDArrayView::RandomNormal({ numOutputClasses, 1, 1, cMap3 }, 0.0, fc1WScale, 1, device)); + auto outBiasParams = Parameter({ numOutputClasses }, (float)fc1BValue, device); + + return Plus(Times(outTimesParams, pool), outBiasParams, outputName); +} + +void TrainResNetCifarClassifer(const DeviceDescriptor& device, bool testSaveAndReLoad) +{ + auto minibatchSource = CreateCifarMinibatchSource(SIZE_MAX); + auto streamInfos = minibatchSource->StreamInfos(); + auto imageStreamInfo = std::find_if(streamInfos.begin(), streamInfos.end(), [](const StreamInfo& streamInfo) { return (streamInfo.m_name == L"features"); }); + auto labelStreamInfo = std::find_if(streamInfos.begin(), streamInfos.end(), [](const StreamInfo& streamInfo) { return (streamInfo.m_name == L"labels"); }); + + auto inputImageShape = imageStreamInfo->m_sampleLayout; + // Change the input shape from HWC to CHW form + inputImageShape = { inputImageShape[1], inputImageShape[2], inputImageShape[0] }; + + const size_t numOutputClasses = labelStreamInfo->m_sampleLayout[0]; + + Variable imageInput(inputImageShape, imageStreamInfo->m_elementType, L"Images"); + auto classifierOutputFunction = ResNetClassifier(imageInput, numOutputClasses, device, L"classifierOutput"); + Variable classifierOutput = classifierOutputFunction; + + auto labelsVar = Variable({ numOutputClasses }, labelStreamInfo->m_elementType, L"Labels"); + + auto trainingLossFunction = CrossEntropyWithSoftmax(classifierOutputFunction, labelsVar, L"lossFunction"); + Variable trainingLoss = trainingLossFunction; + auto predictionFunction = ClassificationError(classifierOutputFunction, labelsVar, L"predictionError"); + Variable prediction = predictionFunction; + + auto imageClassifier = Combine({ trainingLossFunction, predictionFunction, classifierOutputFunction }, L"ImageClassifier"); + + if (testSaveAndReLoad) + SaveAndReloadModel(imageClassifier, { &imageInput, &labelsVar, &trainingLoss, &prediction, &classifierOutput }, device); + + double learningRatePerSample = 0.0078125; + + Trainer trainer(imageClassifier, trainingLoss, { SGDLearner(imageClassifier->Parameters(), learningRatePerSample) }); + const size_t minibatchSize = 32; + size_t numMinibatchesToTrain = 100; + std::unordered_map> minibatchSizeLimits = { { *imageStreamInfo, std::make_pair((size_t)0, minibatchSize) }, { *labelStreamInfo, std::make_pair((size_t)0, minibatchSize) } }; + size_t outputFrequencyInMinibatches = 20; + for (size_t i = 0; i < numMinibatchesToTrain; ++i) + { + auto minibatchData = minibatchSource->GetNextMinibatch(minibatchSizeLimits, device); + trainer.TrainMinibatch({ { imageInput, minibatchData[*imageStreamInfo].m_data }, { labelsVar, minibatchData[*labelStreamInfo].m_data } }, device); + + if ((i % outputFrequencyInMinibatches) == 0) + { + float trainLossValue = PrevMinibatchTrainingLossValue(trainer); + printf("Minibatch %d: CrossEntropy loss = %.8g\n", (int)i, trainLossValue); + } + } +} + +void TestCifarResnet() +{ + TrainResNetCifarClassifer(DeviceDescriptor::GPUDevice(0), true /*testSaveAndReLoad*/); +} diff --git a/Tests/UnitTests/V2LibraryTests/Common.h b/Tests/UnitTests/V2LibraryTests/Common.h index 2f9388e93..2f4f48bec 100644 --- a/Tests/UnitTests/V2LibraryTests/Common.h +++ b/Tests/UnitTests/V2LibraryTests/Common.h @@ -101,5 +101,14 @@ inline CNTK::FunctionPtr FullyConnectedDNNLayer(CNTK::Variable input, size_t out return nonLinearity(plusFunction); } +inline float PrevMinibatchTrainingLossValue(const CNTK::Trainer& trainer) +{ + float trainLossValue = 0.0; + auto prevMBTrainingLossValue = trainer.PreviousMinibatchTrainingLossValue()->Data(); + CNTK::NDArrayView cpuTrainLossValue(prevMBTrainingLossValue->Shape(), &trainLossValue, 1, CNTK::DeviceDescriptor::CPUDevice()); + cpuTrainLossValue.CopyFrom(*prevMBTrainingLossValue); + + return trainLossValue; +} #pragma warning(pop) diff --git a/Tests/UnitTests/V2LibraryTests/FeedForwardTests.cpp b/Tests/UnitTests/V2LibraryTests/FeedForwardTests.cpp index 218e9b038..3396de09f 100644 --- a/Tests/UnitTests/V2LibraryTests/FeedForwardTests.cpp +++ b/Tests/UnitTests/V2LibraryTests/FeedForwardTests.cpp @@ -18,7 +18,7 @@ FunctionPtr FullyConnectedFeedForwardClassifierNet(Variable input, classifierRoot = FullyConnectedDNNLayer(classifierRoot, hiddenLayerDim, device, nonLinearity); auto outputTimesParam = Parameter(NDArrayView::RandomUniform({ numOutputClasses, hiddenLayerDim }, -0.5, 0.5, 1, device)); - return Times(outputTimesParam, classifierRoot, outputName); + return Times(outputTimesParam, classifierRoot, 1, outputName); } std::wstring s_tempModelPath = L"feedForward.net"; diff --git a/Tests/UnitTests/V2LibraryTests/Image.h b/Tests/UnitTests/V2LibraryTests/Image.h new file mode 100644 index 000000000..26fb29dbd --- /dev/null +++ b/Tests/UnitTests/V2LibraryTests/Image.h @@ -0,0 +1,78 @@ +#include "CNTKLibrary.h" + +using namespace CNTK; + +inline FunctionPtr ConvBNLayer(Variable input, size_t outFeatureMapCount, size_t kernelWidth, size_t kernelHeight, size_t hStride, size_t vStride, double wScale, double bValue, double scValue, size_t bnTimeConst, const DeviceDescriptor& device) +{ + size_t numInputChannels = input.Shape()[input.Shape().NumAxes() - 1]; + + auto convParams = Parameter(NDArrayView::RandomNormal({ outFeatureMapCount, kernelWidth, kernelHeight, numInputChannels }, 0.0, wScale, 1, device)); + auto convFunction = Convolution(convParams, input, { hStride, vStride, numInputChannels }); + + auto biasParams = Parameter({ outFeatureMapCount }, (float)bValue, device); + auto scaleParams = Parameter({ outFeatureMapCount }, (float)scValue, device); + auto runningMean = Constant({ outFeatureMapCount }, 0.0f, device); + auto runningInvStd = Constant({ outFeatureMapCount }, 0.0f, device); + return BatchNormalization(convFunction, scaleParams, biasParams, runningMean, runningInvStd, true /*spatial*/, (double)bnTimeConst, 0.0, 0.000000001 /* epsilon */); +} + +inline FunctionPtr ConvBNReLULayer(Variable input, size_t outFeatureMapCount, size_t kernelWidth, size_t kernelHeight, size_t hStride, size_t vStride, double wScale, double bValue, double scValue, size_t bnTimeConst, const DeviceDescriptor& device) +{ + auto convBNFunction = ConvBNLayer(input, outFeatureMapCount, kernelWidth, kernelHeight, hStride, vStride, wScale, bValue, scValue, bnTimeConst, device); + return ReLU(convBNFunction); +} + +inline FunctionPtr ProjLayer(Variable wProj, Variable input, size_t hStride, size_t vStride, double bValue, double scValue, size_t bnTimeConst, const DeviceDescriptor& device) +{ + size_t outFeatureMapCount = wProj.Shape()[0]; + auto b = Parameter({ outFeatureMapCount }, (float)bValue, device); + auto sc = Parameter({ outFeatureMapCount }, (float)scValue, device); + auto m = Constant({ outFeatureMapCount }, 0.0f, device); + auto isd = Constant({ outFeatureMapCount }, 0.0f, device); + + size_t numInputChannels = input.Shape()[input.Shape().NumAxes() - 1]; + + auto c = Convolution(wProj, input, { hStride, vStride, numInputChannels }, { true }, { false }); + return BatchNormalization(c, sc, b, m, isd, true /*spatial*/, (double)bnTimeConst); +} + +inline FunctionPtr ResNetNode2(Variable input, size_t outFeatureMapCount, size_t kernelWidth, size_t kernelHeight, double wScale, double bValue, double scValue, size_t bnTimeConst, const DeviceDescriptor& device) +{ + auto c1 = ConvBNReLULayer(input, outFeatureMapCount, kernelWidth, kernelHeight, 1, 1, wScale, bValue, scValue, bnTimeConst, device); + auto c2 = ConvBNLayer(c1, outFeatureMapCount, kernelWidth, kernelHeight, 1, 1, wScale, bValue, scValue, bnTimeConst, device); + auto p = Plus(c2, input); + return ReLU(p); +} + +inline FunctionPtr ResNetNode2Inc(Variable input, size_t outFeatureMapCount, size_t kernelWidth, size_t kernelHeight, double wScale, double bValue, double scValue, size_t bnTimeConst, Variable wProj, const DeviceDescriptor& device) +{ + auto c1 = ConvBNReLULayer(input, outFeatureMapCount, kernelWidth, kernelHeight, 2, 2, wScale, bValue, scValue, bnTimeConst, device); + auto c2 = ConvBNLayer(c1, outFeatureMapCount, kernelWidth, kernelHeight, 1, 1, wScale, bValue, scValue, bnTimeConst, device); + + auto cProj = ProjLayer(wProj, input, 2, 2, bValue, scValue, bnTimeConst, device); + + auto p = Plus(c2, cProj); + return ReLU(p); +} + +// Standard building block for ResNet with identity shortcut(option A). +inline FunctionPtr ResNetNode2A(Variable input, size_t outFeatureMapCount, size_t kernelWidth, size_t kernelHeight, double wScale, double bValue, double scValue, size_t bnTimeConst, const DeviceDescriptor& device) +{ + auto conv1 = ConvBNReLULayer(input, outFeatureMapCount, kernelWidth, kernelHeight, 1, 1, wScale, bValue, scValue, bnTimeConst, device); + auto conv2 = ConvBNLayer(conv1, outFeatureMapCount, kernelWidth, kernelHeight, 1, 1, wScale, bValue, scValue, bnTimeConst, device); + + // Identity shortcut followed by ReLU. + return ReLU(Plus(conv2, input)); +} + +// Standard building block for ResNet with padding(option B). +inline FunctionPtr ResNetNode2BInc(Variable input, size_t outFeatureMapCount, size_t kernelWidth, size_t kernelHeight, double wScale, double bValue, double scValue, size_t bnTimeConst, const DeviceDescriptor& device) +{ + auto conv1 = ConvBNReLULayer(input, outFeatureMapCount, kernelWidth, kernelHeight, 2, 2, wScale, bValue, scValue, bnTimeConst, device); + auto conv2 = ConvBNLayer(conv1, outFeatureMapCount, kernelWidth, kernelHeight, 1, 1, wScale, bValue, scValue, bnTimeConst, device); + + // Projection convolution layer. + auto cProj = ConvBNLayer(input, outFeatureMapCount, 1, 1, 2, 2, wScale, bValue, scValue, bnTimeConst, device); + return ReLU(Plus(conv2, cProj)); +} + diff --git a/Tests/UnitTests/V2LibraryTests/Main.cpp b/Tests/UnitTests/V2LibraryTests/Main.cpp index 9f53f446e..89830f270 100644 --- a/Tests/UnitTests/V2LibraryTests/Main.cpp +++ b/Tests/UnitTests/V2LibraryTests/Main.cpp @@ -1,20 +1,27 @@ #include "CNTKLibrary.h" #include +using namespace CNTK; + void NDArrayViewTests(); void TensorTests(); void FeedForwardTests(); void RecurrentFunctionTests(); void TrainerTests(); +void TestCifarResnet(); int main() { NDArrayViewTests(); TensorTests(); + FeedForwardTests(); RecurrentFunctionTests(); + TrainerTests(); + TestCifarResnet(); + fprintf(stderr, "\nCNTKv2Library tests: Passed\n"); fflush(stderr); } diff --git a/Tests/UnitTests/V2LibraryTests/TrainerTests.cpp b/Tests/UnitTests/V2LibraryTests/TrainerTests.cpp index 7ee284cfa..1ba9338ac 100644 --- a/Tests/UnitTests/V2LibraryTests/TrainerTests.cpp +++ b/Tests/UnitTests/V2LibraryTests/TrainerTests.cpp @@ -33,16 +33,6 @@ MinibatchSourcePtr CreateTextMinibatchSource(const std::wstring& filePath, size_ return CreateCompositeMinibatchSource(minibatchSourceConfiguration); } -float PrevMinibatchTrainingLossValue(const Trainer& trainer) -{ - float trainLossValue = 0.0; - auto prevMBTrainingLossValue = trainer.PreviousMinibatchTrainingLossValue()->Data(); - NDArrayView cpuTrainLossValue(prevMBTrainingLossValue->Shape(), &trainLossValue, 1, DeviceDescriptor::CPUDevice()); - cpuTrainLossValue.CopyFrom(*prevMBTrainingLossValue); - - return trainLossValue; -} - void TrainSimpleFeedForwardClassifer(const DeviceDescriptor& device) { const size_t inputDim = 2; @@ -50,9 +40,23 @@ void TrainSimpleFeedForwardClassifer(const DeviceDescriptor& device) const size_t hiddenLayerDim = 50; const size_t numHiddenLayers = 2; + const size_t minibatchSize = 25; + const size_t numSamplesPerSweep = 10000; + const size_t numSweepsToTrainWith = 2; + const size_t numMinibatchesToTrain = (numSamplesPerSweep * numSweepsToTrainWith) / minibatchSize; + + auto minibatchSource = CreateTextMinibatchSource(L"SimpleDataTrain_cntk_text.txt", (size_t)2, (size_t)2, 0); + auto streamInfos = minibatchSource->StreamInfos(); + auto featureStreamInfo = std::find_if(streamInfos.begin(), streamInfos.end(), [](const StreamInfo& streamInfo) { return (streamInfo.m_name == L"features"); }); + auto labelStreamInfo = std::find_if(streamInfos.begin(), streamInfos.end(), [](const StreamInfo& streamInfo) { return (streamInfo.m_name == L"labels"); }); + + std::unordered_map> inputMeansAndInvStdDevs = { { *featureStreamInfo, { nullptr, nullptr } } }; + ComputeInputPerDimMeansAndInvStdDevs(minibatchSource, inputMeansAndInvStdDevs); + auto nonLinearity = std::bind(Sigmoid, _1, L""); Variable input({ inputDim }, DataType::Float, L"features"); - auto classifierOutput = FullyConnectedDNNLayer(input, hiddenLayerDim, device, nonLinearity); + auto normalizedinput = PerDimMeanVarianceNormalize(input, inputMeansAndInvStdDevs[*featureStreamInfo].first, inputMeansAndInvStdDevs[*featureStreamInfo].second); + auto classifierOutput = FullyConnectedDNNLayer(normalizedinput, hiddenLayerDim, device, nonLinearity); for (size_t i = 1; i < numHiddenLayers; ++i) classifierOutput = FullyConnectedDNNLayer(classifierOutput, hiddenLayerDim, device, nonLinearity); @@ -66,33 +70,23 @@ void TrainSimpleFeedForwardClassifer(const DeviceDescriptor& device) auto oneHiddenLayerClassifier = CNTK::Combine({ trainingLoss, prediction, classifierOutput }, L"classifierModel"); - const size_t minibatchSize = 25; - const size_t numSamplesPerSweep = 10000; - const size_t numSweepsToTrainWith = 2; - const size_t numMinibatchesToTrain = (numSamplesPerSweep * numSweepsToTrainWith) / minibatchSize; - - auto minibatchSource = CreateTextMinibatchSource(L"SimpleDataTrain_cntk_text.txt", (size_t)2, (size_t)2, numSamplesPerSweep); - - auto streamInfos = minibatchSource->StreamInfos(); - auto featureStreamInfo = std::find_if(streamInfos.begin(), streamInfos.end(), [](const StreamInfo& streamInfo) { return (streamInfo.m_name == L"features"); }); - auto labelStreamInfo = std::find_if(streamInfos.begin(), streamInfos.end(), [](const StreamInfo& streamInfo) { return (streamInfo.m_name == L"labels"); }); - double learningRatePerSample = 0.02; + minibatchSource = CreateTextMinibatchSource(L"SimpleDataTrain_cntk_text.txt", (size_t)2, (size_t)2, SIZE_MAX); Trainer trainer(oneHiddenLayerClassifier, trainingLoss, { SGDLearner(oneHiddenLayerClassifier->Parameters(), learningRatePerSample) }); - std::unordered_map> minibatchData = { { *featureStreamInfo, { minibatchSize, nullptr } }, { *labelStreamInfo, { minibatchSize, nullptr } } }; + std::unordered_map> minibatchSizeLimits = { { *featureStreamInfo, std::make_pair((size_t)0, minibatchSize) }, { *labelStreamInfo, std::make_pair((size_t)0, minibatchSize) } }; size_t outputFrequencyInMinibatches = 20; for (size_t i = 0; i < numMinibatchesToTrain; ++i) { - minibatchSource->GetNextMinibatch(minibatchData); - trainer.TrainMinibatch({ { input, minibatchData[*featureStreamInfo].second }, { labels, minibatchData[*labelStreamInfo].second } }, device); + auto minibatchData = minibatchSource->GetNextMinibatch(minibatchSizeLimits, device); + trainer.TrainMinibatch({ { input, minibatchData[*featureStreamInfo].m_data }, { labels, minibatchData[*labelStreamInfo].m_data } }, device); if ((i % outputFrequencyInMinibatches) == 0) { float trainLossValue = PrevMinibatchTrainingLossValue(trainer); - printf("Minibatch %d: CrossEntropy loss = %.8g\n", (int)i, trainLossValue); + printf("Minibatch %d: CrossEntropy loss = %.8g\n", (int)i, trainLossValue); + } } } -} void TrainMNISTClassifier(const DeviceDescriptor& device) { @@ -118,7 +112,7 @@ void TrainMNISTClassifier(const DeviceDescriptor& device) const size_t numSweepsToTrainWith = 3; const size_t numMinibatchesToTrain = (numSamplesPerSweep * numSweepsToTrainWith) / minibatchSize; - auto minibatchSource = CreateTextMinibatchSource(L"Train-28x28_cntk_text.txt", (size_t)784, (size_t)10, numSamplesPerSweep); + auto minibatchSource = CreateTextMinibatchSource(L"Train-28x28_cntk_text.txt", (size_t)784, (size_t)10, SIZE_MAX); auto streamInfos = minibatchSource->StreamInfos(); auto featureStreamInfo = std::find_if(streamInfos.begin(), streamInfos.end(), [](const StreamInfo& streamInfo) { @@ -130,17 +124,17 @@ void TrainMNISTClassifier(const DeviceDescriptor& device) double learningRatePerSample = 0.003125; Trainer trainer(oneHiddenLayerClassifier, trainingLoss, { SGDLearner(oneHiddenLayerClassifier->Parameters(), learningRatePerSample) }); - std::unordered_map> minibatchData = { { *featureStreamInfo, { minibatchSize, nullptr } }, { *labelStreamInfo, { minibatchSize, nullptr } } }; + std::unordered_map> minibatchSizeLimits = { { *featureStreamInfo, std::make_pair((size_t)0, minibatchSize) }, { *labelStreamInfo, std::make_pair((size_t)0, minibatchSize) } }; size_t outputFrequencyInMinibatches = 20; for (size_t i = 0; i < numMinibatchesToTrain; ++i) { - minibatchSource->GetNextMinibatch(minibatchData); - trainer.TrainMinibatch({ { input, minibatchData[*featureStreamInfo].second }, { labels, minibatchData[*labelStreamInfo].second } }, device); + auto minibatchData = minibatchSource->GetNextMinibatch(minibatchSizeLimits, device); + trainer.TrainMinibatch({ { input, minibatchData[*featureStreamInfo].m_data }, { labels, minibatchData[*labelStreamInfo].m_data } }, device); if ((i % outputFrequencyInMinibatches) == 0) { float trainLossValue = PrevMinibatchTrainingLossValue(trainer); - printf("Minibatch %d: CrossEntropy loss = %.8g\n", (int)i, trainLossValue); + printf("Minibatch %d: CrossEntropy loss = %.8g\n", (int)i, trainLossValue); } } } diff --git a/Tests/UnitTests/V2LibraryTests/V2LibraryTests.vcxproj b/Tests/UnitTests/V2LibraryTests/V2LibraryTests.vcxproj index 49a4a4dc2..6cd10202f 100644 --- a/Tests/UnitTests/V2LibraryTests/V2LibraryTests.vcxproj +++ b/Tests/UnitTests/V2LibraryTests/V2LibraryTests.vcxproj @@ -109,6 +109,7 @@ + @@ -118,6 +119,7 @@ + diff --git a/Tests/UnitTests/V2LibraryTests/V2LibraryTests.vcxproj.filters b/Tests/UnitTests/V2LibraryTests/V2LibraryTests.vcxproj.filters index ee5cf0934..7a85209b4 100644 --- a/Tests/UnitTests/V2LibraryTests/V2LibraryTests.vcxproj.filters +++ b/Tests/UnitTests/V2LibraryTests/V2LibraryTests.vcxproj.filters @@ -33,10 +33,16 @@ Source Files + + Source Files + Header Files + + Header Files + \ No newline at end of file