From 112dd21785612a7255ea09cf062e0d121a53442d Mon Sep 17 00:00:00 2001 From: Alexey Kamenev Date: Wed, 4 May 2016 18:46:56 -0700 Subject: [PATCH] Deconvolution suport, part 1. --- Examples/Image/MNIST/Config/Macros.ndl | 4 + Source/ActionsLib/NDLNetworkBuilder.cpp | 7 +- .../ComputationNetworkBuilder.cpp | 8 +- .../ComputationNetworkBuilder.h | 4 +- .../ComputationNetworkLib/ComputationNode.h | 3 +- .../ConvolutionalNodes.h | 97 +++++++++++-------- 6 files changed, 71 insertions(+), 52 deletions(-) diff --git a/Examples/Image/MNIST/Config/Macros.ndl b/Examples/Image/MNIST/Config/Macros.ndl index bae1796a4..4f790b01d 100644 --- a/Examples/Image/MNIST/Config/Macros.ndl +++ b/Examples/Image/MNIST/Config/Macros.ndl @@ -48,6 +48,10 @@ ConvND(w, inp, kW, kH, inMap, outMap, hStride, vStride) = [ c = Convolution(w, inp, {kW, kH, inMap}, mapCount=outMap, stride={hStride, vStride, inMap}, sharing={true, true, true}, autoPadding={true, true, false}, lowerPad=0, upperPad=0, imageLayout=$imageLayout$) ] +DeConv(w, inp, kW, kH, inMap, outMap, hStride, vStride) = [ + c = Convolution(w, inp, {kW, kH, inMap}, mapCount=outMap, stride={hStride, vStride, inMap}, sharing={true, true, true}, autoPadding={true, true, false}, lowerPad=0, upperPad=0, transpose=1, imageLayout=$imageLayout$) +] + Conv2DReLULayer(inp, outMap, inWCount, kW, kH, hStride, vStride, wScale, bValue) = [ w = ConvW(outMap, inWCount, wScale) b = ConvB(outMap, bValue) diff --git a/Source/ActionsLib/NDLNetworkBuilder.cpp b/Source/ActionsLib/NDLNetworkBuilder.cpp index f2e8c1199..8f18ffe39 100644 --- a/Source/ActionsLib/NDLNetworkBuilder.cpp +++ b/Source/ActionsLib/NDLNetworkBuilder.cpp @@ -299,10 +299,10 @@ void NDLNodeEvaluatorImpl::Evaluate(NDLNode* node, const wst if (cnNodeType == OperationNameOf(ConvolutionNode)) { RuntimeError("%ls: unexpected parameter count. %ls supports 2 modes: \n" - "1. 2D convolution which takes 7 fixed parameters [weightNodeName, inputValueNodeName, kernelWidth, kernelHeight, outputChannels,horizontalSubsample, verticalSubsample] \n" + "1. 2D convolution which takes 7 fixed parameters [weightNodeName, inputValueNodeName, kernelWidth, kernelHeight, outputChannels, horizontalSubsample, verticalSubsample] \n" "and two optional parameters [zeroPadding = [false|yourvalue], maxTempMemSizeInSamples = [0|yourvalue], imageLayout = \"HWC\"|\"cudnn\"]. \n" "2. ND convolution which takes 3 fixed parameters [weightNodeName, inputValueNodeName, kernelShape] and \n" - "9 optional parameters [mapCount = [1|yourvalue], stride = [1|yourvalue], sharing = [true|yourvalue], autoPadding = [true|yourvalue], lowerPad = [0|yourvalue], upperPad = [0|yourvalue], maxTempMemSizeInSamples = [0|yourvalue], imageLayout = \"cudnn\"|\"HWC\"]. \n" + "10 optional parameters [mapCount = [1|yourvalue], stride = [1|yourvalue], sharing = [true|yourvalue], autoPadding = [true|yourvalue], lowerPad = [0|yourvalue], upperPad = [0|yourvalue], bool transpose = [false|yourvalue], maxTempMemSizeInSamples = [0|yourvalue], imageLayout = \"cudnn\"|\"HWC\"]. \n" "For ND convolution, parameters kernelShape, mapCount, stride, sharing, autoPadding, lowerPad, upperPad can be arrays, e.g. kernelShape={5, 5, 3}", cnNodeType.c_str(), cnNodeType.c_str()); } @@ -387,8 +387,9 @@ void NDLNodeEvaluatorImpl::Evaluate(NDLNode* node, const wst if (pool == PoolKind::None) { + bool transpose = node->GetOptionalParameter("transpose", "false"); nodePtr = builder.Convolution(NULL, NULL, kernelShape, mapCount, stride, sharing, - autoPad, lowerPad, upperPad, imageLayout, maxTempMemSizeInSamples, name); + autoPad, lowerPad, upperPad, transpose, imageLayout, maxTempMemSizeInSamples, name); } else { diff --git a/Source/ComputationNetworkLib/ComputationNetworkBuilder.cpp b/Source/ComputationNetworkLib/ComputationNetworkBuilder.cpp index 7ed15ec08..e51715913 100644 --- a/Source/ComputationNetworkLib/ComputationNetworkBuilder.cpp +++ b/Source/ComputationNetworkLib/ComputationNetworkBuilder.cpp @@ -249,12 +249,12 @@ template shared_ptr> ComputationNetworkBuilder::CreateConvolutionNode(const std::wstring& nodeName, const TensorShape& kernelShape, const TensorShape& mapCount, const TensorShape& strideShape, const std::vector& sharing, const std::vector& autoPadding, const TensorShape& lowerPad, const TensorShape& upperPad, - ImageLayoutKind imageLayout, size_t maxTempMemSizeInSamples) + bool transpose, ImageLayoutKind imageLayout, size_t maxTempMemSizeInSamples) { return net.AddNodeToNetWithElemType(New>(net.GetDeviceId(), nodeName, kernelShape, mapCount, strideShape, sharing, autoPadding, lowerPad, upperPad, - imageLayout, maxTempMemSizeInSamples)); + transpose, imageLayout, maxTempMemSizeInSamples)); } template @@ -314,13 +314,13 @@ shared_ptr> ComputationNetworkBuilder::Convo const TensorShape& kernelShape, const TensorShape& mapCount, const TensorShape& strideShape, const std::vector& sharing, const std::vector& autoPadding, const TensorShape& lowerPad, const TensorShape& upperPad, - ImageLayoutKind imageLayout, size_t maxTempMemSizeInSamples, + bool transpose, ImageLayoutKind imageLayout, size_t maxTempMemSizeInSamples, const std::wstring nodeName) { return net.AddNodeToNetAndAttachInputs(New>(net.GetDeviceId(), nodeName, kernelShape, mapCount, strideShape, sharing, autoPadding, lowerPad, upperPad, - imageLayout, maxTempMemSizeInSamples), + transpose, imageLayout, maxTempMemSizeInSamples), { weight, inputValues }); } diff --git a/Source/ComputationNetworkLib/ComputationNetworkBuilder.h b/Source/ComputationNetworkLib/ComputationNetworkBuilder.h index a7ec97bd5..2dd8845af 100644 --- a/Source/ComputationNetworkLib/ComputationNetworkBuilder.h +++ b/Source/ComputationNetworkLib/ComputationNetworkBuilder.h @@ -54,7 +54,7 @@ public: ComputationNodePtr CreateSparseInputNode(const std::wstring& inputName, const TensorShape& sampleLayout, const wstring& dynamicAxisName = L""); ComputationNodePtr CreateConvolutionNode(const std::wstring& nodeName, const TensorShape& kernelShape, const TensorShape& mapCount, const TensorShape& strideShape, const std::vector& sharing, const std::vector& autoPadding, const TensorShape& lowerPad, const TensorShape& upperPad, - ImageLayoutKind imageLayout, size_t maxTempMemSizeInSamples); + bool transpose, ImageLayoutKind imageLayout, size_t maxTempMemSizeInSamples); ComputationNodePtr CreateConvolutionNode(const std::wstring& nodeName, const size_t kernelWidth, const size_t kernelHeight, const size_t outputChannels, const size_t horizontalSubsample, const size_t verticalSubsample, ImageLayoutKind imageLayoutKind, const bool zeroPadding = false, const size_t maxTempMemSizeInSamples = 0); @@ -81,7 +81,7 @@ public: const ComputationNodePtr inputValues, const TensorShape& kernelShape, const TensorShape& mapCount, const TensorShape& strideShape, const std::vector& sharing, const std::vector& autoPadding, const TensorShape& lowerPad, const TensorShape& upperPad, - ImageLayoutKind imageLayout, size_t maxTempMemSizeInSamples, + bool transpose, ImageLayoutKind imageLayout, size_t maxTempMemSizeInSamples, const std::wstring nodeName = L""); ComputationNodePtr Pooling(const ComputationNodePtr inputValues, PoolKind poolKind, const TensorShape& kernelShape, const TensorShape& strideShape, diff --git a/Source/ComputationNetworkLib/ComputationNode.h b/Source/ComputationNetworkLib/ComputationNode.h index 78056391c..71ab9ae37 100644 --- a/Source/ComputationNetworkLib/ComputationNode.h +++ b/Source/ComputationNetworkLib/ComputationNode.h @@ -37,7 +37,8 @@ #define CNTK_MODEL_VERSION_6 6 // Batch norm blending #define CNTK_MODEL_VERSION_7 7 // ElemType tag in model file #define CNTK_MODEL_VERSION_8 8 // DynamicAxis for inputs -#define CURRENT_CNTK_MODEL_VERSION CNTK_MODEL_VERSION_8 +#define CNTK_MODEL_VERSION_9 9 // Deconvolution and unpooling +#define CURRENT_CNTK_MODEL_VERSION CNTK_MODEL_VERSION_9 extern bool g_shareNodeValueMatrices; diff --git a/Source/ComputationNetworkLib/ConvolutionalNodes.h b/Source/ComputationNetworkLib/ConvolutionalNodes.h index 3ce940f75..180afa8cc 100644 --- a/Source/ComputationNetworkLib/ConvolutionalNodes.h +++ b/Source/ComputationNetworkLib/ConvolutionalNodes.h @@ -52,15 +52,15 @@ class ConvolutionNodeBase : public ComputationNode public: ConvolutionNodeBase(DEVICEID_TYPE deviceId, const wstring& name) - : Base(deviceId, name), m_poolKind(PoolKind::None), m_maxTempMemSizeInSamples(0) + : Base(deviceId, name), m_poolKind(PoolKind::None), m_transpose(false), m_maxTempMemSizeInSamples(0) { } ConvolutionNodeBase(DEVICEID_TYPE deviceId, const wstring& name, const TensorShape& kernelShape, const TensorShape& mapCount, const TensorShape& strideShape, - const std::vector& sharing, const std::vector& autoPadding, const TensorShape& lowerPad, const TensorShape& upperPad, - PoolKind poolKind, ImageLayoutKind imageLayout, size_t maxTempMemSizeInSamples) - : Base(deviceId, name), m_kernelShape(kernelShape), m_mapCount(mapCount), m_stride(strideShape), m_sharing(sharing), - m_autoPad(autoPadding), m_lowerPad(lowerPad), m_upperPad(upperPad), m_poolKind(poolKind), - m_imageLayout(imageLayout), m_maxTempMemSizeInSamples(maxTempMemSizeInSamples) + const std::vector& sharing, const std::vector& autoPadding, const TensorShape& lowerPad, const TensorShape& upperPad, + PoolKind poolKind, bool transpose, ImageLayoutKind imageLayout, size_t maxTempMemSizeInSamples) + : Base(deviceId, name), m_kernelShape(kernelShape), m_mapCount(mapCount), m_stride(strideShape), m_sharing(sharing), + m_autoPad(autoPadding), m_lowerPad(lowerPad), m_upperPad(upperPad), m_poolKind(poolKind), m_transpose(transpose), + m_imageLayout(imageLayout), m_maxTempMemSizeInSamples(maxTempMemSizeInSamples) { } @@ -79,6 +79,7 @@ public: fstream << (int32_t)m_poolKind; fstream << (int32_t)m_imageLayout; fstream << m_maxTempMemSizeInSamples; + fstream << m_transpose; } void Load(File& fstream, size_t modelVersion) override @@ -102,7 +103,11 @@ public: fstream >> layout; m_imageLayout = (ImageLayoutKind)layout; fstream >> m_maxTempMemSizeInSamples; - } + } + if (modelVersion >= CNTK_MODEL_VERSION_9) + { + fstream >> m_transpose; + } } void CopyTo(ComputationNodeBasePtr nodeP, const std::wstring& newName, const CopyNodeFlags flags) const override @@ -119,6 +124,7 @@ public: node->m_lowerPad = m_lowerPad; node->m_upperPad = m_upperPad; node->m_poolKind = m_poolKind; + node->m_transpose = m_transpose; node->m_imageLayout = m_imageLayout; node->m_maxTempMemSizeInSamples = m_maxTempMemSizeInSamples; } @@ -130,19 +136,22 @@ public: if (m_poolKind == PoolKind::None) { - if (inputIndex == 0) // derivative with respect to the weight matrix - { - auto& grad = Input(0)->GradientAsMatrix(); + if (inputIndex == 0) // derivative with respect to the weight matrix + { + auto& grad = Input(0)->GradientAsMatrix(); auto sliceInput1Value = Input(1)->ValueFor(fr); m_convEng->BackwardKernel(sliceOutputGrad, sliceInput1Value, grad, fr.IsAllFrames(), *m_tempMatrix); + } + else if (inputIndex == 1) // derivative with respect to the input feature + { + auto& input0 = Input(0)->ValueAsMatrix(); + auto sliceInput1Grad = Input(1)->GradientFor(fr); + if (!m_transpose) + m_convEng->BackwardData(sliceOutputGrad, input0, sliceInput1Grad, *m_tempMatrix); + else + m_convEng->Forward(sliceOutputGrad, input0, sliceInput1Grad, *m_tempMatrix); + } } - else if (inputIndex == 1) // derivative with respect to the input feature - { - auto& input0 = Input(0)->ValueAsMatrix(); - auto sliceInput1Grad = Input(1)->GradientFor(fr); - m_convEng->BackwardData(sliceOutputGrad, input0, sliceInput1Grad, *m_tempMatrix); - } - } else { Matrix sliceInput0Grad = Input(0)->GradientFor(fr); @@ -166,9 +175,12 @@ public: if (m_poolKind == PoolKind::None) { - const Matrix& input0 = Input(0)->ValueAsMatrix(); - Matrix sliceInput1Value = Input(1)->ValueFor(fr); - m_convEng->Forward(sliceInput1Value, input0, sliceOutputValue, *m_tempMatrix); + const Matrix& input0 = Input(0)->ValueAsMatrix(); + Matrix sliceInput1Value = Input(1)->ValueFor(fr); + if (!m_transpose) + m_convEng->Forward(sliceInput1Value, input0, sliceOutputValue, *m_tempMatrix); + else + m_convEng->BackwardData(sliceInput1Value, input0, sliceOutputValue, *m_tempMatrix); } else { @@ -195,6 +207,7 @@ protected: TensorShape m_lowerPad; TensorShape m_upperPad; PoolKind m_poolKind; + bool m_transpose; ImageLayoutKind m_imageLayout; size_t m_maxTempMemSizeInSamples; @@ -241,8 +254,8 @@ public: } ConvolutionNode(DEVICEID_TYPE deviceId, const wstring& name, const TensorShape& kernelShape, const TensorShape& mapCount, const TensorShape& strideShape, const std::vector& sharing, const std::vector& autoPadding, const TensorShape& lowerPad, const TensorShape& upperPad, - ImageLayoutKind imageLayout, size_t maxTempMemSizeInSamples) - : Base(deviceId, name, kernelShape, mapCount, strideShape, sharing, autoPadding, lowerPad, upperPad, PoolKind::None, imageLayout, maxTempMemSizeInSamples), + bool transpose, ImageLayoutKind imageLayout, size_t maxTempMemSizeInSamples) + : Base(deviceId, name, kernelShape, mapCount, strideShape, sharing, autoPadding, lowerPad, upperPad, PoolKind::None, transpose, imageLayout, maxTempMemSizeInSamples), m_convolution2D(false) { } @@ -250,16 +263,16 @@ public: const size_t horizontalSubsample, const size_t verticalSubsample, ImageLayoutKind imageLayout, bool zeroPadding, size_t maxTempMemSizeInSamples) : ConvolutionNode(deviceId, name, TensorShape(kernelWidth, kernelHeight, 1), TensorShape(1, 1, outputChannels), - TensorShape(horizontalSubsample, verticalSubsample, 1), vector{true}, + TensorShape(horizontalSubsample, verticalSubsample, 1), vector{true}, vector{zeroPadding}, TensorShape(0), TensorShape(0), - imageLayout, maxTempMemSizeInSamples) + false, imageLayout, maxTempMemSizeInSamples) { m_convolution2D = true; } ConvolutionNode(const ScriptableObjects::IConfigRecordPtr configp) : ConvolutionNode(configp->Get(L"deviceId"), L"", configp->Get(L"kernelShape"), configp->Get(L"mapCount"), configp->Get(L"strideShape"), configp->Get(L"dimSharing"), configp->Get(L"dimPadding"), configp->Get(L"dimPadLower"), configp->Get(L"dimPadUpper"), - ImageLayoutKindFrom(configp->Get(L"imageLayout")), configp->Get(L"maxTempMemSizeInSamples")) + configp->Get(L"transpose"), ImageLayoutKindFrom(configp->Get(L"imageLayout")), configp->Get(L"maxTempMemSizeInSamples")) { AttachInputsFromConfig(configp, GetExpectedNumInputs()); } @@ -346,12 +359,12 @@ public: if (isFinalValidationPass && (Input(0)->GetAsMatrixNumCols() != weightCols || Input(0)->GetAsMatrixNumRows() != mapCount)) { - LogicError("Convolution weight matrix %ls should have dimension [%d, %d] which is [outputChannels, kernelWidth * kernelHeight * inputChannels]", + LogicError("Convolution weight matrix %ls should have dimension [%d, %d] which is [outputChannels, kernelWidth * kernelHeight * inputChannels]", Input(0)->NodeName().c_str(), (int)mapCount, (int)weightCols); } auto outDims = ConvolveGeometry::ComputeOutputShape(inputShape, m_kernelShape, m_mapCount, m_stride, - m_sharing, m_autoPad, m_lowerPad, m_upperPad); + m_sharing, m_autoPad, m_lowerPad, m_upperPad); // ConvolveGeometry always uses CHW. SetDims(ImageDimensions(outDims, ImageLayoutKind::CHW).AsTensorShape(m_imageLayout), HasMBLayout()); } @@ -432,9 +445,9 @@ public: { } PoolingNode(DEVICEID_TYPE deviceId, const wstring& name, PoolKind pool, const TensorShape& kernelShape, const TensorShape& strideShape, - const std::vector& autoPadding, const TensorShape& lowerPad, const TensorShape& upperPad, - ImageLayoutKind imageLayout) - : Base(deviceId, name, kernelShape, TensorShape(1), strideShape, vector{true}, autoPadding, lowerPad, upperPad, pool, imageLayout, 0) + const std::vector& autoPadding, const TensorShape& lowerPad, const TensorShape& upperPad, + ImageLayoutKind imageLayout) + : Base(deviceId, name, kernelShape, TensorShape(1), strideShape, vector{true}, autoPadding, lowerPad, upperPad, pool, false, imageLayout, 0) { } PoolingNode(const ScriptableObjects::IConfigRecordPtr configp) @@ -491,20 +504,20 @@ class PoolingNodeBase : public ComputationNode, public NumInputs<1> public: PoolingNodeBase(DEVICEID_TYPE deviceId, const wstring& name) : Base(deviceId, name), - m_windowWidth(SIZE_MAX), - m_windowHeight(SIZE_MAX), - m_horizontalSubsample(SIZE_MAX), - m_verticalSubsample(SIZE_MAX), - m_imageLayoutKind(ImageLayoutKind::HWC) + m_windowWidth(SIZE_MAX), + m_windowHeight(SIZE_MAX), + m_horizontalSubsample(SIZE_MAX), + m_verticalSubsample(SIZE_MAX), + m_imageLayoutKind(ImageLayoutKind::HWC) { } PoolingNodeBase(DEVICEID_TYPE deviceId, const wstring& name, const size_t windowWidth, const size_t windowHeight, const size_t horizontalSubsample, const size_t verticalSubsample, ImageLayoutKind imageLayoutKind) : Base(deviceId, name), - m_windowWidth(windowWidth), - m_windowHeight(windowHeight), - m_horizontalSubsample(horizontalSubsample), - m_verticalSubsample(verticalSubsample), - m_imageLayoutKind(imageLayoutKind) + m_windowWidth(windowWidth), + m_windowHeight(windowHeight), + m_horizontalSubsample(horizontalSubsample), + m_verticalSubsample(verticalSubsample), + m_imageLayoutKind(imageLayoutKind) { } PoolingNodeBase(const ScriptableObjects::IConfigRecordPtr configp) @@ -517,8 +530,8 @@ public: void Save(File& fstream) const override { Base::Save(fstream); - uint32_t imageLayoutKind = (uint32_t) m_imageLayoutKind; - uint32_t windowWidth = (uint32_t) m_windowWidth; + uint32_t imageLayoutKind = (uint32_t)m_imageLayoutKind; + uint32_t windowWidth = (uint32_t)m_windowWidth; fstream << windowWidth << imageLayoutKind << m_windowHeight << m_horizontalSubsample << m_verticalSubsample; }