diff --git a/Examples/Image/Miscellaneous/CIFAR-10/TutorialImage.cntk b/Examples/Image/Miscellaneous/CIFAR-10/TutorialImage.cntk index 914a24f07..c7f27e367 100644 --- a/Examples/Image/Miscellaneous/CIFAR-10/TutorialImage.cntk +++ b/Examples/Image/Miscellaneous/CIFAR-10/TutorialImage.cntk @@ -15,15 +15,6 @@ Train = [ action = "train" BrainScriptNetworkBuilder = [ -ConvReLULayer(inp, outMap, inWCount, kW, kH, hStride, vStride, wScale, bValue) = -[ - W = LearnableParameter(outMap, inWCount, init = "gaussian", initValueScale = wScale) - b = ParameterTensor(1:1:outMap, initValue = bValue) - c = Convolution(W, inp, kW:kH:(inWCount/kW/kH), mapDims=outMap, stride=hStride:vStride:(inWCount/kW/kH), autoPadding = true:true:false) - p = Plus(c, b) - y = RectifiedLinear(p) -].y - imageShape = 32:32:3 labelDim = 10 @@ -38,15 +29,40 @@ ConvReLULayer(inp, outMap, inWCount, kW, kH, hStride, vStride, wScale, bValue) = hStride1 = 1 vStride1 = 1 # weight[cMap1, kW1 * kH1 * ImageC] - conv1_act = ConvReLULayer(featScaled, cMap1, 75, kW1, kH1, hStride1, vStride1, 0.0043, 0) + #conv1_act = ConvReLULayer1(cMap1, 75, kW1, kH1, hStride1, vStride1, 0.0043, 0) (featScaled) + #conv1_act = ConvReLULayer1(featScaled, cMap1, 75, kW1, kH1, hStride1, vStride1, 0.0043, 0) + conv1_act = ConvolutionalLayer {cMap1, (5:5), activation = ReLU, init = "gaussian", initValueScale = 0.0043} (featScaled) + +ConvReLULayer(inp, outMap, inWCount, kW, kH, hStride, vStride, wScale, bValue) = +[ + #W = LearnableParameter(outMap, inWCount, init = "gaussian", initValueScale = wScale) + W = LearnableParameter(0, 0, init = "gaussian", initValueScale = wScale) + b = ParameterTensor(1:1:outMap, initValue = bValue) + c = Convolution(W, inp, kW:kH/*:(inWCount/kW/kH)*/, mapDims=outMap, stride=hStride:vStride/*:(inWCount/kW/kH)*/, autoPadding = true/*:true:false*/) + p = Plus(c, b) + y = RectifiedLinear(p) +].y +ConvReLULayer1(outMap, inWCount, kW, kH, hStride, vStride, wScale, bValue) = +[ + #W = LearnableParameter(outMap, inWCount, init = "gaussian", initValueScale = wScale) + W = LearnableParameter(0, 0, init = "gaussian", initValueScale = wScale) + b = ParameterTensor(1:1:outMap, initValue = bValue) + f(inp)= { + c = Convolution(W, inp, kW:kH/*:(inWCount/kW/kH)*/, mapDims=outMap, stride=hStride:vStride/*:(inWCount/kW/kH)*/, autoPadding = true/*:true:false*/) + p = Plus(c, b) + y = RectifiedLinear(p) + }.y +].f + + # pool1 - pool1W = 3 - pool1H = 3 - pool1hStride = 2 - pool1vStride = 2 + #pool1W = 3 + #pool1H = 3 + #pool1hStride = 2 + #pool1vStride = 2 #pool1 = MaxPooling(conv1_act, pool1W, pool1H, pool1hStride, pool1vStride) - pool1 = MaxPoolingLayer {(pool1W:pool1H), stride = (pool1hStride:pool1vStride)} (conv1_act) + pool1 = MaxPoolingLayer {(3:3), stride = (2:2)} (conv1_act) # conv2 kW2 = 5 @@ -56,14 +72,15 @@ ConvReLULayer(inp, outMap, inWCount, kW, kH, hStride, vStride, wScale, bValue) = vStride2 = 1 # weight[cMap2, kW2 * kH2 * cMap1] conv2_act = ConvReLULayer(pool1, cMap2, 800, kW2, kH2, hStride2, vStride2, 1.414, 0) + #conv2_act = ConvolutionalLayer {cMap2, (5:5), activation = ReLU, init = "gaussian", initValueScale = 1.414} (featScaled) # pool2 - pool2W = 3 - pool2H = 3 - pool2hStride = 2 - pool2vStride = 2 + #pool2W = 3 + #pool2H = 3 + #pool2hStride = 2 + #pool2vStride = 2 #pool2 = MaxPooling(conv2_act, pool2W, pool2H, pool2hStride, pool2vStride) - pool2 = MaxPoolingLayer {(pool2W:pool2H), stride = (pool2hStride:pool2vStride)} (conv2_act) + pool2 = MaxPoolingLayer {(3:3), stride = (2:2)} (conv2_act) # conv3 kW3 = 5 @@ -73,47 +90,19 @@ ConvReLULayer(inp, outMap, inWCount, kW, kH, hStride, vStride, wScale, bValue) = vStride3 = 1 # weight[cMap3, kW3 * kH3 * cMap2] conv3_act = ConvReLULayer(pool2, cMap3, 800, kW3, kH3, hStride3, vStride3, 1.414, 0) + #conv3_act = ConvolutionalLayer {cMap3, (5:5), activation = ReLU, init = "gaussian", initValueScale = 1.414} (featScaled) # pool3 - pool3W = 3 - pool3H = 3 - pool3hStride = 2 - pool3vStride = 2 + #pool3W = 3 + #pool3H = 3 + #pool3hStride = 2 + #pool3vStride = 2 #pool3 = MaxPooling(conv3_act, pool3W, pool3H, pool3hStride, pool3vStride) - pool3 = MaxPoolingLayer {(pool3W:pool3H), stride = (pool3hStride:pool3vStride)} (conv3_act) + pool3 = MaxPoolingLayer {(3:3), stride = (2:2)} (conv3_act) -#_PoolingLayer {poolKind, # "max" or "average" -# filterShape, # e.g. (3:3) -# stride = 1, autoPadding = true, -# lowerPad = 0, upperPad = 0} = # TODO: support this -#{ -# f(x) = Pooling (x, poolKind, kernelShape, stride = stride, autoPadding = autoPadding, lowerPad = lowerPad, upperPad = upperPad) -#}.f - - - -#DNNImageReLULayer(inW, inH, inC, outDim, x, wScale, bValue) = -#[ -# W = Parameter(outDim,inW*inH*inC, init = "gaussian", initValueScale = wScale) -# b = LearnableParameter(outDim, 1, initValue = bValue) -# t = Times(W, x) -# z = Plus(t, b) -# y = RectifiedLinear(z) -#].y - #h1 = DNNImageReLULayer(3, 3, cMap3, 64, pool3, 12, 0) h1 = DenseLayer {64, activation = ReLU, init = "gaussian", initValueScale = 12} (pool3) h1_d = Dropout(h1) -#DNNLastLayer(64, labelDim, x, wScale, bValue) = -#[ -# W = LearnableParameter(labelDim, 64, init = "gaussian", initValueScale = wScale) -# b = ParameterTensor(labelDim, initValue = bValue) -# t = Times(W, x) -# z = Plus(t, b) -#].z - - #z = DNNLastLayer(64, labelDim, h1_d, 1.5, 0) - z = LinearLayer {labelDim, init = "gaussian", initValueScale = 1.5} (h1_d) }.z diff --git a/Source/CNTK/BrainScript/CNTKCoreLib/CNTK.core.bs b/Source/CNTK/BrainScript/CNTKCoreLib/CNTK.core.bs index 720bdae58..a542e5b52 100644 --- a/Source/CNTK/BrainScript/CNTKCoreLib/CNTK.core.bs +++ b/Source/CNTK/BrainScript/CNTKCoreLib/CNTK.core.bs @@ -60,34 +60,52 @@ EmbeddingLayer {outDim, # dimension of embeddi # out : [ (shifting dims)] | | (output dim) | (sample dims) ] ConvolutionalLayer {numOutputChannels, # e.g. (1) or BS.Constants.None filterShape, # e.g. (3:3) + bias = true, + activation = (x=>x), init = "uniform", + initValueScale = 1, #reductionRank = 1, # TODO: support this stride = 1, autoPadding = true, - #lowerPad = 0, upperPad = 0, # TODO: support this + lowerPad = 0, upperPad = 0, #transpose = false, # TODO: support this maxTempMemSizeInSamples = 0} = { reductionRank = 1 # TODO: shall become an optional parameter - outputChannelsShape = Repeat (1, numOutputChannels) # Repeat(1) turns a scalar into a 1-element array + outputChannelsShape = _AsArray (numOutputChannels) outputRank = Length (outputChannelsShape) - kernelShape = _ConcatArrays (filterShape, Repeat (reductionRank, 0)) # append reduction dims to filter dims - W = ParameterTensor{_ConcatArrays (kernelDims, outputChannelsShape), init=init} - autoPaddingPadded = _ConcatArrays (_ForceResizeArray (Length (kernelDims), autoPadding), Repeat (reductionRank, false)) # set padding flags for reduction dims to false - sharing = false # TODO: support this - f(x) = Convolution (W, x, kernelShape, mapDims = numOutputChannels, stride = stride, sharing = sharing, autoPadding = autoPaddingPadded, lowerPad = lowerPad, upperPad = upperPad, transpose = transpose, maxTempMemSizeInSamples = maxTempMemSizeInSamples) + filterRank = Length (filterShape) + kernelShape = _ConcatArrays (filterShape, Repeat (reductionRank, 0)) # kernel := filter plus reductionDims + W = ParameterTensor{_ConcatArrays ( kernelShape, outputChannelsShape), init = init, initValueScale = initValueScale} # [ W x H x C x K ] + #W = ParameterTensor{(outputChannelsShape:0), init = init, initValueScale = initValueScale} # old-style for backwards-compatible random initialization + b = ParameterTensor(_ConcatArrays (Repeat (Length (filterShape), 1), outputChannelsShape), initValue = 0) # [ 1 x 1 x K ] + #stridePadded = + # if (Length (_AsArray (stride))) == 1 then stride + # else _ConcatArrays (stride, Repeat (reductionRank, 0)) # gets inferred + #FixShapes (vec, val) = # padding vectors must be either length 1 or match kernel dim including reduction dims + # if Length (_AsArray (vec)) == 1 then vec + # else _ConcatArrays (_ForceResizeArray (Length (kernelShape), vec), Repeat (reductionRank, val)) # set padding flags for reduction dims to false + #autoPaddingPadded = FixShapes (autoPadding, false) + #lowerPadPadded = FixShapes (lowerPad, 0) + #upperPadPadded = FixShapes (upperPad, 0) + sharing = true # TODO: support this + transpose = false # TODO: support this + f(x) = { + c = Convolution (W, x, filterShape, mapDims = numOutputChannels, stride = stride, sharing = sharing, autoPadding = autoPadding, lowerPad = lowerPad, upperPad = upperPad, transpose = transpose, maxTempMemSizeInSamples = maxTempMemSizeInSamples) + res = activation (if bias then c + b else c) + }.res }.f # MaxPoolingLayer, AveragePoolingLayer -- create a max- or average-pooling layer _PoolingLayer {poolKind, # "max" or "average" filterShape, # e.g. (3:3) - stride = 1, autoPadding = true, + stride = 1, autoPadding = false, lowerPad = 0, upperPad = 0} = # TODO: support this { f(x) = Pooling (x, poolKind, filterShape, stride = stride, autoPadding = autoPadding, lowerPad = lowerPad, upperPad = upperPad) }.f -MaxPoolingLayer {filterShape, stride = 1, autoPadding = true, lowerPad = 0, upperPad = 0} = +MaxPoolingLayer {filterShape, stride = 1, autoPadding = false, lowerPad = 0, upperPad = 0} = _PoolingLayer {"max", filterShape, stride = stride, autoPadding = autoPadding, lowerPad = lowerPad, upperPad = upperPad} -AveragePoolingLayer {filterShape, stride = 1, autoPadding = true, lowerPad = 0, upperPad = 0} = +AveragePoolingLayer {filterShape, stride = 1, autoPadding = false, lowerPad = 0, upperPad = 0} = _PoolingLayer {"average", filterShape, stride = stride, autoPadding = autoPadding, lowerPad = lowerPad, upperPad = upperPad} # RecurrentLSTMLayer -- create an LSTM layer @@ -424,7 +442,7 @@ ReconcileDynamicAxis(dataInput, layoutInput, tag='') = new ComputationNode [ ope ReconcileMBLayout = ReconcileDynamicAxis # back compat CastAs (type, data) = ReconcileDynamicAxis (data, type) # read as CastAs(data) where the cast may consist of rearranging the data w.r.t. MBLayout or broadcasting across sequence items Convolution(weightNode, inputValueNode, kernelDims, mapDims = 0, stride = 1, sharing = true, autoPadding = true, lowerPad = 0, upperPad = 0, transpose=false, imageLayout='CHW', maxTempMemSizeInSamples = 0, tag='') = new ComputationNode [ operation = 'Convolution' ; inputs = (weightNode : inputValueNode); kernelShape = new TensorShape [ dims = kernelDims ] ; mapCount = new TensorShape [ dims = mapDims ] ; strideShape = new TensorShape [ dims = stride ] ; dimSharing = new BoolVector [ items = sharing ] ; dimPadding = new BoolVector [ items = autoPadding ] ; dimPadLower = new TensorShape [ dims = lowerPad ] ; dimPadUpper = new TensorShape [ dims = upperPad ] /*plus the function args*/ ] -# ND pooling/unpooling +# ND pooling/unpooling --why is autoPadding true? Normally one would want to reduce dimensions, no? Pooling(input, poolKind/*'max'|'average'*/, kernelDims, stride=1, autoPadding = true, lowerPad = 0, upperPad = 0, imageLayout='CHW', tag='') = new ComputationNode [ operation = 'Pooling' ; inputs = (input); pool = poolKind ; kernelShape = new TensorShape [ dims = kernelDims ] ; strideShape = new TensorShape [ dims = stride ] ; dimPadding = new BoolVector [ items = autoPadding ] ; dimPadLower = new TensorShape [ dims = lowerPad ] ; dimPadUpper = new TensorShape [ dims = upperPad ] /*plus the function args*/ ] MaxUnpooling(unpoolInput, poolInput, kernelDims, stride=1, autoPadding = true, lowerPad = 0, upperPad = 0, imageLayout='CHW', tag='') = new ComputationNode [ operation = 'MaxUnpooling' ; inputs = (unpoolInput : poolInput); kernelShape = new TensorShape [ dims = kernelDims ] ; strideShape = new TensorShape [ dims = stride ] ; dimPadding = new BoolVector [ items = autoPadding ] ; dimPadLower = new TensorShape [ dims = lowerPad ] ; dimPadUpper = new TensorShape [ dims = upperPad ] /*plus the function args*/ ] # 2D pooling @@ -826,7 +844,7 @@ RNNs = # This function also takes an optional auxiliary input, e.g. for suporting attention models. LSTMBlock (outputDim, cellShape=Constants.None, enableSelfStabilization=false) = [ - cellDim = if Constants.IsNone (cellShape) then outputDim else cellDim + cellDim = if Constants.IsNone (cellShape) then outputDim else cellShape // parameter macros # note: each invocation comes with its own set of weights B{} = Parameters.BiasParam {cellDim} diff --git a/Source/ComputationNetworkLib/ConvolutionalNodes.h b/Source/ComputationNetworkLib/ConvolutionalNodes.h index d50056cdd..5210cae36 100644 --- a/Source/ComputationNetworkLib/ConvolutionalNodes.h +++ b/Source/ComputationNetworkLib/ConvolutionalNodes.h @@ -24,15 +24,15 @@ namespace Microsoft { namespace MSR { namespace CNTK { // This follows "high performance convolutional neural networks for document processing" by Kumar Chellapilla, Sidde Puri, and Patrice Simard. // Each sample is stored as a column-major matrix (height, width) of float[numChannels] (r00, g00, b00, r10, g10, b10, r01, g01, b01, r11, g11, b11). // -// - input : [C x W x H x T] or ARRAY[1..T] OF ARRAY[1..H] OF ARRAY[1..W] OF ARRAY[1..C] -// - output : [C' x W' x H' x T] or ARRAY[1..T] OF ARRAY[1..H'] OF ARRAY[1..W'] OF ARRAY[1..C'] -// - filter : [C' x W" x H" x C ] or ARRAY[1..C] OF ARRAY[1..H"] OF ARRAY[1..W"] OF ARRAY[1..C'] +// - input : [C x W x H x T] or ARRAY[1..T] OF ARRAY[1..H] OF ARRAY[1..W] OF ARRAY[1..C] +// - output : [K x W' x H' x T] or ARRAY[1..T] OF ARRAY[1..H'] OF ARRAY[1..W'] OF ARRAY[1..K] +// - filter : [K x W" x H" x C ] or ARRAY[1..C] OF ARRAY[1..H"] OF ARRAY[1..W"] OF ARRAY[1..K] // // * cudnn ("CHW") mode (works both GPU and CPU): Channels are planes // -// - input : [W x H x C x T] or ARRAY[1..T] OF ARRAY[1..C] OF ARRAY[1..H] OF ARRAY[1..W] -// - output : [W' x H' x C' x T] or ARRAY[1..T] OF ARRAY[1..C'] OF ARRAY[1..H'] OF ARRAY[1..W'] -// - filter : [W" x H" x C x C' ] or ARRAY[1..C'] OF ARRAY[1..C] OF ARRAY[1..H] OF ARRAY[1..W] +// - input : [W x H x C x T] or ARRAY[1..T] OF ARRAY[1..C] OF ARRAY[1..H] OF ARRAY[1..W] +// - output : [W' x H' x K x T] or ARRAY[1..T] OF ARRAY[1..K] OF ARRAY[1..H'] OF ARRAY[1..W'] +// - filter : [W" x H" x C x K ] or ARRAY[1..K] OF ARRAY[1..C] OF ARRAY[1..H] OF ARRAY[1..W] // // where: // - using ' for output and " for filter @@ -41,7 +41,7 @@ namespace Microsoft { namespace MSR { namespace CNTK { // - C = input channels // - 3 for color images, 1 for B&W images // - for hidden layer: dimension of activation vector for each pixel -// - C' = output channels = dimension of activation vector for each pixel (also called N by NVidia, inconsistently) +// - K = output channels = dimension of activation vector for each pixel (also called N by NVidia, inconsistently) // // For ND-convolution/pooling only second format ('cudnn') is supported. // @@ -149,6 +149,41 @@ public: size_t MaxTempMemSizeInSamples() const { return m_maxTempMemSizeInSamples; } PoolKind PoolingKind() const { return m_poolKind; } +private: + // bottomlessly expand shape to filterRank, then expand to inputRank using defaults or given 'from' values + template + static void FixVectorShape(size_t filterRank, size_t inputRank, V& shape, T deflt, const V& from = V()) + { + if (shape.size() == 0) + return; // let ComputeOutputShape() deal with this special case + // repeat the last value until we have the same rank as the filter + while (shape.size() < filterRank) + shape.push_back(shape.back()); + // increase to input rank + // If 'from' is given then clone the value from there. This is meant to be the input dimensions for convolution. + while (shape.size() < inputRank) + shape.push_back(shape.size() < from.size() ? from[shape.size()] : deflt); + } + static void FixTensorShape(size_t filterRank, size_t inputRank, TensorShape& shape, size_t deflt, const TensorShape& from = TensorShape()) + { + auto dims = shape.GetDims(); + FixVectorShape(filterRank, inputRank, dims, deflt, from.GetDims()); + shape = TensorShape(dims); + } +protected: + // infer reduction dimensions if not given + void InferReductionDims(const TensorShape& inputShape, const TensorShape& fromShape) + { + // If kernel has a lower rank than the input then the remaining dimensions are to be reduced over. + size_t filterRank = m_kernelShape.size(); + FixTensorShape(filterRank, inputShape.size(), m_kernelShape, 1, fromShape); // convolve over red dim; pool over 1 + FixTensorShape(filterRank, inputShape.size(), m_stride, 1, fromShape); // stride for reduction dims is red dim or 1 + FixVectorShape(filterRank, inputShape.size(), m_autoPad, false); // no padding for reduction dims + FixTensorShape(filterRank, inputShape.size(), m_lowerPad, 0); + FixTensorShape(filterRank, inputShape.size(), m_upperPad, 0); + FixVectorShape(filterRank, inputShape.size(), m_sharing, true); + } + protected: TensorShape m_kernelShape; TensorShape m_mapCount; @@ -369,6 +404,8 @@ public: else { inputShape = GetInputSampleLayout(inputIdx); + // infer reduction dimensions if not given + InferReductionDims(inputShape, inputShape); if (!m_transpose) { outputShape = ConvolveGeometry::ComputeOutputShape(inputShape, m_kernelShape, m_mapCount, m_stride, @@ -385,6 +422,25 @@ public: // ConvolveGeometry always uses CHW. SetDims(ImageDimensions(outputShape, ImageLayoutKind::CHW).AsTensorShape(m_imageLayout), HasMBLayout()); + // update LearnableParameter if it has 0 dimensions (to be inferred) + // Typically this would be the #inputChannels (C). + if (Input(0)->GetSampleLayout().GetNumElements() == 0) + { + // BUGBUG: Inference does not support sharing. Problem is that we have the information too late. + // In this case, users will have to specify the correct dimensions. Good luck. +#if 1 // old style for back compat with previous results. Randomization will differ. + if (Input(0)->GetSampleLayout().GetRank() == 2) + Input(0)->ValidateInferInputDimsFrom(TensorShape(m_mapCount.GetNumElements(), m_kernelShape.GetNumElements())); + else +#endif + { + auto weightShape = m_kernelShape.GetDims(); + for (auto outDim : m_mapCount.GetDims()) + weightShape.push_back(outDim); + Input(0)->ValidateInferInputDimsFrom(TensorShape(weightShape)); + } + } + if (isFinalValidationPass) { if (m_convEng == nullptr) @@ -397,10 +453,11 @@ public: ConvolutionEngineKind::All, NodeName()); } - if (Input(0)->GetAsMatrixNumCols() != m_kernelShape.GetNumElements() || - Input(0)->GetAsMatrixNumRows() != m_convEng->Geometry()->KernelCount()) + if (Input(0)->GetSampleLayout().GetNumElements() != m_kernelShape.GetNumElements() * m_convEng->Geometry()->KernelCount()) { - LogicError("Convolution weight matrix %ls should have dimension [%d, %d] which is [kernelCount, kernelWidth * kernelHeight * inputChannels]", + //LogicError("Convolution weight matrix %ls should have dimension [%d, %d] which is [kernelCount, kernelWidth * kernelHeight * inputChannels]", + // Input(0)->NodeName().c_str(), (int)m_convEng->Geometry()->KernelCount(), (int)m_kernelShape.GetNumElements()); + LogicError("Convolution weight matrix %ls should have dimension [(filter shape) x (input channels) x (output channels)]", Input(0)->NodeName().c_str(), (int)m_convEng->Geometry()->KernelCount(), (int)m_kernelShape.GetNumElements()); } } @@ -489,22 +546,6 @@ public: return m_poolKind == PoolKind::Max; } -private: - // add 'reductionDims' dimensions to 'shape', copying from 'from' or 'deflt' - template - static void FixVectorShape(size_t reductionDims, V& shape, T deflt) - { - size_t targetRank = shape.size() + reductionDims; - if (shape.size() < targetRank) - shape.resize(targetRank, deflt); - // else let ComputeOutputShape() deal with the failure - } - static void FixTensorShape(size_t reductionDims, TensorShape& shape, size_t deflt) - { - auto dims = shape.GetDims(); - FixVectorShape(reductionDims, dims, deflt); - shape = TensorShape(dims); - } public: void Validate(bool isFinalValidationPass) override { @@ -519,26 +560,10 @@ public: "and make sure input data layout is CHW", NodeName().c_str(), OperationName().c_str(), NodeName().c_str()); } - auto inputShape = GetInputSampleLayout(0); - // make kernel shape etc. look like convolution parameters, i.e. create nominal reduction dimensions - // In older versions, it was expected that pooling takes kernel shapes like convolution, - // which included the reduction dim(s). It makes more sense to not require users to - // include them for pooing, which the padding below accounts for. - if (inputShape.size() > m_kernelShape.size()) // user specified only the pooling-area shape: add the missing dims - { - size_t reductionDims = inputShape.size() - m_kernelShape.size(); // number of missing dims--these are reduction dims - FixTensorShape(reductionDims, m_kernelShape, 1); // pool over 1 in reduction dimension - if (m_stride.GetRank() != 1) - FixTensorShape(reductionDims, m_stride, 1); // stride for reduction dims is 1 - if (m_autoPad.size() != 1) - FixVectorShape(reductionDims, m_autoPad, false); // no padding for reduction dims - if (m_lowerPad.GetRank() != 1) - FixTensorShape(reductionDims, m_lowerPad, 0); - if (m_upperPad.GetRank() != 1) - FixTensorShape(reductionDims, m_upperPad, 0); - if (m_sharing.size() != 1) - FixVectorShape(reductionDims, m_sharing, false); // dummy - } + const auto& inputShape = GetInputSampleLayout(0); + + // infer reduction dimensions if not given + InferReductionDims(inputShape, TensorShape()); auto outDims = ConvolveGeometry::ComputeOutputShape(inputShape, m_kernelShape, m_mapCount, m_stride, m_sharing, m_autoPad, m_lowerPad, m_upperPad); @@ -634,6 +659,10 @@ public: } auto inputShape = GetInputSampleLayout(0); + + // infer reduction dimensions if not given + InferReductionDims(inputShape, TensorShape()); + // Same as in case of deconvolution, node input (inputShape) is really the output of the max pooling // and node output (outDims) is pooling input. auto outputShape = ConvolveGeometry::ComputeInputShape(inputShape, m_kernelShape, m_mapCount, m_stride,