implemented dimension inference for Convolution()
This commit is contained in:
Родитель
afb0175f45
Коммит
55673988af
|
@ -15,15 +15,6 @@ Train = [
|
|||
action = "train"
|
||||
|
||||
BrainScriptNetworkBuilder = [
|
||||
ConvReLULayer(inp, outMap, inWCount, kW, kH, hStride, vStride, wScale, bValue) =
|
||||
[
|
||||
W = LearnableParameter(outMap, inWCount, init = "gaussian", initValueScale = wScale)
|
||||
b = ParameterTensor(1:1:outMap, initValue = bValue)
|
||||
c = Convolution(W, inp, kW:kH:(inWCount/kW/kH), mapDims=outMap, stride=hStride:vStride:(inWCount/kW/kH), autoPadding = true:true:false)
|
||||
p = Plus(c, b)
|
||||
y = RectifiedLinear(p)
|
||||
].y
|
||||
|
||||
imageShape = 32:32:3
|
||||
labelDim = 10
|
||||
|
||||
|
@ -38,15 +29,40 @@ ConvReLULayer(inp, outMap, inWCount, kW, kH, hStride, vStride, wScale, bValue) =
|
|||
hStride1 = 1
|
||||
vStride1 = 1
|
||||
# weight[cMap1, kW1 * kH1 * ImageC]
|
||||
conv1_act = ConvReLULayer(featScaled, cMap1, 75, kW1, kH1, hStride1, vStride1, 0.0043, 0)
|
||||
#conv1_act = ConvReLULayer1(cMap1, 75, kW1, kH1, hStride1, vStride1, 0.0043, 0) (featScaled)
|
||||
#conv1_act = ConvReLULayer1(featScaled, cMap1, 75, kW1, kH1, hStride1, vStride1, 0.0043, 0)
|
||||
conv1_act = ConvolutionalLayer {cMap1, (5:5), activation = ReLU, init = "gaussian", initValueScale = 0.0043} (featScaled)
|
||||
|
||||
ConvReLULayer(inp, outMap, inWCount, kW, kH, hStride, vStride, wScale, bValue) =
|
||||
[
|
||||
#W = LearnableParameter(outMap, inWCount, init = "gaussian", initValueScale = wScale)
|
||||
W = LearnableParameter(0, 0, init = "gaussian", initValueScale = wScale)
|
||||
b = ParameterTensor(1:1:outMap, initValue = bValue)
|
||||
c = Convolution(W, inp, kW:kH/*:(inWCount/kW/kH)*/, mapDims=outMap, stride=hStride:vStride/*:(inWCount/kW/kH)*/, autoPadding = true/*:true:false*/)
|
||||
p = Plus(c, b)
|
||||
y = RectifiedLinear(p)
|
||||
].y
|
||||
ConvReLULayer1(outMap, inWCount, kW, kH, hStride, vStride, wScale, bValue) =
|
||||
[
|
||||
#W = LearnableParameter(outMap, inWCount, init = "gaussian", initValueScale = wScale)
|
||||
W = LearnableParameter(0, 0, init = "gaussian", initValueScale = wScale)
|
||||
b = ParameterTensor(1:1:outMap, initValue = bValue)
|
||||
f(inp)= {
|
||||
c = Convolution(W, inp, kW:kH/*:(inWCount/kW/kH)*/, mapDims=outMap, stride=hStride:vStride/*:(inWCount/kW/kH)*/, autoPadding = true/*:true:false*/)
|
||||
p = Plus(c, b)
|
||||
y = RectifiedLinear(p)
|
||||
}.y
|
||||
].f
|
||||
|
||||
|
||||
|
||||
# pool1
|
||||
pool1W = 3
|
||||
pool1H = 3
|
||||
pool1hStride = 2
|
||||
pool1vStride = 2
|
||||
#pool1W = 3
|
||||
#pool1H = 3
|
||||
#pool1hStride = 2
|
||||
#pool1vStride = 2
|
||||
#pool1 = MaxPooling(conv1_act, pool1W, pool1H, pool1hStride, pool1vStride)
|
||||
pool1 = MaxPoolingLayer {(pool1W:pool1H), stride = (pool1hStride:pool1vStride)} (conv1_act)
|
||||
pool1 = MaxPoolingLayer {(3:3), stride = (2:2)} (conv1_act)
|
||||
|
||||
# conv2
|
||||
kW2 = 5
|
||||
|
@ -56,14 +72,15 @@ ConvReLULayer(inp, outMap, inWCount, kW, kH, hStride, vStride, wScale, bValue) =
|
|||
vStride2 = 1
|
||||
# weight[cMap2, kW2 * kH2 * cMap1]
|
||||
conv2_act = ConvReLULayer(pool1, cMap2, 800, kW2, kH2, hStride2, vStride2, 1.414, 0)
|
||||
#conv2_act = ConvolutionalLayer {cMap2, (5:5), activation = ReLU, init = "gaussian", initValueScale = 1.414} (featScaled)
|
||||
|
||||
# pool2
|
||||
pool2W = 3
|
||||
pool2H = 3
|
||||
pool2hStride = 2
|
||||
pool2vStride = 2
|
||||
#pool2W = 3
|
||||
#pool2H = 3
|
||||
#pool2hStride = 2
|
||||
#pool2vStride = 2
|
||||
#pool2 = MaxPooling(conv2_act, pool2W, pool2H, pool2hStride, pool2vStride)
|
||||
pool2 = MaxPoolingLayer {(pool2W:pool2H), stride = (pool2hStride:pool2vStride)} (conv2_act)
|
||||
pool2 = MaxPoolingLayer {(3:3), stride = (2:2)} (conv2_act)
|
||||
|
||||
# conv3
|
||||
kW3 = 5
|
||||
|
@ -73,47 +90,19 @@ ConvReLULayer(inp, outMap, inWCount, kW, kH, hStride, vStride, wScale, bValue) =
|
|||
vStride3 = 1
|
||||
# weight[cMap3, kW3 * kH3 * cMap2]
|
||||
conv3_act = ConvReLULayer(pool2, cMap3, 800, kW3, kH3, hStride3, vStride3, 1.414, 0)
|
||||
#conv3_act = ConvolutionalLayer {cMap3, (5:5), activation = ReLU, init = "gaussian", initValueScale = 1.414} (featScaled)
|
||||
|
||||
# pool3
|
||||
pool3W = 3
|
||||
pool3H = 3
|
||||
pool3hStride = 2
|
||||
pool3vStride = 2
|
||||
#pool3W = 3
|
||||
#pool3H = 3
|
||||
#pool3hStride = 2
|
||||
#pool3vStride = 2
|
||||
#pool3 = MaxPooling(conv3_act, pool3W, pool3H, pool3hStride, pool3vStride)
|
||||
pool3 = MaxPoolingLayer {(pool3W:pool3H), stride = (pool3hStride:pool3vStride)} (conv3_act)
|
||||
pool3 = MaxPoolingLayer {(3:3), stride = (2:2)} (conv3_act)
|
||||
|
||||
#_PoolingLayer {poolKind, # "max" or "average"
|
||||
# filterShape, # e.g. (3:3)
|
||||
# stride = 1, autoPadding = true,
|
||||
# lowerPad = 0, upperPad = 0} = # TODO: support this
|
||||
#{
|
||||
# f(x) = Pooling (x, poolKind, kernelShape, stride = stride, autoPadding = autoPadding, lowerPad = lowerPad, upperPad = upperPad)
|
||||
#}.f
|
||||
|
||||
|
||||
|
||||
#DNNImageReLULayer(inW, inH, inC, outDim, x, wScale, bValue) =
|
||||
#[
|
||||
# W = Parameter(outDim,inW*inH*inC, init = "gaussian", initValueScale = wScale)
|
||||
# b = LearnableParameter(outDim, 1, initValue = bValue)
|
||||
# t = Times(W, x)
|
||||
# z = Plus(t, b)
|
||||
# y = RectifiedLinear(z)
|
||||
#].y
|
||||
#h1 = DNNImageReLULayer(3, 3, cMap3, 64, pool3, 12, 0)
|
||||
h1 = DenseLayer {64, activation = ReLU, init = "gaussian", initValueScale = 12} (pool3)
|
||||
h1_d = Dropout(h1)
|
||||
|
||||
#DNNLastLayer(64, labelDim, x, wScale, bValue) =
|
||||
#[
|
||||
# W = LearnableParameter(labelDim, 64, init = "gaussian", initValueScale = wScale)
|
||||
# b = ParameterTensor(labelDim, initValue = bValue)
|
||||
# t = Times(W, x)
|
||||
# z = Plus(t, b)
|
||||
#].z
|
||||
|
||||
#z = DNNLastLayer(64, labelDim, h1_d, 1.5, 0)
|
||||
|
||||
z = LinearLayer {labelDim, init = "gaussian", initValueScale = 1.5} (h1_d)
|
||||
}.z
|
||||
|
||||
|
|
|
@ -60,34 +60,52 @@ EmbeddingLayer {outDim, # dimension of embeddi
|
|||
# out : [ (shifting dims)] | | (output dim) | (sample dims) ]
|
||||
ConvolutionalLayer {numOutputChannels, # e.g. (1) or BS.Constants.None
|
||||
filterShape, # e.g. (3:3)
|
||||
bias = true,
|
||||
activation = (x=>x),
|
||||
init = "uniform",
|
||||
initValueScale = 1,
|
||||
#reductionRank = 1, # TODO: support this
|
||||
stride = 1, autoPadding = true,
|
||||
#lowerPad = 0, upperPad = 0, # TODO: support this
|
||||
lowerPad = 0, upperPad = 0,
|
||||
#transpose = false, # TODO: support this
|
||||
maxTempMemSizeInSamples = 0} =
|
||||
{
|
||||
reductionRank = 1 # TODO: shall become an optional parameter
|
||||
outputChannelsShape = Repeat (1, numOutputChannels) # Repeat(1) turns a scalar into a 1-element array
|
||||
outputChannelsShape = _AsArray (numOutputChannels)
|
||||
outputRank = Length (outputChannelsShape)
|
||||
kernelShape = _ConcatArrays (filterShape, Repeat (reductionRank, 0)) # append reduction dims to filter dims
|
||||
W = ParameterTensor{_ConcatArrays (kernelDims, outputChannelsShape), init=init}
|
||||
autoPaddingPadded = _ConcatArrays (_ForceResizeArray (Length (kernelDims), autoPadding), Repeat (reductionRank, false)) # set padding flags for reduction dims to false
|
||||
sharing = false # TODO: support this
|
||||
f(x) = Convolution (W, x, kernelShape, mapDims = numOutputChannels, stride = stride, sharing = sharing, autoPadding = autoPaddingPadded, lowerPad = lowerPad, upperPad = upperPad, transpose = transpose, maxTempMemSizeInSamples = maxTempMemSizeInSamples)
|
||||
filterRank = Length (filterShape)
|
||||
kernelShape = _ConcatArrays (filterShape, Repeat (reductionRank, 0)) # kernel := filter plus reductionDims
|
||||
W = ParameterTensor{_ConcatArrays ( kernelShape, outputChannelsShape), init = init, initValueScale = initValueScale} # [ W x H x C x K ]
|
||||
#W = ParameterTensor{(outputChannelsShape:0), init = init, initValueScale = initValueScale} # old-style for backwards-compatible random initialization
|
||||
b = ParameterTensor(_ConcatArrays (Repeat (Length (filterShape), 1), outputChannelsShape), initValue = 0) # [ 1 x 1 x K ]
|
||||
#stridePadded =
|
||||
# if (Length (_AsArray (stride))) == 1 then stride
|
||||
# else _ConcatArrays (stride, Repeat (reductionRank, 0)) # gets inferred
|
||||
#FixShapes (vec, val) = # padding vectors must be either length 1 or match kernel dim including reduction dims
|
||||
# if Length (_AsArray (vec)) == 1 then vec
|
||||
# else _ConcatArrays (_ForceResizeArray (Length (kernelShape), vec), Repeat (reductionRank, val)) # set padding flags for reduction dims to false
|
||||
#autoPaddingPadded = FixShapes (autoPadding, false)
|
||||
#lowerPadPadded = FixShapes (lowerPad, 0)
|
||||
#upperPadPadded = FixShapes (upperPad, 0)
|
||||
sharing = true # TODO: support this
|
||||
transpose = false # TODO: support this
|
||||
f(x) = {
|
||||
c = Convolution (W, x, filterShape, mapDims = numOutputChannels, stride = stride, sharing = sharing, autoPadding = autoPadding, lowerPad = lowerPad, upperPad = upperPad, transpose = transpose, maxTempMemSizeInSamples = maxTempMemSizeInSamples)
|
||||
res = activation (if bias then c + b else c)
|
||||
}.res
|
||||
}.f
|
||||
|
||||
# MaxPoolingLayer, AveragePoolingLayer -- create a max- or average-pooling layer
|
||||
_PoolingLayer {poolKind, # "max" or "average"
|
||||
filterShape, # e.g. (3:3)
|
||||
stride = 1, autoPadding = true,
|
||||
stride = 1, autoPadding = false,
|
||||
lowerPad = 0, upperPad = 0} = # TODO: support this
|
||||
{
|
||||
f(x) = Pooling (x, poolKind, filterShape, stride = stride, autoPadding = autoPadding, lowerPad = lowerPad, upperPad = upperPad)
|
||||
}.f
|
||||
MaxPoolingLayer {filterShape, stride = 1, autoPadding = true, lowerPad = 0, upperPad = 0} =
|
||||
MaxPoolingLayer {filterShape, stride = 1, autoPadding = false, lowerPad = 0, upperPad = 0} =
|
||||
_PoolingLayer {"max", filterShape, stride = stride, autoPadding = autoPadding, lowerPad = lowerPad, upperPad = upperPad}
|
||||
AveragePoolingLayer {filterShape, stride = 1, autoPadding = true, lowerPad = 0, upperPad = 0} =
|
||||
AveragePoolingLayer {filterShape, stride = 1, autoPadding = false, lowerPad = 0, upperPad = 0} =
|
||||
_PoolingLayer {"average", filterShape, stride = stride, autoPadding = autoPadding, lowerPad = lowerPad, upperPad = upperPad}
|
||||
|
||||
# RecurrentLSTMLayer -- create an LSTM layer
|
||||
|
@ -424,7 +442,7 @@ ReconcileDynamicAxis(dataInput, layoutInput, tag='') = new ComputationNode [ ope
|
|||
ReconcileMBLayout = ReconcileDynamicAxis # back compat
|
||||
CastAs (type, data) = ReconcileDynamicAxis (data, type) # read as CastAs<type>(data) where the cast may consist of rearranging the data w.r.t. MBLayout or broadcasting across sequence items
|
||||
Convolution(weightNode, inputValueNode, kernelDims, mapDims = 0, stride = 1, sharing = true, autoPadding = true, lowerPad = 0, upperPad = 0, transpose=false, imageLayout='CHW', maxTempMemSizeInSamples = 0, tag='') = new ComputationNode [ operation = 'Convolution' ; inputs = (weightNode : inputValueNode); kernelShape = new TensorShape [ dims = kernelDims ] ; mapCount = new TensorShape [ dims = mapDims ] ; strideShape = new TensorShape [ dims = stride ] ; dimSharing = new BoolVector [ items = sharing ] ; dimPadding = new BoolVector [ items = autoPadding ] ; dimPadLower = new TensorShape [ dims = lowerPad ] ; dimPadUpper = new TensorShape [ dims = upperPad ] /*plus the function args*/ ]
|
||||
# ND pooling/unpooling
|
||||
# ND pooling/unpooling --why is autoPadding true? Normally one would want to reduce dimensions, no?
|
||||
Pooling(input, poolKind/*'max'|'average'*/, kernelDims, stride=1, autoPadding = true, lowerPad = 0, upperPad = 0, imageLayout='CHW', tag='') = new ComputationNode [ operation = 'Pooling' ; inputs = (input); pool = poolKind ; kernelShape = new TensorShape [ dims = kernelDims ] ; strideShape = new TensorShape [ dims = stride ] ; dimPadding = new BoolVector [ items = autoPadding ] ; dimPadLower = new TensorShape [ dims = lowerPad ] ; dimPadUpper = new TensorShape [ dims = upperPad ] /*plus the function args*/ ]
|
||||
MaxUnpooling(unpoolInput, poolInput, kernelDims, stride=1, autoPadding = true, lowerPad = 0, upperPad = 0, imageLayout='CHW', tag='') = new ComputationNode [ operation = 'MaxUnpooling' ; inputs = (unpoolInput : poolInput); kernelShape = new TensorShape [ dims = kernelDims ] ; strideShape = new TensorShape [ dims = stride ] ; dimPadding = new BoolVector [ items = autoPadding ] ; dimPadLower = new TensorShape [ dims = lowerPad ] ; dimPadUpper = new TensorShape [ dims = upperPad ] /*plus the function args*/ ]
|
||||
# 2D pooling
|
||||
|
@ -826,7 +844,7 @@ RNNs =
|
|||
# This function also takes an optional auxiliary input, e.g. for suporting attention models.
|
||||
LSTMBlock (outputDim, cellShape=Constants.None, enableSelfStabilization=false) =
|
||||
[
|
||||
cellDim = if Constants.IsNone (cellShape) then outputDim else cellDim
|
||||
cellDim = if Constants.IsNone (cellShape) then outputDim else cellShape
|
||||
// parameter macros
|
||||
# note: each invocation comes with its own set of weights
|
||||
B{} = Parameters.BiasParam {cellDim}
|
||||
|
|
|
@ -25,14 +25,14 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
// Each sample is stored as a column-major matrix (height, width) of float[numChannels] (r00, g00, b00, r10, g10, b10, r01, g01, b01, r11, g11, b11).
|
||||
//
|
||||
// - input : [C x W x H x T] or ARRAY[1..T] OF ARRAY[1..H] OF ARRAY[1..W] OF ARRAY[1..C]
|
||||
// - output : [C' x W' x H' x T] or ARRAY[1..T] OF ARRAY[1..H'] OF ARRAY[1..W'] OF ARRAY[1..C']
|
||||
// - filter : [C' x W" x H" x C ] or ARRAY[1..C] OF ARRAY[1..H"] OF ARRAY[1..W"] OF ARRAY[1..C']
|
||||
// - output : [K x W' x H' x T] or ARRAY[1..T] OF ARRAY[1..H'] OF ARRAY[1..W'] OF ARRAY[1..K]
|
||||
// - filter : [K x W" x H" x C ] or ARRAY[1..C] OF ARRAY[1..H"] OF ARRAY[1..W"] OF ARRAY[1..K]
|
||||
//
|
||||
// * cudnn ("CHW") mode (works both GPU and CPU): Channels are planes
|
||||
//
|
||||
// - input : [W x H x C x T] or ARRAY[1..T] OF ARRAY[1..C] OF ARRAY[1..H] OF ARRAY[1..W]
|
||||
// - output : [W' x H' x C' x T] or ARRAY[1..T] OF ARRAY[1..C'] OF ARRAY[1..H'] OF ARRAY[1..W']
|
||||
// - filter : [W" x H" x C x C' ] or ARRAY[1..C'] OF ARRAY[1..C] OF ARRAY[1..H] OF ARRAY[1..W]
|
||||
// - output : [W' x H' x K x T] or ARRAY[1..T] OF ARRAY[1..K] OF ARRAY[1..H'] OF ARRAY[1..W']
|
||||
// - filter : [W" x H" x C x K ] or ARRAY[1..K] OF ARRAY[1..C] OF ARRAY[1..H] OF ARRAY[1..W]
|
||||
//
|
||||
// where:
|
||||
// - using ' for output and " for filter
|
||||
|
@ -41,7 +41,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
// - C = input channels
|
||||
// - 3 for color images, 1 for B&W images
|
||||
// - for hidden layer: dimension of activation vector for each pixel
|
||||
// - C' = output channels = dimension of activation vector for each pixel (also called N by NVidia, inconsistently)
|
||||
// - K = output channels = dimension of activation vector for each pixel (also called N by NVidia, inconsistently)
|
||||
//
|
||||
// For ND-convolution/pooling only second format ('cudnn') is supported.
|
||||
//
|
||||
|
@ -149,6 +149,41 @@ public:
|
|||
size_t MaxTempMemSizeInSamples() const { return m_maxTempMemSizeInSamples; }
|
||||
PoolKind PoolingKind() const { return m_poolKind; }
|
||||
|
||||
private:
|
||||
// bottomlessly expand shape to filterRank, then expand to inputRank using defaults or given 'from' values
|
||||
template<class V, typename T>
|
||||
static void FixVectorShape(size_t filterRank, size_t inputRank, V& shape, T deflt, const V& from = V())
|
||||
{
|
||||
if (shape.size() == 0)
|
||||
return; // let ComputeOutputShape() deal with this special case
|
||||
// repeat the last value until we have the same rank as the filter
|
||||
while (shape.size() < filterRank)
|
||||
shape.push_back(shape.back());
|
||||
// increase to input rank
|
||||
// If 'from' is given then clone the value from there. This is meant to be the input dimensions for convolution.
|
||||
while (shape.size() < inputRank)
|
||||
shape.push_back(shape.size() < from.size() ? from[shape.size()] : deflt);
|
||||
}
|
||||
static void FixTensorShape(size_t filterRank, size_t inputRank, TensorShape& shape, size_t deflt, const TensorShape& from = TensorShape())
|
||||
{
|
||||
auto dims = shape.GetDims();
|
||||
FixVectorShape(filterRank, inputRank, dims, deflt, from.GetDims());
|
||||
shape = TensorShape(dims);
|
||||
}
|
||||
protected:
|
||||
// infer reduction dimensions if not given
|
||||
void InferReductionDims(const TensorShape& inputShape, const TensorShape& fromShape)
|
||||
{
|
||||
// If kernel has a lower rank than the input then the remaining dimensions are to be reduced over.
|
||||
size_t filterRank = m_kernelShape.size();
|
||||
FixTensorShape(filterRank, inputShape.size(), m_kernelShape, 1, fromShape); // convolve over red dim; pool over 1
|
||||
FixTensorShape(filterRank, inputShape.size(), m_stride, 1, fromShape); // stride for reduction dims is red dim or 1
|
||||
FixVectorShape(filterRank, inputShape.size(), m_autoPad, false); // no padding for reduction dims
|
||||
FixTensorShape(filterRank, inputShape.size(), m_lowerPad, 0);
|
||||
FixTensorShape(filterRank, inputShape.size(), m_upperPad, 0);
|
||||
FixVectorShape(filterRank, inputShape.size(), m_sharing, true);
|
||||
}
|
||||
|
||||
protected:
|
||||
TensorShape m_kernelShape;
|
||||
TensorShape m_mapCount;
|
||||
|
@ -369,6 +404,8 @@ public:
|
|||
else
|
||||
{
|
||||
inputShape = GetInputSampleLayout(inputIdx);
|
||||
// infer reduction dimensions if not given
|
||||
InferReductionDims(inputShape, inputShape);
|
||||
if (!m_transpose)
|
||||
{
|
||||
outputShape = ConvolveGeometry::ComputeOutputShape(inputShape, m_kernelShape, m_mapCount, m_stride,
|
||||
|
@ -385,6 +422,25 @@ public:
|
|||
// ConvolveGeometry always uses CHW.
|
||||
SetDims(ImageDimensions(outputShape, ImageLayoutKind::CHW).AsTensorShape(m_imageLayout), HasMBLayout());
|
||||
|
||||
// update LearnableParameter if it has 0 dimensions (to be inferred)
|
||||
// Typically this would be the #inputChannels (C).
|
||||
if (Input(0)->GetSampleLayout().GetNumElements() == 0)
|
||||
{
|
||||
// BUGBUG: Inference does not support sharing. Problem is that we have the information too late.
|
||||
// In this case, users will have to specify the correct dimensions. Good luck.
|
||||
#if 1 // old style for back compat with previous results. Randomization will differ.
|
||||
if (Input(0)->GetSampleLayout().GetRank() == 2)
|
||||
Input(0)->ValidateInferInputDimsFrom(TensorShape(m_mapCount.GetNumElements(), m_kernelShape.GetNumElements()));
|
||||
else
|
||||
#endif
|
||||
{
|
||||
auto weightShape = m_kernelShape.GetDims();
|
||||
for (auto outDim : m_mapCount.GetDims())
|
||||
weightShape.push_back(outDim);
|
||||
Input(0)->ValidateInferInputDimsFrom(TensorShape(weightShape));
|
||||
}
|
||||
}
|
||||
|
||||
if (isFinalValidationPass)
|
||||
{
|
||||
if (m_convEng == nullptr)
|
||||
|
@ -397,10 +453,11 @@ public:
|
|||
ConvolutionEngineKind::All, NodeName());
|
||||
}
|
||||
|
||||
if (Input(0)->GetAsMatrixNumCols() != m_kernelShape.GetNumElements() ||
|
||||
Input(0)->GetAsMatrixNumRows() != m_convEng->Geometry()->KernelCount())
|
||||
if (Input(0)->GetSampleLayout().GetNumElements() != m_kernelShape.GetNumElements() * m_convEng->Geometry()->KernelCount())
|
||||
{
|
||||
LogicError("Convolution weight matrix %ls should have dimension [%d, %d] which is [kernelCount, kernelWidth * kernelHeight * inputChannels]",
|
||||
//LogicError("Convolution weight matrix %ls should have dimension [%d, %d] which is [kernelCount, kernelWidth * kernelHeight * inputChannels]",
|
||||
// Input(0)->NodeName().c_str(), (int)m_convEng->Geometry()->KernelCount(), (int)m_kernelShape.GetNumElements());
|
||||
LogicError("Convolution weight matrix %ls should have dimension [(filter shape) x (input channels) x (output channels)]",
|
||||
Input(0)->NodeName().c_str(), (int)m_convEng->Geometry()->KernelCount(), (int)m_kernelShape.GetNumElements());
|
||||
}
|
||||
}
|
||||
|
@ -489,22 +546,6 @@ public:
|
|||
return m_poolKind == PoolKind::Max;
|
||||
}
|
||||
|
||||
private:
|
||||
// add 'reductionDims' dimensions to 'shape', copying from 'from' or 'deflt'
|
||||
template<class V, typename T>
|
||||
static void FixVectorShape(size_t reductionDims, V& shape, T deflt)
|
||||
{
|
||||
size_t targetRank = shape.size() + reductionDims;
|
||||
if (shape.size() < targetRank)
|
||||
shape.resize(targetRank, deflt);
|
||||
// else let ComputeOutputShape() deal with the failure
|
||||
}
|
||||
static void FixTensorShape(size_t reductionDims, TensorShape& shape, size_t deflt)
|
||||
{
|
||||
auto dims = shape.GetDims();
|
||||
FixVectorShape(reductionDims, dims, deflt);
|
||||
shape = TensorShape(dims);
|
||||
}
|
||||
public:
|
||||
void Validate(bool isFinalValidationPass) override
|
||||
{
|
||||
|
@ -519,26 +560,10 @@ public:
|
|||
"and make sure input data layout is CHW", NodeName().c_str(), OperationName().c_str(), NodeName().c_str());
|
||||
}
|
||||
|
||||
auto inputShape = GetInputSampleLayout(0);
|
||||
// make kernel shape etc. look like convolution parameters, i.e. create nominal reduction dimensions
|
||||
// In older versions, it was expected that pooling takes kernel shapes like convolution,
|
||||
// which included the reduction dim(s). It makes more sense to not require users to
|
||||
// include them for pooing, which the padding below accounts for.
|
||||
if (inputShape.size() > m_kernelShape.size()) // user specified only the pooling-area shape: add the missing dims
|
||||
{
|
||||
size_t reductionDims = inputShape.size() - m_kernelShape.size(); // number of missing dims--these are reduction dims
|
||||
FixTensorShape(reductionDims, m_kernelShape, 1); // pool over 1 in reduction dimension
|
||||
if (m_stride.GetRank() != 1)
|
||||
FixTensorShape(reductionDims, m_stride, 1); // stride for reduction dims is 1
|
||||
if (m_autoPad.size() != 1)
|
||||
FixVectorShape(reductionDims, m_autoPad, false); // no padding for reduction dims
|
||||
if (m_lowerPad.GetRank() != 1)
|
||||
FixTensorShape(reductionDims, m_lowerPad, 0);
|
||||
if (m_upperPad.GetRank() != 1)
|
||||
FixTensorShape(reductionDims, m_upperPad, 0);
|
||||
if (m_sharing.size() != 1)
|
||||
FixVectorShape(reductionDims, m_sharing, false); // dummy
|
||||
}
|
||||
const auto& inputShape = GetInputSampleLayout(0);
|
||||
|
||||
// infer reduction dimensions if not given
|
||||
InferReductionDims(inputShape, TensorShape());
|
||||
|
||||
auto outDims = ConvolveGeometry::ComputeOutputShape(inputShape, m_kernelShape, m_mapCount, m_stride,
|
||||
m_sharing, m_autoPad, m_lowerPad, m_upperPad);
|
||||
|
@ -634,6 +659,10 @@ public:
|
|||
}
|
||||
|
||||
auto inputShape = GetInputSampleLayout(0);
|
||||
|
||||
// infer reduction dimensions if not given
|
||||
InferReductionDims(inputShape, TensorShape());
|
||||
|
||||
// Same as in case of deconvolution, node input (inputShape) is really the output of the max pooling
|
||||
// and node output (outDims) is pooling input.
|
||||
auto outputShape = ConvolveGeometry::ComputeInputShape(inputShape, m_kernelShape, m_mapCount, m_stride,
|
||||
|
|
Загрузка…
Ссылка в новой задаче