disabled TensorView for PlusNode::BackpropTo(), as that causes a difference for Image/QuickE2E;

GetTensorShape() now adds the column dimension as one more dimension
2015-12-30 16:40:49 -08:00 · 2015-12-30 16:40:49 -08:00 · c87e2f7550
--- a/Source/CNTK/BrainScript/ExperimentalNetworkBuilder.cpp
+++ b/Source/CNTK/BrainScript/ExperimentalNetworkBuilder.cpp
@ -37,6 +37,7 @@ using namespace std;
    wstring computationNodes =  // TODO: use actual TypeName() here? would first need to make it a wide string; we should also extract those two methods into the base macro
        L"LearnableParameter(rows, cols, needGradient = true, init = 'uniform'/*|fixedValue|gaussian|fromFile*/, initValueScale = 1, value = 0, initFromFilePath = '', initOnCPUOnly=true, randomSeed=-1, tag='') = new ComputationNode [ operation = 'LearnableParameter' ; shape = new TensorShape [ dims = (rows : cols) ] /*plus the function args*/ ]\n"
        L"Parameter = LearnableParameter // deprecated \n"
+        L"ParameterTensor(dims, needGradient = true, init = 'uniform'/*|fixedValue|gaussian|fromFile*/, initValueScale = 1, value = 0, initFromFilePath = '', initOnCPUOnly=true, randomSeed=-1, tag='') = new ComputationNode [ operation = 'LearnableParameter' ; shape = new TensorShape [ /*dims*/ ] /*plus the function args*/ ]\n"
        // ^^ already works; vv untested
        L"Input(rows, tag='feature') = new ComputationNode [ operation = 'InputValue' ; shape = new TensorShape [ dims = (rows) ] ; isImage = false /*plus the function args*/ ]\n" // note: naming a little inconsistent  // TODO: re-test after flag change
        L"SparseInput(rows, tag='feature') = new ComputationNode [ operation = 'SparseInputValue' ; shape = new TensorShape [ dims = (rows) ] ; isImage = false /*plus the function args*/ ]\n"
--- a/Source/Common/Include/DataTensor.h
+++ b/Source/Common/Include/DataTensor.h
@ -457,8 +457,8 @@ namespace Microsoft { namespace MSR { namespace CNTK {
    };
    static inline ImageLayoutKind ImageLayoutKindFrom(const wstring & s)
    {
-        if      (s == L"CHW") return ImageLayoutKind::CHW;
-        else if (s == L"HWC") return ImageLayoutKind::HWC;
+        if      (s == L"CHW" || s == L"cudnn")  return ImageLayoutKind::CHW;
+        else if (s == L"HWC" || s == L"legacy") return ImageLayoutKind::HWC;
        else InvalidArgument("ImageLayoutKindFrom: Unknown ImageLayoutKind '%ls', must be 'CHW' (cudnn) or 'HWC' (CNTK legacy)", s.c_str());
    }
    static inline TensorShape ImageLayout(size_t width, size_t height, size_t channels, ImageLayoutKind imageLayoutKind)
--- a/Source/ComputationNetworkLib/ComputationNode.cpp
+++ b/Source/ComputationNetworkLib/ComputationNode.cpp
@ -72,6 +72,7 @@ namespace Microsoft {
        size_t rows0 = Input(0)->GetNumRows(), cols0 = Input(0)->GetNumCols();
        size_t rows1 = Input(1)->GetNumRows(), cols1 = Input(1)->GetNumCols();

+#if 1//ndef ENABLE_TENSORVIEW
        // TODO: This test will go away once we switch to full tensor lib.
        if (isFinalValidationPass && !(
               (rows0 == rows1 && (Input(0)->GetMBLayout() == Input(1)->GetMBLayout() || cols0 == cols1)) ||                                  // matching size (obvious case)
@ -81,6 +82,9 @@ namespace Microsoft {
        {
            LogicError("The Matrix dimensions in the %ls %ls operation do not match.", NodeName().c_str(), OperationName().c_str());
        }
+#else
+        rows0; rows1;
+#endif

        // result has tensor shape with dimensions being the max over both
        let shape0 = GetInputSampleLayout(0);
@ -204,6 +208,8 @@ namespace Microsoft {
        for (size_t i = 0; i < GetNumInputs(); i++)
        {
            size_t rank = Input(i)->GetAndValidateSampleLayout().GetRank();
+            if (!HasMBLayout())                         // no MBLayout: last dim is column dimension
+                rank++;
            if (maxRank < rank)
                maxRank = rank;
        }
@ -215,8 +221,9 @@ namespace Microsoft {
    TensorShape ComputationNodeBase::GetTensorShape(size_t rank, const FrameRange & fr) const
    {
        //GetAndValidateSampleLayout();     // no need to validate because rank comes from DetermineElementwiseTensorRank() which validates all
-        if (!HasMBLayout())                         // no MBLayout: just return sample layout (if other participants have layout, tensor lib will broadcast)
-            return GetSampleLayout();    //  .Pad(rank); // no need for padding
+        if (!HasMBLayout())
+            return GetSampleLayout().Append(GetSampleLayout().GetRank(), GetNumCols());    //  last dim is column dimension
+            // TODO: This is not nice! Instead, of no MBLayout then have sample layout explain whole matrix.
        else if (fr.IsAllFrames())
        {
            // we have an MBLayout, and for refers to the entire MB
--- a/Source/ComputationNetworkLib/ComputationNode.h
+++ b/Source/ComputationNetworkLib/ComputationNode.h
@ -26,8 +26,8 @@
 #include <sstream>
 #include <iostream>

- #define ENABLE_TENSORVIEW   // flip this switch once the tensor lib is confirmed to be working
- #define ENABLE_BROADCASTING_ELEMENTTIMES    // if set then ScaleNode and Row/ColumnElementTimes are redirected to ElementTimes
+#define ENABLE_TENSORVIEW   // flip this switch once the tensor lib is confirmed to be working
+#define ENABLE_BROADCASTING_ELEMENTTIMES    // if set then ScaleNode and Row/ColumnElementTimes are redirected to ElementTimes

 #define DEFAULT_HIDDEN_ACTIVATION 0.1

--- a/Source/ComputationNetworkLib/LinearAlgebraNodes.h
+++ b/Source/ComputationNetworkLib/LinearAlgebraNodes.h
@ -43,7 +43,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {

        virtual void /*ComputationNode::*/BackpropTo(const size_t inputIndex, const FrameRange & fr) override
        {
-#ifdef ENABLE_TENSORVIEW
+#if 0//def ENABLE_TENSORVIEW
            size_t rank = DetermineElementwiseTensorRank();
            auto gradient      =                    GradientTensorFor(rank, fr);
            auto inputGradient = Input(inputIndex)->GradientTensorFor(rank, fr.AllowBroadcast());
--- a/Tests/EndToEndTests/Image/QuickE2E/cntk.config
+++ b/Tests/EndToEndTests/Image/QuickE2E/cntk.config
@ -28,10 +28,11 @@ train = [
            convW = Parameter(outMap, inWCount, init="uniform", initValueScale=wScale, initOnCPUOnly=false)
            conv = Convolution(convW, inp, kW, kH, outMap, hStride, vStride, zeroPadding=false)
            convB = Parameter(outMap, 1,        init="fixedValue", value=bValue)
+            #convB = ParameterTensor((1 : 1 : outMap : 1/*col dim*/),        init="fixedValue", value=bValue)
            convPlusB = Plus(conv, convB);
            out = RectifiedLinear(convPlusB);
        ]
-    
+
        DNNSigmoidLayer(inDim, outDim, x, parmScale) = [        // Sigmoid non-linearity
            W = Parameter(outDim, inDim, init="uniform", initValueScale=parmScale, initOnCPUOnly=false) 
            b = Parameter(outDim, 1,     init="uniform", initValueScale=parmScale, initOnCPUOnly=false) 
@ -39,7 +40,7 @@ train = [
            z = Plus(t, b)
            out = Sigmoid(z)
        ]
-    
+
        DNNLayer(inDim, outDim, x, parmScale) = [               //no non-linearity, as input for SoftMax
            W = Parameter(outDim, inDim, init="uniform", initValueScale=parmScale, initOnCPUOnly=false)
            b = Parameter(outDim, 1,     init="uniform", initValueScale=parmScale, initOnCPUOnly=false)
@ -50,8 +51,8 @@ train = [
        imageW = 28
        imageH = 28
        labelDim = 10
-    
-        features = ImageInput(imageW, imageH, 1, imageLayout="HWC", tag="feature")
+
+        features = ImageInput(imageW, imageH, 1, imageLayout="legacy", tag="feature")
        featScale = Constant(0.00390625)
        featScaled = Scale(featScale, features)
        labels = Input(labelDim, tag="label")
@ -94,7 +95,7 @@ train = [
        # DNNSigmoidLayer and DNNLayer are defined in Macros.ndl
        h1 = DNNSigmoidLayer(512, h1Dim, pool2, 1).out
        ol = DNNLayer(h1Dim, labelDim, h1, 1).out
-    
+
        ce = CrossEntropyWithSoftmax(labels, ol, tag="criterion")
        err = ErrorPrediction(labels, ol, tag="eval")
        outputNodes = ol