addressed CR feedback; modifed an e2e test to cover inferInputRankToMap

2016-08-23 21:54:00 -07:00 · 2016-08-23 21:54:00 -07:00 · bd5f9cb3f4
--- a/Source/CNTK/BrainScript/CNTKCoreLib/CNTK.core.bs
+++ b/Source/CNTK/BrainScript/CNTKCoreLib/CNTK.core.bs
@ -32,13 +32,19 @@
 # Note: outDim may describe a tensor as well.
 LinearLayer {outDim, bias = true, init='heNormal', initValueScale=1, inputRank=None, mapRank=None} =
 {
-    inputShape = if BS.Constants.IsNone (inputRank) then Inferred else Repeat (inputRank, Inferred)
+    # inputRank given: number of zeroes to add to W (mapRank must not be given)
+    # mapRank   given: expand W to leave exactly mapRank axes (inputRank must not be given)
+    # none      given: expand W to all (same as mapRank=0)
+    inputShape =
+        if       BS.Constants.IsNone (inputRank) then Inferred  # not given: one Inferred, which will get expanded
+        else if !BS.Constants.IsNone (mapRank)   then Fail ("'inputRank' and 'mapRank' cannot be specified at the same time.")
+        else Repeat (inputRank, Inferred)
    W = ParameterTensor {_ConcatArrays (outDim, inputShape), init=init, initValueScale=initValueScale}
    b = ParameterTensor {outDim, initValue=0}
    outputRank = Length (_AsArray (outDim)) # support outputs with tensor layouts
    inferInputRankToMap =
-        if       BS.Constants.IsNone (mapRank) then -1  # means not specified
-        else if !BS.Constants.IsNone (inputRank) Fail ("'inputRank' and 'mapRank' cannot be specified at the same time.")
+        if      !BS.Constants.IsNone (inputRank) then -1  # means not specified
+        else if  BS.Constants.IsNone (mapRank)   then 0   # default to 'use all input dims'
        else mapRank
    apply (x) =
        if bias
--- a/Source/ComputationNetworkLib/LinearAlgebraNodes.h
+++ b/Source/ComputationNetworkLib/LinearAlgebraNodes.h
@ -238,7 +238,7 @@ class TimesNodeBase : public ComputationNode<ElemType>, public NumInputs<2>
    typedef ComputationNode<ElemType> Base; UsingComputationNodeMembers; using Base::OperationName;                                                                                                                           \

 public:
-    TimesNodeBase(DEVICEID_TYPE deviceId, const wstring& name, size_t outputRank = 1, int inferInputRankToMap = 1)
+    TimesNodeBase(DEVICEID_TYPE deviceId, const wstring& name, size_t outputRank = 1, int inferInputRankToMap = -1)
        : Base(deviceId, name), m_outputRank(outputRank), m_inferInputRankToMap(inferInputRankToMap)
    {
    }
@ -249,8 +249,8 @@ public:
        if (flags & CopyNodeFlags::copyNodeValue)
        {
            auto node = dynamic_pointer_cast<TimesNodeBase<ElemType, m_transpose>>(nodeP);
-            node->m_outputRank      = m_outputRank;
-            node->m_inferInputRankToMap  = m_inferInputRankToMap;
+            node->m_outputRank          = m_outputRank;
+            node->m_inferInputRankToMap = m_inferInputRankToMap;
        }
    }

@ -271,7 +271,7 @@ public:
        if (modelVersion >= CNTK_MODEL_VERSION_11)
            fstream >> m_inferInputRankToMap;
        else
-            m_inferInputRankToMap = 1;
+            m_inferInputRankToMap = -1;
    }

 private:
@ -525,7 +525,7 @@ class TimesNode : public TimesNodeBase<ElemType, false>
    static const std::wstring TypeName() { return L"Times"; }

 public:
-    TimesNode(DEVICEID_TYPE deviceId, const wstring& name, size_t outputRank = 1, int inferInputRankToMap = 1)
+    TimesNode(DEVICEID_TYPE deviceId, const wstring& name, size_t outputRank = 1, int inferInputRankToMap = -1)
        : Base(deviceId, name, outputRank, inferInputRankToMap)
    {
    }
@ -558,7 +558,7 @@ class TransposeTimesNode : public TimesNodeBase<ElemType, true>
 public:
    DeclareConstructorFromConfigWithNumInputs(TransposeTimesNode);
    TransposeTimesNode(DEVICEID_TYPE deviceId, const wstring& name, size_t outputRank = 1)
-        : Base(deviceId, name, outputRank, /*inferInputRankToMap=*/1)
+        : Base(deviceId, name, outputRank, /*inferInputRankToMap=*/-1)
    {
    }
 };
--- a/Tests/EndToEndTests/Speech/LSTM/cntk.cntk
+++ b/Tests/EndToEndTests/Speech/LSTM/cntk.cntk
@ -90,8 +90,8 @@ speechTrain = [
                    // parameter macros--these carry their own weight matrices
                    B() = BS.Parameters.BiasParam (cellDim)

-                    W(v) = BS.Parameters.WeightParam (cellDim, inputDim)  * Stabilize (v, enabled=enableSelfStabilization) // input-to-hidden
-                    H(h) = BS.Parameters.WeightParam (cellDim, outputDim) * Stabilize (h, enabled=enableSelfStabilization) // hidden-to-hidden
+                    W(v) = BS.Parameters.WeightParam (cellDim, Inferred/*inputDim*/)  * Stabilize (v, enabled=enableSelfStabilization) // input-to-hidden
+                    H(h) = BS.Parameters.WeightParam (cellDim, Inferred/*outputDim*/) * Stabilize (h, enabled=enableSelfStabilization) // hidden-to-hidden
                    C(c) = BS.Parameters.DiagWeightParam (cellDim)       .* Stabilize (c, enabled=enableSelfStabilization) // cell-to-hiddden (note: applied elementwise)

                    // note: the W(x) here are all different, they all come with their own set of weights; same for H(dh), C(dc), and B()
@ -133,10 +133,11 @@ speechTrain = [
                else /*BS.RNNs.*/ RecurrentLSTMP (hiddenDim, cellDim=innerCellDim, /*S*/ (LSTMoutput[k-1]), inputDim=hiddenDim,   enableSelfStabilization=useSelfStabilization).h

            // and add a softmax layer on top
-            W = BS.Parameters.WeightParam (labelDim, hiddenDim)
+            W = BS.Parameters.WeightParam (labelDim, Inferred)
            B = BS.Parameters.BiasParam   (labelDim)

-            z = W * S(LSTMoutput[numLSTMLayers]) + B; // top-level input to Softmax
+            // (unnecessarily using explicit Times with inferInputRankToMap in order to have a test for inferInputRankToMap parameter)
+            z = Times (W, S(LSTMoutput[numLSTMLayers]), inferInputRankToMap=0) + B; // top-level input to Softmax
        ].z

        // features