added new node Where (only BS)

2016-03-11 16:57:14 -08:00 · 2016-03-11 16:57:14 -08:00 · 63c50f6364
--- a/1
+++ b/1
@ -529,6 +529,7 @@ CNTK_SRC =\
 	$(SOURCEDIR)/CNTK/SynchronousExecutionEngine.cpp \
 	$(SOURCEDIR)/CNTK/tests.cpp \
 	$(SOURCEDIR)/ComputationNetworkLib/ComputationNode.cpp \
+	$(SOURCEDIR)/ComputationNetworkLib/ReshapingNodes.cpp \
 	$(SOURCEDIR)/ComputationNetworkLib/ComputationNetwork.cpp \
 	$(SOURCEDIR)/ComputationNetworkLib/ComputationNetworkEvaluation.cpp \
 	$(SOURCEDIR)/ComputationNetworkLib/ComputationNetworkAnalysis.cpp \
--- a/Source/CNTK/BrainScript/CNTKCoreLib/CNTK.core.bs
+++ b/Source/CNTK/BrainScript/CNTKCoreLib/CNTK.core.bs
@ -98,6 +98,7 @@ SumElements(matrix, tag='') = new ComputationNode [ operation = 'SumElements' ;
 Tanh(z, tag='') = new ComputationNode [ operation = 'Tanh' ; inputs = z /*plus the function args*/ ]
 TimeReverse(vectorSequence, tag='') = new ComputationNode [ operation = 'TimeReverse' ; inputs = vectorSequence /*plus the function args*/ ]
 TransposeTimes(leftMatrix, rightMatrix, tag='') = new ComputationNode [ operation = 'TransposeTimes' ; inputs = (leftMatrix : rightMatrix) /*plus the function args*/ ]
+Where(cond, tag='') = new ComputationNode [ operation = 'Where' ; inputs = cond /*plus the function args*/ ]

 ##############################################################################
 # common macros
--- a/Source/Common/Include/Sequences.h
+++ b/Source/Common/Include/Sequences.h
@ -364,6 +364,22 @@ public:
        return false;
    }

+    // -------------------------------------------------------------------
+    // indexing
+    // -------------------------------------------------------------------
+
+    // get the matrix-column index for a given time step in a given sequence
+    size_t GetColumnIndex(const SequenceInfo& seq, size_t t) const
+    {
+        if (t > seq.GetNumTimeSteps())
+            LogicError("GetColumnIndex: t out of sequence bounds.");
+        ptrdiff_t tIn = (ptrdiff_t)t + seq.tBegin;
+        if (tIn < 0 || (size_t)tIn >= GetNumTimeSteps())
+            LogicError("GetColumnIndex: Attempted to access a time step that is accessing a portion of a sequence that is not included in current minibatch."); // we may encounter this for truncated BPTT
+        size_t col = (size_t)tIn * GetNumParallelSequences() + seq.s;
+        return (size_t)col;
+    }
+
 private:
    // we are trying to access content--this verifies that the structure is consistent
    // All frames must now be declared.
--- a/Source/ComputationNetworkLib/ComputationNetworkBuilder.cpp
+++ b/Source/ComputationNetworkLib/ComputationNetworkBuilder.cpp
@ -95,6 +95,7 @@ static shared_ptr<ComputationNode<ElemType>> CreateStandardNode(const std::wstri
    else if (nodeType == OperationNameOf(TimesNode))                            return New<TimesNode<ElemType>>(forward<_Types>(_Args)...);
    else if (nodeType == OperationNameOf(TransposeDimensionsNode))              return New<TransposeDimensionsNode<ElemType>>(forward<_Types>(_Args)...);
    else if (nodeType == OperationNameOf(TransposeTimesNode))                   return New<TransposeTimesNode<ElemType>>(forward<_Types>(_Args)...);
+    else if (nodeType == OperationNameOf(WhereNode))                            return New<WhereNode<ElemType>>(forward<_Types>(_Args)...);
    // legacy names we also support for back compat of model-files
    else if (nodeType == L"ColumnElementTimes")                                 return New<ElementTimesNode<ElemType>>(forward<_Types>(_Args)...);
    else if (nodeType == L"Delay")                                              return New<PastValueNode<ElemType>>(forward<_Types>(_Args)...);
--- a/Source/ComputationNetworkLib/ComputationNetworkLib.vcxproj
+++ b/Source/ComputationNetworkLib/ComputationNetworkLib.vcxproj
@ -170,8 +170,9 @@
    <ClCompile Include="ComputationNetworkEvaluation.cpp" />
    <ClCompile Include="ComputationNetworkScripting.cpp" />
    <ClCompile Include="ComputationNode.cpp" />
+    <ClCompile Include="ReshapingNodes.cpp" />
    <ClCompile Include="stdafx.cpp" />
  </ItemGroup>
  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
  <ImportGroup Label="ExtensionTargets" />
-</Project>
+</Project>
--- a/Source/ComputationNetworkLib/ComputationNetworkLib.vcxproj.filters
+++ b/Source/ComputationNetworkLib/ComputationNetworkLib.vcxproj.filters
@ -37,6 +37,9 @@
    <ClCompile Include="ComputationNetworkScripting.cpp">
      <Filter>Network</Filter>
    </ClCompile>
+    <ClCompile Include="ReshapingNodes.cpp">
+      <Filter>Nodes</Filter>
+    </ClCompile>
  </ItemGroup>
  <ItemGroup>
    <ClInclude Include="..\Common\Include\fileutil.h">
--- a/Source/ComputationNetworkLib/ComputationNode.h
+++ b/Source/ComputationNetworkLib/ComputationNode.h
@ -276,6 +276,7 @@ public:
    ComputationNodeBase(DEVICEID_TYPE deviceId, const wstring& name)
        : m_deviceId(deviceId), m_outputNeededDuringBackprop(true), m_learningRateMultiplier(0), m_gradientInitialized(false), m_nodeName(name == L"" ? CreateUniqNodeName() : name)
    {
+        // TODO: should m_learningRateMultiplier be set to 0? Or should every node have a way to add its own say on the learning rate for all its inputs?
    }
    virtual ~ComputationNodeBase()
    {
--- a/Source/ComputationNetworkLib/LinearAlgebraNodes.h
+++ b/Source/ComputationNetworkLib/LinearAlgebraNodes.h
@ -137,20 +137,8 @@ public:
        Input(0)->GradientFor(fr) -= GradientFor(fr);
    }

-    virtual bool OutputUsedInComputingInputNodesGradients() const override
-    {
-        // The NegateNode does not require its output value for computing
-        // the gradients of its input nodes
-        return false;
-    }
-
-    virtual bool InputUsedInComputingInputNodesGradients(size_t childIndex) const override
-    {
-        // The NegateNode does not require any of it's input's values for computing
-        // the gradients of its input nodes
-        UNREFERENCED_PARAMETER(childIndex);
-        return false;
-    }
+    virtual bool OutputUsedInComputingInputNodesGradients() const override { return false; }
+    virtual bool InputUsedInComputingInputNodesGradients(size_t /*childIndex*/) const override { return false; }

    virtual void /*ComputationNode::*/ ForwardProp(const FrameRange& fr) override
    {
@ -460,10 +448,7 @@ public:
        inputGradient.AddElementwiseProductOf(gradient, otherInputValue);
    }

-    virtual bool InputUsedInComputingInputNodesGradients(size_t childIndex) const override
-    {
-        return true;
-    }
+    virtual bool InputUsedInComputingInputNodesGradients(size_t /*childIndex*/) const override { return true; }

    virtual void /*ComputationNode::*/ ForwardProp(const FrameRange& fr) override
    {
@ -689,20 +674,8 @@ public:
        sliceInputGrad += sliceOutputGrad; // here the assumption is that sliceOutputGrad is a row vector
    }

-    virtual bool OutputUsedInComputingInputNodesGradients() const override
-    {
-        // The SumColumnElementsNode does not require its output value for computing
-        // the gradients of its input nodes
-        return false;
-    }
-
-    virtual bool InputUsedInComputingInputNodesGradients(size_t childIndex) const override
-    {
-        // The SumColumnElementsNode does not require any of it's input's values for computing
-        // the gradients of its input nodes
-        UNREFERENCED_PARAMETER(childIndex);
-        return false;
-    }
+    virtual bool OutputUsedInComputingInputNodesGradients() const override { return false; }
+    virtual bool InputUsedInComputingInputNodesGradients(size_t /*childIndex*/) const override { return false; }

    virtual void /*ComputationNode::*/ ForwardProp(const FrameRange& fr) override
    {
--- a/Source/ComputationNetworkLib/NonlinearityNodes.h
+++ b/Source/ComputationNetworkLib/NonlinearityNodes.h
@ -230,13 +230,7 @@ public:
    {
    }

-    virtual bool InputUsedInComputingInputNodesGradients(size_t childIndex) const override
-    {
-        // The plus node does not require any of it's input's values for computing
-        // the gradients of its input nodes
-        UNREFERENCED_PARAMETER(childIndex);
-        return false;
-    }
+    virtual bool InputUsedInComputingInputNodesGradients(size_t /*childIndex*/) const override { return false; }

    /*virtual*/ void BackpropToV(Matrix<ElemType>& gradient, const Matrix<ElemType>& inputFunctionValues, Matrix<ElemType>& inputGradientValues, const Matrix<ElemType>& gradientValues, const Matrix<ElemType>& functionValues)
    {
@ -304,13 +298,7 @@ public:
    {
    }

-    virtual bool InputUsedInComputingInputNodesGradients(size_t childIndex) const override
-    {
-        // The plus node does not require any of it's input's values for computing
-        // the gradients of its input nodes
-        UNREFERENCED_PARAMETER(childIndex);
-        return false;
-    }
+    virtual bool InputUsedInComputingInputNodesGradients(size_t /*childIndex*/) const override { return false; }

    /*virtual*/ void BackpropToV(Matrix<ElemType>& gradient, const Matrix<ElemType>& inputFunctionValues, Matrix<ElemType>& inputGradientValues, const Matrix<ElemType>& gradientValues, const Matrix<ElemType>& functionValues)
    {
--- a/Source/ComputationNetworkLib/RecurrentNodes.h
+++ b/Source/ComputationNetworkLib/RecurrentNodes.h
@ -232,20 +232,8 @@ public:
        }
    }

-    virtual bool OutputUsedInComputingInputNodesGradients() const override
-    {
-        // The DelayedValueNode does not require its output value for computing
-        // the gradients of its input nodes
-        return false;
-    }
-
-    virtual bool InputUsedInComputingInputNodesGradients(size_t childIndex) const override
-    {
-        // The DelayedValueNode does not require any of it's input's values for computing
-        // the gradients of its input nodes
-        UNREFERENCED_PARAMETER(childIndex);
-        return false;
-    }
+    virtual bool OutputUsedInComputingInputNodesGradients() const override { return false; }
+    virtual bool InputUsedInComputingInputNodesGradients(size_t /*childIndex*/) const override { return false; }

    virtual void EndForwardProp() override // called after last iteration step of ForwardProp()
    {
--- a/Source/ComputationNetworkLib/ReshapingNodes.cpp
+++ b/Source/ComputationNetworkLib/ReshapingNodes.cpp
@ -0,0 +1,175 @@
+//
+// Copyright (c) Microsoft. All rights reserved.
+// Licensed under the MIT license. See LICENSE.md file in the project root for full license information.
+//
+// ReshapingNodes.cpp -- collection of nodes that reshape or sub-sample matrices leading to layout changes
+//
+
+#include "Basics.h"
+#include "ReshapingNodes.h"
+#include "Matrix.h"
+#include "ComputationNode.h"
+#include "Sequences.h"
+
+#include <unordered_set>
+#include <map>
+#include <string>
+#include <vector>
+#include <stdexcept>
+#include <list>
+#include <memory>
+#include <algorithm>
+#include <assert.h>
+
+namespace Microsoft { namespace MSR { namespace CNTK {
+
+// -----------------------------------------------------------------------
+// Where(bitVector) -- extract indices of non-0 values in a sequence
+// -----------------------------------------------------------------------
+
+// TODO: move to MBLayout as a static method
+// packing algorithm
+//  - width: maximum width of structure; set to maximum over sequence lengths
+//  - inputSequences: vector of input SequenceInfo records (only seqId and GetNumTimeSteps() are used)
+//  - [out] *pMBLayout: MBLayout that describes the created packed sequence set
+//  - placement, rowAllocations: temp buffers (passed in to be able to optimize memory allocations)
+template<typename SequenceInfoVector>
+static void PackSequences(const SequenceInfoVector& inputSequences,
+    /*ref->out*/MBLayoutPtr pMBLayout,
+    /*temp buffer*/std::vector<std::pair<size_t, size_t>>& placement,
+    /*temp buffer*/std::vector<size_t> rowAllocations)
+{
+    placement.resize(inputSequences.size()); // [sequence index] result goes here (entries are invalid for gaps)
+    // determine width of MBLayout
+    size_t width = 0;
+    for (size_t i = 0; i < inputSequences.size(); i++)
+        if (inputSequences[i].seqId == GAP_SEQUENCE_ID)
+            continue;
+        else if (width < inputSequences[i].GetNumTimeSteps())
+            width = inputSequences[i].GetNumTimeSteps();
+    // allocate
+    rowAllocations.clear();             // [row] we build rows one by one
+    for (size_t i = 0; i < inputSequences.size(); i++)
+    {
+        if (inputSequences[i].seqId == GAP_SEQUENCE_ID)
+            continue;
+        let len = inputSequences[i].GetNumTimeSteps();
+        // first see if we find a row that has enough space
+        size_t s;
+        for (s = 0; s < rowAllocations.size(); s++)
+            if (rowAllocations[s] + len <= width)
+                break; // yep, it fits
+        // we did not find a s that fit then create a new one
+        if (s == rowAllocations.size())
+            rowAllocations.push_back(0);
+        // sequence goes to (s, rowAllocations[s])
+        placement[i] = make_pair(s, rowAllocations[s]);
+        // and allocate it
+        rowAllocations[s] += len;
+    }
+    // create MBLayout
+    pMBLayout->Init(rowAllocations.size(), width);
+    for (size_t i = 0; i < inputSequences.size(); i++)
+    {
+        if (inputSequences[i].seqId == GAP_SEQUENCE_ID)
+            continue;
+        size_t s, tBegin; tie
+        (s, tBegin) = placement[i];
+        pMBLayout->AddSequence(inputSequences[i].seqId, s, (ptrdiff_t)tBegin, tBegin + inputSequences[i].GetNumTimeSteps());
+    }
+    // need to fill the gaps as well
+    for (size_t s = 0; s < rowAllocations.size(); s++)
+        pMBLayout->AddGap(s, (size_t)rowAllocations[s], width);
+}
+
+// wrapper class to pass MBLayout sequence vector to PackSequences()
+struct SequenceLengthVector
+{
+    typedef vector<vector<size_t>> SequenceVector;
+    typedef MBLayout::SequenceInfo SequenceInfo;
+    const SequenceVector& sequenceVector;       // 
+    const vector<SequenceInfo>& sequenceInfo;    // original sequence info (for seqId)
+    SequenceLengthVector(const vector<SequenceInfo>& sequenceInfo, const SequenceVector& sequenceVector) : sequenceInfo(sequenceInfo), sequenceVector(sequenceVector) { }
+    size_t size() const { return sequenceInfo.size(); }
+    MBLayout::SequenceInfo operator[](size_t i) const // return a descriptor of the new sequence
+    {
+        SequenceInfo seq;
+        seq.seqId = sequenceInfo[i].seqId;
+        seq.s = i;
+        seq.tBegin = 0;
+        seq.tEnd = sequenceVector[i].size();
+        return seq;
+    }
+    void operator=(const SequenceLengthVector&) = delete;
+};
+
+// TODO: Where should the MBLayout be created--in BeginForwardProp() or ForwardProp()?
+//       BeginForwardProp() should generally have no access to the actual values,
+//       while ForwardProp() might be too late. We may have to define the semantics here.
+// BUGBUG: This is the first node with value-dependent MBLayout. It resizes Value(), which we otherwise always do before.
+template <class ElemType>
+/*virtual*/ void WhereNode<ElemType>::ForwardPropNonLooping() /*override*/
+{
+    // gather all sequences
+    let& inMBLayout = Input(0)->GetMBLayout();
+    let& input = Input(0)->Value();
+    let& sequences = inMBLayout->GetAllSequences();
+    auto& indexSequences = m_indexSequenceBuffer;
+    if (indexSequences.size() < sequences.size())
+        indexSequences.resize(sequences.size());
+    for (size_t i = 0; i < sequences.size(); i++)
+    {
+        let& seq = sequences[i];
+        if (seq.seqId == GAP_SEQUENCE_ID)
+            continue;
+        auto& indexSequence = indexSequences[i];
+        indexSequence.clear();
+        for (size_t t = 0; t < seq.GetNumTimeSteps(); t++)
+            if (input(0, inMBLayout->GetColumnIndex(seq, t))) // this is the condition check that this node performs; the meat
+                indexSequence.push_back(t);
+        // Note: The above accesses m_value directly on the CPU, putting it into BOTH state, possibly for other consumers as well.
+    }
+    // create a new MBLayout
+    let& outMBLayout = GetMBLayout();
+    PackSequences(SequenceLengthVector(sequences, indexSequences), outMBLayout, /*temp*/m_placementBuffer, /*temp*/m_rowAllocationsBuffer);
+    // copy to output
+    vector<ElemType> buf(outMBLayout->GetNumCols(), numeric_limits<ElemType>::quiet_NaN()); // STL cannot easily avoid initializing, so we might as well init with NaN for gaps
+    for (size_t i = 0; i < sequences.size(); i++)
+    {
+        let& seq = outMBLayout->GetAllSequences()[i];
+        if (seq.seqId == GAP_SEQUENCE_ID) // gaps will keep the NaN
+            continue;
+        let& indexSequence = indexSequences[i];
+        for (size_t t = 0; t < seq.GetNumTimeSteps(); t++)
+            buf[outMBLayout->GetColumnIndex(seq, t)] = (ElemType)indexSequence[t];
+    }
+    Value().SetValue(outMBLayout->GetNumParallelSequences(), outMBLayout->GetNumTimeSteps(), Input(0)->Value().GetDeviceId(), buf.data(), MatrixFormat::matrixFormatColMajor);
+}
+
+template <class ElemType>
+/*virtual*/ void WhereNode<ElemType>::BackpropToNonLooping(size_t /*inputIndex*/) /*override*/
+{
+    // we cannot backprop through a condition
+    // Can we?
+    return;
+}
+
+template <class ElemType>
+/*virtual*/ void WhereNode<ElemType>::Validate(bool isFinalValidationPass) /*override*/
+{
+    ComputationNodeBase::Validate(isFinalValidationPass);
+    // we generate its own MBLayout
+    if (isFinalValidationPass && !Input(0)->HasMBLayout())
+        InvalidArgument("%ls %ls operation can only operate on minibatch data (which have a layout).", NodeName().c_str(), OperationName().c_str());
+    if (!m_pMBLayout)
+        m_pMBLayout = make_shared<MBLayout>(); // this generates a new layout
+    // we map scalars to scalars
+    if (isFinalValidationPass && Input(0)->GetSampleLayout().GetNumElements() != 1)
+        InvalidArgument("%ls %ls operation can only operate on scalar input.", NodeName().c_str(), OperationName().c_str());
+    SetDims(TensorShape(1), true);
+}
+
+template class WhereNode<float>;
+template class WhereNode<double>;
+
+}}}
--- a/Source/ComputationNetworkLib/ReshapingNodes.h
+++ b/Source/ComputationNetworkLib/ReshapingNodes.h
@ -561,20 +561,8 @@ public:
        Input(0)->GradientFor(fr).AddToRowRepeatValuesOf(GradientFor(fr), m_numRepeat);
    }

-    virtual bool OutputUsedInComputingInputNodesGradients() const override
-    {
-        // The RowRepeatNode does not require its output value for computing
-        // the gradients of its input nodes
-        return false;
-    }
-
-    virtual bool InputUsedInComputingInputNodesGradients(size_t childIndex) const override
-    {
-        // The RowRepeatNode does not require any of it's input's values for computing
-        // the gradients of its input nodes
-        UNREFERENCED_PARAMETER(childIndex);
-        return false;
-    }
+    virtual bool OutputUsedInComputingInputNodesGradients() const override { return false; }
+    virtual bool InputUsedInComputingInputNodesGradients(size_t /*childIndex*/) const override { return false; }

 private:
    size_t m_numRepeat;
@ -583,6 +571,40 @@ private:
 template class RowRepeatNode<float>;
 template class RowRepeatNode<double>;

+// -----------------------------------------------------------------------
+// WhereNode -- extract indices of non-0 values in a sequence
+// As this implies a runtime-vale dependent reduction in dimension, it can
+// only be applied to time sequences, and not other tensor dimensions.
+// The result will have a different MBLayout reflecting the shortened result sequences.
+// -----------------------------------------------------------------------
+
+template <class ElemType>
+class WhereNode : public ComputationNodeNonLooping<ElemType>, public NumInputs<1>
+{
+    typedef ComputationNodeNonLooping<ElemType> Base; UsingComputationNodeMembersBoilerplate;
+    static const std::wstring TypeName() { return L"Where"; }
+
+public:
+    DeclareConstructorFromConfigWithNumInputs(WhereNode);
+    WhereNode(DEVICEID_TYPE deviceId, const wstring& name) :
+        Base(deviceId, name)
+    {
+        m_learningRateMultiplier = 0.0f;    // we cannot backprop; this will disable it
+        // TODO: This ^^ is a bit of a hack. Do we need a better mechanism for nodes to tell that they cannot backprop? We will have more of those.
+        //       This might even not work, need to track down how this is inferred/propagated upwards. It is really only for LearnableParameters.
+    }
+
+    virtual void /*ComputationNodeNonLooping::*/ ForwardPropNonLooping() override;
+    virtual void /*ComputationNodeNonLooping::*/ BackpropToNonLooping(size_t /*inputIndex*/) override;
+    virtual void Validate(bool isFinalValidationPass) override;
+
+private:
+    // buffers for creating the result sequences (kept as object state to avoid memory allocations)
+    std::vector<std::vector<size_t>>   m_indexSequenceBuffer; // [sequenceIndex][t] for creating the result sequences
+    std::vector<size_t>               m_rowAllocationsBuffer; // [row] for determining new MBLayout packing
+    std::vector<std::pair<size_t, size_t>> m_placementBuffer; // [sequenceIndex] assigned location for a sequence
+};
+
 // -----------------------------------------------------------------------
 // DiagonalNode -- extract diagonal elements of a square matrix into a row vector
 // -----------------------------------------------------------------------
@ -590,12 +612,8 @@ template class RowRepeatNode<double>;
 template <class ElemType>
 class DiagonalNode : public ComputationNodeNonLooping<ElemType>, public NumInputs<1>
 {
-    typedef ComputationNodeNonLooping<ElemType> Base;
-    UsingComputationNodeMembersBoilerplate;
-    static const std::wstring TypeName()
-    {
-        return L"Diagonal";
-    }
+    typedef ComputationNodeNonLooping<ElemType> Base; UsingComputationNodeMembersBoilerplate;
+    static const std::wstring TypeName() { return L"Diagonal"; }

 public:
    DeclareConstructorFromConfigWithNumInputs(DiagonalNode);
@ -642,7 +660,7 @@ public:
        m_pMBLayout = nullptr;

        if (isFinalValidationPass && Input(0)->HasMBLayout())
-            InvalidArgument("%ls %ls operation cannot operate on minibatch data (which have a layout)", NodeName().c_str(), OperationName().c_str());
+            InvalidArgument("%ls %ls operation cannot operate on minibatch data (which have a layout).", NodeName().c_str(), OperationName().c_str());

        size_t dim = Input(0)->GetAsMatrixNumCols();
        if (isFinalValidationPass && dim != Input(0)->GetAsMatrixNumRows())
--- a/Source/ComputationNetworkLib/SpecialPurposeNodes.h
+++ b/Source/ComputationNetworkLib/SpecialPurposeNodes.h
@ -106,20 +106,8 @@ public:
        }
    }

-    virtual bool OutputUsedInComputingInputNodesGradients() const override
-    {
-        // The GMMLogLikelihoodNode does not require its output value for computing
-        // the gradients of its input nodes
-        return false;
-    }
-
-    virtual bool InputUsedInComputingInputNodesGradients(size_t childIndex) const override
-    {
-        // The GMMLogLikelihoodNode does not require any of it's input's values for computing
-        // the gradients of its input nodes
-        UNREFERENCED_PARAMETER(childIndex);
-        return false;
-    }
+    virtual bool OutputUsedInComputingInputNodesGradients() const override { return false; }
+    virtual bool InputUsedInComputingInputNodesGradients(size_t /*childIndex*/) const override { return false; }

    void BackpropToUnnormedPrior(Matrix<ElemType>& unnormedPriorGradientValues, const Matrix<ElemType>& gradientValues,
                                 const Matrix<ElemType>& prior, const Matrix<ElemType>& posterior, Matrix<ElemType>& temp)
--- a/Source/ComputationNetworkLib/TrainingNodes.h
+++ b/Source/ComputationNetworkLib/TrainingNodes.h
@ -1454,20 +1454,8 @@ public:
            sliceInput0Grad += sliceOutputGrad;
    }

-    virtual bool OutputUsedInComputingInputNodesGradients() const override
-    {
-        // The DropoutNode does not require its output value for computing
-        // the gradients of its input nodes
-        return false;
-    }
-
-    virtual bool InputUsedInComputingInputNodesGradients(size_t childIndex) const override
-    {
-        // The DropoutNode does not require any of it's input's values for computing
-        // the gradients of its input nodes
-        UNREFERENCED_PARAMETER(childIndex);
-        return false;
-    }
+    virtual bool OutputUsedInComputingInputNodesGradients() const override { return false; }
+    virtual bool InputUsedInComputingInputNodesGradients(size_t /*childIndex*/) const override { return false; }

    virtual void UpdateFunctionMBSize() override
    {
--- a/Source/Math/CPUMatrix.cpp
+++ b/Source/Math/CPUMatrix.cpp
@ -852,16 +852,10 @@ void CPUMatrix<ElemType>::SetValue(const size_t numRows, const size_t numCols, E
    {
        Resize(numRows, numCols);

-        if (IsEmpty())
+        if (!IsEmpty())
        {
-            InvalidArgument("NumRows or NumCols is 0. Nothing to copy");
-        }
-        else
-        {
-            if (!(matrixFlags & matrixFormatRowMajor)) // compatible to internal structure
-            {
+            if (!(matrixFlags & matrixFormatRowMajor)) // compatible with internal structure
                memcpy(m_pArray, pArray, GetNumElements() * sizeof(ElemType));
-            }
            else // need to transpose
            {
                auto& us = *this;
@ -900,9 +894,6 @@ void CPUMatrix<ElemType>::SetValue(const size_t numRows, const size_t numCols, E
 template <class ElemType>
 void CPUMatrix<ElemType>::SetDiagonalValue(const ElemType v)
 {
-    if (IsEmpty())
-        LogicError("SetDiagonalValue: Matrix is empty.");
-
    if (GetNumRows() != GetNumCols())
        LogicError("SetDiagonalValue: NumRows and NumCols do not agree.");

--- a/Source/Math/Matrix.h
+++ b/Source/Math/Matrix.h
@ -124,27 +124,12 @@ private:
    void ShallowCopyFrom(const Matrix<ElemType>& other);

 public:
-    MatrixType GetMatrixType() const
-    {
-        return m_matrixType;
-    }
-    MatrixFormat GetFormat() const
-    {
-        return m_baseMatrix->GetFormat();
-    }
-    bool OwnBuffer() const
-    {
-        return m_baseMatrix->OwnBuffer();
-    }
+    MatrixType GetMatrixType() const { return m_matrixType; }
+    MatrixFormat GetFormat() const { return m_baseMatrix->GetFormat(); }
+    bool OwnBuffer() const { return m_baseMatrix->OwnBuffer(); }
    int GetDeviceId() const; // -1 if CPU, otherwise GPU CUDA device id
-    DEVICEID_TYPE GetPreferredDeviceId() const
-    {
-        return m_preferredDeviceId;
-    }; // -1 if CPU, otherwise GPU CUDA device id
-    void SetPreferredDeviceId(DEVICEID_TYPE preferredDeviceId)
-    {
-        m_preferredDeviceId = preferredDeviceId;
-    }
+    DEVICEID_TYPE GetPreferredDeviceId() const { return m_preferredDeviceId; }; // -1 if CPU, otherwise GPU CUDA device id
+    void SetPreferredDeviceId(DEVICEID_TYPE preferredDeviceId) { m_preferredDeviceId = preferredDeviceId; }
    // Moves matrix from device id_from to device with id_to.
    // If emptyTransfer=true, then no data is ever moved, just corresponding GPU/CPU matrices are deleted and then created using empty constructor
    void TransferFromDeviceToDevice(int id_from, int id_to, bool ismoved = false, /*if false then keep source and set location to BOTH*/ bool emptyTransfer = false, bool updatePreferredDevice = true) const;
@ -235,12 +220,12 @@ public:
    void SetValue(const Matrix<ElemType>& deepCopyFrom, const MatrixFormat format = matrixFormatSparseCSR); // BUGBUG: default for 'format' is unexpected
    void SetValue(const size_t numRows, const size_t numCols, int deviceId, ElemType* pArray, const size_t matrixFlags = matrixFlagNormal);
    void SetValue(const size_t rIdx, const size_t cIdx, ElemType val); // set matrix sparsely
-    void SetValue(const size_t numRows, const size_t numCols, std::initializer_list<ElemType> l)
+    void SetValue(const size_t numRows, const size_t numCols, std::initializer_list<ElemType> l) // SetValue(2,3, {1,2,3,  4,5,6});
    {
        std::vector<ElemType> vals(l);
        assert(vals.size() == numRows * numCols);
        SetValue(numRows, numCols, GetDeviceId(), vals.data(), matrixFormatRowMajor);
-    } // SetValue(2,3, {1,2,3,  4,5,6});
+    }
    static ElemType MakeNan(size_t payload);
    void Invalidate()
    {
@ -271,35 +256,35 @@ public:
    Matrix<ElemType>& AssignTransposeOf(const Matrix<ElemType>& a);

    Matrix<ElemType>& operator+=(const ElemType alpha);
-    Matrix<ElemType> operator+(const ElemType alpha) const;
+    Matrix<ElemType>  operator+(const ElemType alpha) const;
    Matrix<ElemType>& AssignSumOf(const ElemType alpha, const Matrix<ElemType>& a);

    Matrix<ElemType>& operator+=(const Matrix<ElemType>& a);
-    Matrix<ElemType> operator+(const Matrix<ElemType>& a) const;
+    Matrix<ElemType>  operator+(const Matrix<ElemType>& a) const;
    Matrix<ElemType>& AssignSumOf(const Matrix<ElemType>& a, const Matrix<ElemType>& b);

    Matrix<ElemType>& operator-=(const ElemType alpha);
-    Matrix<ElemType> operator-(const ElemType alpha) const;
+    Matrix<ElemType>  operator-(const ElemType alpha) const;
    Matrix<ElemType>& AssignDifferenceOf(const ElemType alpha, const Matrix<ElemType>& a);
    Matrix<ElemType>& AssignDifferenceOf(const Matrix<ElemType>& a, const ElemType alpha);

    Matrix<ElemType>& operator-=(const Matrix<ElemType>& a);
-    Matrix<ElemType> operator-(const Matrix<ElemType>& a) const;
+    Matrix<ElemType>  operator-(const Matrix<ElemType>& a) const;
    Matrix<ElemType>& AssignDifferenceOf(const Matrix<ElemType>& a, const Matrix<ElemType>& b);

    Matrix<ElemType>& operator*=(const ElemType alpha);
-    Matrix<ElemType> operator*(const ElemType alpha) const;
+    Matrix<ElemType>  operator*(const ElemType alpha) const;
    Matrix<ElemType>& AssignProductOf(const ElemType alpha, const Matrix<ElemType>& a);

-    Matrix<ElemType> operator*(const Matrix<ElemType>& a) const;
+    Matrix<ElemType>  operator*(const Matrix<ElemType>& a) const;
    Matrix<ElemType>& AssignProductOf(const Matrix<ElemType>& a, const bool transposeA, const Matrix<ElemType>& b, const bool transposeB); // this = a * b
    Matrix<ElemType>& Assign1x1ProductOf(const Matrix<ElemType>& a1x1, const Matrix<ElemType>& b);                                         // this = a * b, where a is 1x1

    Matrix<ElemType>& operator/=(ElemType alpha);
-    Matrix<ElemType> operator/(ElemType alpha) const;
+    Matrix<ElemType>  operator/(ElemType alpha) const;

    Matrix<ElemType>& operator^=(ElemType alpha);     // element-wise power
-    Matrix<ElemType> operator^(ElemType alpha) const; // element-wise power
+    Matrix<ElemType>  operator^(ElemType alpha) const; // element-wise power
    Matrix<ElemType>& AssignElementPowerOf(const Matrix<ElemType>& a, const ElemType power);

    // TODO: There are several functions below that perform an in-place operation