renamed several methods/names to more familiar or concise names:

EvaluateThisNode() -> ForwardProp(); ComputeInputPartial() -> BackpropTo(); SaveToFile() -> Save() (where else to save? In the bank?); LoadFromFile() -> Load(); ImageLayout -> TensorShape; m_imageLayout -> m_sampleLayout
2015-12-04 16:08:30 -08:00 · 2015-12-04 16:08:30 -08:00 · d3765547f9
--- a/Common/Include/DataTensor.h
+++ b/Common/Include/DataTensor.h
@ -13,13 +13,13 @@
 namespace Microsoft { namespace MSR { namespace CNTK {

    // -----------------------------------------------------------------------
-    // ImageLayout -- tensor descriptor to describe the inner layout of a data vector that holds a tensor
+    // TensorShape -- tensor descriptor to describe the inner layout of a data vector that holds a tensor
    //
    // Minibatches are stored as Matrices. While the column dimension represents multiple data vectors, and may have
    // an inner structure (time, parallel sequences) described by the MBLayout, the row dimension represents data
    // vectors that hold tensors of data.
    //
-    // The ImageLayout describes the inner tensor structure of these vectors, as a column-major tensor of arbitrary number of dimensions.
+    // The TensorShape describes the inner tensor structure of these vectors, as a column-major tensor of arbitrary number of dimensions.
    //
    // Specifically, when the image is an image, then this is a 3-dimensional tensor with dimensions ( channels, width, height ),
    // which represents the column-major interpretation of a transposed row-by-row-scanned image where each pixel stores (R,G,B) as a float3.
@ -29,25 +29,25 @@ namespace Microsoft { namespace MSR { namespace CNTK {

    // TODO: really support lengths other than 3, e.g. fix serialization code to handle variable-length descriptors
    // TODO: rename to DataLayout
-    // TODO: must match ComputationNode::m_numRows; or, rather, the ImageLayout is how m_numRows is stored??
+    // TODO: must match ComputationNode::m_numRows; or, rather, the TensorShape is how m_numRows is stored??
    // TODO: move this elsewhere, maybe a separate header Tensors.h?
-    struct ImageLayout
+    struct TensorShape
    {
    public:
        // BUGBUG: This initialization is not correct. This must match GetNumRows(). We probably cannot have all three members here.
        // Idea: We could construct this thing with a ref to the enclosing ComputationNode, and replace 'width' by an expression.
-        ImageLayout() : m_tensorDims(3, 1) { }
+        TensorShape() : m_tensorDims(3, 1) { }
        template<class VEC>
-        ImageLayout(const VEC & dims) { m_tensorDims.reserve(dims.size()); m_tensorDims.assign(dims.begin(), dims.end()); }
-        ImageLayout(std::vector<size_t> && dims) : m_tensorDims(std::move(dims)) { }
+        TensorShape(const VEC & dims) { m_tensorDims.reserve(dims.size()); m_tensorDims.assign(dims.begin(), dims.end()); }
+        TensorShape(std::vector<size_t> && dims) : m_tensorDims(std::move(dims)) { }

        void Invalidate() { m_tensorDims.assign(3, SIZE_MAX); } // TODO: clean up the valid/invalid situation (this is currently done inconsistently)

        // TODO: need move constructor/assignment?

-        bool operator==(const ImageLayout & other) const { return m_tensorDims == other.m_tensorDims; }
+        bool operator==(const TensorShape & other) const { return m_tensorDims == other.m_tensorDims; }

-        void SaveToFile(File& fstream) const
+        void Save(File& fstream) const
        {
 #if 1
            // saving as 32-bit ints. This allows to continue to support the old format (size_t W, H, C)
@ -55,7 +55,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
            for (auto dim : m_tensorDims)
            {
                if (dim > UINT32_MAX)
-                    LogicError("ImageLayout::SaveToFile(): Tensor dimension out of bounds (> 4G).");
+                    LogicError("TensorShape::Save(): Tensor dimension out of bounds (> 4G).");
                fstream << (uint32_t)dim;
            }
 #else
@ -64,7 +64,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
            fstream << m_tensorDims[1] << m_tensorDims[2] << m_tensorDims[0]; // currently stored in order W, H, C. TODO: general tensor format will be different
 #endif
        }
-        void LoadFromFile(File& fstream)
+        void Load(File& fstream)
        {
 #if 1
            // format: uint32_t n, dim[0], dim[1], ..., dim[n-1]
@ -113,14 +113,14 @@ namespace Microsoft { namespace MSR { namespace CNTK {

    // When constructing an image tensor with the usual W, H, C format, use the following function instead.
    // This will sort the three parameters into the correct order.
-    static inline ImageLayout ImageLayoutWHC(size_t width, size_t height, size_t channels)
+    static inline TensorShape ImageLayoutWHC(size_t width, size_t height, size_t channels)
    {
-        return ImageLayout(std::vector<size_t> { channels, width, height });
+        return TensorShape(std::vector<size_t> { channels, width, height });
    }
    // and use this one when the data is a plain vector
-    static inline ImageLayout ImageLayoutVector(size_t n)
+    static inline TensorShape ImageLayoutVector(size_t n)
    {
-        return ImageLayout(std::vector<size_t> { 1, 1, n });    // for now storing it as a 3D object as well  --TODO: fix this
+        return TensorShape(std::vector<size_t> { 1, 1, n });    // for now storing it as a 3D object as well  --TODO: fix this
    }
    // TODO: we need a constructor from config; that will generalize

--- a/Common/Include/Sequences.h
+++ b/Common/Include/Sequences.h
@ -63,7 +63,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
    // In the special case of frame randomization, every frame is stored as a single-frame sequence.
    //
    // If we describe this in terms of tensors, a data matrix with sample layout (I,J,K) and
-    // MBLayout (S,T) can be interpreted as ImageLayout(I,J,K,T,S) (note that S is last, not T).
+    // MBLayout (S,T) can be interpreted as TensorShape(I,J,K,T,S) (note that S is last, not T).
    //
    // Sequences can also be concatenated, to fill the space better. For this case,
    // this object stores about every frame whether it is at the start or end of a sequence.
@ -91,7 +91,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
    // Contract between ComputationNode, ComputationNetwork, and MBLayout:
    //  - if a node has no MBLayout, m_{function,gradient}Values are not samples (they are not activations or input data), but e.g. model parameters
    //  - ComputationNode::GetNumCols() == MBLayout::GetNumTimeSteps() * MBLayout::GetNumParallelSequences()
-    //  - ComputationNetwork ensures that m_{function,gradient}Values are allocated correctly before calling EvaluateThisNode() on a node
+    //  - ComputationNetwork ensures that m_{function,gradient}Values are allocated correctly before calling ForwardProp() on a node
    // NOTE: This class represents an ongoing abstraction of an originally distributed/code-duped way of defining and accessing the MB layout.
    //       Some code below represents the actual use cases I encountered. Not all are, I believe, needed to be as they are; this class could be simplified/streamlined much further.
    //       Some wackiness below is explained by this.
--- a/DataReader/HTKMLFReader/HTKMLFWriter.cpp
+++ b/DataReader/HTKMLFReader/HTKMLFWriter.cpp
@ -133,7 +133,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
            
            assert(outputData.GetNumRows()==dim); dim;

-            SaveToFile(outFile,outputData);
+            Save(outFile,outputData);
        }

        outputFileIndex++;
@ -142,7 +142,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
    }

    template<class ElemType>
-    void HTKMLFWriter<ElemType>::SaveToFile(std::wstring& outputFile, Matrix<ElemType>& outputData)
+    void HTKMLFWriter<ElemType>::Save(std::wstring& outputFile, Matrix<ElemType>& outputData)
    {
        msra::dbn::matrix output;
        output.resize(outputData.GetNumRows(),outputData.GetNumCols());
--- a/DataReader/HTKMLFReader/HTKMLFWriter.h
+++ b/DataReader/HTKMLFReader/HTKMLFWriter.h
@ -25,7 +25,7 @@ private:
    std::map<std::wstring,size_t> outputNameToTypeMap;
    unsigned int sampPeriod;
    size_t outputFileIndex;
-    void SaveToFile(std::wstring& outputFile, Matrix<ElemType>& outputData);
+    void Save(std::wstring& outputFile, Matrix<ElemType>& outputData);
    ElemType * m_tempArray;
    size_t m_tempArraySize;

--- a/DataReader/Kaldi2Reader/HTKMLFWriter.cpp
+++ b/DataReader/Kaldi2Reader/HTKMLFWriter.cpp
@ -11,16 +11,7 @@

 #include "htkfeatio.h"                  // for reading HTK features

-//#ifndef __unix__
 #include "ssematrix.h"
-//#endif
-//#include "msra_mgram.h"                 // for unigram scores of ground-truth path in sequence training
-
-//#include "rollingwindowsource.h"        // minibatch sources
-//#include "utterancesource.h"
-//#include "readaheadsource.h"
-//#include "chunkevalsource.h"
-//#include "minibatchiterator.h"

 #define DATAWRITER_EXPORTS  // creating the exports here
 #include "DataWriter.h"
@ -224,7 +215,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
    }

    template<class ElemType>
-    void HTKMLFWriter<ElemType>::SaveToFile(std::wstring& outputFile, Matrix<ElemType>& outputData)
+    void HTKMLFWriter<ElemType>::Save(std::wstring& outputFile, Matrix<ElemType>& outputData)
    {
        msra::dbn::matrix output;
        output.resize(outputData.GetNumRows(),outputData.GetNumCols());
--- a/DataReader/Kaldi2Reader/HTKMLFWriter.h
+++ b/DataReader/Kaldi2Reader/HTKMLFWriter.h
@ -25,7 +25,7 @@ private:
    std::map<std::wstring,size_t> outputNameToTypeMap;
    unsigned int sampPeriod;
    size_t outputFileIndex;
-    void SaveToFile(std::wstring& outputFile, Matrix<ElemType>& outputData);
+    void Save(std::wstring& outputFile, Matrix<ElemType>& outputData);
    void SaveToKaldiFile(std::wstring& outputFile, Matrix<ElemType>& outputData);
    ElemType * m_tempArray;
    size_t m_tempArraySize;
--- a/DataReader/KaldiReader/HTKMLFWriter.cpp
+++ b/DataReader/KaldiReader/HTKMLFWriter.cpp
@ -208,7 +208,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
    }

    template<class ElemType>
-    void HTKMLFWriter<ElemType>::SaveToFile(std::wstring& outputFile, Matrix<ElemType>& outputData)
+    void HTKMLFWriter<ElemType>::Save(std::wstring& outputFile, Matrix<ElemType>& outputData)
    {
        msra::dbn::matrix output;
        output.resize(outputData.GetNumRows(),outputData.GetNumCols());
--- a/DataReader/KaldiReader/HTKMLFWriter.h
+++ b/DataReader/KaldiReader/HTKMLFWriter.h
@ -25,7 +25,7 @@ private:
    std::map<std::wstring,size_t> outputNameToTypeMap;
    unsigned int sampPeriod;
    size_t outputFileIndex;
-    void SaveToFile(std::wstring& outputFile, Matrix<ElemType>& outputData);
+    void Save(std::wstring& outputFile, Matrix<ElemType>& outputData);
    void SaveToKaldiFile(std::wstring& outputFile, Matrix<ElemType>& outputData);
    ElemType * m_tempArray;
    size_t m_tempArraySize;
--- a/DataReader/LMSequenceReader/SequenceWriter.cpp
+++ b/DataReader/LMSequenceReader/SequenceWriter.cpp
@ -116,14 +116,14 @@ namespace Microsoft { namespace MSR { namespace CNTK {
            Matrix<ElemType>& outputData = *(static_cast<Matrix<ElemType>*>(iter->second));
            wstring outFile = outputFiles[outputName];

-            SaveToFile(outFile, outputData, idx4word[iter->first], nBests[outputName]);
+            Save(outFile, outputData, idx4word[iter->first], nBests[outputName]);
        }

        return true;
    }

    template<class ElemType>
-    void LMSequenceWriter<ElemType>::SaveToFile(std::wstring& outputFile, const Matrix<ElemType>& outputData, const map<int, string>& idx2wrd, const int& nbest)
+    void LMSequenceWriter<ElemType>::Save(std::wstring& outputFile, const Matrix<ElemType>& outputData, const map<int, string>& idx2wrd, const int& nbest)
    {
        size_t nT = outputData.GetNumCols();
        size_t nD = min(idx2wrd.size(), outputData.GetNumRows());
--- a/DataReader/LMSequenceReader/SequenceWriter.h
+++ b/DataReader/LMSequenceReader/SequenceWriter.h
@ -52,7 +52,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
        map<wstring, int> nBests;
        bool compare_val(const ElemType& first, const ElemType& second);

-        void SaveToFile(std::wstring& outputFile, const Matrix<ElemType>& outputData, const map<int, string>& idx2wrd, const int& nbest = 1);
+        void Save(std::wstring& outputFile, const Matrix<ElemType>& outputData, const map<int, string>& idx2wrd, const int& nbest = 1);

        void ReadLabelInfo(const wstring & vocfile,
            map<string, int> & word4idx,
--- a/DataReader/LUSequenceReader/LUSequenceWriter.cpp
+++ b/DataReader/LUSequenceReader/LUSequenceWriter.cpp
@ -102,14 +102,14 @@ namespace Microsoft { namespace MSR { namespace CNTK {
            Matrix<ElemType>& outputData = *(static_cast<Matrix<ElemType>*>(iter->second));
            wstring outFile = outputFiles[outputName];
            
-            SaveToFile(outFile,outputData, idx4word[outputName], nBests[outputName]);
+            Save(outFile,outputData, idx4word[outputName], nBests[outputName]);
        }

        return true;
    }

    template<class ElemType>
-    void LUSequenceWriter<ElemType>::SaveToFile(std::wstring& outputFile, const Matrix<ElemType>& outputData, const map<int, string>& idx2wrd, const int& nbest)
+    void LUSequenceWriter<ElemType>::Save(std::wstring& outputFile, const Matrix<ElemType>& outputData, const map<int, string>& idx2wrd, const int& nbest)
    {
        size_t nT = outputData.GetNumCols();
        size_t nD = min(idx2wrd.size(), outputData.GetNumRows());
--- a/DataReader/LUSequenceReader/LUSequenceWriter.h
+++ b/DataReader/LUSequenceReader/LUSequenceWriter.h
@ -27,7 +27,7 @@ private:
    map<wstring, int> nBests;
    bool compare_val(const ElemType& first, const ElemType& second);

-    void SaveToFile(std::wstring& outputFile, const Matrix<ElemType>& outputData, const map<int, string>& idx2wrd, const int& nbest = 1);
+    void Save(std::wstring& outputFile, const Matrix<ElemType>& outputData, const map<int, string>& idx2wrd, const int& nbest = 1);

    void ReadLabelInfo(const wstring & vocfile, 
            map<string, int> & word4idx,
--- a/Documentation/CNTK-TechReport/lyx/CNTKBook_CNTK_Programmer_Chapter.lyx
+++ b/Documentation/CNTK-TechReport/lyx/CNTKBook_CNTK_Programmer_Chapter.lyx
@ -2794,7 +2794,7 @@ CEMATRIX, const std::wstring name = L"")

 \begin_layout Plain Layout

-    LoadFromFile(fstream, modelVersion, deviceId);         
+    Load(fstream, modelVersion, deviceId);         
 \end_layout

 \begin_layout Plain Layout
@ -2852,10 +2852,10 @@ The first constructor creates the node based on a deviceId and a node name.
 The second constructor creates a node from a file.
 It passes in a file stream to read data from and a modelVersion value to
 control how to load the file, in addition to the deviceId and node name.
- In this example, the actual code to load the node is in the LoadFromFile(fstrea
+ In this example, the actual code to load the node is in the Load(fstrea
 m, modelVersion, deviceId) function implemented in the base class.
 For some complicated nodes with additional node states, you need to implement
- your own LoadFromFile function for your newly added node.
+ your own Load function for your newly added node.
 \end_layout

 \begin_layout Standard
@ -3241,7 +3241,7 @@ Forward Evaluation

 \begin_layout Standard
 For each node type you need to implement two forward computation functions
- EvaluateThisNode(), which evaluate the whole minibatch, and EvaluateThisNode(co
+ ForwardProp(), which evaluate the whole minibatch, and ForwardProp(co
 nst size_t timeIdxInSeq), which is used in the recurrent networks to evaluate
 the timeIdxInSeq-th sample for all the sequences in the minibatch.
 \end_layout
@ -3253,7 +3253,7 @@ status open

 \begin_layout Plain Layout

-virtual void EvaluateThisNode()           
+virtual void ForwardProp()           
 \end_layout

 \begin_layout Plain Layout
@ -3263,7 +3263,7 @@ virtual void EvaluateThisNode()

 \begin_layout Plain Layout

-    EvaluateThisNodeS(FunctionValues(), Inputs(0)->FunctionValues(), Inputs(1)->
+    ForwardPropS(FunctionValues(), Inputs(0)->FunctionValues(), Inputs(1)->
 FunctionValues());
 \end_layout

@ -3278,7 +3278,7 @@ FunctionValues());

 \begin_layout Plain Layout

-virtual void EvaluateThisNode(const size_t timeIdxInSeq)           
+virtual void ForwardProp(const size_t timeIdxInSeq)           
 \end_layout

 \begin_layout Plain Layout
@ -3301,7 +3301,7 @@ imeIdxInSeq * m_samplesInRecurrentStep, m_samplesInRecurrentStep);

 \begin_layout Plain Layout

-    EvaluateThisNodeS(sliceOutputValue, Inputs(0)->FunctionValues(), sliceInput1
+    ForwardPropS(sliceOutputValue, Inputs(0)->FunctionValues(), sliceInput1
 Value);         
 \end_layout

@ -3316,7 +3316,7 @@ Value);

 \begin_layout Plain Layout

-static void WINAPI EvaluateThisNodeS(Matrix<ElemType>& functionValues, const
+static void WINAPI ForwardPropS(Matrix<ElemType>& functionValues, const
 Matrix<ElemType>& input0, const Matrix<ElemType>& input1)
 \end_layout

@ -3351,14 +3351,14 @@ status open

 \begin_layout Plain Layout

-EvaluateThisNodeS(Matrix<ElemType>& functionValues, const Matrix<ElemType>&
+ForwardPropS(Matrix<ElemType>& functionValues, const Matrix<ElemType>&
 input0, const Matrix<ElemType>& input1)
 \end_layout

 \end_inset

 which contains the actual evaluation code.
- In the EvaluateThisNode(const size_t timeIdxInSeq) function you will notice
+ In the ForwardProp(const size_t timeIdxInSeq) function you will notice
 the calls to the ColumnSlice function.
 As the name suggests, this function returns a column slice of a matrix.
 \end_layout
@ -3379,9 +3379,9 @@ Gradient Computation

 \begin_layout Standard
 Similar to the forward computation, for each node type you need to implement
- two gradient computation functions ComputeInputPartial(const size_t inputIndex)
+ two gradient computation functions BackpropTo(const size_t inputIndex)
 , which computes the gradient for the whole minibatch with regard to the
- inputIndex-th input, and ComputeInputPartial(const size_t inputIndex, const
+ inputIndex-th input, and BackpropTo(const size_t inputIndex, const
 size_t timeIdxInSeq), which is used in the recurrent networks to compute
 the gradient of the timeIdxInSeq-th sample for all the sequences in the
 minibatch.
@ -3394,7 +3394,7 @@ status open

 \begin_layout Plain Layout

-virtual void ComputeInputPartial(const size_t inputIndex)
+virtual void BackpropTo(const size_t inputIndex)
 \end_layout

 \begin_layout Plain Layout
@ -3435,7 +3435,7 @@ virtual void ComputeInputPartial(const size_t inputIndex)

 \begin_layout Plain Layout

-        ComputeInputPartialLeft(Inputs(1)->FunctionValues(), Inputs(0)->Gradient
+        BackpropToLeft(Inputs(1)->FunctionValues(), Inputs(0)->Gradient
 Values(), GradientValues());             
 \end_layout

@ -3456,7 +3456,7 @@ Values(), GradientValues());

 \begin_layout Plain Layout

-    ComputeInputPartialRight(Inputs(0)->FunctionValues(), Inputs(1)->GradientVal
+    BackpropToRight(Inputs(0)->FunctionValues(), Inputs(1)->GradientVal
 ues(), GradientValues());             
 \end_layout

@ -3477,7 +3477,7 @@ ues(), GradientValues());

 \begin_layout Plain Layout

-virtual void ComputeInputPartial(const size_t inputIndex, const size_t timeIdxIn
+virtual void BackpropTo(const size_t inputIndex, const size_t timeIdxIn
 Seq)         
 \end_layout

@ -3531,7 +3531,7 @@ e(timeIdxInSeq * m_samplesInRecurrentStep, m_samplesInRecurrentStep);

 \begin_layout Plain Layout

-        ComputeInputPartialLeft(sliceInput1Value, Inputs(0)->GradientValues(),
+        BackpropToLeft(sliceInput1Value, Inputs(0)->GradientValues(),
 sliceOutputGrad);
 \end_layout

@ -3564,7 +3564,7 @@ Seq * m_samplesInRecurrentStep, m_samplesInRecurrentStep);

 \begin_layout Plain Layout

-        ComputeInputPartialRight(Inputs(0)->FunctionValues(), sliceInput1Grad,
+        BackpropToRight(Inputs(0)->FunctionValues(), sliceInput1Grad,
 sliceOutputGrad);
 \end_layout

@ -3585,7 +3585,7 @@ Seq * m_samplesInRecurrentStep, m_samplesInRecurrentStep);

 \begin_layout Plain Layout

-static void WINAPI ComputeInputPartialLeft(const Matrix<ElemType>& inputFunction
+static void WINAPI BackpropToLeft(const Matrix<ElemType>& inputFunction
 Values, Matrix<ElemType>& inputGradientValues, const Matrix<ElemType>& gradientV
 alues)
 \end_layout
@ -3613,7 +3613,7 @@ es, inputFunctionValues);

 \begin_layout Plain Layout

-static void WINAPI ComputeInputPartialRight(const Matrix<ElemType>& inputFunctio
+static void WINAPI BackpropToRight(const Matrix<ElemType>& inputFunctio
 nValues, Matrix<ElemType>& inputGradientValues, const Matrix<ElemType>&
 gradientValues)           
 \end_layout
@ -3650,7 +3650,7 @@ status open

 \begin_layout Plain Layout

-ComputeInputPartialLeft(const Matrix<ElemType>& inputFunctionValues, Matrix<Elem
+BackpropToLeft(const Matrix<ElemType>& inputFunctionValues, Matrix<Elem
 Type>& inputGradientValues, const Matrix<ElemType>& gradientValues)
 \end_layout

@ -3666,7 +3666,7 @@ status open

 \begin_layout Plain Layout

-ComputeInputPartialRight(const Matrix<ElemType>& inputFunctionValues, Matrix<Ele
+BackpropToRight(const Matrix<ElemType>& inputFunctionValues, Matrix<Ele
 mType>& inputGradientValues, const Matrix<ElemType>& gradientValues)
 \end_layout

@ -4488,7 +4488,7 @@ if (PreCompute(net,trainSetDataReader, FeatureNodes,labelNodes,inputMatrices)

 \begin_layout Plain Layout

-    net.SaveToFile(GetModelNameForEpoch(int(startEpoch)-1));            
+    net.Save(GetModelNameForEpoch(int(startEpoch)-1));            
 
 \end_layout

--- a/MachineLearning/CNTK/CNTK.cpp
+++ b/MachineLearning/CNTK/CNTK.cpp
@ -108,7 +108,7 @@ void DumpNodeInfo(const ConfigParameters& config)
    bool printValues = config(L"printValues", true);

    ComputationNetwork net(-1);  //always use CPU
-    net.LoadFromFile<ElemType>(modelPath);
+    net.Load<ElemType>(modelPath);
    net.DumpNodeInfoToFile(nodeName, printValues, outputFile, nodeNameRegexStr);
 }

@ -530,11 +530,11 @@ void  DoParameterSVD(const ConfigParameters& config)


    ComputationNetwork net(deviceID);
-    net.LoadFromFile<ElemType>(modelPath);
+    net.Load<ElemType>(modelPath);

    net.PerformSVDecomposition<ElemType>(svdconfig, AlignedSize);
    if (!outputmodelPath.empty())
-        net.SaveToFile(outputmodelPath);
+        net.Save(outputmodelPath);

 }

@ -813,7 +813,7 @@ public:
        if (!m_net || m_net->GetTotalNumberOfNodes() == 0 || forceLoad) //not built or force load   --TODO: why all these options?
        {
            auto net = make_shared<ComputationNetwork>(m_deviceId);
-            net->LoadFromFile<ElemType>(modelFileName, FileOptions::fileOptionsBinary, bAllowNoCriterionNode, anotherNetwork);
+            net->Load<ElemType>(modelFileName, FileOptions::fileOptionsBinary, bAllowNoCriterionNode, anotherNetwork);
            m_net = net;
        }
        m_net->ResetEvalTimeStamp();
@ -1372,7 +1372,7 @@ void DoConvertFromDbn(const ConfigParameters& config)

    auto netBuilder = make_shared<SimpleNetworkBuilder<ElemType>>(config);
    ComputationNetworkPtr net = netBuilder->BuildNetworkFromDbnFile(dbnModelPath);
-    net->SaveToFile(modelPath);
+    net->Save(modelPath);
 }

 // do topological plot of computation network 
@ -1407,7 +1407,7 @@ void DoTopologyPlot(const ConfigParameters& config)
    }

    ComputationNetwork net(-1);
-    net.LoadFromFile<ElemType>(modelPath);
+    net.Load<ElemType>(modelPath);
    net.PlotNetworkTopology(outdot);
    fprintf(stderr, "Output network description in dot language to %S\n", outdot.c_str());

--- a/MachineLearning/CNTK/ModelEditLanguage.cpp
+++ b/MachineLearning/CNTK/ModelEditLanguage.cpp
@ -125,7 +125,7 @@ void MELScript<ElemType>::CallFunction(const std::string& p_name, const ConfigPa
        std::wstring modelFormat = GetOptionalModelFormat(params, numFixedParams);

        auto cn = make_shared<ComputationNetwork>(CPUDEVICE);
-        cn->LoadFromFile<ElemType>(params[0]);
+        cn->Load<ElemType>(params[0]);
        OverrideModelNameAndSetDefaultModel(cn);
    }
    else if (EqualInsensitive(name, "LoadModelWithName"))
@ -137,7 +137,7 @@ void MELScript<ElemType>::CallFunction(const std::string& p_name, const ConfigPa
        std::wstring modelFormat = GetOptionalModelFormat(params, numFixedParams);

        auto cn = make_shared<ComputationNetwork>(CPUDEVICE);
-        cn->LoadFromFile<ElemType>(params[1]);
+        cn->Load<ElemType>(params[1]);
        OverrideModelNameAndSetDefaultModel(cn, params[0]);
    }
    else if (EqualInsensitive(name, "LoadNDLSnippet"))
@ -188,7 +188,7 @@ void MELScript<ElemType>::CallFunction(const std::string& p_name, const ConfigPa
        // validate the network before we save it out
        ProcessNDLScript(m_netNdlDefault, ndlPassAll, true);

-        cn->SaveToFile(fileName);
+        cn->Save(fileName);
    }
    else if (EqualInsensitive(name, "SaveModel"))
    {
@ -207,7 +207,7 @@ void MELScript<ElemType>::CallFunction(const std::string& p_name, const ConfigPa

        // validate and finish the second pass through NDL if any in-line NDL was defined
        ProcessNDLScript(netNdl, ndlPassAll, true);
-        netNdl->cn->SaveToFile(fileName);
+        netNdl->cn->Save(fileName);
    }
    else if (EqualInsensitive(name, "SetDefaultModel"))
    {
--- a/MachineLearning/CNTKComputationNetworkLib/CompositeComputationNodes.h
+++ b/MachineLearning/CNTKComputationNetworkLib/CompositeComputationNodes.h
@ -43,7 +43,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
            Base(deviceId, name)
        { }

-        virtual void ComputeInputPartialNonLooping(size_t inputIndex) override
+        virtual void BackpropToNonLooping(size_t inputIndex) override
        {
            if (inputIndex > 1)
                InvalidArgument("Parallel operation only takes two input.");
@ -60,20 +60,20 @@ namespace Microsoft { namespace MSR { namespace CNTK {
            Matrix<ElemType> tmpMat(m_deviceId);
            tmpMat.AssignRowSliceValuesOf(GradientValues(), startidx, nrows);

-            ComputeInputPartialS(tmpMat, child->GradientValues());
+            BackpropToS(tmpMat, child->GradientValues());
        }

-        /*TODO: merge with call site*/void ComputeInputPartialS(Matrix<ElemType>& gradientValues, Matrix<ElemType>& inputGradientValues)
+        /*TODO: merge with call site*/void BackpropToS(Matrix<ElemType>& gradientValues, Matrix<ElemType>& inputGradientValues)
        {
            inputGradientValues += gradientValues;
        }

-        virtual void /*ComputationNodeNonLooping::*/EvaluateThisNodeNonLooping() override
+        virtual void /*ComputationNodeNonLooping::*/ForwardPropNonLooping() override
        {
-            EvaluateThisNodeS(FunctionValues(), Inputs(0)->FunctionValues(), Inputs(1)->FunctionValues());
+            ForwardPropS(FunctionValues(), Inputs(0)->FunctionValues(), Inputs(1)->FunctionValues());
        }

-        /*TODO: merge with call site*/void EvaluateThisNodeS(Matrix<ElemType>& functionValues, Matrix<ElemType>& inputFunctionValues0, Matrix<ElemType>& inputFunctionValues1)
+        /*TODO: merge with call site*/void ForwardPropS(Matrix<ElemType>& functionValues, Matrix<ElemType>& inputFunctionValues0, Matrix<ElemType>& inputFunctionValues1)
        {
            size_t rows0 = inputFunctionValues0.GetNumRows(), cols0 = inputFunctionValues0.GetNumCols();
            size_t rows1 = inputFunctionValues1.GetNumRows(), cols1 = inputFunctionValues1.GetNumCols();
@ -142,7 +142,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
            SetDims(nInput0 + nInput1, nT);
            UpdateFunctionValuesSize();

-            EvaluateThisNode(FrameRange(m_pMBLayout));
+            ForwardProp(FrameRange(m_pMBLayout));

            /// check with expected values
            if (!ISCLOSE(FunctionValues()(0, 0), 1, EPSILON) ||
@ -167,8 +167,8 @@ namespace Microsoft { namespace MSR { namespace CNTK {
            GradientValues()(3, 1) = 5;
            GradientValues()(3, 2) = 6;

-            ComputeInputPartial(0, FrameRange(m_pMBLayout));
-            ComputeInputPartial(1, FrameRange(m_pMBLayout));
+            BackpropTo(0, FrameRange(m_pMBLayout));
+            BackpropTo(1, FrameRange(m_pMBLayout));

            /// check with expected values
            if (!ISCLOSE(Inputs(0)->GradientValues()(0, 0), 1, EPSILON)
@ -222,16 +222,16 @@ namespace Microsoft { namespace MSR { namespace CNTK {

        virtual bool RequiresPreCompute() const override { return true; }

-        virtual void SaveToFile(File& fstream) const override
+        virtual void Save(File& fstream) const override
        {
-            Base::SaveToFile(fstream);
+            Base::Save(fstream);
            fstream << m_hasComputed;
            fstream << FunctionValues();   // TODO: why serialize if not yet computed?
        }

-        virtual void LoadFromFile(File& fstream, size_t modelVersion) override
+        virtual void Load(File& fstream, size_t modelVersion) override
        {
-            Base::LoadFromFile(fstream, modelVersion);
+            Base::Load(fstream, modelVersion);
            fstream >> m_hasComputed;
            LoadFunctionValues(fstream);
         }
@ -302,9 +302,9 @@ namespace Microsoft { namespace MSR { namespace CNTK {
            m_numSamples(SIZE_MAX)
        { }

-        virtual void LoadFromFile(File& fstream, size_t modelVersion) override
+        virtual void Load(File& fstream, size_t modelVersion) override
        {
-            Base::LoadFromFile(fstream, modelVersion);
+            Base::Load(fstream, modelVersion);
            m_numSamples = SIZE_MAX;
        }
    
@ -334,7 +334,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
            }
        }

-        virtual void ComputeInputPartialNonLooping(size_t /*inputIndex*/) override
+        virtual void BackpropToNonLooping(size_t /*inputIndex*/) override
        {
            //LogicError("Mean operation should not be involved in the gradient calculation.");
        }
@ -384,10 +384,10 @@ namespace Microsoft { namespace MSR { namespace CNTK {
                UpdateFunctionValuesSize();
                FunctionValues().SetValue(0);
            }
-            // no else branch because EvaluateThisNodeNonLooping() already leaves a valid mean in m_functionValues
+            // no else branch because ForwardPropNonLooping() already leaves a valid mean in m_functionValues
        }

-        virtual void /*ComputationNodeNonLooping::*/EvaluateThisNodeNonLooping() override
+        virtual void /*ComputationNodeNonLooping::*/ForwardPropNonLooping() override
        {
            FrameRange frameRange(Inputs(0)->GetMBLayout());
            if (m_hasComputed)
@ -475,7 +475,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
            }
        }

-        virtual void /*ComputationNodeNonLooping::*/EvaluateThisNodeNonLooping() override
+        virtual void /*ComputationNodeNonLooping::*/ForwardPropNonLooping() override
        {
            FrameRange frameRange(Inputs(0)->GetMBLayout());
            if (m_hasComputed)
@ -552,21 +552,21 @@ namespace Microsoft { namespace MSR { namespace CNTK {
            Base(deviceId, name)
        { }

-        virtual void /*ComputationNode::*/ComputeInputPartial(const size_t /*inputIndex*/, const FrameRange &) override
+        virtual void /*ComputationNode::*/BackpropTo(const size_t /*inputIndex*/, const FrameRange &) override
        {
            InvalidArgument("PerDimMeanVarNormalizationNode should only be called in the evaluation stage.");
        }

-        virtual void /*ComputationNode::*/EvaluateThisNode(const FrameRange & frameRange) override
+        virtual void /*ComputationNode::*/ForwardProp(const FrameRange & frameRange) override
        {
            //only feature (input0) and output needs to be sliced
            Matrix<ElemType> sliceInput0Value = Inputs(0)->ValueSlice(frameRange/*TODO: delete this:*/.Check_t(GetNumParallelSequences(), m_pMBLayout));
            Matrix<ElemType> sliceOutputValue = ValueSlice(frameRange/*TODO: delete this:*/.Check_t(GetNumParallelSequences(), m_pMBLayout));

-            EvaluateThisNodeS(sliceOutputValue, sliceInput0Value, Inputs(1)->FunctionValues(), Inputs(2)->FunctionValues());
+            ForwardPropS(sliceOutputValue, sliceInput0Value, Inputs(1)->FunctionValues(), Inputs(2)->FunctionValues());
        }

-        /*TODO: merge with call site*/void EvaluateThisNodeS(Matrix<ElemType>& functionValues, const Matrix<ElemType>& input0,
+        /*TODO: merge with call site*/void ForwardPropS(Matrix<ElemType>& functionValues, const Matrix<ElemType>& input0,
                                             const Matrix<ElemType>& input1, const Matrix<ElemType>& input2)
        {
 #if DUMPOUTPUT
@ -662,22 +662,22 @@ namespace Microsoft { namespace MSR { namespace CNTK {
            Base(deviceId, name)
        { }

-        virtual void /*ComputationNode::*/ComputeInputPartial(const size_t /*inputIndex*/, const FrameRange &) override
+        virtual void /*ComputationNode::*/BackpropTo(const size_t /*inputIndex*/, const FrameRange &) override
        {
            InvalidArgument("PerDimMeanVarDeNormalizationNode should only be called in the evaluation stage.");
        }

        //(feature-mean).*InvStdDev
-        virtual void /*ComputationNode::*/EvaluateThisNode(const FrameRange & frameRange) override
+        virtual void /*ComputationNode::*/ForwardProp(const FrameRange & frameRange) override
        {
            //only feature (input0) and output needs to be sliced
            Matrix<ElemType> sliceInput0Value = Inputs(0)->ValueSlice(frameRange/*TODO: delete this:*/.Check_t(GetNumParallelSequences(), m_pMBLayout));
            Matrix<ElemType> sliceOutputValue = ValueSlice(frameRange/*TODO: delete this:*/.Check_t(GetNumParallelSequences(), m_pMBLayout));

-            EvaluateThisNodeS(sliceOutputValue, sliceInput0Value, Inputs(1)->FunctionValues(), Inputs(2)->FunctionValues());
+            ForwardPropS(sliceOutputValue, sliceInput0Value, Inputs(1)->FunctionValues(), Inputs(2)->FunctionValues());
        }

-        /*TODO: merge with call site*/void EvaluateThisNodeS(Matrix<ElemType>& functionValues, const Matrix<ElemType>& input0,
+        /*TODO: merge with call site*/void ForwardPropS(Matrix<ElemType>& functionValues, const Matrix<ElemType>& input0,
                                             const Matrix<ElemType>& input1, const Matrix<ElemType>& input2)
        {
    #if DUMPOUTPUT
@ -795,16 +795,16 @@ namespace Microsoft { namespace MSR { namespace CNTK {
        virtual bool HasComputed() const = 0;
        virtual void MarkComputed(const bool hasComputed) = 0;

-        virtual void SaveToFile(File& fstream) const override
+        virtual void Save(File& fstream) const override
        {
-            Base::SaveToFile(fstream);
+            Base::Save(fstream);
            fstream << m_hasComputed;
            fstream << FunctionValues();
        }

-        virtual void LoadFromFile(File& fstream, size_t modelVersion) override
+        virtual void Load(File& fstream, size_t modelVersion) override
        {
-            Base::LoadFromFile(fstream, modelVersion);
+            Base::Load(fstream, modelVersion);
            fstream >> m_hasComputed;
            LoadFunctionValues(fstream);
        }
@ -874,7 +874,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
        virtual bool HasComputed() const { return m_hasComputed; }
        virtual void MarkComputed(const bool hasComputed) { m_hasComputed = hasComputed; }

-        virtual void ComputeInputPartialNonLooping(size_t inputIndex) override
+        virtual void BackpropToNonLooping(size_t inputIndex) override
        {
            assert(inputIndex == 0); inputIndex;
            VerifyDims(Inputs(0));
@ -888,7 +888,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
            }
        }

-        virtual void /*ComputationNodeNonLooping::*/EvaluateThisNodeNonLooping() override
+        virtual void /*ComputationNodeNonLooping::*/ForwardPropNonLooping() override
        {
            // BUGBUG: We must flip the layout, too.
            if (GetNumParallelSequences() != 1)
@ -949,7 +949,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
            SetDims(nOutput, nT);
            UpdateFunctionValuesSize();
            Inputs(0)->FunctionValues().TransferToDeviceIfNotThere( m_deviceId, true);
-            EvaluateThisNode(FrameRange(m_pMBLayout));
+            ForwardProp(FrameRange(m_pMBLayout));

            /// check with expected values
            if (!ISCLOSE(FunctionValues()(0, 0), 3, EPSILON) ||
@ -970,7 +970,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
            GradientValues()(0, 2) = 3;
            GradientValues().TransferToDeviceIfNotThere( m_deviceId, true);

-            ComputeInputPartial(0, FrameRange(m_pMBLayout));
+            BackpropTo(0, FrameRange(m_pMBLayout));

            /// check with expected values
            if (!ISCLOSE(Inputs(0)->GradientValues()(0, 0), 4, EPSILON) ||
--- a/MachineLearning/CNTKComputationNetworkLib/ComputationNetwork.cpp
+++ b/MachineLearning/CNTKComputationNetworkLib/ComputationNetwork.cpp
@ -73,7 +73,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
    // serialization
    // -----------------------------------------------------------------------

-    void ComputationNetwork::SaveToFile(const wstring& fileName, const FileOptions fileFormat) const
+    void ComputationNetwork::Save(const wstring& fileName, const FileOptions fileFormat) const
    {
        // In case of parallel training only the main node should we saving the model to prevent
        // the parallel training nodes from colliding to write the same file
@ -106,7 +106,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
        for (auto nodeIter = m_nameToNodeMap.begin(); nodeIter != m_nameToNodeMap.end(); nodeIter++)
        {
            ComputationNodeBasePtr nodePtr = nodeIter->second;
-            nodePtr->SaveToFile(fstream);
+            nodePtr->Save(fstream);
        }

        fstream.PutMarker(FileMarker::fileMarkerEndSection, L"ENodeList");
@ -204,7 +204,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
            fstream >> opName >> nodeName;
            ComputationNodeBasePtr nodePtr = GetNodeFromName(nodeName);
            // TODO: don't we have a load constructor? Then when to call which? Document the calling sequence
-            nodePtr->LoadFromFile(fstream, modelVersion);
+            nodePtr->Load(fstream, modelVersion);
        }

        fstream.GetMarker(FileMarker::fileMarkerEndSection, L"ENodeList");
@ -217,7 +217,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
        }
    }

-    template<class ElemType> void ComputationNetwork::LoadFromFile(const wstring& fileName, const FileOptions fileFormat, const bool bAllowNoCriterionNode, ComputationNetwork* anotherNetwork)
+    template<class ElemType> void ComputationNetwork::Load(const wstring& fileName, const FileOptions fileFormat, const bool bAllowNoCriterionNode, ComputationNetwork* anotherNetwork)
    {
        ClearNet();

@ -250,7 +250,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
                fprintf(stderr, "Unknown ComputationNode type %ls (node name %ls)\n", opName.c_str(), nodeName.c_str());
                InvalidArgument("Invalid node type.");
            }
-            newNode->LoadFromFile(fstream, modelVersion);
+            newNode->Load(fstream, modelVersion);
            AddNodeToNet(newNode);
        }
        fstream.GetMarker(FileMarker::fileMarkerEndSection, L"ENodeList");
@ -1127,13 +1127,13 @@ namespace Microsoft { namespace MSR { namespace CNTK {
    }

    template void ComputationNetwork::InitLearnableParameters<float>(const ComputationNodeBasePtr& node, const bool uniformInit, const unsigned long randomSeed, const float initValueScale, bool initOnCPUOnly);
-    template void ComputationNetwork::LoadFromFile<float>(const wstring& fileName, const FileOptions fileFormat, const bool bAllowNoCriterionNode, ComputationNetwork* anotherNetwork);
+    template void ComputationNetwork::Load<float>(const wstring& fileName, const FileOptions fileFormat, const bool bAllowNoCriterionNode, ComputationNetwork* anotherNetwork);
    template void ComputationNetwork::PerformSVDecomposition<float>(const map<wstring, float>& SVDConfig, size_t alignedsize);
    template /*static*/void ComputationNetwork::SetDropoutRate<float>(ComputationNetworkPtr net, const ComputationNodeBasePtr& criterionNode, const double dropoutRate, double & prevDropoutRate, unsigned long & dropOutSeed);
    template void ComputationNetwork::SetSeqParam<float>(ComputationNetworkPtr net, const ComputationNodeBasePtr criterionNode, double hsmoothingWeight, double frameDropThresh, const bool doreferencealign);

    template void ComputationNetwork::InitLearnableParameters<double>(const ComputationNodeBasePtr& node, const bool uniformInit, const unsigned long randomSeed, const double initValueScale, bool initOnCPUOnly);
-    template void ComputationNetwork::LoadFromFile<double>(const wstring& fileName, const FileOptions fileFormat, const bool bAllowNoCriterionNode, ComputationNetwork* anotherNetwork);
+    template void ComputationNetwork::Load<double>(const wstring& fileName, const FileOptions fileFormat, const bool bAllowNoCriterionNode, ComputationNetwork* anotherNetwork);
    template void ComputationNetwork::PerformSVDecomposition<double>(const map<wstring, float>& SVDConfig, size_t alignedsize);
    template /*static*/void ComputationNetwork::SetDropoutRate<double>(ComputationNetworkPtr net, const ComputationNodeBasePtr& criterionNode, const double dropoutRate, double & prevDropoutRate, unsigned long & dropOutSeed);
    template void ComputationNetwork::SetSeqParam<double>(ComputationNetworkPtr net, const ComputationNodeBasePtr criterionNode, double hsmoothingWeight, double frameDropThresh, const bool doreferencealign);
--- a/MachineLearning/CNTKComputationNetworkLib/ComputationNetwork.h
+++ b/MachineLearning/CNTKComputationNetworkLib/ComputationNetwork.h
@ -7,18 +7,16 @@
 #pragma once

 // TODOs:
-//  - need Matrix::RowSlice() (problem: currently has no 'lead' dimension separate from numRows) --> add stride to ImageLayout
+//  - need Matrix::RowSlice() (problem: currently has no 'lead' dimension separate from numRows) --> add stride to TensorShape
 //  - BUGBUG (in the future): Once we have > 1 layout in the system, all nodes must compare their actual layouts upon Evaluate().
 //    Example: TimeReverse must create a new layout. A second TimeReverse ideally would revert back, but can't know. Hence, all consumers of layouts must compare upon Evaluate().
 //    -> solve by including a layout in the FrameRange directly; then DataSlice() can compare compatibility
 //  - automatic inference of time window w.r.t. delay nodes (and related nodes such as a temporal pooling)
 //  - have overrides of RuntimeError etc. in ComputationNode, which prepend the error string with the node name and operation
 //  - code prettification:
-//     - sort all node implementations' methods into the same order; esp, EvaluateThisNode() comes before partial
+//     - sort all node implementations' methods into the same order; esp, ForwardProp() comes before partial
 //     - sort important nodes first; move unused/experimental nodes into source files named accordingly
 //  - renaming:
-//     EvaluateThisNode()           -> ForwardProp()        // the familiar names
-//     ComputeInputPartial()        -> BackpropTo()
 //     OnEvaluateBeginIteration()   -> BeginForwardProp()   // and similar functions likewise
 //     Inputs()                     -> Input()              // or In()? or GetInput()?
 //     Children()                   -> Inputs()
@ -29,15 +27,11 @@
 //     DataSlice(frameRange)        -> DataFor(t)           // also more lightweight; 'slice' is an implementation detail
 //     ValueSlice(.)                -> OutputFor(t)
 //     GradientSlice(.)             -> GradientFor(t)
-//     LoadFromFile()               -> Load()               // keep it simpler (where else would one load from?)
-//     SaveToFile()                 -> Save()
-//     ImageLayout                  -> TensorShape          // general tensor descriptor
-//     m_imageLayout                -> SampleLayout
 //  - finish the job:
-//     - everywhere complete folding EvaluateThisNodeS() into EvaluateThisNode(FrameRange()), same for partial
+//     - everywhere complete folding ForwardPropS() into ForwardProp(FrameRange()), same for partial
 //     - revise node constructors, merge by means of default parameters
 //  - known issues that need actual test cases to be fixed:
-//     - CRFNode::ComputeInputPartial() fails for >1 parallel sequence due to DataSlice() not being able to return whole sequences
+//     - CRFNode::BackpropTo() fails for >1 parallel sequence due to DataSlice() not being able to return whole sequences
 //     - implement reading of MB Layout in Binary, DSSM, and LivbSVM readers    --is DSSM already done?

 // The basic idea of this implementation is learned from Brian Guenter <bguenter@microsoft.com>
@ -77,7 +71,7 @@ protected:
    // SEQTraversalFlowControlNode -- FlowControlNode to traverse a (sub-)network time step by time step
    //
    // This is to implement recurrent loops. All nodes inside a loop are listed
-    // inside this node. This node's EvaluateThisNode() function will execute
+    // inside this node. This node's ForwardProp() function will execute
    // them inside a loop over all time steps of the recurrence.
    // For every time step, the entire chain of nodes is called, with the time index
    // passed as a FrameRange object.
@ -92,10 +86,10 @@ protected:
        //  - change m_recurrentInfo to use shared_ptrs to ComputationNodeBase
        virtual const std::wstring OperationName() const override { return L"SEQTraversalFlowControlNode"; }
        virtual void OnEvaluateBeginIteration() override;
-        virtual void EvaluateThisNode(const FrameRange &) override;
+        virtual void ForwardProp(const FrameRange &) override;
        virtual void OnEvaluateEndIteration() override;
        virtual void OnComputeGradientBeginIteration() override;
-        virtual void ComputeInputPartial(const size_t inputIndex, const FrameRange &) override { NOT_IMPLEMENTED; } // ugh, call ComputeGradientForChildren() instead
+        virtual void BackpropTo(const size_t inputIndex, const FrameRange &) override { NOT_IMPLEMENTED; } // ugh, call ComputeGradientForChildren() instead
        virtual void OnComputeGradientEndIteration() override;
        virtual void ComputeGradientForChildren(const FrameRange & frameRange, bool childrenInThisLoop, bool childrenInOuterLoop) override;
        virtual void RequestMatricesBeforeEval(MatrixPool& matrixPool);
@ -122,7 +116,7 @@ protected:
    // PARTraversalFlowControlNode -- FlowControlNode that traverses a (sub-)network
    //
    // This node contains a list of nodes in a (sub-)network. This node's
-    // EvaluateThisNode() method will execute all those nodes once in PAR mode,
+    // ForwardProp() method will execute all those nodes once in PAR mode,
    // that is, by passing a FrameRange object that represents to operate
    // on all frames in the node simultaneously.
    //
@ -135,10 +129,10 @@ protected:
    public:
        virtual const std::wstring OperationName() const override { return L"PARTraversalFlowControlNode"; }
        virtual void OnEvaluateBeginIteration() override { }
-        virtual void EvaluateThisNode(const FrameRange &) override;
+        virtual void ForwardProp(const FrameRange &) override;
        virtual void OnEvaluateEndIteration() override { }
        virtual void OnComputeGradientBeginIteration() override { }
-        virtual void ComputeInputPartial(const size_t inputIndex, const FrameRange &) override { NOT_IMPLEMENTED; } // ugh, call ComputeGradientForChildren() instead
+        virtual void BackpropTo(const size_t inputIndex, const FrameRange &) override { NOT_IMPLEMENTED; } // ugh, call ComputeGradientForChildren() instead
        virtual void OnComputeGradientEndIteration() override { }
        virtual void ComputeGradientForChildren(const FrameRange & frameRange, bool childrenInThisLoop, bool childrenInOuterLoop) override;
        virtual void RequestMatricesBeforeEval(MatrixPool& matrixPool);
@ -348,7 +342,7 @@ public:
    // serialization
    // -----------------------------------------------------------------------

-    void SaveToFile(const std::wstring& fileName, const FileOptions fileFormat = FileOptions::fileOptionsBinary) const;
+    void Save(const std::wstring& fileName, const FileOptions fileFormat = FileOptions::fileOptionsBinary) const;
 private:
    void SaveToFileImpl(const std::wstring& fileName, const FileOptions fileFormat) const;
 public:
@ -358,7 +352,7 @@ public:
    // design BUGBUG: binary files do not know whether they are float or double.
    // TODO: modify file format to know this; then eliminate the <ElemType> dependency (and in some future, allow nodes to be different)
    template<class ElemType>
-    void LoadFromFile(const std::wstring& fileName, const FileOptions fileFormat = FileOptions::fileOptionsBinary,
+    void Load(const std::wstring& fileName, const FileOptions fileFormat = FileOptions::fileOptionsBinary,
                      const bool bAllowNoCriterionNode = false, ComputationNetwork* anotherNetwork = nullptr);

    // static helper to instantiate a network from a file
@ -368,7 +362,7 @@ public:
                                                const bool bAllowNoCriterionNode = false, ComputationNetwork* anotherNetwork = nullptr)
    {
        auto net = make_shared<ComputationNetwork>(deviceId);
-        net->LoadFromFile<ElemType>(fileName, FileOptions::fileOptionsBinary, bAllowNoCriterionNode, anotherNetwork);
+        net->Load<ElemType>(fileName, FileOptions::fileOptionsBinary, bAllowNoCriterionNode, anotherNetwork);
        return net;
    }

--- a/MachineLearning/CNTKComputationNetworkLib/ComputationNetworkAnalysis.cpp
+++ b/MachineLearning/CNTKComputationNetworkLib/ComputationNetworkAnalysis.cpp
@ -24,19 +24,14 @@ namespace Microsoft { namespace MSR { namespace CNTK {
    // The methods below determine evaluation order, which is tricky in presence of recurrent loops.
    // TODO: Can this be moved to a separate class?

-    // MAIN ENTRY POINT for network recurrent-loop analysis. All other functions below are called from this one.
-
-    // forms the recurrent loop that 'rootNode' participates in
-    // TODO: This function is not lazy, i.e. not cached. BuildAndValidateSubNetwork() caches, but others don't. Not sure why/how that's OK--won't we reassign loop ids?
+    // FormRecurrentLoops() -- MAIN ENTRY POINT for network recurrent-loop analysis. All other functions in this CPP are called only from this one.
+    // This function analysis the networks for recurrent loops present in the computation of 'rootNode.'
    // This sets/updates:
    //  - m_recurrentInfo
    //  - ComputationNode::m_isPartOfLoop and m_loopId
    // Is often called before ValidateNetwork() on a root; will be called from inside ValidateNetwork() as well.
-    // This function is called for multiple nodes, e.g. eval and training criterion. I.e. it must be able to add to a previous result. E.g. it does not clear the m_visited flags at start. This seems brittle.
-    // BUGBUG: m_visited is also used by ValidateSubNetwork(). Hence, it may be in unexpected state when calling into this multiple times.
-    // BUGBUG: This currently does not handle nested loops. To handle that:
-    //  - loops are isolated by a ReconcileMBLayout--loop determination should see right through it, and then include everything inside
-    //  - ...? Need to figure this out.
+    // This function is called for multiple nodes, e.g. eval and training criterion. I.e. it must be able to add to a previous result. E.g. it does not clear the m_visited flags at start.
+    // Note: This function is not lazy, i.e. not cached. BuildAndValidateSubNetwork() caches, but others don't.
    void ComputationNetwork::FormRecurrentLoops(const ComputationNodeBasePtr& rootNode)
    {
        // determine the strongly connected cliques -> m_recurrentInfo[]
--- a/MachineLearning/CNTKComputationNetworkLib/ComputationNetworkBuilder.cpp
+++ b/MachineLearning/CNTKComputationNetworkLib/ComputationNetworkBuilder.cpp
@ -183,14 +183,14 @@ namespace Microsoft { namespace MSR { namespace CNTK {
    }

    template<class ElemType> shared_ptr<ComputationNode<ElemType>> ComputationNetworkBuilder<ElemType>::CreateInputNode(const std::wstring & inputName,
-                                                                                                                        const ImageLayout & imageLayout,
+                                                                                                                        const TensorShape & imageLayout,
                                                                                                                        const size_t numImages)
    {
        return net.AddNodeToNetWithElemType(New<InputValue<ElemType>>(net.GetDeviceId(), inputName, imageLayout, numImages));
    }

    template<class ElemType> shared_ptr<ComputationNode<ElemType>> ComputationNetworkBuilder<ElemType>::CreateSparseInputNode(const std::wstring & inputName,
-                                                                                                                              const ImageLayout & imageLayout,
+                                                                                                                              const TensorShape & imageLayout,
                                                                                                                              const size_t numImages)
    {
        return net.AddNodeToNetWithElemType(New<SparseInputValue<ElemType>>(net.GetDeviceId(), inputName, imageLayout, numImages));
@ -547,7 +547,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {

    template<class ElemType> shared_ptr<ComputationNode<ElemType>> ComputationNetworkBuilder<ElemType>::Reshape(const ComputationNodePtr a,
                                                                                                                const size_t numRows,
-                                                                                                                const ImageLayout & imageLayout,
+                                                                                                                const TensorShape & imageLayout,
                                                                                                                const std::wstring nodeName)
    {
        return net.AddNodeToNetAndAttachInputs(New<ReshapeNode<ElemType>>(net.GetDeviceId(), nodeName, numRows, imageLayout), a);
--- a/MachineLearning/CNTKComputationNetworkLib/ComputationNetworkBuilder.h
+++ b/MachineLearning/CNTKComputationNetworkLib/ComputationNetworkBuilder.h
@ -40,8 +40,8 @@ namespace Microsoft { namespace MSR { namespace CNTK {
        ComputationNodePtr CreateSparseLearnableParameter(const std::wstring & paramName, const size_t rows, const size_t cols, const size_t size = 0);
        ComputationNodePtr CreateInputNode(const std::wstring & inputName, const size_t rows, const size_t cols);
        ComputationNodePtr CreateSparseInputNode(const std::wstring & inputName, const size_t rows, const size_t cols);
-        ComputationNodePtr CreateInputNode(const std::wstring & inputName, const ImageLayout & imageLayout, const size_t numImages);
-        ComputationNodePtr CreateSparseInputNode(const std::wstring & inputName, const ImageLayout & imageLayout, const size_t numImages);
+        ComputationNodePtr CreateInputNode(const std::wstring & inputName, const TensorShape & imageLayout, const size_t numImages);
+        ComputationNodePtr CreateSparseInputNode(const std::wstring & inputName, const TensorShape & imageLayout, const size_t numImages);
        ComputationNodePtr CreatePairNetworkNode(const std::wstring & inputName, const size_t rows, const size_t cols);
        ComputationNodePtr CreateConvolutionNode(const std::wstring & nodeName, const size_t kernelWidth, const size_t kernelHeight, const size_t outputChannels, const size_t horizontalSubsample, const size_t verticalSubsample, const bool zeroPadding = false, const size_t maxTempMemSizeInSamples = 0);
        ComputationNodePtr CreateMaxPoolingNode(const std::wstring & nodeName, const size_t windowWidth, const size_t windowHeight, const size_t horizontalSubsample, const size_t verticalSubsample);
@ -52,32 +52,32 @@ namespace Microsoft { namespace MSR { namespace CNTK {
        // TODO: These next three functions are wrappers around CreateXXXNode(). Remove these.
        ComputationNodePtr Parameter(const size_t rows, size_t cols, const std::wstring nodeName = L"") { return CreateLearnableParameter(nodeName, rows, cols); } // TODO: remove
        ComputationNodePtr Input(const size_t rows, const size_t cols, const std::wstring nodeName = L"") { return CreateInputNode(nodeName, rows, cols); } // TODO: remove
-        ComputationNodePtr Input(const ImageLayout & imageLayout, const size_t numImages, const std::wstring nodeName = L"") { return CreateInputNode(nodeName, imageLayout, numImages); } // TODO: remove
+        ComputationNodePtr Input(const TensorShape & imageLayout, const size_t numImages, const std::wstring nodeName = L"") { return CreateInputNode(nodeName, imageLayout, numImages); } // TODO: remove
        // The following functions create nodes and link them to the network and their inputs.
        // TODO: Do we need both this set and the one above that does not add inputs? Can they share more code?
        ComputationNodePtr PairNetwork(const ComputationNodePtr & a, const std::wstring nodeName = L"");
        ComputationNodePtr Convolution(const ComputationNodePtr weight,
-            const ComputationNodePtr inputValues,
-            const size_t kernelWidth,
-            const size_t kernelHeight,
-            const size_t outputChannels,
-            const size_t horizontalSubsample,
-            const size_t verticalSubsample,
-            const bool zeroPadding = false,
-            const std::wstring nodeName = L"",
-            const size_t maxTempMemSizeInSamples = 0);
+                                       const ComputationNodePtr inputValues,
+                                       const size_t kernelWidth,
+                                       const size_t kernelHeight,
+                                       const size_t outputChannels,
+                                       const size_t horizontalSubsample,
+                                       const size_t verticalSubsample,
+                                       const bool zeroPadding = false,
+                                       const std::wstring nodeName = L"",
+                                       const size_t maxTempMemSizeInSamples = 0);
        ComputationNodePtr MaxPooling(const ComputationNodePtr inputValues,
-            const size_t windowWidth,
-            const size_t windowHeight,
-            const size_t horizontalSubsample,
-            const size_t verticalSubsample,
-            const std::wstring nodeName = L"");
+                                      const size_t windowWidth,
+                                      const size_t windowHeight,
+                                      const size_t horizontalSubsample,
+                                      const size_t verticalSubsample,
+                                      const std::wstring nodeName = L"");
        ComputationNodePtr AveragePooling(const ComputationNodePtr inputValues,
-            const size_t windowWidth,
-            const size_t windowHeight,
-            const size_t horizontalSubsample,
-            const size_t verticalSubsample,
-            const std::wstring nodeName = L"");
+                                          const size_t windowWidth,
+                                          const size_t windowHeight,
+                                          const size_t horizontalSubsample,
+                                          const size_t verticalSubsample,
+                                          const std::wstring nodeName = L"");
        ComputationNodePtr ErrorPrediction(const ComputationNodePtr a, const ComputationNodePtr b, const std::wstring nodeName = L"");
        ComputationNodePtr PerDimMeanVarNormalization(const ComputationNodePtr feature, const ComputationNodePtr mean, const ComputationNodePtr InvStdDev, const std::wstring nodeName = L"");
        ComputationNodePtr PerDimMeanVarDeNormalization(const ComputationNodePtr feature, const ComputationNodePtr mean, const ComputationNodePtr InvStdDev, const std::wstring nodeName = L"");
@ -122,7 +122,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
        ComputationNodePtr Plus(const ComputationNodePtr a, const ComputationNodePtr b, const std::wstring nodeName = L"");
        ComputationNodePtr Minus(const ComputationNodePtr a, const ComputationNodePtr b, const std::wstring nodeName = L"");
        ComputationNodePtr Dropout(const ComputationNodePtr a, const std::wstring nodeName = L"");
-        ComputationNodePtr Reshape(const ComputationNodePtr a, const size_t num_rows, const ImageLayout & imageLayout, const std::wstring nodeName = L"");
+        ComputationNodePtr Reshape(const ComputationNodePtr a, const size_t num_rows, const TensorShape & imageLayout, const std::wstring nodeName = L"");
        ComputationNodePtr RowRepeat(const ComputationNodePtr a, const size_t num_repeat, const std::wstring nodeName = L"");
        ComputationNodePtr Diagonal(const ComputationNodePtr a, const std::wstring nodeName = L"");
        ComputationNodePtr PastValue(const ComputationNodePtr a, const float initHiddenActivity, const size_t row_size, const size_t col_size, size_t timeStep, const std::wstring nodeName = L"");
--- a/MachineLearning/CNTKComputationNetworkLib/ComputationNetworkEvaluation.cpp
+++ b/MachineLearning/CNTKComputationNetworkLib/ComputationNetworkEvaluation.cpp
@ -28,7 +28,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
    // MAIN ENTRY POINT for evaluating one minibatch (forward prop)
    // TODO: pass a set of nodes instead of only one
    // TODO: rename to ForwardProp()? To make it very clear?
-    // This calls EvaluateThisNode() on all nodes in order of data flow through the network.
+    // This calls ForwardProp() on all nodes in order of data flow through the network.
    // By default, the network is applied concurrently on all frames in a minibatch in parallel (PAR mode, a "map" operation)
    // Recurrent loops deviate:
    //  - a recurrent loop is the loop of nodes that make up computation for one time step (e.g. Times -> Plus -> Sigmoid -> Delay)
@ -43,7 +43,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
            LogicError("Evaluate for node %ls %ls: BuildAndValidateSubNetwork() has not been called on this node.", rootNode->NodeName().c_str(), rootNode->OperationName().c_str());

        // traverse all nodes in the pre-determined evaluation order
-        GetOuterLoopNode(rootNode)->EvaluateThisNode(FrameRange(nullptr));
+        GetOuterLoopNode(rootNode)->ForwardProp(FrameRange(nullptr));
    }

    // MAIN ENTRY POINT for evaluation followed by gradient computation (forward prop then back prop)
@ -125,7 +125,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
            }
        }
    }
-    /*virtual*/ void ComputationNetwork::PARTraversalFlowControlNode::EvaluateThisNode(const FrameRange & frameRange) /*override*/
+    /*virtual*/ void ComputationNetwork::PARTraversalFlowControlNode::ForwardProp(const FrameRange & frameRange) /*override*/
    {
        for (auto & node : m_nestedNodes)
        {
@ -136,7 +136,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
                    assert(recInfo->m_sourceNode->GetMBLayout() == node->GetMBLayout());

                node->OnEvaluateBeginIteration();
-                node->EvaluateThisNode(frameRange.WithLayout(node->GetMBLayout()));
+                node->ForwardProp(frameRange.WithLayout(node->GetMBLayout()));
                node->OnEvaluateEndIteration();

                node->UpdateEvalTimeStamp();
@ -195,7 +195,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
    // This evaluates all nodes in this FlowControlNode in SEQ mode: process the loop frame by frame in a nested loop.
    // This is where the time axis changes.
    // TODO: Once we do nested loops, then the FrameRange argument to this will refer to the outer loop.
-    /*virtual*/ void ComputationNetwork::SEQTraversalFlowControlNode::EvaluateThisNode(const FrameRange &) /*override*/
+    /*virtual*/ void ComputationNetwork::SEQTraversalFlowControlNode::ForwardProp(const FrameRange &) /*override*/
    {
        // get layout associated with this loop
        // All nodes share the same layout.
@ -209,7 +209,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
        {
            for (auto & node : m_nestedNodes)
            {
-                node->EvaluateThisNode(t);
+                node->ForwardProp(t);
                node->UpdateEvalTimeStamp();
            }
        } 
--- a/MachineLearning/CNTKComputationNetworkLib/ComputationNode.h
+++ b/MachineLearning/CNTKComputationNetworkLib/ComputationNode.h
@ -79,11 +79,11 @@ namespace Microsoft { namespace MSR { namespace CNTK {
        virtual void UpdateFunctionMBSize() = 0;                // recalculate our column dimension from MBLayout

        virtual void OnEvaluateBeginIteration() = 0;
-        virtual void EvaluateThisNode(const FrameRange &) = 0;  // forward prop for one minibatch
-        virtual void OnEvaluateEndIteration() = 0;              // called after last iteration step of EvaluateThisNode()
+        virtual void ForwardProp(const FrameRange &) = 0;  // forward prop for one minibatch
+        virtual void OnEvaluateEndIteration() = 0;              // called after last iteration step of ForwardProp()

        virtual void OnComputeGradientBeginIteration() = 0;     // called before first iteration step of ComputeGradient()
-        virtual void ComputeInputPartial(const size_t inputIndex, const FrameRange &) = 0;
+        virtual void BackpropTo(const size_t inputIndex, const FrameRange &) = 0;
        virtual void OnComputeGradientEndIteration() = 0;       // called after last iteration step of ComputeGradient()

        // --- these are meant to be overridden by ControlFlowNodes
@ -103,8 +103,8 @@ namespace Microsoft { namespace MSR { namespace CNTK {

        virtual void Validate(bool isFinalValidationPass) = 0;          // main base validation function
        virtual void InferImageDimsFromInputs() = 0;
-        virtual void SaveToFile(File& fstream) const = 0;
-        virtual void LoadFromFile(File& /*fstream*/, size_t /*modelVersion*/) = 0;
+        virtual void Save(File& fstream) const = 0;
+        virtual void Load(File& /*fstream*/, size_t /*modelVersion*/) = 0;
        virtual void CopyTo(ComputationNodeBasePtr node, const std::wstring& newName, const CopyNodeFlags flags) const = 0;

        // --- optional overrides that describe a feature or property of the node
@ -271,7 +271,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
                node->m_nodeName = newName;

                node->m_inputImageLayout = m_inputImageLayout;
-                node->m_imageLayout = m_imageLayout;
+                node->m_sampleLayout = m_sampleLayout;

                ComputationNetworkOwnedNodeState::CopyTo(*node);
                TimeStamp::CopyTo(*node);
@ -283,12 +283,12 @@ namespace Microsoft { namespace MSR { namespace CNTK {
        // TODO: make sure this does not get implemented in any of the base classes
        DEVICEID_TYPE GetDeviceId() const { return m_deviceId; }    // TODO: remove, only used from copy constructor which will go away

-        virtual void SaveToFile(File& fstream) const
+        virtual void Save(File& fstream) const
        {
            fstream << OperationName() << NodeName();
        }

-        virtual void LoadFromFile(File& /*fstream*/, size_t /*modelVersion*/)
+        virtual void Load(File& /*fstream*/, size_t /*modelVersion*/)
        {
            // it is assumed that OperationName and NodeName have already been consumed--some asymmetry between Save and Load
            // base class has nothing to load
@ -299,7 +299,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
        size_t GetNumRows() const { return m_numRows; }
        size_t GetNumCols() const { return m_numCols; }
        pair<size_t, size_t> GetDims() { return make_pair(GetNumRows(), GetNumCols()); }
-        // TODO: add an overload SetDims(ImageLayout, cols)
+        // TODO: add an overload SetDims(TensorShape, cols)
        virtual // for now virtual as this still updates m_functionValues
        void SetDims(size_t rows, size_t cols)
        {
@ -485,7 +485,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
                    const char * mbSizeMark = child->m_pMBLayout ? "MBSize " : "";
                    if (IsChildAnImage(i))  //image
                        fprintf(stderr, "%ls[%lu {W=%lu, H=%lu, C=%lu}, %s%lu]", child->NodeName().c_str(), child->GetNumRows(),
-                                child->m_imageLayout.GetWidth(), child->m_imageLayout.GetHeight(), child->m_imageLayout.GetNumChannels(), mbSizeMark, child->GetNumCols());
+                                child->m_sampleLayout.GetWidth(), child->m_sampleLayout.GetHeight(), child->m_sampleLayout.GetNumChannels(), mbSizeMark, child->GetNumCols());
                    else
                        fprintf(stderr, "%ls[%lu, %s%lu]", child->NodeName().c_str(), child->GetNumRows(), mbSizeMark, child->GetNumCols());
                }
@ -522,12 +522,12 @@ namespace Microsoft { namespace MSR { namespace CNTK {

        bool IsChildAnImage(const size_t index) const
        {
-            return m_inputs[index]->m_imageLayout.GetWidth() != 1 || m_inputs[index]->m_imageLayout.GetNumChannels() != 1;
+            return m_inputs[index]->m_sampleLayout.GetWidth() != 1 || m_inputs[index]->m_sampleLayout.GetNumChannels() != 1;
        }

-        const ImageLayout & GetImageLayout() const { return m_imageLayout; }
+        const TensorShape & GetImageLayout() const { return m_sampleLayout; }

-        pair<ImageLayout, ImageLayout> GetImageLayouts() const { return make_pair(m_inputImageLayout, m_imageLayout); }   // helper for Validate()
+        pair<TensorShape, TensorShape> GetImageLayouts() const { return make_pair(m_inputImageLayout, m_sampleLayout); }   // helper for Validate()

        const size_t ChildrenSize() const { return m_inputs.size(); }     // TODO: rename to NumChildren() or NumInputs(); and inside here where we use m_inputs, use m_inputs.size() as well

@ -542,13 +542,13 @@ namespace Microsoft { namespace MSR { namespace CNTK {

        virtual void ClearGradientForChildren() = 0;

-        virtual void /*IComputationNode::*/OnEvaluateBeginIteration() override             // called before first iteration step of EvaluateThisNode()
+        virtual void /*IComputationNode::*/OnEvaluateBeginIteration() override             // called before first iteration step of ForwardProp()
        {
 #ifdef TRACK_GAP_NANS
            fprintf(stderr, "OnEvaluateBeginIteration: %ls %ls operation\n", NodeName().c_str(), OperationName().c_str());
 #endif
        }
-        virtual void /*IComputationNode::*/OnEvaluateEndIteration() override               // called after last iteration step of EvaluateThisNode()
+        virtual void /*IComputationNode::*/OnEvaluateEndIteration() override               // called after last iteration step of ForwardProp()
        {
 #ifdef TRACK_GAP_NANS
            fprintf(stderr, "OnEvaluateEndIteration: %ls %ls operation\n", NodeName().c_str(), OperationName().c_str());
@ -577,9 +577,9 @@ namespace Microsoft { namespace MSR { namespace CNTK {

            const auto & child = m_inputs[index];
            if (child != nullptr)
-                m_inputImageLayout = child->m_imageLayout;
+                m_inputImageLayout = child->m_sampleLayout;
            if (outputSameAsInput)
-                m_imageLayout = m_inputImageLayout;
+                m_sampleLayout = m_inputImageLayout;
        }

        void InferMBLayoutFromInputsForStandardCase();
@ -742,8 +742,8 @@ namespace Microsoft { namespace MSR { namespace CNTK {
        // If the matrix is minibatch data (inputs, activations, labels), then matrix columns are samples.
        // Note that the actual matrix storage does not always exist.
        size_t m_numRows, m_numCols;        // matrix dimension of function values and gradients
-        ImageLayout m_inputImageLayout;     // how to interpret each column in the input as an image
-        ImageLayout m_imageLayout;    // and the output
+        TensorShape m_inputImageLayout;     // how to interpret each column in the input as an image
+        TensorShape m_sampleLayout;    // and the output
        // TODO: Why is the input layout not just the layout of the input node?
        MBLayoutPtr m_pMBLayout;

@ -950,7 +950,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
        }

        // update size (#columns) of node to match MBLayout
-        // This must be called right before EvaluateThisNode() the first time for a given minibatch.
+        // This must be called right before ForwardProp() the first time for a given minibatch.
        // Currently overridden by
        //  - InputValue, which verifies instead of resizing (since Resize() is specified to be destructive, it should not call it).
        //  - LearnableParameters
@ -1126,11 +1126,11 @@ namespace Microsoft { namespace MSR { namespace CNTK {
            FunctionValues().Resize(m_numRows, m_numCols);
        }

-        // this is called before a node's EvaluateThisNode() function is called (in loops: for the first time)
+        // this is called before a node's ForwardProp() function is called (in loops: for the first time)
        // This is where we
        //  - update the node dimension based on actual MB size
        //  - (re-)allocate the m_functionValues matrix, which may be shared across nodes and thus have changed dimensions
-        virtual void /*IComputationNode::*/OnEvaluateBeginIteration() override             // called before first iteration step of EvaluateThisNode()
+        virtual void /*IComputationNode::*/OnEvaluateBeginIteration() override             // called before first iteration step of ForwardProp()
        {
            Base::OnEvaluateBeginIteration();

@ -1162,24 +1162,6 @@ namespace Microsoft { namespace MSR { namespace CNTK {
        virtual void /*IComputationNode::*/OnComputeGradientBeginIteration() override
        {
            Base::OnComputeGradientBeginIteration();
-
-#if 0       // TODO: If you get a NaN failure, feel free to put this back in
-            // many gradients are reduction operations
-            // They touch both in-flowing gradients and function values, so we must set both to 0.
-            // BUGBUG: This masks a bug: Nodes should do that by themselves, like in EvaluateThisNode(), but they currently don't.
-            if (m_needsGradient)
-            {
-                MaskMissingValuesColumnsToZero(FrameRange(m_pMBLayout));
-                if (m_gradientInitialized)
-                    MaskMissingGradientColumnsToZero(FrameRange(m_pMBLayout));
-            }
-            bool anyChildNeedsGradient = false;
-            for (size_t i = 0; i < m_inputs.size(); i++)
-                anyChildNeedsGradient |= Inputs(i)->m_needsGradient;
-            if (anyChildNeedsGradient)
-                for (size_t i = 0; i < m_inputs.size(); i++)
-                    Inputs(i)->MaskMissingValuesColumnsToZero(FrameRange(Inputs(i)->GetMBLayout()));
-#endif
        }

 #ifdef _DEBUG
@ -1201,7 +1183,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
        }
 #endif

-        // this is the entry point from Network; while it will call virtual ComputeInputPartial() into the actual node implementation
+        // this is the entry point from Network; while it will call virtual BackpropTo() into the actual node implementation
        // TODO: move to -Base (or -Network?)
        void ComputeGradientForChildren(const FrameRange & frameRange, bool childrenInThisLoop, bool childrenInOuterLoop) override
        {
@ -1243,8 +1225,8 @@ namespace Microsoft { namespace MSR { namespace CNTK {
 #endif
                    }

-                    //fprintf(stderr, "ComputeInputPartial %d %d %ls %ls\n", (int)frameRange.timeIdxInSeq, (int)i, NodeName().c_str(), OperationName().c_str());
-                    ComputeInputPartial(i, frameRange);     // this computes partial wrt to the child and sums the gradient value in the child
+                    //fprintf(stderr, "BackpropTo %d %d %ls %ls\n", (int)frameRange.timeIdxInSeq, (int)i, NodeName().c_str(), OperationName().c_str());
+                    BackpropTo(i, frameRange);     // this computes partial wrt to the child and sums the gradient value in the child
                }
 #ifdef DISPLAY_DEBUG
                else fprintf (stderr, "    [%lu]: %s(%s) (no gradient needed so don't compute for)\n", i, child->OperationName().c_str(), child->NodeName().c_str());
@ -1406,24 +1388,24 @@ namespace Microsoft { namespace MSR { namespace CNTK {
        { }

        // these two implement the ComputationNode<> interface
-        void EvaluateThisNode(const FrameRange & frameRange) override final
+        void ForwardProp(const FrameRange & frameRange) override final
        {
            if (frameRange.IsAllFrames())
-                EvaluateThisNodeNonLooping();
+                ForwardPropNonLooping();
            else
                LogicError("%s node should never be in a loop.", typeid(*this).name());
        }
-        void ComputeInputPartial(const size_t inputIndex, const FrameRange & frameRange) override final
+        void BackpropTo(const size_t inputIndex, const FrameRange & frameRange) override final
        {
            if (frameRange.IsAllFrames())
-                ComputeInputPartialNonLooping(inputIndex);
+                BackpropToNonLooping(inputIndex);
            else
                LogicError("%s node should never be in a loop.", typeid(*this).name());
        }

        // non-looping node types instead implement these functions
-        virtual void EvaluateThisNodeNonLooping() = 0;
-        virtual void ComputeInputPartialNonLooping(size_t inputIndex) = 0;
+        virtual void ForwardPropNonLooping() = 0;
+        virtual void BackpropToNonLooping(size_t inputIndex) = 0;
    };

    // =======================================================================
@ -1442,8 +1424,8 @@ namespace Microsoft { namespace MSR { namespace CNTK {
        virtual ComputationNodeBase * NewThis(DEVICEID_TYPE deviceId, const wstring & name) override { NOT_IMPLEMENTED; }
        virtual void Validate(bool isFinalValidationPass) override { NOT_IMPLEMENTED; }          // main base validation function
        virtual void InferImageDimsFromInputs() override { NOT_IMPLEMENTED; }
-        virtual void SaveToFile(File& fstream) const override { NOT_IMPLEMENTED; }
-        virtual void LoadFromFile(File& /*fstream*/, size_t /*modelVersion*/) override { NOT_IMPLEMENTED; }
+        virtual void Save(File& fstream) const override { NOT_IMPLEMENTED; }
+        virtual void Load(File& /*fstream*/, size_t /*modelVersion*/) override { NOT_IMPLEMENTED; }
        virtual void CopyTo(ComputationNodeBasePtr node, const std::wstring& newName, const CopyNodeFlags flags) const override { NOT_IMPLEMENTED; }
        virtual ComputationNodeBasePtr Duplicate(const std::wstring& newName, const CopyNodeFlags flags) override { NOT_IMPLEMENTED; }
        //virtual void SetDims(size_t rows, size_t cols) override { NOT_IMPLEMENTED; }
@ -1515,9 +1497,9 @@ protected: \
    using Base::m_pMBLayout; using Base::GetNumTimeSteps; using Base::GetNumParallelSequences; \
    using Base::MaskMissingColumnsToZero; using Base::MaskMissingValuesColumnsToZero; using Base::MaskMissingGradientColumnsToZero; using Base::InvalidateMissingValuesColumns; using Base::InvalidateMissingGradientColumns; \
    using Base::DataSlice; using Base::ValueSlice; using Base::GradientValues; using Base::GradientValuesPtr; using Base::GradientSlice; using Base::MaskedValueSlice; using Base::MaskedGradientSlice; \
-    using Base::EvaluateThisNode; using Base::ComputeInputPartial; \
+    using Base::ForwardProp; using Base::BackpropTo; \
    using Base::m_inputs; using Base::m_deviceId; using Base::m_functionValues; using Base::m_gradientValues; \
-    using Base::m_inputImageLayout; using Base::m_imageLayout; \
+    using Base::m_inputImageLayout; using Base::m_sampleLayout; \
    using Base::m_parameterUpdateRequired; using Base::m_nodeName; \
    using Base::CreateMatrixIfNull; using Base::RequestMatrixFromPool; using Base::ReleaseMatrixToPool; \
    using Base::CreateUniqId; \
@ -1529,9 +1511,9 @@ protected: \
    using Base::HasMBLayout; using Base::GetMBLayout; using Base::LinkToMBLayout; \
    using Base::Inputs; using Base::SetInput; \
    using Base::IsChildAnImage; using Base::IsEqualTo; using Base::IsFuncValueOlderThanInputs; using Base::IsLeaf; using Base::SetParameterUpdateRequired; \
-    using Base::LoadFromFile; \
+    using Base::Load; \
    using Base::PrintNodeValuesToFile; using Base::PrintSelfBeforeValidation; \
-    using Base::SaveToFile; using Base::UpdateFunctionMBSize; \
+    using Base::Save; using Base::UpdateFunctionMBSize; \
    using Base::RequestMatricesBeforeEval; using Base::ReleaseMatricesAfterEval; \
    using Base::RequestMatricesBeforeGradientComp; using Base::ReleaseMatricesAfterGradientComp; \
    using Base::Validate; using Base::ValidateUnaryMap; using Base::ValidateBinaryZip; using Base::ValidateUnaryReduce; using Base::ValidateBinaryReduce; using Base::ValidateInferBinaryChildrenDims; using Base::ValidateInferChildDims; \
--- a/MachineLearning/CNTKComputationNetworkLib/ConvolutionalNodes.h
+++ b/MachineLearning/CNTKComputationNetworkLib/ConvolutionalNodes.h
@ -45,7 +45,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
            m_horizontalSubsample(SIZE_MAX), m_verticalSubsample(SIZE_MAX),
            m_zeroPadding(false), m_maxTempMemSizeInSamples(SIZE_MAX)
        {
-            m_imageLayout = ImageLayoutWHC(1, 1, 0);           // TODO: what is this magic #channels == 0? Can this even be initialized at this time, or only inferred?
+            m_sampleLayout = ImageLayoutWHC(1, 1, 0);           // TODO: what is this magic #channels == 0? Can this even be initialized at this time, or only inferred?
        }
        ConvolutionNode(DEVICEID_TYPE deviceId, const wstring & name, const size_t kernelWidth, const size_t kernelHeight, const size_t outputChannels, const size_t horizontalSubsample, const size_t verticalSubsample, const bool zeroPadding = false, const size_t maxTempMemSizeInSamples = 0) :
            Base(deviceId, name),
@ -53,7 +53,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
            m_horizontalSubsample(horizontalSubsample), m_verticalSubsample(verticalSubsample),
            m_zeroPadding(zeroPadding), m_maxTempMemSizeInSamples(maxTempMemSizeInSamples)
        {
-            m_imageLayout = ImageLayoutWHC(1, 1, outputChannels);
+            m_sampleLayout = ImageLayoutWHC(1, 1, outputChannels);
        }
        ConvolutionNode(const ScriptableObjects::IConfigRecordPtr configp) :
            ConvolutionNode(configp->Get(L"deviceId"), L"<placeholder>", configp->Get(L"kernelWidth"), configp->Get(L"kernelHeight"), configp->Get(L"outputChannels"),
@ -64,21 +64,21 @@ namespace Microsoft { namespace MSR { namespace CNTK {
            AttachInputs(configp, this->GetExpectedNumInputs());
        }

-        virtual void SaveToFile(File& fstream) const override
+        virtual void Save(File& fstream) const override
        {
-            Base::SaveToFile(fstream);
+            Base::Save(fstream);
            fstream <<  m_kernelWidth << m_kernelHeight << m_horizontalSubsample << m_verticalSubsample;
-            fstream << m_imageLayout.GetNumChannels();
+            fstream << m_sampleLayout.GetNumChannels();
            fstream << m_zeroPadding << m_maxTempMemSizeInSamples;
        }

-        virtual void LoadFromFile(File& fstream, size_t modelVersion) override
+        virtual void Load(File& fstream, size_t modelVersion) override
        {
-            Base::LoadFromFile(fstream, modelVersion);
+            Base::Load(fstream, modelVersion);
            fstream >> m_kernelWidth >> m_kernelHeight >> m_horizontalSubsample >> m_verticalSubsample; 
            size_t outputChannels;
            fstream >> outputChannels;
-            m_imageLayout = ImageLayoutWHC(1, 1, outputChannels);
+            m_sampleLayout = ImageLayoutWHC(1, 1, outputChannels);
            fstream >> m_zeroPadding >> m_maxTempMemSizeInSamples;
        }

@ -102,38 +102,26 @@ namespace Microsoft { namespace MSR { namespace CNTK {
            }
        }

-        //void ComputeInputPartialMap(const size_t inputIndex) 
-        //{
-        //    if (inputIndex > 1)
-        //        InvalidArgument("Convolution operation only takes two inputs.");
-        //
-        //    if (inputIndex == 0)  //derivative with regard to the weight matrix
-        //        ComputeInputPartialOverWeight(GradientValues(), Inputs(0)->GradientValues(), Inputs(0)->FunctionValues(), Inputs(1)->FunctionValues(), m_tempMatrix, true);
-        //    else  // derivative with regard to the input feature
-        //        ComputeInputPartialOverInputFeature(GradientValues(), Inputs(1)->GradientValues(), Inputs(0)->FunctionValues(), Inputs(1)->FunctionValues(), m_tempMatrix);
-        //}
-
-        virtual void /*ComputationNode::*/ComputeInputPartial(const size_t inputIndex, const FrameRange & frameRange) override
+        virtual void /*ComputationNode::*/BackpropTo(const size_t inputIndex, const FrameRange & frameRange) override
        {
-            //if (frameRange.IsAllFrames()) { ComputeInputPartialMap(inputIndex); return; } // TODO: remove these one by one
            Matrix<ElemType> sliceOutputGrad = GradientSlice(frameRange/*TODO: delete this:*/.Check_t(GetNumParallelSequences(), m_pMBLayout));
            Matrix<ElemType> sliceInput1Value = Inputs(1)->ValueSlice(frameRange/*TODO: delete this:*/.Check_t(Inputs(1)->GetNumParallelSequences(), m_pMBLayout));

            if (inputIndex == 0)  //derivative with regard to the weight matrix
-                ComputeInputPartialOverWeight(sliceOutputGrad, Inputs(0)->GradientValues(), Inputs(0)->FunctionValues(), sliceInput1Value, *m_tempMatrix, !frameRange.IsAllFrames());
+                BackpropToOverWeight(sliceOutputGrad, Inputs(0)->GradientValues(), Inputs(0)->FunctionValues(), sliceInput1Value, *m_tempMatrix, !frameRange.IsAllFrames());
            else if (inputIndex == 1)  // derivative with regard to the input feature
            {
                Matrix<ElemType> sliceInput1Grad = Inputs(1)->GradientSlice(frameRange/*TODO: delete this:*/.Check_t(GetNumParallelSequences(), m_pMBLayout));
-                ComputeInputPartialOverInputFeature(sliceOutputGrad, sliceInput1Grad, Inputs(0)->FunctionValues(), sliceInput1Value, *m_tempMatrix);
+                BackpropToOverInputFeature(sliceOutputGrad, sliceInput1Grad, Inputs(0)->FunctionValues(), sliceInput1Value, *m_tempMatrix);
            }
        }

    private:
-        void ComputeInputPartialOverWeight(Matrix<ElemType> &gradientValues,
+        void BackpropToOverWeight(Matrix<ElemType> &gradientValues,
            Matrix<ElemType> &inputGradientValues, const Matrix<ElemType> &/*input0*/, const Matrix<ElemType> &input1, Matrix<ElemType> &tempMatrix, const bool inLoop)
        {
            size_t packedInputRows = m_kernelWidth * m_kernelHeight * m_inputImageLayout.GetNumChannels();
-            size_t packedInputColsPerSample = m_imageLayout.GetWidth() * m_imageLayout.GetHeight();
+            size_t packedInputColsPerSample = m_sampleLayout.GetWidth() * m_sampleLayout.GetHeight();
            size_t outputSizePerChannel = packedInputColsPerSample;
            //size_t packedInputDim = packedInputRows * packedInputColsPerSample; // size of each packed input sample
            //size_t inputDim = m_inputImageLayout.GetWidth() * m_inputImageLayout.GetHeight() * m_inputImageLayout.GetNumChannels();  //size of each input sample
@ -145,7 +133,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
            //const Matrix<ElemType> & weightMatrix = input0;
            //inputGradientValues.Resize(weightMatrix.GetNumRows(), weightMatrix.GetNumCols()); //should have been resized when preparing gradient computation

-            gradientValues.Reshape(m_imageLayout.GetNumChannels(), outputSizePerChannel * batchSize);  //reshape to match the longernal operation
+            gradientValues.Reshape(m_sampleLayout.GetNumChannels(), outputSizePerChannel * batchSize);  //reshape to match the longernal operation

            size_t subBatchSize = min(batchSize, maxTempMemSizeInSamples);
            size_t numSubBatches = (batchSize + subBatchSize - 1) / subBatchSize;
@ -165,7 +153,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
                    inputSubBatch.SwitchToMatrixType(MatrixType::DENSE, inputSubBatch.GetFormat(), true);
                    tempMatrix.AssignPackedConvolutionInput(inputSubBatch,
                                                            m_inputImageLayout.GetWidth(), m_inputImageLayout.GetHeight(), m_inputImageLayout.GetNumChannels(),
-                                                            m_imageLayout.GetWidth(), m_imageLayout.GetHeight(), m_imageLayout.GetNumChannels(),
+                                                            m_sampleLayout.GetWidth(), m_sampleLayout.GetHeight(), m_sampleLayout.GetNumChannels(),
                                                            m_kernelWidth, m_kernelHeight, m_horizontalSubsample, m_verticalSubsample,
                                                            m_zeroPadding);

@ -174,14 +162,14 @@ namespace Microsoft { namespace MSR { namespace CNTK {
                }
            }

-            gradientValues.Reshape(m_imageLayout.GetNumChannels() * outputSizePerChannel, batchSize);  //change back
+            gradientValues.Reshape(m_sampleLayout.GetNumChannels() * outputSizePerChannel, batchSize);  //change back
        }

        //compute gradient over the packed input and then convert the result to the original input
-        void ComputeInputPartialOverInputFeature(Matrix<ElemType> &gradientValues, const Matrix<ElemType> &inputGradientValues, const Matrix<ElemType> &input0, const Matrix<ElemType> &input1, Matrix<ElemType> &tempMatrix)
+        void BackpropToOverInputFeature(Matrix<ElemType> &gradientValues, const Matrix<ElemType> &inputGradientValues, const Matrix<ElemType> &input0, const Matrix<ElemType> &input1, Matrix<ElemType> &tempMatrix)
        {
            size_t packedInputRows = m_kernelWidth * m_kernelHeight * m_inputImageLayout.GetNumChannels();
-            size_t packedInputColsPerSample = m_imageLayout.GetWidth() * m_imageLayout.GetHeight();
+            size_t packedInputColsPerSample = m_sampleLayout.GetWidth() * m_sampleLayout.GetHeight();
            size_t outputSizePerChannel = packedInputColsPerSample;
            //size_t packedInputDim = packedInputRows * packedInputColsPerSample; // size of each packed input sample
            //size_t inputDim = m_inputImageLayout.GetWidth() * m_inputImageLayout.GetHeight() * m_inputImageLayout.GetNumChannels();  //size of each input sample
@ -192,7 +180,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {

            const Matrix<ElemType> & weightMatrix = input0;

-            gradientValues.Reshape(m_imageLayout.GetNumChannels(), outputSizePerChannel * batchSize);  //reshape to match the longernal operation
+            gradientValues.Reshape(m_sampleLayout.GetNumChannels(), outputSizePerChannel * batchSize);  //reshape to match the longernal operation

            size_t subBatchSize = min(batchSize, maxTempMemSizeInSamples);
            size_t numSubBatches = (batchSize + subBatchSize - 1) / subBatchSize;
@ -210,25 +198,24 @@ namespace Microsoft { namespace MSR { namespace CNTK {
                Matrix<ElemType> inputGradientSubBatch = inputGradientValues.ColumnSlice(startSampleID, smallBatchSize);
                tempMatrix.UnpackConvolutionInput(inputGradientSubBatch,
                                                  m_inputImageLayout.GetWidth(), m_inputImageLayout.GetHeight(), m_inputImageLayout.GetNumChannels(),
-                                                  m_imageLayout.GetWidth(), m_imageLayout.GetHeight(), m_imageLayout.GetNumChannels(),
+                                                  m_sampleLayout.GetWidth(), m_sampleLayout.GetHeight(), m_sampleLayout.GetNumChannels(),
                                                  m_kernelWidth, m_kernelHeight, m_horizontalSubsample, m_verticalSubsample,
                                                  m_zeroPadding);
            }

-            gradientValues.Reshape(m_imageLayout.GetNumChannels() * outputSizePerChannel, batchSize);  //change back
+            gradientValues.Reshape(m_sampleLayout.GetNumChannels() * outputSizePerChannel, batchSize);  //change back
        }
    public:

-        virtual void /*ComputationNode::*/EvaluateThisNode(const FrameRange & frameRange) override
+        virtual void /*ComputationNode::*/ForwardProp(const FrameRange & frameRange) override
        {
-            //if (frameRange.IsAllFrames()) { EvaluateThisNodeMap(); return; }
            Matrix<ElemType> sliceInput1Value = Inputs(1)->ValueSlice(frameRange/*TODO: delete this:*/.Check_t(GetNumParallelSequences(), m_pMBLayout));
            Matrix<ElemType> sliceOutputValue = ValueSlice(frameRange/*TODO: delete this:*/.Check_t(GetNumParallelSequences(), m_pMBLayout));
-            EvaluateThisNodeS(sliceOutputValue, Inputs(0)->FunctionValues(), sliceInput1Value, *m_tempMatrix);
+            ForwardPropS(sliceOutputValue, Inputs(0)->FunctionValues(), sliceInput1Value, *m_tempMatrix);
        }

    private:
-        void EvaluateThisNodeS(Matrix<ElemType> &functionValues, const Matrix<ElemType> &input0, 
+        void ForwardPropS(Matrix<ElemType> &functionValues, const Matrix<ElemType> &input0, 
                               const Matrix<ElemType> &input1, Matrix<ElemType> &tempMatrix)
        {
 #if NANCHECK
@ -236,7 +223,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
            input1.HasNan("Convolution-input1");
 #endif
            size_t packedInputRows = m_kernelWidth * m_kernelHeight * m_inputImageLayout.GetNumChannels();
-            size_t packedInputColsPerSample = m_imageLayout.GetWidth() * m_imageLayout.GetHeight();
+            size_t packedInputColsPerSample = m_sampleLayout.GetWidth() * m_sampleLayout.GetHeight();
            size_t outputSizePerChannel = packedInputColsPerSample;
            //size_t packedInputDim = packedInputRows * packedInputColsPerSample; // size of each packed input sample
            //size_t inputDim = m_inputImageLayout.GetWidth() * m_inputImageLayout.GetHeight() * m_inputImageLayout.GetNumChannels();  //size of each input sample
@ -246,7 +233,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
            size_t maxTempMemSizeInSamples = (m_maxTempMemSizeInSamples == 0? batchSize : m_maxTempMemSizeInSamples);

            const Matrix<ElemType> & weightMatrix = input0;
-            assert(weightMatrix.GetNumCols() == packedInputRows && weightMatrix.GetNumRows() == m_imageLayout.GetNumChannels());
+            assert(weightMatrix.GetNumCols() == packedInputRows && weightMatrix.GetNumRows() == m_sampleLayout.GetNumChannels());

            // GPU and 1-dimensional image
            bool m_1DConvolutionOnGPUSparse = (m_inputImageLayout.GetHeight() == 1
@ -257,7 +244,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {

            // Reshaping is only necessary if we are going to use the unpacking trick
            if (!m_1DConvolutionOnGPUSparse)
-                functionValues.Reshape(m_imageLayout.GetNumChannels(), outputSizePerChannel * batchSize);
+                functionValues.Reshape(m_sampleLayout.GetNumChannels(), outputSizePerChannel * batchSize);

            size_t subBatchSize = min(batchSize, maxTempMemSizeInSamples); 
            size_t numSubBatches = (batchSize+subBatchSize-1)/subBatchSize; 
@ -298,14 +285,14 @@ namespace Microsoft { namespace MSR { namespace CNTK {
                    tempMatrix.Resize(packedInputRows, packedInputColsPerSample * smallBatchSize);
                    tempMatrix.AssignPackedConvolutionInput(inputSubBatch,
                                                        m_inputImageLayout.GetWidth(), m_inputImageLayout.GetHeight(), m_inputImageLayout.GetNumChannels(),
-                                                        m_imageLayout.GetWidth(), m_imageLayout.GetHeight(), m_imageLayout.GetNumChannels(),
+                                                        m_sampleLayout.GetWidth(), m_sampleLayout.GetHeight(), m_sampleLayout.GetNumChannels(),
                                                        m_kernelWidth, m_kernelHeight, m_horizontalSubsample, m_verticalSubsample, m_zeroPadding);

                    Matrix<ElemType>::Multiply(weightMatrix, false, tempMatrix, false, outputSubBatch);
                }
            }

-            functionValues.Reshape(m_imageLayout.GetNumChannels() * outputSizePerChannel, batchSize);  //each sample becomes a column
+            functionValues.Reshape(m_sampleLayout.GetNumChannels() * outputSizePerChannel, batchSize);  //each sample becomes a column

 #if NANCHECK
            functionValues.HasNan("Convolution");
@ -331,10 +318,10 @@ namespace Microsoft { namespace MSR { namespace CNTK {
            size_t weightCols = m_kernelWidth * m_kernelHeight * m_inputImageLayout.GetNumChannels();

            if (Inputs(0)->FunctionValues().HasNoElements())
-                ValidateInferChildDims(0, m_imageLayout.GetNumChannels(), weightCols);
+                ValidateInferChildDims(0, m_sampleLayout.GetNumChannels(), weightCols);

-            if (isFinalValidationPass && (Inputs(0)->GetNumCols() != weightCols || Inputs(0)->GetNumRows() != m_imageLayout.GetNumChannels()))
-                LogicError("convolutionWeight matrix %ls should have dimension [%d, %d] which is [outputChannels, kernelWidth * kernelHeight * inputChannels]", m_inputs[0]->NodeName().c_str(), (int)m_imageLayout.GetNumChannels(), (int)weightCols);
+            if (isFinalValidationPass && (Inputs(0)->GetNumCols() != weightCols || Inputs(0)->GetNumRows() != m_sampleLayout.GetNumChannels()))
+                LogicError("convolutionWeight matrix %ls should have dimension [%d, %d] which is [outputChannels, kernelWidth * kernelHeight * inputChannels]", m_inputs[0]->NodeName().c_str(), (int)m_sampleLayout.GetNumChannels(), (int)weightCols);

            size_t inputDim = m_inputImageLayout.GetWidth() * m_inputImageLayout.GetHeight() * m_inputImageLayout.GetNumChannels();
            if (Inputs(1)->GetNumRows() == 0)
@ -343,7 +330,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
            if (isFinalValidationPass && Inputs(1)->GetNumRows() != inputDim)
                LogicError("each column of input to the convolution node %ls is a sample and should have dimension %d, which is inputWidth * inputHeight * inputChannels", NodeName().c_str(), (int)inputDim);

-            size_t outputDim = m_imageLayout.GetWidth() * m_imageLayout.GetHeight() * m_imageLayout.GetNumChannels();
+            size_t outputDim = m_sampleLayout.GetWidth() * m_sampleLayout.GetHeight() * m_sampleLayout.GetNumChannels();
            SetDims(outputDim, Inputs(1)->GetNumCols());
        }

@ -358,15 +345,15 @@ namespace Microsoft { namespace MSR { namespace CNTK {
            {
                const int kernelWidthCenter = m_kernelWidth % 2;
                const int kernelHeightCenter = m_kernelHeight % 2;
-                m_imageLayout = ImageLayoutWHC((m_inputImageLayout.GetWidth()  - kernelWidthCenter)  / m_horizontalSubsample + 1,
+                m_sampleLayout = ImageLayoutWHC((m_inputImageLayout.GetWidth()  - kernelWidthCenter)  / m_horizontalSubsample + 1,
                                                     (m_inputImageLayout.GetHeight() - kernelHeightCenter) / m_verticalSubsample   + 1,
-                                                     m_imageLayout.GetNumChannels());
+                                                     m_sampleLayout.GetNumChannels());
            }
            else
            {
-                m_imageLayout = ImageLayoutWHC((m_inputImageLayout.GetWidth()  - m_kernelWidth)  / m_horizontalSubsample + 1,
+                m_sampleLayout = ImageLayoutWHC((m_inputImageLayout.GetWidth()  - m_kernelWidth)  / m_horizontalSubsample + 1,
                                                     (m_inputImageLayout.GetHeight() - m_kernelHeight) / m_verticalSubsample   + 1,
-                                                     m_imageLayout.GetNumChannels());
+                                                     m_sampleLayout.GetNumChannels());
            }    
        }

@ -379,7 +366,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
            fstream << string(str);
            sprintf(str, "Kernel[Width:%lu, Height:%lu]  SubSample[Horizontal:%lu, Vertical:%lu]\n", m_kernelWidth, m_kernelHeight, m_horizontalSubsample, m_verticalSubsample);
            fstream << string(str);
-            sprintf(str, "Output[Width:%lu, Height:%lu, Channels:%lu]  \n", m_imageLayout.GetWidth(), m_imageLayout.GetHeight(), m_imageLayout.GetNumChannels());
+            sprintf(str, "Output[Width:%lu, Height:%lu, Channels:%lu]  \n", m_sampleLayout.GetWidth(), m_sampleLayout.GetHeight(), m_sampleLayout.GetNumChannels());
            fstream << string(str);
            sprintf(str, "ZeroPadding=%ls  maxTempMemSizeInSamples=%lu\n", m_zeroPadding? L"true" : L"false", m_maxTempMemSizeInSamples);
            fstream << string(str);
@ -445,15 +432,15 @@ namespace Microsoft { namespace MSR { namespace CNTK {
            AttachInputs(configp, this->GetExpectedNumInputs());
        }

-        virtual void SaveToFile(File& fstream) const override
+        virtual void Save(File& fstream) const override
        {
-            Base::SaveToFile(fstream);
+            Base::Save(fstream);
            fstream << m_windowWidth << m_windowHeight << m_horizontalSubsample << m_verticalSubsample;
        }

-        virtual void LoadFromFile(File& fstream, size_t modelVersion) override
+        virtual void Load(File& fstream, size_t modelVersion) override
        {
-            Base::LoadFromFile(fstream, modelVersion);
+            Base::Load(fstream, modelVersion);
            fstream >> m_windowWidth >> m_windowHeight >> m_horizontalSubsample >> m_verticalSubsample;
        }

@ -475,31 +462,29 @@ namespace Microsoft { namespace MSR { namespace CNTK {
            }
        }

-        virtual void /*ComputationNode::*/ComputeInputPartial(const size_t /*inputIndex*/, const FrameRange & frameRange) override
+        virtual void /*ComputationNode::*/BackpropTo(const size_t /*inputIndex*/, const FrameRange & frameRange) override
        {
-            //if (frameRange.IsAllFrames()) { ComputeInputPartialMap(inputIndex); return; } // TODO: remove these one by one
            Matrix<ElemType> sliceInput0Grad = Inputs(0)->GradientSlice(frameRange/*TODO: delete this:*/.Check_t(GetNumParallelSequences(), m_pMBLayout));
            Matrix<ElemType> sliceOutputGrad = GradientSlice(frameRange/*TODO: delete this:*/.Check_t(GetNumParallelSequences(), m_pMBLayout));

            Matrix<ElemType> sliceInput0Value = Inputs(0)->ValueSlice(frameRange/*TODO: delete this:*/.Check_t(GetNumParallelSequences(), m_pMBLayout));
            Matrix<ElemType> sliceOutputValue = ValueSlice(frameRange/*TODO: delete this:*/.Check_t(GetNumParallelSequences(), m_pMBLayout));

-            ComputeInputPartialV(sliceOutputGrad, sliceInput0Grad, sliceInput0Value, sliceOutputValue);
+            BackpropToV(sliceOutputGrad, sliceInput0Grad, sliceInput0Value, sliceOutputValue);
        }

        // this function must be overriden by Max or AveragePoolingNode
-        virtual void ComputeInputPartialV(const Matrix<ElemType> &gradientValues, Matrix<ElemType> &inputGradientValues, const Matrix<ElemType> &input0, const Matrix<ElemType> &functionValues) = 0;
+        virtual void BackpropToV(const Matrix<ElemType> &gradientValues, Matrix<ElemType> &inputGradientValues, const Matrix<ElemType> &input0, const Matrix<ElemType> &functionValues) = 0;

-        virtual void /*ComputationNode::*/EvaluateThisNode(const FrameRange & frameRange) override
+        virtual void /*ComputationNode::*/ForwardProp(const FrameRange & frameRange) override
        {
-            //if (frameRange.IsAllFrames()) { EvaluateThisNodeMap(); return; }
            Matrix<ElemType> sliceInput0Value = Inputs(0)->ValueSlice(frameRange/*TODO: delete this:*/.Check_t(GetNumParallelSequences(), m_pMBLayout));
            Matrix<ElemType> sliceOutputValue = ValueSlice(frameRange/*TODO: delete this:*/.Check_t(GetNumParallelSequences(), m_pMBLayout));
-            EvaluateThisNodeV(sliceOutputValue, sliceInput0Value);
+            ForwardPropV(sliceOutputValue, sliceInput0Value);
        }

        // this function must be overriden by Max or AveragePoolingNode
-        virtual void EvaluateThisNodeV(Matrix<ElemType> &functionValues, const Matrix<ElemType> &input0) = 0;
+        virtual void ForwardPropV(Matrix<ElemType> &functionValues, const Matrix<ElemType> &input0) = 0;

        virtual void /*ComputationNodeBase::*/Validate(bool isFinalValidationPass) override
        {
@ -512,7 +497,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
            InferImageDimsFromInputs();

            m_inputSizePerSample = m_inputImageLayout.GetWidth() * m_inputImageLayout.GetHeight() * m_inputImageLayout.GetNumChannels();
-            m_outputSizePerSample = m_imageLayout.GetWidth() * m_imageLayout.GetHeight() * m_imageLayout.GetNumChannels();
+            m_outputSizePerSample = m_sampleLayout.GetWidth() * m_sampleLayout.GetHeight() * m_sampleLayout.GetNumChannels();

            if (Inputs(0)->GetNumRows() == 0)
                ValidateInferChildDims(0, m_inputSizePerSample, Inputs(0)->GetNumCols());
@ -530,7 +515,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
            if (m_inputImageLayout.GetWidth() < m_windowWidth || m_inputImageLayout.GetHeight() < m_windowHeight)
                InvalidArgument("PoolingNodeBase: inputWidth must >= windowWidth and inputHeight must >= windowHeight.");

-            m_imageLayout = ImageLayoutWHC((m_inputImageLayout.GetWidth()  - m_windowWidth)  / m_horizontalSubsample + 1,
+            m_sampleLayout = ImageLayoutWHC((m_inputImageLayout.GetWidth()  - m_windowWidth)  / m_horizontalSubsample + 1,
                                                 (m_inputImageLayout.GetHeight() - m_windowHeight) / m_verticalSubsample   + 1,
                                                 m_inputImageLayout.GetNumChannels());
        }
@ -544,7 +529,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
            fstream << string(str);
            sprintf(str, "PoolingWindow[Width:%lu, Height:%lu]  SubSampling[Horizontal:%lu, Vertical:%lu]\n", m_windowWidth, m_windowHeight, m_horizontalSubsample, m_verticalSubsample);
            fstream << string(str);
-            sprintf(str, "Output[Width:%lu, Height:%lu, Channels:%lu]  \n", m_imageLayout.GetWidth(), m_imageLayout.GetHeight(), m_imageLayout.GetNumChannels());
+            sprintf(str, "Output[Width:%lu, Height:%lu, Channels:%lu]  \n", m_sampleLayout.GetWidth(), m_sampleLayout.GetHeight(), m_sampleLayout.GetNumChannels());
            fstream << string(str);
            sprintf(str, "TotalSizePerSample[Input:%lu, Output:%lu]  \n", m_inputSizePerSample, m_outputSizePerSample);
            fstream << string(str);
@ -581,19 +566,19 @@ namespace Microsoft { namespace MSR { namespace CNTK {
            Base(configp)
        { }

-        virtual void ComputeInputPartialV(const Matrix<ElemType> &gradientValues, Matrix<ElemType> &inputGradientValues, const Matrix<ElemType> &input0, const Matrix<ElemType> &functionValues) override
+        virtual void BackpropToV(const Matrix<ElemType> &gradientValues, Matrix<ElemType> &inputGradientValues, const Matrix<ElemType> &input0, const Matrix<ElemType> &functionValues) override
        {
            inputGradientValues.AddMaxPoolingGradient(gradientValues, input0, functionValues, m_inputImageLayout.GetNumChannels(),
                                                      m_inputImageLayout.GetWidth(), m_inputImageLayout.GetHeight(), m_inputSizePerSample, 
-                                                      m_imageLayout.GetWidth(), m_imageLayout.GetHeight(), m_outputSizePerSample, 
+                                                      m_sampleLayout.GetWidth(), m_sampleLayout.GetHeight(), m_outputSizePerSample, 
                                                      m_windowWidth, m_windowHeight, m_horizontalSubsample, m_verticalSubsample);
        }

-        virtual void EvaluateThisNodeV(Matrix<ElemType> &functionValues, const Matrix<ElemType> &input0) override
+        virtual void ForwardPropV(Matrix<ElemType> &functionValues, const Matrix<ElemType> &input0) override
        {
            functionValues.AssignMaxPoolingResult(input0, m_inputImageLayout.GetNumChannels(),
                                                  m_inputImageLayout.GetWidth(), m_inputImageLayout.GetHeight(), m_inputSizePerSample, 
-                                                  m_imageLayout.GetWidth(), m_imageLayout.GetHeight(), m_outputSizePerSample, 
+                                                  m_sampleLayout.GetWidth(), m_sampleLayout.GetHeight(), m_outputSizePerSample, 
                                                  m_windowWidth, m_windowHeight, m_horizontalSubsample, m_verticalSubsample);
        }
    };
@ -619,19 +604,19 @@ namespace Microsoft { namespace MSR { namespace CNTK {
            Base(configp)
        { }

-        virtual void ComputeInputPartialV(const Matrix<ElemType> &gradientValues, Matrix<ElemType> &inputGradientValues, const Matrix<ElemType> &/*input0*/, const Matrix<ElemType> &/*functionValues*/) override
+        virtual void BackpropToV(const Matrix<ElemType> &gradientValues, Matrix<ElemType> &inputGradientValues, const Matrix<ElemType> &/*input0*/, const Matrix<ElemType> &/*functionValues*/) override
        {
            inputGradientValues.AddAveragePoolingGradient(gradientValues, m_inputImageLayout.GetNumChannels(),
                                                          m_inputImageLayout.GetWidth(), m_inputImageLayout.GetHeight(), m_inputSizePerSample, 
-                                                          m_imageLayout.GetWidth(), m_imageLayout.GetHeight(), m_outputSizePerSample, 
+                                                          m_sampleLayout.GetWidth(), m_sampleLayout.GetHeight(), m_outputSizePerSample, 
                                                          m_windowWidth, m_windowHeight, m_horizontalSubsample, m_verticalSubsample);
        }

-        virtual void EvaluateThisNodeV(Matrix<ElemType> &functionValues, const Matrix<ElemType> &input0) override
+        virtual void ForwardPropV(Matrix<ElemType> &functionValues, const Matrix<ElemType> &input0) override
        {
            functionValues.AssignAveragePoolingResult(input0, m_inputImageLayout.GetNumChannels(),
                                                      m_inputImageLayout.GetWidth(), m_inputImageLayout.GetHeight(), m_inputSizePerSample, 
-                                                      m_imageLayout.GetWidth(), m_imageLayout.GetHeight(), m_outputSizePerSample, 
+                                                      m_sampleLayout.GetWidth(), m_sampleLayout.GetHeight(), m_outputSizePerSample, 
                                                      m_windowWidth, m_windowHeight, m_horizontalSubsample, m_verticalSubsample);
        }
    };
--- a/MachineLearning/CNTKComputationNetworkLib/EsotericNodes.h
+++ b/MachineLearning/CNTKComputationNetworkLib/EsotericNodes.h
@ -42,7 +42,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
          Base(deviceId, name)
        { }

-        virtual void ComputeInputPartialNonLooping(size_t inputIndex) override
+        virtual void BackpropToNonLooping(size_t inputIndex) override
        {
            FrameRange frameRange(Inputs(0)->GetMBLayout());
            if (inputIndex == 0)
@ -57,7 +57,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
            }
        }

-        virtual void /*ComputationNodeNonLooping::*/EvaluateThisNodeNonLooping() override
+        virtual void /*ComputationNodeNonLooping::*/ForwardPropNonLooping() override
        {
            FunctionValues().VerifySize(1, 1);
            Inputs(0)->FunctionValues().VerifySize(1, 1);
@ -97,7 +97,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
        {
            InferImageDimsFromInput(0, false);

-            m_imageLayout = ImageLayout();
+            m_sampleLayout = TensorShape();
        }
    };

@ -160,21 +160,21 @@ namespace Microsoft { namespace MSR { namespace CNTK {
            stp = lastLbl;
        };

-        virtual void ComputeInputPartialNonLooping(size_t /*inputIndex*/) override  //scaled by 2*number of elements in the Matrix<ElemType>
+        virtual void BackpropToNonLooping(size_t /*inputIndex*/) override  //scaled by 2*number of elements in the Matrix<ElemType>
        {
            LogicError("SequenceDecoder is used for evaluation only.");
        }

        /// compute posterior probability of label y at position t
-        virtual void /*ComputationNodeNonLooping::*/EvaluateThisNodeNonLooping() override
+        virtual void /*ComputationNodeNonLooping::*/ForwardPropNonLooping() override
        {
            DecideStartEndingOutputLab(Inputs(0)->FunctionValues(), mStartLab, mEndLab);
-            EvaluateThisNodeS(mAlpha, mBacktrace, FunctionValues(), Inputs(1)->FunctionValues(),
+            ForwardPropS(mAlpha, mBacktrace, FunctionValues(), Inputs(1)->FunctionValues(),
                              Inputs(2)->FunctionValues(), mStartLab, mEndLab);
        }

        // compute forward backward algorithm
-        void EvaluateThisNodeS(Matrix<ElemType>& alpha, Matrix<ElemType>& backtrace, Matrix<ElemType>& functionValues, const Matrix<ElemType>& pos_scores, const Matrix<ElemType>& pair_scores, const size_t stt, const size_t stp)
+        void ForwardPropS(Matrix<ElemType>& alpha, Matrix<ElemType>& backtrace, Matrix<ElemType>& functionValues, const Matrix<ElemType>& pos_scores, const Matrix<ElemType>& pair_scores, const size_t stt, const size_t stp)
        {
            /// to-do, each slice is for one sentence
            /// to-do, number of slices correspond to number of frames 
@ -283,7 +283,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
        {
            InferImageDimsFromInput(0, false);

-            m_imageLayout = ImageLayout();
+            m_sampleLayout = TensorShape();
        }
    };

@ -334,7 +334,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
        { }
        // BUGBUG: This node needs to serialize and CopyTo m_stride

-        virtual void /*ComputationNode::*/ComputeInputPartial(const size_t inputIndex, const FrameRange & frameRange) override
+        virtual void /*ComputationNode::*/BackpropTo(const size_t inputIndex, const FrameRange & frameRange) override
        {
            if (frameRange.IsAllFrames()) { NOT_IMPLEMENTED; return; } // TODO: remove these one by one. And why is this not implemented?
            if (inputIndex > 2)
@ -350,7 +350,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
                {
                    Matrix<ElemType> sliceInput1Value = Inputs(1)->ValueSlice(frameRange);

-                    //ComputeInputPartialLeft1(sliceInput1Value, Inputs(0)->GradientValues(), sliceOutputGrad);
+                    //BackpropToLeft1(sliceInput1Value, Inputs(0)->GradientValues(), sliceOutputGrad);

                    size_t r = Inputs(0)->GetNumRows();
                    size_t T1 = Inputs(0)->GetNumCols() / GetNumParallelSequences();    // TODO: if T1 == GetNumTimeSteps() then we can simplify code below.
@ -363,7 +363,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
                        auto mTmp2 = sliceInput1Value.ColumnSlice(k, 1);
                        auto mTmp3 = sliceOutputGrad.ColumnSlice(k, 1);

-                        ComputeInputPartialLeft1(mTmp2, mTmp1, mTmp3);
+                        BackpropToLeft1(mTmp2, mTmp1, mTmp3);

                        for (size_t t = 0; t < T1; t++)
                        {
@ -375,7 +375,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
                {
                    Matrix<ElemType> sliceInput1Grad = Inputs(1)->GradientSlice(frameRange);

-                    //ComputeInputPartialRight(Inputs(0)->FunctionValues(), sliceInput1Grad, sliceOutputGrad);
+                    //BackpropToRight(Inputs(0)->FunctionValues(), sliceInput1Grad, sliceOutputGrad);

                    // process sequence by sequence
                    for (size_t k = 0; k < GetNumParallelSequences(); k++)
@ -390,7 +390,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
                        auto mTmp2 = sliceInput1Grad.ColumnSlice(k, 1);
                        auto mTmp3 = sliceOutputGrad.ColumnSlice(k, 1);

-                        ComputeInputPartialRight(mTmp1, mTmp2, mTmp3);
+                        BackpropToRight(mTmp1, mTmp2, mTmp3);
                    }
                }
            }
@ -408,7 +408,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
                        mTmp1.Resize(d, T1);
                        Matrix<ElemType> mTmp2 = sliceInput1Value.ColumnSlice(k, 1);
                        Matrix<ElemType> mTmp3 = sliceOutputGrad.ColumnSlice(k, 1);
-                        ComputeInputPartialLeft(mTmp2, mTmp1, mTmp3);
+                        BackpropToLeft(mTmp2, mTmp1, mTmp3);

                        Matrix<ElemType> mTmp4(sliceInput1Value.GetDeviceId());
                        for (size_t t = 0; t < T1; t++)
@ -442,14 +442,14 @@ namespace Microsoft { namespace MSR { namespace CNTK {
                        Matrix<ElemType> mTmp2 = sliceInput1Grad.ColumnSlice(k, 1);
                        Matrix<ElemType> mTmp3 = sliceOutputGrad.ColumnSlice(k, 1);

-                        ComputeInputPartialRight(mTmp1, mTmp2, mTmp3);
+                        BackpropToRight(mTmp1, mTmp2, mTmp3);
                    }
                }
            }
        }

        // TODO: the following two functions only differ in the order of argument use in the final MultiplyAndAdd()  --is that intended??
-        static /*TODO: merge with call site*/void ComputeInputPartialLeft1(const Matrix<ElemType>& inputFunctionValues, Matrix<ElemType>& inputGradientValues, const Matrix<ElemType>& gradientValues)
+        static /*TODO: merge with call site*/void BackpropToLeft1(const Matrix<ElemType>& inputFunctionValues, Matrix<ElemType>& inputGradientValues, const Matrix<ElemType>& gradientValues)
        {
 #if DUMPOUTPUT
            gradientValues.Print("Gradient-in");
@ -466,7 +466,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
 #endif
        }

-        static /*TODO: merge with call site*/void ComputeInputPartialLeft(Matrix<ElemType>& inputFunctionValues, Matrix<ElemType>& inputGradientValues, const Matrix<ElemType>& gradientValues)
+        static /*TODO: merge with call site*/void BackpropToLeft(Matrix<ElemType>& inputFunctionValues, Matrix<ElemType>& inputGradientValues, const Matrix<ElemType>& gradientValues)
        {
 #if DUMPOUTPUT   
            gradientValues.Print("Gradient-in");   
@ -484,7 +484,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
 #endif
        }

-        static /*TODO: merge with call site*/void ComputeInputPartialRight(Matrix<ElemType>& inputFunctionValues, Matrix<ElemType>& inputGradientValues, const Matrix<ElemType>& gradientValues)
+        static /*TODO: merge with call site*/void BackpropToRight(Matrix<ElemType>& inputFunctionValues, Matrix<ElemType>& inputGradientValues, const Matrix<ElemType>& gradientValues)
        {
 #if DUMPOUTPUT   
            gradientValues.Print("Gradient-in");   
@ -497,7 +497,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
 #endif
        }

-        virtual void /*ComputationNode::*/EvaluateThisNode(const FrameRange & frameRange) override
+        virtual void /*ComputationNode::*/ForwardProp(const FrameRange & frameRange) override
        {
            size_t rows0 = Inputs(0)->GetNumRows(), cols1 = Inputs(1)->GetNumCols();
            Matrix<ElemType> sliceInput1Value = Inputs(1)->ValueSlice(frameRange);
@ -632,7 +632,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
            InferImageDimsFromInput(1, false); //the second one is the input since it's column wize

            //after multiplication the structure is lost
-            m_imageLayout = ImageLayoutWHC(1, Inputs(0)->GetNumRows(), 1);
+            m_sampleLayout = ImageLayoutWHC(1, Inputs(0)->GetNumRows(), 1);
        }
    };

--- a/MachineLearning/CNTKComputationNetworkLib/EvaluationCriterionNodes.h
+++ b/MachineLearning/CNTKComputationNetworkLib/EvaluationCriterionNodes.h
@ -31,12 +31,12 @@ namespace Microsoft { namespace MSR { namespace CNTK {
            Base(deviceId, name)
        { }

-        virtual void ComputeInputPartialNonLooping(size_t /*inputIndex*/) override
+        virtual void BackpropToNonLooping(size_t /*inputIndex*/) override
        {
            LogicError("%ls operation is used for evaluation only.", OperationName().c_str());
        }

-        virtual void /*ComputationNodeNonLooping::*/EvaluateThisNodeNonLooping() override
+        virtual void /*ComputationNodeNonLooping::*/ForwardPropNonLooping() override
        {
            FrameRange frameRange(Inputs(0)->GetMBLayout());
            Inputs(0)->ValueSlice(frameRange).VectorMax(*m_maxIndexes0, *m_maxValues, true);
@ -96,7 +96,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
        {
            InferImageDimsFromInput(0, false);

-            m_imageLayout = ImageLayout();
+            m_sampleLayout = TensorShape();
        }

        virtual void CopyTo(ComputationNodeBasePtr nodeP, const std::wstring& newName, const CopyNodeFlags flags) const override
--- a/MachineLearning/CNTKComputationNetworkLib/InputAndParamNodes.h
+++ b/MachineLearning/CNTKComputationNetworkLib/InputAndParamNodes.h
@ -40,13 +40,13 @@ namespace Microsoft { namespace MSR { namespace CNTK {
            Base(deviceId, name)
        {
            m_parameterUpdateRequired = true;
-            m_imageLayout = ImageLayoutWHC(1, SIZE_MAX, 1);
+            m_sampleLayout = ImageLayoutWHC(1, SIZE_MAX, 1);
        }
        LearnableParameter(DEVICEID_TYPE deviceId, const wstring & name, size_t rows, size_t cols) :
            Base(deviceId, name)
        {
            m_parameterUpdateRequired = true;
-            m_imageLayout = ImageLayoutWHC(1, rows, 1);
+            m_sampleLayout = ImageLayoutWHC(1, rows, 1);
            // TODO: Is ^^ this a wise choice? These are often weight matrices, where rows, not columns, are multiplied with input vectors.
            CreateMatrixIfNull(m_functionValues);
            SetDims(rows, cols);
@ -81,17 +81,17 @@ namespace Microsoft { namespace MSR { namespace CNTK {
                RuntimeError("init must be one of the values of [ uniform | gaussian | fixedValue | fromFile ]");
        }

-        virtual void SaveToFile(File& fstream) const override
+        virtual void Save(File& fstream) const override
        {
-            Base::SaveToFile(fstream);
+            Base::Save(fstream);
            fstream << m_parameterUpdateRequired;
            fstream << GetNumRows() << GetNumCols(); 
            fstream << FunctionValues();
        }

-        virtual void LoadFromFile(File& fstream, size_t modelVersion) override
+        virtual void Load(File& fstream, size_t modelVersion) override
        {
-            Base::LoadFromFile(fstream, modelVersion);
+            Base::Load(fstream, modelVersion);

            size_t rows, cols;
            fstream >> m_parameterUpdateRequired;
@ -100,7 +100,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
            SetDims(rows, cols);
            LoadFunctionValues(fstream);

-            m_imageLayout = ImageLayoutWHC(1, rows, 1);
+            m_sampleLayout = ImageLayoutWHC(1, rows, 1);
        }

        // initialize with random numbers
@ -158,8 +158,8 @@ namespace Microsoft { namespace MSR { namespace CNTK {
        // computation functions don't do anything for parameter nodes
        virtual void UpdateFunctionMBSize() override { }

-        virtual void /*ComputationNode::*/ComputeInputPartial(const size_t /*inputIndex*/, const FrameRange &) override { }
-        virtual void /*ComputationNode::*/EvaluateThisNode(const FrameRange &) override { }
+        virtual void /*ComputationNode::*/BackpropTo(const size_t /*inputIndex*/, const FrameRange &) override { }
+        virtual void /*ComputationNode::*/ForwardProp(const FrameRange &) override { }

        virtual void /*ComputationNodeBase::*/Validate(bool isFinalValidationPass) override
        {
@ -208,9 +208,9 @@ namespace Microsoft { namespace MSR { namespace CNTK {
            m_gradientValues->Resize(rows, cols, size);
        }

-        virtual void LoadFromFile(File& fstream, size_t modelVersion) override
+        virtual void Load(File& fstream, size_t modelVersion) override
        {
-            LearnableParameter<ElemType>::LoadFromFile(fstream, modelVersion);
+            LearnableParameter<ElemType>::Load(fstream, modelVersion);
            CreateMatrixIfNull(m_gradientValues);
            m_gradientValues->Resize(GetNumRows(), GetNumCols());
        }
@ -245,7 +245,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
        InputValueBase(DEVICEID_TYPE deviceId, const wstring & name, bool isSparse) :
            Base(deviceId, name)
        {
-            m_imageLayout.Invalidate();
+            m_sampleLayout.Invalidate();
            Init(0, 0, isSparse);
        }
        InputValueBase(DEVICEID_TYPE deviceId, const wstring & name, size_t rows, size_t cols, bool isSparse) :
@ -254,10 +254,10 @@ namespace Microsoft { namespace MSR { namespace CNTK {
            if (rows * cols == 0)
                LogicError("This InputValue dimension is 0.");

-            m_imageLayout = ImageLayoutVector(rows);
+            m_sampleLayout = ImageLayoutVector(rows);
            Init(rows, cols, isSparse);
        }
-        InputValueBase(DEVICEID_TYPE deviceId, const wstring & name, const ImageLayout & imageLayout, size_t numImages, bool isSparse) :
+        InputValueBase(DEVICEID_TYPE deviceId, const wstring & name, const TensorShape & imageLayout, size_t numImages, bool isSparse) :
            Base(deviceId, name)
        {
            size_t rows = imageLayout.GetNumElements();
@ -266,7 +266,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
            if (rows * cols == 0)
                LogicError("This InputValue dimension is 0.");

-            m_imageLayout = imageLayout;
+            m_sampleLayout = imageLayout;

            Init(rows, cols, isSparse);
        }
@ -279,37 +279,37 @@ namespace Microsoft { namespace MSR { namespace CNTK {
            {
                size_t rows = configp->Get(L"rows");
                size_t cols = configp->Get(L"cols");
-                m_imageLayout = ImageLayoutVector(rows);    // no tensor, just a vector
+                m_sampleLayout = ImageLayoutVector(rows);    // no tensor, just a vector
                Init(rows, cols, isSparse);
            }
            else
            {
-                m_imageLayout = ImageLayoutWHC(configp->Get(L"imageWidth"), configp->Get(L"imageHeight"), configp->Get(L"imageChannels"));
-                size_t rows = m_imageLayout.GetNumElements();
+                m_sampleLayout = ImageLayoutWHC(configp->Get(L"imageWidth"), configp->Get(L"imageHeight"), configp->Get(L"imageChannels"));
+                size_t rows = m_sampleLayout.GetNumElements();
                size_t cols = configp->Get(L"numImages");         // this is actually the MB size
                Init(rows, cols, isSparse);
            }
        }
    public:

-        virtual void SaveToFile(File& fstream) const override
+        virtual void Save(File& fstream) const override
        {
-            Base::SaveToFile(fstream);
-            size_t rows = GetNumRows();                     // using explicitly typed variables to be 100% symmetrical to LoadFromFile()
+            Base::Save(fstream);
+            size_t rows = GetNumRows();                     // using explicitly typed variables to be 100% symmetrical to Load()
            size_t cols = m_pMBLayout ? 0 : GetNumCols();   // if this Input depends on MB size, we write it as having 0 dimensions
            fstream << rows << cols;
-            m_imageLayout.SaveToFile(fstream);
+            m_sampleLayout.Save(fstream);
        }

-        virtual void LoadFromFile(File& fstream, size_t modelVersion) override
+        virtual void Load(File& fstream, size_t modelVersion) override
        {
-            Base::LoadFromFile(fstream, modelVersion);
+            Base::Load(fstream, modelVersion);

            size_t rows, cols;
            fstream >> rows >> cols;
            if (m_pMBLayout)    // some older files retained the #columns when saving, which is meaningless
                cols = 0;
-            m_imageLayout.LoadFromFile(fstream);
+            m_sampleLayout.Load(fstream);
            Init(rows, cols, m_isSparse);
        }

@ -320,8 +320,8 @@ namespace Microsoft { namespace MSR { namespace CNTK {
                VerifyDims(GetNumRows(), m_pMBLayout->GetNumCols());
        }

-        virtual void /*ComputationNode::*/EvaluateThisNode(const FrameRange &) override { }
-        virtual void /*ComputationNode::*/ComputeInputPartial(const size_t /*inputIndex*/, const FrameRange &) { }
+        virtual void /*ComputationNode::*/ForwardProp(const FrameRange &) override { }
+        virtual void /*ComputationNode::*/BackpropTo(const size_t /*inputIndex*/, const FrameRange &) { }

        virtual void DumpNodeInfo(const bool printValues, File& fstream) const override
        {
@ -358,7 +358,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
        InputValue(DEVICEID_TYPE deviceId, const wstring & name, size_t rows, size_t cols) :
            Base(deviceId, name, rows, cols, false)
        { }
-        InputValue(DEVICEID_TYPE deviceId, const wstring & name, const ImageLayout & imageLayout, size_t numImages) :
+        InputValue(DEVICEID_TYPE deviceId, const wstring & name, const TensorShape & imageLayout, size_t numImages) :
            Base(deviceId, name, imageLayout, numImages, false)
        { }
        InputValue(const ScriptableObjects::IConfigRecordPtr configp) :
@ -387,7 +387,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
        SparseInputValue(DEVICEID_TYPE deviceId, const wstring & name, size_t rows, size_t cols) :
            Base(deviceId, name, rows, cols, true)
        { }
-        SparseInputValue(DEVICEID_TYPE deviceId, const wstring & name, const ImageLayout & imageLayout, size_t numImages) :
+        SparseInputValue(DEVICEID_TYPE deviceId, const wstring & name, const TensorShape & imageLayout, size_t numImages) :
            Base(deviceId, name, imageLayout, numImages, true)
        { }
        SparseInputValue(const ScriptableObjects::IConfigRecordPtr configp) :
@ -414,7 +414,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
            Base(deviceId, name)
        { }

-        virtual void /*ComputationNode::*/ComputeInputPartial(const size_t inputIndex, const FrameRange & t) override
+        virtual void /*ComputationNode::*/BackpropTo(const size_t inputIndex, const FrameRange & t) override
        {
            if (inputIndex == 0)        // left derivative (embedding matrix)
            {
@ -422,18 +422,18 @@ namespace Microsoft { namespace MSR { namespace CNTK {
                Matrix<ElemType> sliceInput1Value = Inputs(1)->MaskedValueSlice(t);
                Matrix<ElemType> sliceOutputGrad = MaskedGradientSlice(t);

-                ComputeInputPartialLeft(sliceInput1Value, Inputs(0)->GradientValues(), sliceOutputGrad);
+                BackpropToLeft(sliceInput1Value, Inputs(0)->GradientValues(), sliceOutputGrad);
            }
            else if (inputIndex == 1)   // right derivative (input)
            {
                Matrix<ElemType> sliceInput1Grad = Inputs(1)->GradientSlice(t);
                Matrix<ElemType> sliceOutputGrad = GradientSlice(t);

-                ComputeInputPartialRight(Inputs(0)->FunctionValues(), sliceInput1Grad, sliceOutputGrad);
+                BackpropToRight(Inputs(0)->FunctionValues(), sliceInput1Grad, sliceOutputGrad);
            }
        }

-        /*TODO: merge with call site*/void ComputeInputPartialLeft(Matrix<ElemType>& inputFunctionValues, Matrix<ElemType>& inputGradientValues, Matrix<ElemType>& gradientValues)  
+        /*TODO: merge with call site*/void BackpropToLeft(Matrix<ElemType>& inputFunctionValues, Matrix<ElemType>& inputGradientValues, Matrix<ElemType>& gradientValues)  
        {
            size_t rows1 = inputFunctionValues.GetNumRows(), cols1 = inputFunctionValues.GetNumCols();
            size_t rowsp = gradientValues.GetNumRows(), colsp = gradientValues.GetNumCols();
@ -448,8 +448,8 @@ namespace Microsoft { namespace MSR { namespace CNTK {
            gradientValues.Reshape(rowsp, colsp);
        }

-        /*TODO: merge with call site*/void ComputeInputPartialRight(Matrix<ElemType>& inputFunctionValues, Matrix<ElemType>& inputGradientValues, Matrix<ElemType>& gradientValues)  
-            {
+        /*TODO: merge with call site*/void BackpropToRight(Matrix<ElemType>& inputFunctionValues, Matrix<ElemType>& inputGradientValues, Matrix<ElemType>& gradientValues)  
+        {
            size_t rows1 =inputGradientValues.GetNumRows(), cols1 = inputGradientValues.GetNumCols();
            size_t rowsp = gradientValues.GetNumRows(), colsp = gradientValues.GetNumCols();
            int wordsInEachSample = rows1 / inputFunctionValues.GetNumCols();
@ -463,7 +463,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
            gradientValues.Reshape(rowsp, colsp);
        }

-        virtual void /*ComputationNode::*/EvaluateThisNode(const FrameRange & t) override
+        virtual void /*ComputationNode::*/ForwardProp(const FrameRange & t) override
        {
            // input0 is the weight (each column is an embedding of one word), input 1 contains m_bnrLooked words in each column (sample)
            Matrix<ElemType> functionValues = ValueSlice(t);
@ -522,7 +522,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
                SetDims(nInput, nOutput);
                UpdateFunctionValuesSize();

-                EvaluateThisNode(FrameRange(m_pMBLayout));
+                ForwardProp(FrameRange(m_pMBLayout));

                /// check with expected values
                FunctionValues().TransferFromDeviceToDevice(m_deviceId, CPUDEVICE, true);
@ -541,7 +541,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
                    Inputs(i)->GradientValues().SetValue(0);
                }
                for (size_t i = 0; i < 2; i++)
-                    ComputeInputPartial(i, FrameRange(m_pMBLayout));
+                    BackpropTo(i, FrameRange(m_pMBLayout));

                // check with expected values
                if (!ISCLOSE(Inputs(1)->GradientValues()(0, 0), 2, EPSILON) /// bi
@ -599,14 +599,14 @@ namespace Microsoft { namespace MSR { namespace CNTK {
            m_gradientValues->SetValue(0.0f);
        }

-        virtual void LoadFromFile(File& fstream, size_t modelVersion) override
+        virtual void Load(File& fstream, size_t modelVersion) override
        {
            Init(1, 1); // TODO: this looks wrong; should the dimension not come from the loaded model data?
-            Base::LoadFromFile(fstream, modelVersion);
+            Base::Load(fstream, modelVersion);
        }

        /// to-do: need to change to the new way of resetting state
-        void ComputeInputPartialMap(const size_t inputIndex)
+        void BackpropToMap(const size_t inputIndex)
        {
            if (inputIndex > 0)
                InvalidArgument("PairNetwork operation only takes one input.");
@ -614,9 +614,9 @@ namespace Microsoft { namespace MSR { namespace CNTK {
            Matrix<ElemType>::ScaleAndAdd(1.0, GradientValues(), Inputs(inputIndex)->GradientValues());
        }

-        virtual void /*ComputationNode::*/ComputeInputPartial(const size_t inputIndex, const FrameRange & frameRange) override
+        virtual void /*ComputationNode::*/BackpropTo(const size_t inputIndex, const FrameRange & frameRange) override
        {
-            if (frameRange.IsAllFrames()) { ComputeInputPartialMap(inputIndex); return; } // TODO: remove these one by one
+            if (frameRange.IsAllFrames()) { BackpropToMap(inputIndex); return; } // TODO: remove these one by one
            assert(m_functionValues->GetNumRows() == GradientValues().GetNumRows()); // original used m_functionValues->GetNumRows() for loop dimension
            assert(m_pMBLayout);

@ -624,14 +624,8 @@ namespace Microsoft { namespace MSR { namespace CNTK {
            Matrix<ElemType>::ScaleAndAdd(1.0, GradientSlice(frameRange/*TODO: delete this:*/.Check_t(GetNumParallelSequences(), m_pMBLayout)), mTmp);
        }

-        void EvaluateThisNodeMap()    // TODO: This is a stop-gap; in most cases, we should just be able to delete this (but need to review one by one)
+        virtual void /*ComputationNode::*/ForwardProp(const FrameRange & frameRange) override
        {
-            m_functionValues->SetValue(Inputs(0)->FunctionValues());
-        }
-
-        virtual void /*ComputationNode::*/EvaluateThisNode(const FrameRange & frameRange) override
-        {
-            //if (frameRange.IsAllFrames()) { EvaluateThisNodeMap(); return; }
            Matrix<ElemType> mTmp = ValueSlice(frameRange/*TODO: delete this:*/.Check_t(GetNumParallelSequences(), m_pMBLayout));
            mTmp.SetValue(Inputs(0)->ValueSlice(frameRange/*TODO: delete this:*/.Check_t(GetNumParallelSequences(), m_pMBLayout)));
        }
--- a/MachineLearning/CNTKComputationNetworkLib/LinearAlgebraNodes.h
+++ b/MachineLearning/CNTKComputationNetworkLib/LinearAlgebraNodes.h
@ -40,7 +40,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
            Base(deviceId, name)
        { }

-        virtual void /*ComputationNode::*/ComputeInputPartial(const size_t inputIndex, const FrameRange & frameRange) override
+        virtual void /*ComputationNode::*/BackpropTo(const size_t inputIndex, const FrameRange & frameRange) override
        {
            Matrix<ElemType> gradientValues = GradientSlice(frameRange);
            Matrix<ElemType> functionValues = ValueSlice(frameRange);
@ -100,7 +100,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
 #endif
        }

-        virtual void /*ComputationNode::*/EvaluateThisNode(const FrameRange & frameRange) override  
+        virtual void /*ComputationNode::*/ForwardProp(const FrameRange & frameRange) override  
        {
            Matrix<ElemType> functionValues = ValueSliceToDense(frameRange, false); // Switch to dense as a work-around because ColumnSlice doesn't support all the sparse formats
            Matrix<ElemType> inputFunctionValues0 = Inputs(0)->ValueSlice(frameRange.AllowBroadcast());
@ -190,7 +190,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
            Base(deviceId, name)
        { }

-        virtual void /*ComputationNode::*/ComputeInputPartial(const size_t inputIndex, const FrameRange & frameRange) override
+        virtual void /*ComputationNode::*/BackpropTo(const size_t inputIndex, const FrameRange & frameRange) override
        {
            Matrix<ElemType> gradientValues = GradientSlice(frameRange);
            Matrix<ElemType> functionValues = ValueSlice(frameRange);
@ -232,7 +232,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
                LogicError("%ls %ls operation's Validate() function let invalid dimensions slip by.", NodeName().c_str(), OperationName().c_str());
        }

-        virtual void /*ComputationNode::*/EvaluateThisNode(const FrameRange & frameRange) override
+        virtual void /*ComputationNode::*/ForwardProp(const FrameRange & frameRange) override
        {
            Matrix<ElemType> functionValues = ValueSlice(frameRange);
            Matrix<ElemType> inputFunctionValues0 = Inputs(0)->ValueSlice(frameRange.AllowBroadcast());
@ -302,7 +302,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
            Base(deviceId, name)
        { }

-        virtual void /*ComputationNode::*/ComputeInputPartial(const size_t inputIndex, const FrameRange & frameRange) override
+        virtual void /*ComputationNode::*/BackpropTo(const size_t inputIndex, const FrameRange & frameRange) override
        {
            if (inputIndex == 0)        // left derivative
            {
@ -317,7 +317,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
            }
        }

-        virtual void /*ComputationNode::*/EvaluateThisNode(const FrameRange & frameRange) override  
+        virtual void /*ComputationNode::*/ForwardProp(const FrameRange & frameRange) override  
        {
            //ValueSlice(frameRange).AssignProductOf(Inputs(0)->FunctionValues().Get00Element(), Inputs(1)->ValueSlice(frameRange));
            ValueSlice(frameRange).Assign1x1ProductOf(Inputs(0)->FunctionValues()/*1x1*/, Inputs(1)->ValueSlice(frameRange));
@ -361,12 +361,12 @@ namespace Microsoft { namespace MSR { namespace CNTK {
            Base(deviceId, name)
        { }

-        virtual void /*ComputationNode::*/ComputeInputPartial(const size_t /*inputIndex*/, const FrameRange & frameRange) override
+        virtual void /*ComputationNode::*/BackpropTo(const size_t /*inputIndex*/, const FrameRange & frameRange) override
        {
            Inputs(0)->GradientSlice(frameRange) -= GradientSlice(frameRange);
        }

-        virtual void /*ComputationNode::*/EvaluateThisNode(const FrameRange & frameRange) override 
+        virtual void /*ComputationNode::*/ForwardProp(const FrameRange & frameRange) override 
        {
            ValueSlice(frameRange).AssignDifferenceOf(0, Inputs(0)->ValueSlice(frameRange));
        }
@ -398,7 +398,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
        {
        }

-        virtual void /*ComputationNode::*/ComputeInputPartial(const size_t inputIndex, const FrameRange & frameRange) override
+        virtual void /*ComputationNode::*/BackpropTo(const size_t inputIndex, const FrameRange & frameRange) override
        {
            if (inputIndex == 0)    // left derivative
            {
@ -421,7 +421,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
            }
        }

-        virtual void /*ComputationNode::*/EvaluateThisNode(const FrameRange & frameRange) override
+        virtual void /*ComputationNode::*/ForwardProp(const FrameRange & frameRange) override
        {
            size_t rows0 = Inputs(0)->GetNumRows(), cols1 = Inputs(1)->GetNumCols();
            VerifyDims(rows0, cols1);
@ -478,7 +478,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
            InferImageDimsFromInput(1, false); //the second one is the input since it's columnwise

            //after multiplication the structure is lost
-            m_imageLayout = ImageLayoutWHC(1, Inputs(0)->GetNumRows(), 1);
+            m_sampleLayout = ImageLayoutWHC(1, Inputs(0)->GetNumRows(), 1);
        }

        virtual void AllocateGradientMatricesForChildren(MatrixPool& matrixPool) override
@ -516,7 +516,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
            Base(deviceId, name)
        { }

-        virtual void /*ComputationNode::*/ComputeInputPartial(const size_t inputIndex, const FrameRange & frameRange) override
+        virtual void /*ComputationNode::*/BackpropTo(const size_t inputIndex, const FrameRange & frameRange) override
        {
            if (inputIndex == 0)  //left derivative
            {
@ -524,18 +524,18 @@ namespace Microsoft { namespace MSR { namespace CNTK {
                Matrix<ElemType> sliceOutputGrad = MaskedGradientSlice(frameRange);
                Matrix<ElemType> sliceInput1Value = Inputs(1)->MaskedValueSlice(frameRange);

-                ComputeInputPartialLeft(sliceInput1Value, Inputs(0)->GradientValues(), sliceOutputGrad);
+                BackpropToLeft(sliceInput1Value, Inputs(0)->GradientValues(), sliceOutputGrad);
            }
            else  //right derivative
            {
                Matrix<ElemType> sliceInput1Grad = Inputs(1)->GradientSlice(frameRange);
                Matrix<ElemType> sliceOutputGrad = GradientSlice(frameRange);

-                ComputeInputPartialRight(Inputs(0)->FunctionValues(), sliceInput1Grad, sliceOutputGrad);
+                BackpropToRight(Inputs(0)->FunctionValues(), sliceInput1Grad, sliceOutputGrad);
            }
        }

-        /*TODO: merge with call site*/void ComputeInputPartialLeft(Matrix<ElemType>& inputFunctionValues, Matrix<ElemType>& inputGradientValues, const Matrix<ElemType>& gradientValues)
+        /*TODO: merge with call site*/void BackpropToLeft(Matrix<ElemType>& inputFunctionValues, Matrix<ElemType>& inputGradientValues, const Matrix<ElemType>& gradientValues)
        {
 #if DUMPOUTPUT
            gradientValues.Print("Gradient-in");
@ -554,7 +554,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
 #endif
        }

-        /*TODO: merge with call site*/void ComputeInputPartialRight(Matrix<ElemType>& inputFunctionValues, Matrix<ElemType>& inputGradientValues, const Matrix<ElemType>& gradientValues)
+        /*TODO: merge with call site*/void BackpropToRight(Matrix<ElemType>& inputFunctionValues, Matrix<ElemType>& inputGradientValues, const Matrix<ElemType>& gradientValues)
        {
 #if DUMPOUTPUT
            gradientValues.Print("Gradient-in");
@ -568,7 +568,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
 #endif
        }

-        virtual void /*ComputationNode::*/EvaluateThisNode(const FrameRange & frameRange) override
+        virtual void /*ComputationNode::*/ForwardProp(const FrameRange & frameRange) override
        {
            Matrix<ElemType> sliceInput1Value = Inputs(1)->ValueSlice(frameRange);
            Matrix<ElemType> sliceOutputValue = ValueSlice(frameRange);
@ -607,7 +607,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
            InferImageDimsFromInput(1, false); //the second one is the input since it's column wize

            //after multiplication the structure is lost
-            m_imageLayout = ImageLayoutWHC(1, Inputs(0)->GetNumRows(), 1);
+            m_sampleLayout = ImageLayoutWHC(1, Inputs(0)->GetNumRows(), 1);
        }
    };

@ -629,7 +629,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
            Base(deviceId, name)
        { }

-        virtual void /*ComputationNode::*/ComputeInputPartial(const size_t inputIndex, const FrameRange & frameRange) override
+        virtual void /*ComputationNode::*/BackpropTo(const size_t inputIndex, const FrameRange & frameRange) override
        {
            Matrix<ElemType> sliceInput0Grad = Inputs(inputIndex)->GradientSlice(frameRange);
            Matrix<ElemType> sliceOutputGrad = GradientSlice(frameRange);
@ -641,13 +641,13 @@ namespace Microsoft { namespace MSR { namespace CNTK {
            sliceInput0Grad.AddElementProductOf(sliceOutputGrad, sliceInput1Value);
        }

-        virtual void /*ComputationNode::*/EvaluateThisNode(const FrameRange & frameRange) override  
+        virtual void /*ComputationNode::*/ForwardProp(const FrameRange & frameRange) override  
        {
            Matrix<ElemType> sliceInput0Value = Inputs(0)->ValueSlice(frameRange);
            Matrix<ElemType> sliceInput1Value = Inputs(1)->ValueSlice(frameRange);
            Matrix<ElemType> sliceOutputValue = ValueSlice(frameRange);

-            //EvaluateThisNodeS(sliceOutputValue, sliceInput0Value, sliceInput1Value);
+            //ForwardPropS(sliceOutputValue, sliceInput0Value, sliceInput1Value);
            sliceOutputValue.AssignElementProductOf(sliceInput0Value, sliceInput1Value);
        }

@ -683,24 +683,24 @@ namespace Microsoft { namespace MSR { namespace CNTK {
            Base(deviceId, name)
        { }

-        void ComputeInputPartialMap(const size_t inputIndex)
+        void BackpropToMap(const size_t inputIndex)
        {
            if (inputIndex > 1)
                InvalidArgument("RowElementTimes operation only takes two inputs.");

            if (inputIndex == 0)
            {
-                ComputeInputPartialLeftS(Inputs(1)->FunctionValues(), Inputs(0)->GradientValues(), GradientValues(), *m_tempMatrix);
+                BackpropToLeftS(Inputs(1)->FunctionValues(), Inputs(0)->GradientValues(), GradientValues(), *m_tempMatrix);
            }
            else
            {
-                ComputeInputPartialRightS(Inputs(0)->FunctionValues(), Inputs(1)->GradientValues(), GradientValues(), *m_tempMatrix);
+                BackpropToRightS(Inputs(0)->FunctionValues(), Inputs(1)->GradientValues(), GradientValues(), *m_tempMatrix);
            }
        }

-        virtual void /*ComputationNode::*/ComputeInputPartial(const size_t inputIndex, const FrameRange & frameRange) override
+        virtual void /*ComputationNode::*/BackpropTo(const size_t inputIndex, const FrameRange & frameRange) override
        {
-            if (frameRange.IsAllFrames()) { ComputeInputPartialMap(inputIndex); return; } // TODO: remove these one by one
+            if (frameRange.IsAllFrames()) { BackpropToMap(inputIndex); return; } // TODO: remove these one by one
            Matrix<ElemType> sliceInput0Grad = Inputs(inputIndex)->GradientSlice(frameRange);
            Matrix<ElemType> sliceOutputGrad = GradientSlice(frameRange);

@ -708,16 +708,16 @@ namespace Microsoft { namespace MSR { namespace CNTK {

            if (inputIndex == 0)
            {
-                ComputeInputPartialLeftS(sliceInput1Value, sliceInput0Grad, sliceOutputGrad, *m_tempMatrix);
+                BackpropToLeftS(sliceInput1Value, sliceInput0Grad, sliceOutputGrad, *m_tempMatrix);
            }
            else
            {
-                ComputeInputPartialRightS(sliceInput1Value, sliceInput0Grad, sliceOutputGrad, *m_tempMatrix);
+                BackpropToRightS(sliceInput1Value, sliceInput0Grad, sliceOutputGrad, *m_tempMatrix);
            }
        }

        //left (input 0) is a matrix
-        /*TODO: merge with call site*/void ComputeInputPartialLeftS(Matrix<ElemType>& input1FunctionValues,
+        /*TODO: merge with call site*/void BackpropToLeftS(Matrix<ElemType>& input1FunctionValues,
            Matrix<ElemType>& input0GradientValues, 
            const Matrix<ElemType>& gradientValues, 
            Matrix<ElemType>& tempMatrix)
@ -732,7 +732,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
        }

        //right (input 1) is a row vector
-        /*TODO: merge with call site*/void ComputeInputPartialRightS(Matrix<ElemType>& input0FunctionValues, 
+        /*TODO: merge with call site*/void BackpropToRightS(Matrix<ElemType>& input0FunctionValues, 
            Matrix<ElemType>& input1GradientValues, 
            const Matrix<ElemType>& gradientValues, 
            Matrix<ElemType>& tempMatrix)
@ -744,22 +744,22 @@ namespace Microsoft { namespace MSR { namespace CNTK {
            input1GradientValues.HasNan("RowElementTimes");
 #endif
        }
-        void EvaluateThisNodeMap()    // TODO: This is a stop-gap; in most cases, we should just be able to delete this (but need to review one by one)
+        void ForwardPropMap()    // TODO: This is a stop-gap; in most cases, we should just be able to delete this (but need to review one by one)
        {
-            EvaluateThisNodeS(FunctionValues(), Inputs(0)->FunctionValues(), Inputs(1)->FunctionValues());
+            ForwardPropS(FunctionValues(), Inputs(0)->FunctionValues(), Inputs(1)->FunctionValues());
        }

-        virtual void /*ComputationNode::*/EvaluateThisNode(const FrameRange & frameRange) override
+        virtual void /*ComputationNode::*/ForwardProp(const FrameRange & frameRange) override
        {
-            //if (frameRange.IsAllFrames()) { EvaluateThisNodeMap(); return; }
+            //if (frameRange.IsAllFrames()) { ForwardPropMap(); return; }
            Matrix<ElemType> sliceInput0Value = Inputs(0)->ValueSlice(frameRange);
            Matrix<ElemType> sliceInput1Value = Inputs(1)->ValueSlice(frameRange);
            Matrix<ElemType> sliceOutputValue = ValueSlice(frameRange);

-            EvaluateThisNodeS(sliceOutputValue, sliceInput0Value, sliceInput1Value);
+            ForwardPropS(sliceOutputValue, sliceInput0Value, sliceInput1Value);
        }

-        /*TODO: merge with call site*/void EvaluateThisNodeS(Matrix<ElemType>& functionValues, const Matrix<ElemType>& input0, const Matrix<ElemType>& input1)
+        /*TODO: merge with call site*/void ForwardPropS(Matrix<ElemType>& functionValues, const Matrix<ElemType>& input0, const Matrix<ElemType>& input1)
        {
            functionValues.SetValue(input0);
            functionValues.RowElementMultiplyWith(input1);
@ -825,41 +825,41 @@ namespace Microsoft { namespace MSR { namespace CNTK {
            Base(deviceId, name)
        { }

-        void ComputeInputPartialMap(const size_t inputIndex)
+        void BackpropToMap(const size_t inputIndex)
        {
            if (inputIndex > 1)
                InvalidArgument("ColumnElementTimes operation only takes two inputs.");

            if (inputIndex == 0)
            {
-                ComputeInputPartialLeftS(Inputs(1)->FunctionValues(), Inputs(0)->GradientValues(), GradientValues(), *m_tempMatrix);
+                BackpropToLeftS(Inputs(1)->FunctionValues(), Inputs(0)->GradientValues(), GradientValues(), *m_tempMatrix);
            }
            else
            {
-                ComputeInputPartialRightS(Inputs(0)->FunctionValues(), Inputs(1)->GradientValues(), GradientValues(), *m_tempMatrix);
+                BackpropToRightS(Inputs(0)->FunctionValues(), Inputs(1)->GradientValues(), GradientValues(), *m_tempMatrix);
            }
        }

-        virtual void /*ComputationNode::*/ComputeInputPartial(const size_t inputIndex, const FrameRange & frameRange) override
+        virtual void /*ComputationNode::*/BackpropTo(const size_t inputIndex, const FrameRange & frameRange) override
        {
-            if (frameRange.IsAllFrames()) { ComputeInputPartialMap(inputIndex); return; } // TODO: remove these one by one
+            if (frameRange.IsAllFrames()) { BackpropToMap(inputIndex); return; } // TODO: remove these one by one
            Matrix<ElemType> sliceOutputGrad = GradientSlice(frameRange);

            if (inputIndex == 0)
            {
                Matrix<ElemType> sliceInput0Grad = Inputs(0)->GradientSlice(frameRange);

-                ComputeInputPartialLeftS(Inputs(1)->FunctionValues(), sliceInput0Grad, sliceOutputGrad, *m_tempMatrix);
+                BackpropToLeftS(Inputs(1)->FunctionValues(), sliceInput0Grad, sliceOutputGrad, *m_tempMatrix);
            }
            else
            {
                Matrix<ElemType> sliceInput0Value = Inputs(0)->ValueSlice(frameRange);
-                ComputeInputPartialRightS(sliceInput0Value, Inputs(1)->GradientValues(), sliceOutputGrad, *m_tempMatrix);
+                BackpropToRightS(sliceInput0Value, Inputs(1)->GradientValues(), sliceOutputGrad, *m_tempMatrix);
            }
        }

        //left (input 0) is a matrix
-        /*TODO: merge with call site*/void ComputeInputPartialLeftS(Matrix<ElemType>& input1FunctionValues,
+        /*TODO: merge with call site*/void BackpropToLeftS(Matrix<ElemType>& input1FunctionValues,
            Matrix<ElemType>& input0GradientValues,
            const Matrix<ElemType>& gradientValues,
            Matrix<ElemType>& tempMatrix)
@ -874,7 +874,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
        }

        //right (input 1) is a col vector
-        /*TODO: merge with call site*/void ComputeInputPartialRightS(Matrix<ElemType>& input0FunctionValues,
+        /*TODO: merge with call site*/void BackpropToRightS(Matrix<ElemType>& input0FunctionValues,
            Matrix<ElemType>& input1GradientValues,
            const Matrix<ElemType>& gradientValues,
            Matrix<ElemType>& tempMatrix)
@ -886,21 +886,21 @@ namespace Microsoft { namespace MSR { namespace CNTK {
            input1GradientValues.HasNan("ColumnElementTimes");
 #endif
        }
-        void EvaluateThisNodeMap()    // TODO: This is a stop-gap; in most cases, we should just be able to delete this (but need to review one by one)
+        void ForwardPropMap()    // TODO: This is a stop-gap; in most cases, we should just be able to delete this (but need to review one by one)
        {
-            EvaluateThisNodeS(FunctionValues(), Inputs(0)->FunctionValues(), Inputs(1)->FunctionValues());
+            ForwardPropS(FunctionValues(), Inputs(0)->FunctionValues(), Inputs(1)->FunctionValues());
        }

-        virtual void /*ComputationNode::*/EvaluateThisNode(const FrameRange & frameRange) override
+        virtual void /*ComputationNode::*/ForwardProp(const FrameRange & frameRange) override
        {
-            //if (frameRange.IsAllFrames()) { EvaluateThisNodeMap(); return; }
+            //if (frameRange.IsAllFrames()) { ForwardPropMap(); return; }
            Matrix<ElemType> sliceInput0Value = Inputs(0)->ValueSlice(frameRange);
            Matrix<ElemType> sliceOutputValue = ValueSlice(frameRange);

-            EvaluateThisNodeS(sliceOutputValue, sliceInput0Value, Inputs(1)->FunctionValues());
+            ForwardPropS(sliceOutputValue, sliceInput0Value, Inputs(1)->FunctionValues());
        }

-        /*TODO: merge with call site*/void EvaluateThisNodeS(Matrix<ElemType>& functionValues, const Matrix<ElemType>& input0, const Matrix<ElemType>& input1)
+        /*TODO: merge with call site*/void ForwardPropS(Matrix<ElemType>& functionValues, const Matrix<ElemType>& input0, const Matrix<ElemType>& input1)
        {
            functionValues.SetValue(input0);
            functionValues.ColumnElementMultiplyWith(input1);
@ -974,7 +974,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
            Base(deviceId, name)
        { }

-        virtual void /*ComputationNode::*/ComputeInputPartial(const size_t inputIndex, const FrameRange & frameRange) override
+        virtual void /*ComputationNode::*/BackpropTo(const size_t inputIndex, const FrameRange & frameRange) override
        {
            if (inputIndex == 0)    // left derivative
            {
@ -993,20 +993,20 @@ namespace Microsoft { namespace MSR { namespace CNTK {
            }
        }

-        ///*TODO: merge with call site*/void ComputeInputPartialLeft(Matrix<ElemType>& temp, const Matrix<ElemType>& inputFunctionValues, Matrix<ElemType>& inputGradientValues, const Matrix<ElemType>& gradientValues)  
+        ///*TODO: merge with call site*/void BackpropToLeft(Matrix<ElemType>& temp, const Matrix<ElemType>& inputFunctionValues, Matrix<ElemType>& inputGradientValues, const Matrix<ElemType>& gradientValues)  
        //{
        //    temp.AssignInnerProductOf(gradientValues, inputFunctionValues, false);
        //    inputGradientValues += temp;
        //}
        //
-        ///*TODO: merge with call site*/void ComputeInputPartialRight(Matrix<ElemType>& temp, const Matrix<ElemType>& inputFunctionValues, Matrix<ElemType>& inputGradientValues, const Matrix<ElemType>& gradientValues)  
+        ///*TODO: merge with call site*/void BackpropToRight(Matrix<ElemType>& temp, const Matrix<ElemType>& inputFunctionValues, Matrix<ElemType>& inputGradientValues, const Matrix<ElemType>& gradientValues)  
        //{
        //    temp.SetValue(gradientValues);
        //    temp.ColumnElementMultiplyWith(inputFunctionValues);
        //    inputGradientValues += temp;
        //}

-        virtual void /*ComputationNode::*/EvaluateThisNode(const FrameRange & frameRange) override  
+        virtual void /*ComputationNode::*/ForwardProp(const FrameRange & frameRange) override  
        {
            Matrix<ElemType> sliceInput1Value = Inputs(1)->ValueSlice(frameRange);
            Matrix<ElemType> sliceOutputValue = ValueSlice(frameRange);
@ -1094,13 +1094,13 @@ private:
            Base(deviceId, name)
        { }

-        virtual void /*ComputationNode::*/ComputeInputPartial(const size_t /*inputIndex*/, const FrameRange & frameRange) override
+        virtual void /*ComputationNode::*/BackpropTo(const size_t /*inputIndex*/, const FrameRange & frameRange) override
        {
            // BUGBUG: In the future we may want to allow this to operate on a scalar that is one step of an outer time loop.
            Inputs(0)->GradientSlice(frameRange) += GradientValues(); // here the assumption is that gradientValues are 1x1 matrix
        }

-        virtual void /*ComputationNode::*/EvaluateThisNode(const FrameRange & frameRange) override
+        virtual void /*ComputationNode::*/ForwardProp(const FrameRange & frameRange) override
        {
            FunctionValues().AssignSumOfElements(Inputs(0)->MaskedValueSlice(frameRange));  // since we are reducing over frames, we must first mask gaps in input to zero
        }
@ -1118,7 +1118,7 @@ private:
        {
            InferImageDimsFromInput(0, false);

-            m_imageLayout = ImageLayout();
+            m_sampleLayout = TensorShape();
        }
    };

@ -1141,7 +1141,7 @@ private:
            Base(deviceId, name)
        { }

-        virtual void /*ComputationNode::*/ComputeInputPartial(const size_t /*inputIndex*/, const FrameRange & frameRange) override
+        virtual void /*ComputationNode::*/BackpropTo(const size_t /*inputIndex*/, const FrameRange & frameRange) override
        {
            Matrix<ElemType> sliceInputGrad = Inputs(0)->GradientSlice(frameRange);
            Matrix<ElemType> sliceOutputGrad = GradientSlice(frameRange);
@ -1149,12 +1149,12 @@ private:
            sliceInputGrad += sliceOutputGrad; // here the assumption is that gradientValues is a row vector
        }

-        virtual void /*ComputationNode::*/EvaluateThisNode(const FrameRange & frameRange) override
+        virtual void /*ComputationNode::*/ForwardProp(const FrameRange & frameRange) override
        {
            Matrix<ElemType> sliceInputValue = Inputs(0)->ValueSlice(frameRange);
            Matrix<ElemType> sliceOutputValue = ValueSlice(frameRange);

-            //EvaluateThisNodeS(sliceOutputValue, sliceInputValue);
+            //ForwardPropS(sliceOutputValue, sliceInputValue);
            Matrix<ElemType>::VectorSum(sliceInputValue, sliceOutputValue, true);
        }

@ -1171,7 +1171,7 @@ private:
        {
            InferImageDimsFromInput(0, false);

-            m_imageLayout = ImageLayout();
+            m_sampleLayout = TensorShape();
        }
    };

@ -1194,7 +1194,7 @@ private:
            Base(deviceId, name)
        { }

-        virtual void /*ComputationNodeNonLooping::*/ComputeInputPartialNonLooping(size_t /*inputIndex*/) override
+        virtual void /*ComputationNodeNonLooping::*/BackpropToNonLooping(size_t /*inputIndex*/) override
        {
            Matrix<ElemType>& inputGradientValues = Inputs(0)->GradientValues();
            const Matrix<ElemType>& gradientValues = GradientValues();
@ -1210,7 +1210,7 @@ private:
 #endif
        }

-        virtual void /*ComputationNodeNonLooping::*/EvaluateThisNodeNonLooping() override
+        virtual void /*ComputationNodeNonLooping::*/ForwardPropNonLooping() override
        {
 #if DUMPOUTPUT
            Inputs(0)->FunctionValues().Print("TransposeNode- Input0");
@ -1242,7 +1242,7 @@ private:
            InferImageDimsFromInput(0, false); // the second one is the input since it's column wize

            // after transposition, the structure is lost
-            m_imageLayout = ImageLayoutWHC(1, Inputs(0)->GetNumCols(), 1);
+            m_sampleLayout = ImageLayoutWHC(1, Inputs(0)->GetNumCols(), 1);
        }
    };

@ -1277,7 +1277,7 @@ private:
        {
            InferImageDimsFromInput(0, true);

-            m_imageLayout = ImageLayoutWHC(1, m_imageLayout.GetHeight(), 1);
+            m_sampleLayout = ImageLayoutWHC(1, m_sampleLayout.GetHeight(), 1);

            if (m_inputImageLayout.GetWidth() * m_inputImageLayout.GetNumChannels() != 1)
                fprintf(stderr, "WARNING: Diagonal operation cannot inherit image size information from its child. Image size info is lost.\n");
@ -1329,7 +1329,7 @@ private:
            InferImageDimsFromInputs();
        }

-        virtual void /*ComputationNodeNonLooping::*/EvaluateThisNodeNonLooping() override
+        virtual void /*ComputationNodeNonLooping::*/ForwardPropNonLooping() override
        {
            Inputs(0)->FunctionValues().AssignDiagonalValuesTo(FunctionValues());
 #if NANCHECK
@ -1337,7 +1337,7 @@ private:
 #endif
        }

-        virtual void /*ComputationNodeNonLooping::*/ComputeInputPartialNonLooping(size_t /*inputIndex*/) override
+        virtual void /*ComputationNodeNonLooping::*/BackpropToNonLooping(size_t /*inputIndex*/) override
        {
            Matrix<ElemType>& inputGradientValues = Inputs(0)->GradientValues();
            const Matrix<ElemType>& gradientValues = GradientValues();
@ -1374,7 +1374,7 @@ private:
            Base(deviceId, name)
        { }

-        virtual void /*ComputationNode::*/ComputeInputPartial(const size_t inputIndex, const FrameRange & frameRange) override
+        virtual void /*ComputationNode::*/BackpropTo(const size_t inputIndex, const FrameRange & frameRange) override
        {
            // functionValues, invNorm0, invNorm1 - output from the EvaluateNode() method
            // temp, rightTerm, leftTerm - temporary matrices
@ -1397,7 +1397,7 @@ private:
        }


-        virtual void /*ComputationNode::*/EvaluateThisNode(const FrameRange & frameRange) override 
+        virtual void /*ComputationNode::*/ForwardProp(const FrameRange & frameRange) override 
        {
            Matrix<ElemType> sliceInput0Value = Inputs(0)->ValueSlice(frameRange);
            Matrix<ElemType> sliceInput1Value = Inputs(1)->ValueSlice(frameRange);
@ -1433,7 +1433,7 @@ private:
        {
            InferImageDimsFromInput(0, false);

-            m_imageLayout = ImageLayout();
+            m_sampleLayout = TensorShape();
        }

        virtual void CopyTo(ComputationNodeBasePtr nodeP, const std::wstring& newName, const CopyNodeFlags flags) const override
@ -1477,7 +1477,7 @@ private:
            ReleaseMatrixToPool(m_temp, matrixPool);
        }
 private:
-        // invNorm nodes tranfer data between EvaluateThisNode and ComputeInputPartial
+        // invNorm nodes tranfer data between ForwardProp and BackpropTo
        shared_ptr<Matrix<ElemType>> m_invNorm0;
        shared_ptr<Matrix<ElemType>> m_invNorm1;
        // the rest are temporaries, values don't need to be maintained
@ -1504,7 +1504,7 @@ private:
            Base(deviceId, name)
        { }

-        virtual void /*ComputationNode::*/ComputeInputPartial(const size_t inputIndex, const FrameRange & frameRange) override
+        virtual void /*ComputationNode::*/BackpropTo(const size_t inputIndex, const FrameRange & frameRange) override
        {
            Matrix<ElemType> sliceOutputGrad = GradientSlice(frameRange);

@ -1524,7 +1524,7 @@ private:
            }
        }

-        virtual void /*ComputationNode::*/EvaluateThisNode(const FrameRange & frameRange) override  
+        virtual void /*ComputationNode::*/ForwardProp(const FrameRange & frameRange) override  
        {
            ValueSlice(frameRange).AssignKhatriRaoProductOf(Inputs(0)->ValueSlice(frameRange), Inputs(1)->ValueSlice(frameRange));
        }
@ -1553,12 +1553,12 @@ private:

        virtual void InferImageDimsFromInputs()  
        {
-            //since it's symmetrical any one of the input may be the true input. 
-            //since we dont' use the input image size info in the operation, the input part doesn't matter.
+            // since it's symmetrical any one of the input may be the true input. 
+            // since we dont' use the input image size info in the operation, the input part doesn't matter.
            InferImageDimsFromInput(1, false); 

-            //after KhatriRaoProduct the structure is lost
-            m_imageLayout = ImageLayoutWHC(1, m_functionValues->GetNumRows(), 1);
+            // after KhatriRaoProduct the structure is lost
+            m_sampleLayout = ImageLayoutWHC(1, m_functionValues->GetNumRows(), 1);
        }
    };

@ -1580,31 +1580,31 @@ private:
            Base(deviceId, name)
        { }

-        void ComputeInputPartialMap(const size_t inputIndex)
+        void BackpropToMap(const size_t inputIndex)
        {
            if (inputIndex > 1)
                InvalidArgument("CosDistanceWithNegativeSamples operation only takes grdients on the first two inputs.");

-            ComputeInputPartialS(inputIndex, *m_invNorm0, *m_invNorm1, FunctionValues(), *m_temp, *m_rightTerm, *m_leftTerm, *m_invNormSquare, Inputs(0)->FunctionValues(), Inputs(1)->FunctionValues(), Inputs(2)->FunctionValues(), Inputs(3)->FunctionValues(), Inputs(inputIndex)->GradientValues(), GradientValues());
+            BackpropToS(inputIndex, *m_invNorm0, *m_invNorm1, FunctionValues(), *m_temp, *m_rightTerm, *m_leftTerm, *m_invNormSquare, Inputs(0)->FunctionValues(), Inputs(1)->FunctionValues(), Inputs(2)->FunctionValues(), Inputs(3)->FunctionValues(), Inputs(inputIndex)->GradientValues(), GradientValues());
        }

-        virtual void /*ComputationNode::*/ComputeInputPartial(const size_t inputIndex, const FrameRange & frameRange) override
+        virtual void /*ComputationNode::*/BackpropTo(const size_t inputIndex, const FrameRange & frameRange) override
        {
-            if (frameRange.IsAllFrames()) { ComputeInputPartialMap(inputIndex); return; } // TODO: remove these one by one
+            if (frameRange.IsAllFrames()) { BackpropToMap(inputIndex); return; } // TODO: remove these one by one
            Matrix<ElemType> sliceInput0Value = Inputs(0)->ValueSlice(frameRange);
            Matrix<ElemType> sliceInput1Value = Inputs(1)->ValueSlice(frameRange);
            Matrix<ElemType> sliceOutputValue = ValueSlice(frameRange);
            Matrix<ElemType> sliceInputGrad = Inputs(inputIndex)->GradientSlice(frameRange);
            Matrix<ElemType> sliceThisGrad = GradientSlice(frameRange);

-            ComputeInputPartialS(inputIndex, *m_invNorm0, *m_invNorm1, sliceOutputValue, *m_temp, *m_rightTerm, *m_leftTerm, *m_invNormSquare, sliceInput0Value, sliceInput1Value, Inputs(2)->FunctionValues(), Inputs(3)->FunctionValues(), sliceInputGrad, sliceThisGrad);
+            BackpropToS(inputIndex, *m_invNorm0, *m_invNorm1, sliceOutputValue, *m_temp, *m_rightTerm, *m_leftTerm, *m_invNormSquare, sliceInput0Value, sliceInput1Value, Inputs(2)->FunctionValues(), Inputs(3)->FunctionValues(), sliceInputGrad, sliceThisGrad);
        }

        // functionValues, invNorm0, invNorm1 - output from the EvaluateNode() method
        // temp, rightTerm, leftTerm - temporary matrices
        // in0, in1, in2, in3 - input functionValues from other nodes
        // inputGradientValues(x) - gradients to update, where x matches inputIndex
-        /*TODO: merge with call site*/void ComputeInputPartialS(const size_t inputIndex, const Matrix<ElemType>& invNorm0, const Matrix<ElemType>& invNorm1, const Matrix<ElemType>& functionValues,
+        /*TODO: merge with call site*/void BackpropToS(const size_t inputIndex, const Matrix<ElemType>& invNorm0, const Matrix<ElemType>& invNorm1, const Matrix<ElemType>& functionValues,
            Matrix<ElemType>& temp, Matrix<ElemType>& rightTerm, Matrix<ElemType>& leftTerm, Matrix<ElemType>& invNormSquare, // the temporary variables
            const Matrix<ElemType>& in0, const Matrix<ElemType>& in1, const Matrix<ElemType>& in2, const Matrix<ElemType>& in3,
            Matrix<ElemType>& inputGradientValues, Matrix<ElemType>& thisGradientValues)
@ -1701,22 +1701,22 @@ private:
            }
        }

-        void EvaluateThisNodeMap()    // TODO: This is a stop-gap; in most cases, we should just be able to delete this (but need to review one by one)
+        void ForwardPropMap()    // TODO: This is a stop-gap; in most cases, we should just be able to delete this (but need to review one by one)
        {
-            EvaluateThisNodeS(*m_invNorm0, *m_invNorm1, FunctionValues(), Inputs(0)->FunctionValues(), Inputs(1)->FunctionValues(), Inputs(2)->FunctionValues(), Inputs(3)->FunctionValues(), *m_leftTerm, *m_rightTerm);
+            ForwardPropS(*m_invNorm0, *m_invNorm1, FunctionValues(), Inputs(0)->FunctionValues(), Inputs(1)->FunctionValues(), Inputs(2)->FunctionValues(), Inputs(3)->FunctionValues(), *m_leftTerm, *m_rightTerm);
        }

-        virtual void /*ComputationNode::*/EvaluateThisNode(const FrameRange & frameRange) override
+        virtual void /*ComputationNode::*/ForwardProp(const FrameRange & frameRange) override
        {
-            //if (frameRange.IsAllFrames()) { EvaluateThisNodeMap(); return; }
+            //if (frameRange.IsAllFrames()) { ForwardPropMap(); return; }
            Matrix<ElemType> sliceInput0Value = Inputs(0)->ValueSlice(frameRange);
            Matrix<ElemType> sliceInput1Value = Inputs(1)->ValueSlice(frameRange);
            Matrix<ElemType> sliceOutputValue = ValueSlice(frameRange);

-            EvaluateThisNodeS(*m_invNorm0, *m_invNorm1, sliceOutputValue, sliceInput0Value, sliceInput1Value, Inputs(2)->FunctionValues(), Inputs(3)->FunctionValues(), *m_leftTerm, *m_rightTerm);
+            ForwardPropS(*m_invNorm0, *m_invNorm1, sliceOutputValue, sliceInput0Value, sliceInput1Value, Inputs(2)->FunctionValues(), Inputs(3)->FunctionValues(), *m_leftTerm, *m_rightTerm);
        }

-        /*TODO: merge with call site*/void EvaluateThisNodeS(Matrix<ElemType>& invNorm0, Matrix<ElemType>& invNorm1, Matrix<ElemType>& functionValues, Matrix<ElemType>& in0, Matrix<ElemType>& in1, Matrix<ElemType>& in2, Matrix<ElemType>& in3, Matrix<ElemType>& leftTermTemp, Matrix<ElemType>& rightTermTemp)
+        /*TODO: merge with call site*/void ForwardPropS(Matrix<ElemType>& invNorm0, Matrix<ElemType>& invNorm1, Matrix<ElemType>& functionValues, Matrix<ElemType>& in0, Matrix<ElemType>& in1, Matrix<ElemType>& in2, Matrix<ElemType>& in3, Matrix<ElemType>& leftTermTemp, Matrix<ElemType>& rightTermTemp)
        {
            invNorm0.AssignVectorNorm2Of(in0, true); // seems to modify input (in0)
            invNorm0.AssignElementInverseOf(invNorm0);
@ -1779,7 +1779,7 @@ private:
        {
            InferImageDimsFromInput(0, false);

-            m_imageLayout = ImageLayout();
+            m_sampleLayout = TensorShape();
        }

        virtual void CopyTo(ComputationNodeBasePtr nodeP, const std::wstring& newName, const CopyNodeFlags flags) const override
@ -1826,7 +1826,7 @@ private:
            ReleaseMatrixToPool(m_temp, matrixPool);
        }
 private:
-        // invNorm nodes tranfer data between EvaluateThisNode and ComputeInputPartial
+        // invNorm nodes tranfer data between ForwardProp and BackpropTo
        shared_ptr<Matrix<ElemType>> m_invNorm0;
        shared_ptr<Matrix<ElemType>> m_invNorm1;
        shared_ptr<Matrix<ElemType>> m_leftTerm;
--- a/MachineLearning/CNTKComputationNetworkLib/NonlinearityNodes.h
+++ b/MachineLearning/CNTKComputationNetworkLib/NonlinearityNodes.h
@ -43,24 +43,24 @@ namespace Microsoft { namespace MSR { namespace CNTK {
        { }

        // TODO: with FrameRange, this code has now been reduced so much that there is no need to have these overrides here; they can just be implemented in the derived classes directly.
-        virtual void /*ComputationNode::*/ComputeInputPartial(const size_t inputIndex, const FrameRange & frameRange) override
+        virtual void /*ComputationNode::*/BackpropTo(const size_t inputIndex, const FrameRange & frameRange) override
        {
            assert(inputIndex == 0); inputIndex;
            auto gradient = Inputs(0)->GradientSlice(frameRange);
-            ComputeInputPartialV(*m_gradient, Inputs(0)->ValueSlice(frameRange), gradient, GradientSlice(frameRange));
+            BackpropToV(*m_gradient, Inputs(0)->ValueSlice(frameRange), gradient, GradientSlice(frameRange));
        }

        // derived class implement the actual non-linear operation
-        virtual void ComputeInputPartialV(Matrix<ElemType>& gradient, const Matrix<ElemType>& inputFunctionValues, Matrix<ElemType>& inputGradientValues, const Matrix<ElemType>& gradientValues) = 0;
+        virtual void BackpropToV(Matrix<ElemType>& gradient, const Matrix<ElemType>& inputFunctionValues, Matrix<ElemType>& inputGradientValues, const Matrix<ElemType>& gradientValues) = 0;

-        virtual void /*ComputationNode::*/EvaluateThisNode(const FrameRange & frameRange) override
+        virtual void /*ComputationNode::*/ForwardProp(const FrameRange & frameRange) override
        {
            auto values = ValueSlice(frameRange);
-            EvaluateThisNodeV(values, Inputs(0)->ValueSlice(frameRange));
+            ForwardPropV(values, Inputs(0)->ValueSlice(frameRange));
        }

        // derived class implement the actual non-linear operation
-        virtual void EvaluateThisNodeV(Matrix<ElemType>& functionValues, const Matrix<ElemType>& inputFunctionValues) = 0;
+        virtual void ForwardPropV(Matrix<ElemType>& functionValues, const Matrix<ElemType>& inputFunctionValues) = 0;

        virtual void /*ComputationNodeBase::*/Validate(bool isFinalValidationPass) override
        {
@ -111,7 +111,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
            NonlinearityNodeBase<ElemType>(deviceId, name)
        { }

-        void ComputeInputPartialV(Matrix<ElemType>& gradient, const Matrix<ElemType>& inputFunctionValues, Matrix<ElemType>& inputGradientValues, const Matrix<ElemType>& gradientValues) override
+        void BackpropToV(Matrix<ElemType>& gradient, const Matrix<ElemType>& inputFunctionValues, Matrix<ElemType>& inputGradientValues, const Matrix<ElemType>& gradientValues) override
        {
            gradient.AssignLinearRectifierDerivativeOf(inputFunctionValues);
 #if DUMPOUTPUT
@ -123,7 +123,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
 #endif
        }

-        void EvaluateThisNodeV(Matrix<ElemType>& functionValues, const Matrix<ElemType>& inputFunctionValues) override
+        void ForwardPropV(Matrix<ElemType>& functionValues, const Matrix<ElemType>& inputFunctionValues) override
        {
            functionValues.AssignTruncateBottomOf(inputFunctionValues, 0);
 #if NANCHECK
@ -154,15 +154,15 @@ namespace Microsoft { namespace MSR { namespace CNTK {
        { }

        // we should get rid of this code dup, need to unify the -V functions
-        void ComputeInputPartialMap(const size_t inputIndex)
+        void BackpropToMap(const size_t inputIndex)
        {
            assert(inputIndex == 0); inputIndex;
-            ComputeInputPartialS(*m_gradient, Inputs(0)->GradientValues(), GradientValues(), FunctionValues());
+            BackpropToS(*m_gradient, Inputs(0)->GradientValues(), GradientValues(), FunctionValues());
        }

-        virtual void /*ComputationNode::*/ComputeInputPartial(const size_t inputIndex, const FrameRange & frameRange) override
+        virtual void /*ComputationNode::*/BackpropTo(const size_t inputIndex, const FrameRange & frameRange) override
        {
-            if (frameRange.IsAllFrames()) { ComputeInputPartialMap(inputIndex); return; } // TODO: remove these one by one
+            if (frameRange.IsAllFrames()) { BackpropToMap(inputIndex); return; } // TODO: remove these one by one
            assert(inputIndex == 0); inputIndex;

            Matrix<ElemType> sliceInputGrad = Inputs(0)->GradientSlice(frameRange/*TODO: delete this:*/.Check_t(GetNumParallelSequences(), m_pMBLayout));
@ -170,19 +170,19 @@ namespace Microsoft { namespace MSR { namespace CNTK {

            Matrix<ElemType> sliceOutputValue = ValueSlice(frameRange/*TODO: delete this:*/.Check_t(GetNumParallelSequences(), m_pMBLayout));

-            ComputeInputPartialS(*m_gradient, sliceInputGrad, sliceOutputGrad, sliceOutputValue);
+            BackpropToS(*m_gradient, sliceInputGrad, sliceOutputGrad, sliceOutputValue);
        }

        // should be:
-        /*virtual*/ void ComputeInputPartialV(Matrix<ElemType>& gradient, const Matrix<ElemType>& inputFunctionValues, Matrix<ElemType>& inputGradientValues, const Matrix<ElemType>& gradientValues) { gradient; inputFunctionValues;  inputGradientValues;  gradientValues;  LogicError("wrong signature :( need to unify code more"); }
+        /*virtual*/ void BackpropToV(Matrix<ElemType>& gradient, const Matrix<ElemType>& inputFunctionValues, Matrix<ElemType>& inputGradientValues, const Matrix<ElemType>& gradientValues) { gradient; inputFunctionValues;  inputGradientValues;  gradientValues;  LogicError("wrong signature :( need to unify code more"); }
        // but is:
-        /*virtual*/ void ComputeInputPartialS(Matrix<ElemType>& gradient, Matrix<ElemType>& inputGradientValues, const Matrix<ElemType>& gradientValues, const Matrix<ElemType>& functionValues)
+        /*virtual*/ void BackpropToS(Matrix<ElemType>& gradient, Matrix<ElemType>& inputGradientValues, const Matrix<ElemType>& gradientValues, const Matrix<ElemType>& functionValues)
        {
            gradient.AssignSigmoidDerivativeOf(functionValues);
            inputGradientValues.AddElementProductOf(gradientValues, gradient);
        }

-        /*virtual*/ void EvaluateThisNodeV(Matrix<ElemType>& functionValues, const Matrix<ElemType>& inputFunctionValues)
+        /*virtual*/ void ForwardPropV(Matrix<ElemType>& functionValues, const Matrix<ElemType>& inputFunctionValues)
        {
            functionValues.AssignSigmoidOf(inputFunctionValues);
 #if NANCHECK
@ -210,15 +210,15 @@ namespace Microsoft { namespace MSR { namespace CNTK {
        { }

        // TODO: unify signature & get rid of code dup
-        void ComputeInputPartialMap(const size_t inputIndex)
+        void BackpropToMap(const size_t inputIndex)
        {
            assert(inputIndex == 0); inputIndex;
-            ComputeInputPartialS(*m_gradient, Inputs(0)->GradientValues(), GradientValues(), FunctionValues());
+            BackpropToS(*m_gradient, Inputs(0)->GradientValues(), GradientValues(), FunctionValues());
        }

-        virtual void /*ComputationNode::*/ComputeInputPartial(const size_t inputIndex, const FrameRange & frameRange) override
+        virtual void /*ComputationNode::*/BackpropTo(const size_t inputIndex, const FrameRange & frameRange) override
        {
-            if (frameRange.IsAllFrames()) { ComputeInputPartialMap(inputIndex); return; } // TODO: remove these one by one
+            if (frameRange.IsAllFrames()) { BackpropToMap(inputIndex); return; } // TODO: remove these one by one
            assert(inputIndex == 0); inputIndex;

            Matrix<ElemType> sliceInputGrad = Inputs(0)->GradientSlice(frameRange/*TODO: delete this:*/.Check_t(GetNumParallelSequences(), m_pMBLayout));
@ -226,13 +226,13 @@ namespace Microsoft { namespace MSR { namespace CNTK {

            Matrix<ElemType> sliceOutputValue = ValueSlice(frameRange/*TODO: delete this:*/.Check_t(GetNumParallelSequences(), m_pMBLayout));

-            ComputeInputPartialS(*m_gradient, sliceInputGrad, sliceOutputGrad, sliceOutputValue);
+            BackpropToS(*m_gradient, sliceInputGrad, sliceOutputGrad, sliceOutputValue);
        }

        // should be:
-        /*virtual*/ void ComputeInputPartialV(Matrix<ElemType>& gradient, const Matrix<ElemType>& inputFunctionValues, Matrix<ElemType>& inputGradientValues, const Matrix<ElemType>& gradientValues) { gradient; inputFunctionValues;  inputGradientValues;  gradientValues;  LogicError("wrong signature :( need to unify code more"); }
+        /*virtual*/ void BackpropToV(Matrix<ElemType>& gradient, const Matrix<ElemType>& inputFunctionValues, Matrix<ElemType>& inputGradientValues, const Matrix<ElemType>& gradientValues) { gradient; inputFunctionValues;  inputGradientValues;  gradientValues;  LogicError("wrong signature :( need to unify code more"); }
        // but is:
-        /*virtual*/ void ComputeInputPartialS(Matrix<ElemType>& gradient, Matrix<ElemType>& inputGradientValues, const Matrix<ElemType>& gradientValues, const Matrix<ElemType>& functionValues)
+        /*virtual*/ void BackpropToS(Matrix<ElemType>& gradient, Matrix<ElemType>& inputGradientValues, const Matrix<ElemType>& gradientValues, const Matrix<ElemType>& functionValues)
        {
            gradient.AssignElementProductOf(functionValues, functionValues); // v .* v
            gradient.AssignDifferenceOf(1, gradient); // 1-v^2
@ -240,7 +240,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
            inputGradientValues.AddElementProductOf(gradientValues, gradient); // += d .* ((1-v) .* v))
        }

-        /*virtual*/ void EvaluateThisNodeV(Matrix<ElemType>& functionValues, const Matrix<ElemType>& inputFunctionValues)
+        /*virtual*/ void ForwardPropV(Matrix<ElemType>& functionValues, const Matrix<ElemType>& inputFunctionValues)
        {
            functionValues.AssignTanhOf(inputFunctionValues);
 #if NANCHECK
@ -268,15 +268,15 @@ namespace Microsoft { namespace MSR { namespace CNTK {
        { }

        // TODO: get rid of code dup
-        void ComputeInputPartialMap(const size_t inputIndex)
+        void BackpropToMap(const size_t inputIndex)
        {
            assert(inputIndex == 0); inputIndex;
-            ComputeInputPartialS(*m_gradient, Inputs(0)->GradientValues(), Inputs(0)->FunctionValues(), GradientValues());
+            BackpropToS(*m_gradient, Inputs(0)->GradientValues(), Inputs(0)->FunctionValues(), GradientValues());
        }

-        virtual void /*ComputationNode::*/ComputeInputPartial(const size_t inputIndex, const FrameRange & frameRange) override
+        virtual void /*ComputationNode::*/BackpropTo(const size_t inputIndex, const FrameRange & frameRange) override
        {
-            if (frameRange.IsAllFrames()) { ComputeInputPartialMap(inputIndex); return; } // TODO: remove these one by one
+            if (frameRange.IsAllFrames()) { BackpropToMap(inputIndex); return; } // TODO: remove these one by one
            assert(inputIndex == 0); inputIndex;

            Matrix<ElemType> sliceInputGrad = Inputs(0)->GradientSlice(frameRange/*TODO: delete this:*/.Check_t(GetNumParallelSequences(), m_pMBLayout));
@ -284,20 +284,20 @@ namespace Microsoft { namespace MSR { namespace CNTK {

            Matrix<ElemType> sliceInputValue = Inputs(0)->ValueSlice(frameRange/*TODO: delete this:*/.Check_t(GetNumParallelSequences(), m_pMBLayout));

-            ComputeInputPartialS(*m_gradient, sliceInputGrad, sliceInputValue, sliceOutputGrad);
+            BackpropToS(*m_gradient, sliceInputGrad, sliceInputValue, sliceOutputGrad);
        }

        // should be:
-        /*virtual*/ void ComputeInputPartialV(Matrix<ElemType>& gradient, const Matrix<ElemType>& inputFunctionValues, Matrix<ElemType>& inputGradientValues, const Matrix<ElemType>& gradientValues) { gradient; inputFunctionValues;  inputGradientValues;  gradientValues;  LogicError("wrong signature :( need to unify code more"); }
+        /*virtual*/ void BackpropToV(Matrix<ElemType>& gradient, const Matrix<ElemType>& inputFunctionValues, Matrix<ElemType>& inputGradientValues, const Matrix<ElemType>& gradientValues) { gradient; inputFunctionValues;  inputGradientValues;  gradientValues;  LogicError("wrong signature :( need to unify code more"); }
        // but is:
-        /*virtual*/ void ComputeInputPartialS(Matrix<ElemType>& gradient, Matrix<ElemType>& inputGradientValues, const Matrix<ElemType>& inputFunctionValues, const Matrix<ElemType>& gradientValues)
+        /*virtual*/ void BackpropToS(Matrix<ElemType>& gradient, Matrix<ElemType>& inputGradientValues, const Matrix<ElemType>& inputFunctionValues, const Matrix<ElemType>& gradientValues)
        {
            gradient.AssignElementInverseOf(inputFunctionValues); // 1/x (x is input to log(x))

            inputGradientValues.AddElementProductOf(gradientValues, gradient);
        }

-        /*virtual*/ void EvaluateThisNodeV(Matrix<ElemType>& functionValues, const Matrix<ElemType>& inputFunctionValues)
+        /*virtual*/ void ForwardPropV(Matrix<ElemType>& functionValues, const Matrix<ElemType>& inputFunctionValues)
        {
            functionValues.AssignLogOf(inputFunctionValues);
 #if NANCHECK
@ -324,7 +324,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
            NonlinearityNodeBase<ElemType>(deviceId, name)
        { }

-        virtual void /*ComputationNode::*/ComputeInputPartial(const size_t inputIndex, const FrameRange & frameRange) override
+        virtual void /*ComputationNode::*/BackpropTo(const size_t inputIndex, const FrameRange & frameRange) override
        {
            assert(inputIndex == 0); inputIndex;

@ -335,9 +335,9 @@ namespace Microsoft { namespace MSR { namespace CNTK {
            m_gradient->AssignExpOf(sliceInputValue); // Exp(x) is its own partial
            sliceInputGrad.AddElementProductOf(sliceOutputGrad, *m_gradient);
        }
-        virtual void ComputeInputPartialV(Matrix<ElemType>& gradient, const Matrix<ElemType>& inputFunctionValues, Matrix<ElemType>& inputGradientValues, const Matrix<ElemType>& gradientValues) { NOT_IMPLEMENTED; }   // not needed
+        virtual void BackpropToV(Matrix<ElemType>& gradient, const Matrix<ElemType>& inputFunctionValues, Matrix<ElemType>& inputGradientValues, const Matrix<ElemType>& gradientValues) { NOT_IMPLEMENTED; }   // not needed

-        void EvaluateThisNodeV(Matrix<ElemType>& functionValues, const Matrix<ElemType>& inputFunctionValues) override
+        void ForwardPropV(Matrix<ElemType>& functionValues, const Matrix<ElemType>& inputFunctionValues) override
        {
            functionValues.AssignExpOf(inputFunctionValues);
 #if NANCHECK
@ -365,15 +365,15 @@ namespace Microsoft { namespace MSR { namespace CNTK {
        { }

        // TODO: code dup
-        void ComputeInputPartialMap(const size_t inputIndex)
+        void BackpropToMap(const size_t inputIndex)
        {
            assert(inputIndex == 0); inputIndex;
-            ComputeInputPartialS(*m_gradient, Inputs(0)->GradientValues(), Inputs(0)->FunctionValues(), GradientValues());
+            BackpropToS(*m_gradient, Inputs(0)->GradientValues(), Inputs(0)->FunctionValues(), GradientValues());
        }

-        virtual void /*ComputationNode::*/ComputeInputPartial(const size_t inputIndex, const FrameRange & frameRange) override
+        virtual void /*ComputationNode::*/BackpropTo(const size_t inputIndex, const FrameRange & frameRange) override
        {
-            if (frameRange.IsAllFrames()) { ComputeInputPartialMap(inputIndex); return; } // TODO: remove these one by one
+            if (frameRange.IsAllFrames()) { BackpropToMap(inputIndex); return; } // TODO: remove these one by one
            assert(inputIndex == 0); inputIndex;

            Matrix<ElemType> sliceInputGrad = Inputs(0)->GradientSlice(frameRange/*TODO: delete this:*/.Check_t(GetNumParallelSequences(), m_pMBLayout));
@ -381,19 +381,19 @@ namespace Microsoft { namespace MSR { namespace CNTK {

            Matrix<ElemType> sliceInputValue = Inputs(0)->ValueSlice(frameRange/*TODO: delete this:*/.Check_t(GetNumParallelSequences(), m_pMBLayout));

-            ComputeInputPartialS(*m_gradient, sliceInputGrad, sliceInputValue, sliceOutputGrad);
+            BackpropToS(*m_gradient, sliceInputGrad, sliceInputValue, sliceOutputGrad);
        }

        // should be:
-        /*virtual*/ void ComputeInputPartialV(Matrix<ElemType>& gradient, const Matrix<ElemType>& inputFunctionValues, Matrix<ElemType>& inputGradientValues, const Matrix<ElemType>& gradientValues) { gradient; inputFunctionValues;  inputGradientValues;  gradientValues;  LogicError("wrong signature :( need to unify code more"); }
+        /*virtual*/ void BackpropToV(Matrix<ElemType>& gradient, const Matrix<ElemType>& inputFunctionValues, Matrix<ElemType>& inputGradientValues, const Matrix<ElemType>& gradientValues) { gradient; inputFunctionValues;  inputGradientValues;  gradientValues;  LogicError("wrong signature :( need to unify code more"); }
        // but is:
-        /*virtual*/ void ComputeInputPartialS(Matrix<ElemType>& gradient, Matrix<ElemType>& inputGradientValues, const Matrix<ElemType>& inputFunctionValues, const Matrix<ElemType>& gradientValues)
+        /*virtual*/ void BackpropToS(Matrix<ElemType>& gradient, Matrix<ElemType>& inputGradientValues, const Matrix<ElemType>& inputFunctionValues, const Matrix<ElemType>& gradientValues)
        {
            gradient.AssignNegativeSineOf(inputFunctionValues); // -sin(x) (x is input to Cosine(x))
            inputGradientValues.AddElementProductOf(gradientValues, gradient);
        }

-        /*virtual*/ void EvaluateThisNodeV(Matrix<ElemType>& functionValues, const Matrix<ElemType>& inputFunctionValues)
+        /*virtual*/ void ForwardPropV(Matrix<ElemType>& functionValues, const Matrix<ElemType>& inputFunctionValues)
        {
            functionValues.AssignCosineOf(inputFunctionValues);
 #if NANCHECK
@ -423,15 +423,15 @@ namespace Microsoft { namespace MSR { namespace CNTK {
        { }

        // TODO: code dup
-        void ComputeInputPartialMap(const size_t inputIndex)
+        void BackpropToMap(const size_t inputIndex)
        {
            assert(inputIndex == 0); inputIndex;
-            ComputeInputPartialS(*m_gradient, *m_diff, Inputs(0)->GradientValues(), GradientValues(), FunctionValues());
+            BackpropToS(*m_gradient, *m_diff, Inputs(0)->GradientValues(), GradientValues(), FunctionValues());
        }

-        virtual void /*ComputationNode::*/ComputeInputPartial(const size_t inputIndex, const FrameRange & frameRange) override
+        virtual void /*ComputationNode::*/BackpropTo(const size_t inputIndex, const FrameRange & frameRange) override
        {
-            if (frameRange.IsAllFrames()) { ComputeInputPartialMap(inputIndex); return; } // TODO: remove these one by one
+            if (frameRange.IsAllFrames()) { BackpropToMap(inputIndex); return; } // TODO: remove these one by one
            assert(inputIndex == 0); inputIndex;

            Matrix<ElemType> sliceInputGrad = Inputs(0)->GradientSlice(frameRange/*TODO: delete this:*/.Check_t(GetNumParallelSequences(), m_pMBLayout));
@ -439,13 +439,13 @@ namespace Microsoft { namespace MSR { namespace CNTK {

            Matrix<ElemType> sliceOutputValue = ValueSlice(frameRange/*TODO: delete this:*/.Check_t(GetNumParallelSequences(), m_pMBLayout));

-            ComputeInputPartialS(*m_gradient, *m_diff, sliceInputGrad, sliceOutputGrad, sliceOutputValue);
+            BackpropToS(*m_gradient, *m_diff, sliceInputGrad, sliceOutputGrad, sliceOutputValue);
        }

        // should be:
-        /*virtual*/ void ComputeInputPartialV(Matrix<ElemType>& gradient, const Matrix<ElemType>& inputFunctionValues, Matrix<ElemType>& inputGradientValues, const Matrix<ElemType>& gradientValues) { gradient; inputFunctionValues;  inputGradientValues;  gradientValues;  LogicError("wrong signature :( need to unify code more"); }
+        /*virtual*/ void BackpropToV(Matrix<ElemType>& gradient, const Matrix<ElemType>& inputFunctionValues, Matrix<ElemType>& inputGradientValues, const Matrix<ElemType>& gradientValues) { gradient; inputFunctionValues;  inputGradientValues;  gradientValues;  LogicError("wrong signature :( need to unify code more"); }
        // but is:
-        /*virtual*/ void ComputeInputPartialS(Matrix<ElemType>& gradient, Matrix<ElemType>& diff, Matrix<ElemType>& inputGradientValues,
+        /*virtual*/ void BackpropToS(Matrix<ElemType>& gradient, Matrix<ElemType>& diff, Matrix<ElemType>& inputGradientValues,
            const Matrix<ElemType>& gradientValues, const Matrix<ElemType>& functionValues)
        {
            gradient.AssignInnerProductOf(gradientValues, functionValues, true);
@ -454,7 +454,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
            inputGradientValues.AddElementProductOf(diff, functionValues);
        }

-        /*virtual*/ void EvaluateThisNodeV(Matrix<ElemType>& functionValues, const Matrix<ElemType>& inputFunctionValues)
+        /*virtual*/ void ForwardPropV(Matrix<ElemType>& functionValues, const Matrix<ElemType>& inputFunctionValues)
        {
            functionValues.AssignLogSoftmaxOf(inputFunctionValues, true);
            functionValues.InplaceExp();
@ -513,15 +513,15 @@ namespace Microsoft { namespace MSR { namespace CNTK {
        { }

        // TODO: code dup
-        void ComputeInputPartialMap(const size_t inputIndex)
+        void BackpropToMap(const size_t inputIndex)
        {
            assert(inputIndex == 0); inputIndex;
-            ComputeInputPartialS(*m_gradient, *m_softmax, Inputs(0)->GradientValues(), GradientValues(), FunctionValues());
+            BackpropToS(*m_gradient, *m_softmax, Inputs(0)->GradientValues(), GradientValues(), FunctionValues());
        }

-        virtual void /*ComputationNode::*/ComputeInputPartial(const size_t inputIndex, const FrameRange & frameRange) override
+        virtual void /*ComputationNode::*/BackpropTo(const size_t inputIndex, const FrameRange & frameRange) override
        {
-            if (frameRange.IsAllFrames()) { ComputeInputPartialMap(inputIndex); return; } // TODO: remove these one by one
+            if (frameRange.IsAllFrames()) { BackpropToMap(inputIndex); return; } // TODO: remove these one by one
            assert(inputIndex == 0); inputIndex;

            Matrix<ElemType> sliceInputGrad = Inputs(0)->GradientSlice(frameRange/*TODO: delete this:*/.Check_t(GetNumParallelSequences(), m_pMBLayout));
@ -529,13 +529,13 @@ namespace Microsoft { namespace MSR { namespace CNTK {

            Matrix<ElemType> sliceOutputValue = ValueSlice(frameRange/*TODO: delete this:*/.Check_t(GetNumParallelSequences(), m_pMBLayout));

-            ComputeInputPartialS(*m_gradient, *m_softmax, sliceInputGrad, sliceOutputGrad, sliceOutputValue);
+            BackpropToS(*m_gradient, *m_softmax, sliceInputGrad, sliceOutputGrad, sliceOutputValue);
        }

        // should be:
-        /*virtual*/ void ComputeInputPartialV(Matrix<ElemType>& gradient, const Matrix<ElemType>& inputFunctionValues, Matrix<ElemType>& inputGradientValues, const Matrix<ElemType>& gradientValues) { gradient; inputFunctionValues;  inputGradientValues;  gradientValues;  LogicError("wrong signature :( need to unify code more"); }
+        /*virtual*/ void BackpropToV(Matrix<ElemType>& gradient, const Matrix<ElemType>& inputFunctionValues, Matrix<ElemType>& inputGradientValues, const Matrix<ElemType>& gradientValues) { gradient; inputFunctionValues;  inputGradientValues;  gradientValues;  LogicError("wrong signature :( need to unify code more"); }
        // but is:
-        /*virtual*/ void ComputeInputPartialS(Matrix<ElemType>& gradient, Matrix<ElemType>& softmax, Matrix<ElemType>& inputGradientValues,
+        /*virtual*/ void BackpropToS(Matrix<ElemType>& gradient, Matrix<ElemType>& softmax, Matrix<ElemType>& inputGradientValues,
            const Matrix<ElemType>& gradientValues, const Matrix<ElemType>& functionValues)
        {
            softmax.AssignExpOf(functionValues);
@ -544,7 +544,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
            Matrix<ElemType>::AddScaledDifference(1.0, gradientValues, softmax, inputGradientValues);
        }

-        /*virtual*/ void EvaluateThisNodeV(Matrix<ElemType>& functionValues, const Matrix<ElemType>& inputFunctionValues)
+        /*virtual*/ void ForwardPropV(Matrix<ElemType>& functionValues, const Matrix<ElemType>& inputFunctionValues)
        {
            functionValues.AssignLogSoftmaxOf(inputFunctionValues, true);
 #if NANCHECK
@ -602,30 +602,30 @@ namespace Microsoft { namespace MSR { namespace CNTK {
            ComputationNode<ElemType>(deviceId, name)
        { }

-        void ComputeInputPartialMap(const size_t inputIndex)
+        void BackpropToMap(const size_t inputIndex)
        {
            switch (inputIndex)
            {
            case 0:
-                ComputeInputPartialUnnormedPrior(Inputs(0)->GradientValues(), GradientValues(), *m_prior, *m_posterior, *m_temp);
+                BackpropToUnnormedPrior(Inputs(0)->GradientValues(), GradientValues(), *m_prior, *m_posterior, *m_temp);
                break;
            case 1:
-                ComputeInputPartialMean(Inputs(1)->GradientValues(), GradientValues(), *m_normedDeviationVectors, *m_posterior, *m_temp);
+                BackpropToMean(Inputs(1)->GradientValues(), GradientValues(), *m_normedDeviationVectors, *m_posterior, *m_temp);
                break;
            case 2:
-                ComputeInputPartialLogStddev(Inputs(2)->GradientValues(), GradientValues(), *m_normedDeviation, *m_posterior, *m_temp);
+                BackpropToLogStddev(Inputs(2)->GradientValues(), GradientValues(), *m_normedDeviation, *m_posterior, *m_temp);
                break;
            case 3:
-                ComputeInputPartialFeature(Inputs(3)->GradientValues(), GradientValues(), *m_normedDeviationVectors, *m_posterior, *m_temp);
+                BackpropToFeature(Inputs(3)->GradientValues(), GradientValues(), *m_normedDeviationVectors, *m_posterior, *m_temp);
                break;
            default:
                InvalidArgument("GMMLogLikelihoodNode only takes four inputs.");
            }
        }

-        virtual void /*ComputationNode::*/ComputeInputPartial(const size_t inputIndex, const FrameRange & frameRange) override
+        virtual void /*ComputationNode::*/BackpropTo(const size_t inputIndex, const FrameRange & frameRange) override
        {
-            if (frameRange.IsAllFrames()) { ComputeInputPartialMap(inputIndex); return; } // TODO: remove these one by one
+            if (frameRange.IsAllFrames()) { BackpropToMap(inputIndex); return; } // TODO: remove these one by one
            //get the right slice 
            const size_t colsPrior = Inputs(0)->GetNumCols();

@ -637,12 +637,12 @@ namespace Microsoft { namespace MSR { namespace CNTK {
            case 0:
            {
                if (colsPrior == 1)
-                        ComputeInputPartialUnnormedPrior(Inputs(0)->GradientValues(), sliceGradientValue, *m_prior, slicePosterior, *m_temp);
+                        BackpropToUnnormedPrior(Inputs(0)->GradientValues(), sliceGradientValue, *m_prior, slicePosterior, *m_temp);
                else
                {
                    Matrix<ElemType> sliceUnnormedPriorGradient = Inputs(0)->GradientSlice(frameRange/*TODO: delete this:*/.Check_t(GetNumParallelSequences(), m_pMBLayout));
                        Matrix<ElemType> slicePrior = DataSlice(*m_prior, frameRange/*TODO: delete this:*/.Check_t(GetNumParallelSequences(), m_pMBLayout));
-                        ComputeInputPartialUnnormedPrior(sliceUnnormedPriorGradient, sliceGradientValue, slicePrior, slicePosterior, *m_temp);
+                        BackpropToUnnormedPrior(sliceUnnormedPriorGradient, sliceGradientValue, slicePrior, slicePosterior, *m_temp);
                }
            }
            break;
@ -650,11 +650,11 @@ namespace Microsoft { namespace MSR { namespace CNTK {
            {
                      Matrix<ElemType> sliceNormedDeviationVectors = DataSlice(*m_normedDeviationVectors, frameRange/*TODO: delete this:*/.Check_t(GetNumParallelSequences(), m_pMBLayout));
                if (colsPrior == 1)
-                        ComputeInputPartialMean(Inputs(1)->GradientValues(), sliceGradientValue, sliceNormedDeviationVectors, slicePosterior, *m_temp);
+                        BackpropToMean(Inputs(1)->GradientValues(), sliceGradientValue, sliceNormedDeviationVectors, slicePosterior, *m_temp);
                else
                {
                    Matrix<ElemType> sliceMeanGradient = Inputs(1)->GradientSlice(frameRange/*TODO: delete this:*/.Check_t(GetNumParallelSequences(), m_pMBLayout));
-                        ComputeInputPartialMean(sliceMeanGradient, sliceGradientValue, sliceNormedDeviationVectors, slicePosterior, *m_temp);
+                        BackpropToMean(sliceMeanGradient, sliceGradientValue, sliceNormedDeviationVectors, slicePosterior, *m_temp);
                }
            }
            break;
@ -662,11 +662,11 @@ namespace Microsoft { namespace MSR { namespace CNTK {
            {
                    Matrix<ElemType> sliceNormedDeviation = DataSlice(*m_normedDeviation, frameRange/*TODO: delete this:*/.Check_t(GetNumParallelSequences(), m_pMBLayout));
                if (colsPrior == 1)
-                        ComputeInputPartialLogStddev(Inputs(2)->GradientValues(), sliceGradientValue, sliceNormedDeviation, slicePosterior, *m_temp);
+                        BackpropToLogStddev(Inputs(2)->GradientValues(), sliceGradientValue, sliceNormedDeviation, slicePosterior, *m_temp);
                else
                {
                    Matrix<ElemType> sliceLotStddevGradient = Inputs(2)->GradientSlice(frameRange/*TODO: delete this:*/.Check_t(GetNumParallelSequences(), m_pMBLayout));
-                    ComputeInputPartialLogStddev(sliceLotStddevGradient, sliceGradientValue, sliceNormedDeviation, slicePosterior, *m_temp);
+                    BackpropToLogStddev(sliceLotStddevGradient, sliceGradientValue, sliceNormedDeviation, slicePosterior, *m_temp);
                }
            }
            break;
@ -674,7 +674,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
            {
                Matrix<ElemType> sliceNormedDeviationVectors = DataSlice(*m_normedDeviationVectors, frameRange/*TODO: delete this:*/.Check_t(GetNumParallelSequences(), m_pMBLayout));
                Matrix<ElemType> sliceFeatureGradient = Inputs(3)->GradientSlice(frameRange/*TODO: delete this:*/.Check_t(GetNumParallelSequences(), m_pMBLayout));
-                ComputeInputPartialFeature(sliceFeatureGradient, sliceGradientValue, sliceNormedDeviationVectors, slicePosterior, *m_temp);
+                BackpropToFeature(sliceFeatureGradient, sliceGradientValue, sliceNormedDeviationVectors, slicePosterior, *m_temp);
            }
            break;
            default:
@ -682,7 +682,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
            }
        }

-        /*TODO: merge with call site*/void ComputeInputPartialUnnormedPrior(Matrix<ElemType>& unnormedPriorGradientValues, const Matrix<ElemType>& gradientValues,
+        /*TODO: merge with call site*/void BackpropToUnnormedPrior(Matrix<ElemType>& unnormedPriorGradientValues, const Matrix<ElemType>& gradientValues,
            const Matrix<ElemType>& prior, const Matrix<ElemType>& posterior, Matrix<ElemType>& temp)
        {
            temp.AssignDifferenceOf(posterior, prior);
@ -695,7 +695,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
                RuntimeError("GMMLogLikelihoodNode: UnnormedPrior should either have same number of columns as the features or have only one column.");
        }

-        /*TODO: merge with call site*/void ComputeInputPartialMean(Matrix<ElemType>& meanGradientValues, const Matrix<ElemType>& gradientValues, const Matrix<ElemType>& normedDeviationVectors,
+        /*TODO: merge with call site*/void BackpropToMean(Matrix<ElemType>& meanGradientValues, const Matrix<ElemType>& gradientValues, const Matrix<ElemType>& normedDeviationVectors,
            Matrix<ElemType>& posterior, Matrix<ElemType>& temp)
        {
            size_t numComponent = posterior.GetNumRows();
@ -721,7 +721,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
                RuntimeError("GMMLogLikelihoodNode: stddev should either have same number of columns as the features or have only one column.");
        }

-        /*TODO: merge with call site*/void ComputeInputPartialLogStddev(Matrix<ElemType>& logStddevGradientValues, const Matrix<ElemType>& gradientValues, const Matrix<ElemType>& normedDeviation,
+        /*TODO: merge with call site*/void BackpropToLogStddev(Matrix<ElemType>& logStddevGradientValues, const Matrix<ElemType>& gradientValues, const Matrix<ElemType>& normedDeviation,
            const Matrix<ElemType>& posterior, Matrix<ElemType>& temp)
        {
            size_t numComponent = posterior.GetNumRows();
@ -738,7 +738,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
                RuntimeError("GMMLogLikelihoodNode: stddev should either have same number of columns as the features or have only one column.");
        }

-        /*TODO: merge with call site*/void ComputeInputPartialFeature(Matrix<ElemType>& featureGradientValues, const Matrix<ElemType>& gradientValues, const Matrix<ElemType>& normedDeviationVectors,
+        /*TODO: merge with call site*/void BackpropToFeature(Matrix<ElemType>& featureGradientValues, const Matrix<ElemType>& gradientValues, const Matrix<ElemType>& normedDeviationVectors,
            Matrix<ElemType>& posterior, Matrix<ElemType>& temp)
        {
            size_t numComponent = posterior.GetNumRows();
@ -776,17 +776,17 @@ namespace Microsoft { namespace MSR { namespace CNTK {
        }

        //input0=unnormedPrior, input1=mean, input2=logstddev, input3=feature
-        void EvaluateThisNodeMap()    // TODO: This is a stop-gap; in most cases, we should just be able to delete this (but need to review one by one)
+        void ForwardPropMap()    // TODO: This is a stop-gap; in most cases, we should just be able to delete this (but need to review one by one)
        {
            // all internal matrices will be automatically resized since all of them are assigned to a value so no resize is needed here.
-            EvaluateThisNodeS(FunctionValues(), Inputs(0)->FunctionValues(), Inputs(1)->FunctionValues(), Inputs(2)->FunctionValues(), Inputs(3)->FunctionValues(),
+            ForwardPropS(FunctionValues(), Inputs(0)->FunctionValues(), Inputs(1)->FunctionValues(), Inputs(2)->FunctionValues(), Inputs(3)->FunctionValues(),
                *m_prior, *m_stddev, *m_normedDeviationVectors, *m_normedDeviation, *m_posterior, *m_temp);
        }

        //input0=unnormedPrior, input1=mean, input2=logstddev, input3=feature
-        virtual void /*ComputationNode::*/EvaluateThisNode(const FrameRange & frameRange) override
+        virtual void /*ComputationNode::*/ForwardProp(const FrameRange & frameRange) override
        {
-            //if (frameRange.IsAllFrames()) { EvaluateThisNodeMap(); return; }
+            //if (frameRange.IsAllFrames()) { ForwardPropMap(); return; }
            size_t colsPrior = Inputs(0)->GetNumCols();
            size_t numSamples = Inputs(3)->GetNumCols();

@ -799,7 +799,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {

            if (colsPrior == 1)
            {
-                EvaluateThisNodeS(sliceOutputValue, Inputs(0)->FunctionValues(), Inputs(1)->FunctionValues(), Inputs(2)->FunctionValues(), sliceFeature,
+                ForwardPropS(sliceOutputValue, Inputs(0)->FunctionValues(), Inputs(1)->FunctionValues(), Inputs(2)->FunctionValues(), sliceFeature,
                    *m_prior, *m_stddev, sliceNormedDeviationVectors, sliceNormedDeviation, slicePosterior, *m_temp);
            }
            else if (colsPrior == numSamples)
@ -811,7 +811,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
                Matrix<ElemType> slicePrior = DataSlice(*m_prior, frameRange/*TODO: delete this:*/.Check_t(GetNumParallelSequences(), m_pMBLayout));
                Matrix<ElemType> sliceStddev = DataSlice(*m_stddev, frameRange/*TODO: delete this:*/.Check_t(GetNumParallelSequences(), m_pMBLayout));

-                EvaluateThisNodeS(sliceOutputValue, sliceUnnormedPrior, sliceMean, sliceLogstddev, sliceFeature,
+                ForwardPropS(sliceOutputValue, sliceUnnormedPrior, sliceMean, sliceLogstddev, sliceFeature,
                    slicePrior, sliceStddev, sliceNormedDeviationVectors, sliceNormedDeviation, slicePosterior, *m_temp);
            }
            else  //should not reach the code since validation should fail already
@ -820,7 +820,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {

        //input0=unnormedPrior, input1=mean, input2=logstddev, input3=feature
        //If we want to speed up we need to replace following code with a several specialized GPU functions
-        /*TODO: merge with call site*/void EvaluateThisNodeS(Matrix<ElemType>& functionValues, const Matrix<ElemType>& unnormedPrior, const Matrix<ElemType>& mean, Matrix<ElemType>& logstddev,
+        /*TODO: merge with call site*/void ForwardPropS(Matrix<ElemType>& functionValues, const Matrix<ElemType>& unnormedPrior, const Matrix<ElemType>& mean, Matrix<ElemType>& logstddev,
            const Matrix<ElemType>& feature, Matrix<ElemType>& prior, Matrix<ElemType>& stddev, Matrix<ElemType>& normedDeviationVectors,
            Matrix<ElemType>& normedDeviation, Matrix<ElemType>& posterior, Matrix<ElemType>& temp)
        {
@ -933,7 +933,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
        {
            InferImageDimsFromInput(3, false);

-            m_imageLayout = ImageLayout();
+            m_sampleLayout = TensorShape();
        }

        virtual void CopyTo(ComputationNodeBasePtr nodeP, const std::wstring& newName, const CopyNodeFlags flags) const override
@ -1004,16 +1004,16 @@ namespace Microsoft { namespace MSR { namespace CNTK {
            m_randomSeed = (unsigned long)CreateUniqId();
        }

-        void ComputeInputPartialMap(const size_t inputIndex)
+        void BackpropToMap(const size_t inputIndex)
        {
            if (inputIndex > 0)
                InvalidArgument("Dropout operation only takes one input.");
-            ComputeInputPartialS(m_dropoutRate, Inputs(0)->GradientValues(), *m_maskOfDropout, GradientValues());
+            BackpropToS(m_dropoutRate, Inputs(0)->GradientValues(), *m_maskOfDropout, GradientValues());
        }

-        virtual void /*ComputationNode::*/ComputeInputPartial(const size_t inputIndex, const FrameRange & frameRange) override
+        virtual void /*ComputationNode::*/BackpropTo(const size_t inputIndex, const FrameRange & frameRange) override
        {
-            if (frameRange.IsAllFrames()) { ComputeInputPartialMap(inputIndex); return; } // TODO: remove these one by one
+            if (frameRange.IsAllFrames()) { BackpropToMap(inputIndex); return; } // TODO: remove these one by one
            Matrix<ElemType> sliceInput0Grad = Inputs(0)->GradientSlice(frameRange/*TODO: delete this:*/.Check_t(GetNumParallelSequences(), m_pMBLayout));
            Matrix<ElemType> sliceOutputGrad = GradientSlice(frameRange/*TODO: delete this:*/.Check_t(GetNumParallelSequences(), m_pMBLayout));

@ -1023,10 +1023,10 @@ namespace Microsoft { namespace MSR { namespace CNTK {
                sliceMask = DataSlice(*m_maskOfDropout, frameRange/*TODO: delete this:*/.Check_t(GetNumParallelSequences(), m_pMBLayout));
            }

-            ComputeInputPartialS(m_dropoutRate, sliceInput0Grad, sliceMask, sliceOutputGrad);
+            BackpropToS(m_dropoutRate, sliceInput0Grad, sliceMask, sliceOutputGrad);
        }

-        /*TODO: merge with call site*/void ComputeInputPartialS(const double dropoutRate, Matrix<ElemType>& inputGradientValues, const Matrix<ElemType>& maskOfDropout, const Matrix<ElemType>& gradientValues)
+        /*TODO: merge with call site*/void BackpropToS(const double dropoutRate, Matrix<ElemType>& inputGradientValues, const Matrix<ElemType>& maskOfDropout, const Matrix<ElemType>& gradientValues)
        {
            if (dropoutRate > 0)
            {
@ -1038,13 +1038,13 @@ namespace Microsoft { namespace MSR { namespace CNTK {
            }
        }

-        void EvaluateThisNodeMap()    // TODO: This is a stop-gap; in most cases, we should just be able to delete this (but need to review one by one)
+        void ForwardPropMap()    // TODO: This is a stop-gap; in most cases, we should just be able to delete this (but need to review one by one)
        {
-            EvaluateThisNodeS(m_dropoutRate, m_randomSeed, FunctionValues(), *m_maskOfDropout, Inputs(0)->FunctionValues());
+            ForwardPropS(m_dropoutRate, m_randomSeed, FunctionValues(), *m_maskOfDropout, Inputs(0)->FunctionValues());
        }
-        virtual void /*ComputationNode::*/EvaluateThisNode(const FrameRange & frameRange) override
+        virtual void /*ComputationNode::*/ForwardProp(const FrameRange & frameRange) override
        {
-            //if (frameRange.IsAllFrames()) { EvaluateThisNodeMap(); return; }
+            //if (frameRange.IsAllFrames()) { ForwardPropMap(); return; }
            Matrix<ElemType> sliceInput0Value = Inputs(0)->ValueSlice(frameRange/*TODO: delete this:*/.Check_t(GetNumParallelSequences(), m_pMBLayout));
            Matrix<ElemType> sliceOutputValue = Matrix <ElemType>();

@ -1058,10 +1058,10 @@ namespace Microsoft { namespace MSR { namespace CNTK {

            sliceOutputValue = ValueSlice(frameRange/*TODO: delete this:*/.Check_t(GetNumParallelSequences(), m_pMBLayout));

-            EvaluateThisNodeS(m_dropoutRate, m_randomSeed, sliceOutputValue, sliceMask, sliceInput0Value);
+            ForwardPropS(m_dropoutRate, m_randomSeed, sliceOutputValue, sliceMask, sliceInput0Value);
        }

-        /*TODO: merge with call site*/void EvaluateThisNodeS(const double dropoutRate, unsigned long& randomSeed, Matrix<ElemType>& functionValues, Matrix<ElemType>& maskOfDropout, const Matrix<ElemType>& inputFunctionValues)
+        /*TODO: merge with call site*/void ForwardPropS(const double dropoutRate, unsigned long& randomSeed, Matrix<ElemType>& functionValues, Matrix<ElemType>& maskOfDropout, const Matrix<ElemType>& inputFunctionValues)
        {
            if (dropoutRate > 0)
            {
@ -1168,23 +1168,23 @@ namespace Microsoft { namespace MSR { namespace CNTK {
            Base(deviceId, name)
        { }

-        virtual void ComputeInputPartial(const size_t /*inputIndex*/)  //TODO: this is still needed?
+        virtual void BackpropTo(const size_t /*inputIndex*/)  //TODO: this is still needed?
        {
            LogicError("Hardmax is not differentiable and is used for evaluation only.");
        }

-        virtual void /*ComputationNode::*/ComputeInputPartial(const size_t /*inputIndex*/, const FrameRange & /*frameRange*/) override
+        virtual void /*ComputationNode::*/BackpropTo(const size_t /*inputIndex*/, const FrameRange & /*frameRange*/) override
        {
            LogicError("Hardmax is not differentiable and is used for evaluation only.");
        }

-        /*virtual*/ void ComputeInputPartialV(Matrix<ElemType>& gradient, const Matrix<ElemType>& inputFunctionValues, Matrix<ElemType>& inputGradientValues, const Matrix<ElemType>& gradientValues) 
+        /*virtual*/ void BackpropToV(Matrix<ElemType>& gradient, const Matrix<ElemType>& inputFunctionValues, Matrix<ElemType>& inputGradientValues, const Matrix<ElemType>& gradientValues) 
        { 
            gradient; inputFunctionValues;  inputGradientValues;  gradientValues;  
            LogicError("wrong signature :( need to unify code more"); 
        }

-        /*virtual*/ void EvaluateThisNodeV(Matrix<ElemType>& functionValues, const Matrix<ElemType>& inputFunctionValues)
+        /*virtual*/ void ForwardPropV(Matrix<ElemType>& functionValues, const Matrix<ElemType>& inputFunctionValues)
        {
            //TODO: temp solution, we need to write a math function specifically for this
            functionValues.AssignHardmaxOf(inputFunctionValues, true);
--- a/MachineLearning/CNTKComputationNetworkLib/RecurrentNodes.h
+++ b/MachineLearning/CNTKComputationNetworkLib/RecurrentNodes.h
@ -129,9 +129,9 @@ namespace Microsoft { namespace MSR { namespace CNTK {
            m_attachInputsFn = [](){ LogicError("LateAttachingNode::AttachInputs: must only be called once"); };
        }
    public:
-        void SaveToFile(File& fstream) const
+        void Save(File& fstream) const
        {
-            Base::SaveToFile(fstream);
+            Base::Save(fstream);

            fstream << m_timeStep;
            fstream << GetNumRows() << GetNumCols();
@ -139,10 +139,10 @@ namespace Microsoft { namespace MSR { namespace CNTK {
            fstream << m_initialActivationValue;
        }

-        virtual void LoadFromFile(File& fstream, size_t modelVersion) override
+        virtual void Load(File& fstream, size_t modelVersion) override
        {
            // the node has already been initialized e.g. w.r.t. direction and sequence flags
-            Base::LoadFromFile(fstream, modelVersion);
+            Base::Load(fstream, modelVersion);

            fstream >> m_timeStep;

@ -208,7 +208,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
        }
    public:

-        virtual void /*ComputationNode::*/ComputeInputPartial(const size_t inputIndex, const FrameRange & frameRange) override
+        virtual void /*ComputationNode::*/BackpropTo(const size_t inputIndex, const FrameRange & frameRange) override
        {
            assert(inputIndex == 0); inputIndex;

@ -220,7 +220,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
                // recursive call to ourselves
                FrameRangeIteration range(m_pMBLayout, -dir);
                for (auto t = range.rbegin(); t != range.rend(); t++)   // note: reverse iterator
-                    ComputeInputPartial(inputIndex, t);
+                    BackpropTo(inputIndex, t);
                return;
            }

@ -253,13 +253,13 @@ namespace Microsoft { namespace MSR { namespace CNTK {
            }
        }

-        virtual void OnEvaluateBeginIteration() override      // called before first iteration step of EvaluateThisNode()
+        virtual void OnEvaluateBeginIteration() override      // called before first iteration step of ForwardProp()
        {
            Base::OnEvaluateBeginIteration();
            CacheMBLayout();
        }

-        virtual void OnEvaluateEndIteration() override        // called after last iteration step of EvaluateThisNode()
+        virtual void OnEvaluateEndIteration() override        // called after last iteration step of ForwardProp()
        {
            // In BPTT, we carry over left-to-right state across minibatches.
            // It is kept in m_delayedActivation, m_delayedActivationMBLayout.
@ -282,7 +282,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {

        // This function assumes OnEvaluateBegin/EndIteration() to be called before/after the iteration loop.
        // TODO: In the future, there may be value for one more way of handling the boundary condition: Fill as 'NoInput'. Then we can use this to implement rolling windows (albeit inefficiently). Would require to unshare the layout.
-        virtual void EvaluateThisNode(const FrameRange & frameRange) override
+        virtual void ForwardProp(const FrameRange & frameRange) override
        {
            assert(m_pMBLayout);

@ -294,7 +294,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
                // recursive call to ourselves
                FrameRangeIteration range(m_pMBLayout, -dir);
                for (auto t = range.begin(); t != range.end(); t++)
-                    EvaluateThisNode(t);
+                    ForwardProp(t);
                return;
            }

@ -381,7 +381,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
            hist.TransferFromDeviceToDevice(m_deviceId, device, true);

            // need a layout as well
-            // EvaluateThisNode() expects it to have the same number of parallel sequences.
+            // ForwardProp() expects it to have the same number of parallel sequences.
            if (!m_delayedActivationMBLayout) m_delayedActivationMBLayout = make_shared<MBLayout>();
            m_delayedActivationMBLayout->Init(GetNumParallelSequences(), hist.GetNumCols() / GetNumParallelSequences());
        }
@ -631,16 +631,16 @@ namespace Microsoft { namespace MSR { namespace CNTK {
        {
        }

-        virtual void SaveToFile(File& fstream) const override
+        virtual void Save(File& fstream) const override
        {
-            Base::SaveToFile(fstream);
+            Base::Save(fstream);
            fstream << m_inputDim << m_outputDim;
            fstream << m_DefaultState;
        }

-        virtual void LoadFromFile(File& fstream, size_t modelVersion) override
+        virtual void Load(File& fstream, size_t modelVersion) override
        {
-            Base::LoadFromFile(fstream, modelVersion);
+            Base::Load(fstream, modelVersion);
            if (modelVersion == 2)
                fstream >> m_inputDim >> m_outputDim;
            fstream >> m_DefaultState;
@ -672,7 +672,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
            }
        }

-        virtual void ComputeInputPartialNonLooping(size_t inputIndex) override
+        virtual void BackpropToNonLooping(size_t inputIndex) override
        {
            if (inputIndex > 4)
                InvalidArgument("LSTM operation only takes five inputs.");
@ -1063,7 +1063,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
            }
        }

-        virtual void /*ComputationNodeNonLooping::*/EvaluateThisNodeNonLooping() override
+        virtual void /*ComputationNodeNonLooping::*/ForwardPropNonLooping() override
        {
            size_t nT = Inputs(0)->GetNumCols();
            size_t outputDim = Inputs(1)->GetNumRows();
@ -1117,7 +1117,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {

                    PrepareHistory(timeIdxInSeq, mSlicePrevOutput, mSlicePrevState, FunctionValues(), m_State, m_PastOutput, m_PastState, GetNumParallelSequences(), m_DefaultState, &m_pMBLayout->GetM());

-                    EvaluateThisNodeS(Inputs(1)->FunctionValues(), Inputs(2)->FunctionValues(), Inputs(3)->FunctionValues(), Inputs(4)->FunctionValues(),
+                    ForwardPropS(Inputs(1)->FunctionValues(), Inputs(2)->FunctionValues(), Inputs(3)->FunctionValues(), Inputs(4)->FunctionValues(),
                            sliceObs, mSlicePrevOutput, mSlicePrevState, sliceOutput, sliceState, sliceGi, sliceGf, sliceGo, sliceTanhState, sliceTanhInput, m_tempMatrix);
                }

@ -1313,7 +1313,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
 #endif
        }

-        /*TODO: merge with call site*/void EvaluateThisNodeS(
+        /*TODO: merge with call site*/void ForwardPropS(
            const Matrix<ElemType>& mInputGate,
            const Matrix<ElemType> &mForgetGate, const Matrix<ElemType> &mOutputGate,
            const Matrix<ElemType> &mCellWgt,
@ -1490,7 +1490,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
                SetDims(nOutput, nT);

                m_DefaultState = 0.0;
-                EvaluateThisNode(FrameRange(m_pMBLayout));
+                ForwardProp(FrameRange(m_pMBLayout));

                // check with expected values
                if (!ISCLOSE(FunctionValues()(0, 0), 0.0335975, EPSILON) ||
@ -1510,7 +1510,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
                    Inputs(i)->GradientValues().SetValue(0);
                }
                for (size_t i = 0; i < 5; i++)
-                    ComputeInputPartial(i, FrameRange(m_pMBLayout));
+                    BackpropTo(i, FrameRange(m_pMBLayout));

                // check with expected values
                if (!ISCLOSE(Inputs(1)->GradientValues()(0, 0), 0.07843818, EPSILON) // bi
--- a/MachineLearning/CNTKComputationNetworkLib/ReshapingNodes.h
+++ b/MachineLearning/CNTKComputationNetworkLib/ReshapingNodes.h
@ -129,7 +129,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
    // -----------------------------------------------------------------------
    // ReshapeNode (input) -- reinterpret input matrix as having different dimensions
    // where the new row dimension is given, and the column dimension is inferred.
-    // Also optionally associate a different ImageLayout with the data.
+    // Also optionally associate a different TensorShape with the data.
    //
    // If input has no layout, then this reshapes the input matrix
    // from (rows x cols) to (newRows x (cols / newRows * rows)).
@ -149,13 +149,13 @@ namespace Microsoft { namespace MSR { namespace CNTK {
    //       E.g. ReinterpretRowStackAsSequence and ReinterpretSequenceAsRowStack.
    // BUGBUG: This is not actually implemented yet. Instead, it goes from 1 to K steps or from K to 1 step. This is temporary/experimental, until the plumbing for nesting is there.
    //
-    // Thirdly, ReshapeNode can also be used to update only the ImageLayout. In that case, the MBLayout is kept as is.
+    // Thirdly, ReshapeNode can also be used to update only the TensorShape. In that case, the MBLayout is kept as is.
    //
    // Note: The new row dimension must be a straight multiple or divisor of the current row dimension.
    // To reshape to a non-multiple go to row dim 1 first.
    //
    // Unlike most other nodes, this node has intimate inside knowlegde of MBLayouts and frameRanges.
-    // TODO: Changing the ImageLayout does not seem to belong here.
+    // TODO: Changing the TensorShape does not seem to belong here.
    // -----------------------------------------------------------------------

    template<class ElemType>
@ -164,7 +164,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
        typedef ReinterpretNodeBase<ElemType> Base; UsingReinterpretNodeBaseMembers;
        static const std::wstring TypeName() { return L"Reshape"; }
    public:
-        ReshapeNode(DEVICEID_TYPE deviceId, const wstring & name, size_t numRows = 0, const ImageLayout & imageLayout = ImageLayoutWHC(0,0,0)) :
+        ReshapeNode(DEVICEID_TYPE deviceId, const wstring & name, size_t numRows = 0, const TensorShape & imageLayout = ImageLayoutWHC(0,0,0)) :
            Base(deviceId, name),
            m_numTargetRows(numRows),
            m_targetImageLayout(imageLayout)
@ -186,18 +186,18 @@ namespace Microsoft { namespace MSR { namespace CNTK {
            }
        }

-        virtual void SaveToFile(File& fstream) const override
+        virtual void Save(File& fstream) const override
        {
-            Base::SaveToFile(fstream);
+            Base::Save(fstream);
            fstream << m_numTargetRows;
-            m_targetImageLayout.SaveToFile(fstream);
+            m_targetImageLayout.Save(fstream);
        }

-        virtual void LoadFromFile(File& fstream, size_t modelVersion) override
+        virtual void Load(File& fstream, size_t modelVersion) override
        {
-            Base::LoadFromFile(fstream, modelVersion);
+            Base::Load(fstream, modelVersion);
            fstream >> m_numTargetRows;
-            m_targetImageLayout.LoadFromFile(fstream);
+            m_targetImageLayout.Load(fstream);
        }

        virtual void InferImageDimsFromInputs()
@ -207,13 +207,13 @@ namespace Microsoft { namespace MSR { namespace CNTK {

            if (m_targetImageLayout.GetWidth() == 0 || m_targetImageLayout.GetHeight() == 0 || m_targetImageLayout.GetNumChannels() == 0)
            {
-                m_imageLayout = ImageLayoutWHC(1, 1, m_numTargetRows);
+                m_sampleLayout = ImageLayoutWHC(1, 1, m_numTargetRows);
                if (m_inputImageLayout.GetWidth() * m_inputImageLayout.GetNumChannels() != 1)
                    fprintf(stderr, "WARNING: Reshape operation cannot inherit image size information from its child. Image size info is lost.\n");
            }
            else
            {
-                m_imageLayout = m_targetImageLayout;
+                m_sampleLayout = m_targetImageLayout;
            }
        }

@ -251,7 +251,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
            }

            SetDims(m_numTargetRows, newCols);
-            if (factor() == 1)          // canonical case: no reshaping actually (e.g. only changing the ImageLayout)
+            if (factor() == 1)          // canonical case: no reshaping actually (e.g. only changing the TensorShape)
                m_pMBLayout = Inputs(0)->GetMBLayout();
            else if (Inputs(0)->HasMBLayout())
            {
@ -308,7 +308,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
        // notes:
        //  - input and output have different time base and different layouts (unless the canonical case of factor() == 1)
        //  - frameRange refers to *functionValues*, not the inputs
-        virtual void /*ComputationNode::*/EvaluateThisNode(const FrameRange & frameRange) override
+        virtual void /*ComputationNode::*/ForwardProp(const FrameRange & frameRange) override
        {
            size_t rows = Inputs(0)->GetNumRows(), cols = Inputs(0)->GetNumCols();
            size_t newCols = cols * rows / m_numTargetRows;
@ -335,7 +335,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
            }
        }

-        virtual void /*ComputationNode::*/ComputeInputPartial(const size_t /*inputIndex*/, const FrameRange & frameRange) override
+        virtual void /*ComputationNode::*/BackpropTo(const size_t /*inputIndex*/, const FrameRange & frameRange) override
        {
            size_t rows = Inputs(0)->GetNumRows(), cols = Inputs(0)->GetNumCols();
            size_t newCols = cols * rows / m_numTargetRows;
@ -359,7 +359,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
        size_t m_numTargetRows;
        bool weStack() const { return m_numTargetRows > Inputs(0)->GetNumRows(); }        // do we stack (multiple frames into one)
        size_t factor() const { return m_numTargetRows > Inputs(0)->GetNumRows() ? m_numTargetRows / Inputs(0)->GetNumRows() : Inputs(0)->GetNumRows() / m_numTargetRows; }   // factor by which we stack or unstack
-        ImageLayout m_targetImageLayout;
+        TensorShape m_targetImageLayout;

        void InferImageDimensions()
        {
@ -437,13 +437,13 @@ namespace Microsoft { namespace MSR { namespace CNTK {
            Base(deviceId, name)
        { }

-        virtual void /*ComputationNode::*/ComputeInputPartial(const size_t /*inputIndex*/, const FrameRange & frameRange) override
+        virtual void /*ComputationNode::*/BackpropTo(const size_t /*inputIndex*/, const FrameRange & frameRange) override
        {
            Inputs(0)->GradientSlice(frameRange.WithLayout(Inputs(0)->GetMBLayout())) += GradientSlice(frameRange);
            // TODO: Once we do in-place, the above must include a copy-to-self check (pay special attention to adding vs. copying).
        }

-        virtual void /*ComputationNode::*/EvaluateThisNode(const FrameRange & frameRange) override
+        virtual void /*ComputationNode::*/ForwardProp(const FrameRange & frameRange) override
        {
            // enforce compatibility of 'dataInput' with 'layoutInput'
            // TODO: how to deal with boundary flags?
@ -507,24 +507,24 @@ namespace Microsoft { namespace MSR { namespace CNTK {
            node->m_sliceHeight = m_sliceHeight;
        }

-        virtual void SaveToFile(File& fstream) const override
+        virtual void Save(File& fstream) const override
        {
-            Base::SaveToFile(fstream);
+            Base::Save(fstream);
            fstream << m_startIndex << m_sliceHeight;
        }
        
-        virtual void LoadFromFile(File& fstream, size_t modelVersion) override
+        virtual void Load(File& fstream, size_t modelVersion) override
        {
-            Base::LoadFromFile(fstream, modelVersion);
+            Base::Load(fstream, modelVersion);
            fstream >> m_startIndex >> m_sliceHeight;
        }

-        virtual void /*ComputationNode::*/ComputeInputPartial(const size_t /*inputIndex*/, const FrameRange & frameRange) override
+        virtual void /*ComputationNode::*/BackpropTo(const size_t /*inputIndex*/, const FrameRange & frameRange) override
        {
            Inputs(0)->GradientSlice(frameRange).AddToRowSliceValuesOf(GradientSlice(frameRange), m_startIndex, m_sliceHeight);
        }

-        virtual void /*ComputationNode::*/EvaluateThisNode(const FrameRange & frameRange) override
+        virtual void /*ComputationNode::*/ForwardProp(const FrameRange & frameRange) override
        {
            ValueSlice(frameRange).AssignRowSliceValuesOf(Inputs(0)->ValueSlice(frameRange), m_startIndex, m_sliceHeight);
        }
@ -544,7 +544,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
        virtual void InferImageDimsFromInputs()
        {
            InferImageDimsFromInput(0, true);
-            m_imageLayout = ImageLayoutWHC(m_imageLayout.GetWidth(), m_sliceHeight, m_imageLayout.GetNumChannels());
+            m_sampleLayout = ImageLayoutWHC(m_sampleLayout.GetWidth(), m_sliceHeight, m_sampleLayout.GetNumChannels());

            // warn that this node will destroy the image size information from the child
            if (m_inputImageLayout.GetWidth() * m_inputImageLayout.GetNumChannels() != 1)
@ -584,12 +584,12 @@ namespace Microsoft { namespace MSR { namespace CNTK {
            }
        }

-        virtual void /*ComputationNode::*/ComputeInputPartial(const size_t inputIndex, const FrameRange & frameRange) override
+        virtual void /*ComputationNode::*/BackpropTo(const size_t inputIndex, const FrameRange & frameRange) override
        {
            Inputs(inputIndex)->GradientSlice(frameRange).AddWithRowSliceValuesOf(GradientSlice(frameRange), m_startRowIndices[inputIndex], Inputs(inputIndex)->GetNumRows());
        }

-        virtual void /*ComputationNode::*/EvaluateThisNode(const FrameRange & frameRange) override
+        virtual void /*ComputationNode::*/ForwardProp(const FrameRange & frameRange) override
        {
            for (size_t inputIndex = 0; inputIndex < ChildrenSize(); inputIndex++)
                ValueSlice(frameRange).AssignToRowSliceValuesOf(Inputs(inputIndex)->ValueSlice(frameRange), m_startRowIndices[inputIndex], Inputs(inputIndex)->GetNumRows());
@ -622,7 +622,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
        virtual void InferImageDimsFromInputs()
        {
            InferImageDimsFromInput(0, true);
-            m_imageLayout = ImageLayoutWHC(m_imageLayout.GetWidth(), GetNumRows(), m_imageLayout.GetNumChannels());
+            m_sampleLayout = ImageLayoutWHC(m_sampleLayout.GetWidth(), GetNumRows(), m_sampleLayout.GetNumChannels());

            // warn that this node will destroy the image size information from the child
            if (m_inputImageLayout.GetWidth() * m_inputImageLayout.GetNumChannels() != 1)
@ -666,22 +666,22 @@ namespace Microsoft { namespace MSR { namespace CNTK {
            }
        }

-        virtual void SaveToFile(File& fstream) const override
+        virtual void Save(File& fstream) const override
        {
-            Base::SaveToFile(fstream);
+            Base::Save(fstream);
            fstream << m_numRepeat;
        }

-        virtual void LoadFromFile(File& fstream, size_t modelVersion) override
+        virtual void Load(File& fstream, size_t modelVersion) override
        {
-            Base::LoadFromFile(fstream, modelVersion);
+            Base::Load(fstream, modelVersion);
            fstream >> m_numRepeat;
        }

        virtual void InferImageDimsFromInputs()
        {
            InferImageDimsFromInput(0, true);
-            m_imageLayout = ImageLayoutWHC(m_imageLayout.GetWidth(), m_inputImageLayout.GetHeight() * m_numRepeat, m_imageLayout.GetNumChannels());
+            m_sampleLayout = ImageLayoutWHC(m_sampleLayout.GetWidth(), m_inputImageLayout.GetHeight() * m_numRepeat, m_sampleLayout.GetNumChannels());

            // watn that this node will destroy the image size information from the child
            if (m_inputImageLayout.GetWidth() * m_inputImageLayout.GetNumChannels() != 1)
@ -727,13 +727,13 @@ namespace Microsoft { namespace MSR { namespace CNTK {
            InferImageDimsFromInputs();
        }

-        virtual void /*ComputationNode::*/EvaluateThisNode(const FrameRange & frameRange) override
+        virtual void /*ComputationNode::*/ForwardProp(const FrameRange & frameRange) override
        {
            //if (!isNoop())    // if m_numRepeat == 1 then virtual FunctionValues() will return the child   --TODO: do this as an in-place optimization instead
            ValueSlice(frameRange).AssignRepeatOf(Inputs(0)->ValueSlice(frameRange), m_numRepeat, 1);
        }

-        virtual void /*ComputationNode::*/ComputeInputPartial(const size_t /*inputIndex*/, const FrameRange & frameRange) override
+        virtual void /*ComputationNode::*/BackpropTo(const size_t /*inputIndex*/, const FrameRange & frameRange) override
        {
            Inputs(0)->GradientSlice(frameRange).AddToRowRepeatValuesOf(GradientSlice(frameRange), m_numRepeat);
        }
--- a/MachineLearning/CNTKComputationNetworkLib/TrainingCriterionNodes.h
+++ b/MachineLearning/CNTKComputationNetworkLib/TrainingCriterionNodes.h
@ -34,7 +34,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
            Base(deviceId, name)
        { }

-        virtual void ComputeInputPartialNonLooping(size_t inputIndex) override
+        virtual void BackpropToNonLooping(size_t inputIndex) override
        {
            FrameRange frameRange(Inputs(0)->GetMBLayout());
 #if 1
@ -53,7 +53,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
            m_leftMinusRight->Resize(Inputs(0)->GetNumRows(), Inputs(0)->GetNumCols());
        }

-        virtual void /*ComputationNodeNonLooping::*/EvaluateThisNodeNonLooping() override
+        virtual void /*ComputationNodeNonLooping::*/ForwardPropNonLooping() override
        {
            FrameRange frameRange(Inputs(0)->GetMBLayout());
            m_leftMinusRight->AssignDifferenceOf(Inputs(0)->ValueSlice(frameRange), Inputs(1)->ValueSlice(frameRange));
@ -75,7 +75,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
        {
            InferImageDimsFromInput(0, false);

-            m_imageLayout = ImageLayout();
+            m_sampleLayout = TensorShape();
        }       

        virtual void CopyTo(ComputationNodeBasePtr nodeP, const std::wstring& newName, const CopyNodeFlags flags) const override
@ -125,7 +125,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
            Base(deviceId, name)
        { }

-        virtual void ComputeInputPartialNonLooping(size_t inputIndex) override
+        virtual void BackpropToNonLooping(size_t inputIndex) override
        {
            FrameRange frameRange(Inputs(0)->GetMBLayout());
            // left input is scalar
@ -172,7 +172,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
            m_softmaxOfRight->Resize(m_logSoftmaxOfRight->GetNumRows(), m_logSoftmaxOfRight->GetNumCols());
        }

-        virtual void /*ComputationNodeNonLooping::*/EvaluateThisNodeNonLooping() override   //-sum(left_i * log(softmax_i(right)))
+        virtual void /*ComputationNodeNonLooping::*/ForwardPropNonLooping() override   //-sum(left_i * log(softmax_i(right)))
        {
            FrameRange frameRange(Inputs(0)->GetMBLayout());
            // first compute the softmax (column-wise)
@ -202,7 +202,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
        {
            InferImageDimsFromInput(0, false);

-            m_imageLayout = ImageLayout();
+            m_sampleLayout = TensorShape();
        }

        virtual void CopyTo(ComputationNodeBasePtr nodeP, const std::wstring& newName, const CopyNodeFlags flags) const override
@ -250,28 +250,28 @@ namespace Microsoft { namespace MSR { namespace CNTK {
            Base(deviceId, name)
        { }

-        virtual void ComputeInputPartialNonLooping(size_t inputIndex) override
+        virtual void BackpropToNonLooping(size_t inputIndex) override
        {
            FrameRange frameRange(Inputs(0)->GetMBLayout());
            //left Node must be a scalar
            if (inputIndex == 0)  //left derivative
            {
-                ComputeInputPartialLeft(*m_logOfRight, Inputs(0)->GradientSlice(frameRange), GradientValues());
+                BackpropToLeft(*m_logOfRight, Inputs(0)->GradientSlice(frameRange), GradientValues());
            }
            else
            {
-                ComputeInputPartialRight(*m_leftDivRight, Inputs(0)->ValueSlice(frameRange), Inputs(1)->ValueSlice(frameRange), Inputs(1)->GradientSlice(frameRange), GradientValues());
+                BackpropToRight(*m_leftDivRight, Inputs(0)->ValueSlice(frameRange), Inputs(1)->ValueSlice(frameRange), Inputs(1)->GradientSlice(frameRange), GradientValues());
            }
        }

-        /*TODO: merge with call site*/void ComputeInputPartialLeft(const Matrix<ElemType>& logOfRight, Matrix<ElemType> inputGradientValues, 
+        /*TODO: merge with call site*/void BackpropToLeft(const Matrix<ElemType>& logOfRight, Matrix<ElemType> inputGradientValues, 
            const Matrix<ElemType>& gradientValues)  
        {
            //Matrix<ElemType>::ScaleAndAdd(-gradientValues.Get00Element(), logOfRight, inputGradientValues);
            Matrix<ElemType>::Multiply1x1AndWeightedAdd(-1.0f, gradientValues/*1x1*/, logOfRight, 1.0f, inputGradientValues);
        }

-        /*TODO: merge with call site*/void ComputeInputPartialRight(Matrix<ElemType>& leftDivRight, 
+        /*TODO: merge with call site*/void BackpropToRight(Matrix<ElemType>& leftDivRight, 
            const Matrix<ElemType> inputFunctionValues0, const Matrix<ElemType> inputFunctionValues1,
            Matrix<ElemType> inputGradientValues, const Matrix<ElemType>& gradientValues)
        {
@ -289,7 +289,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
        }

        //-sum(left_i * log(right_i))
-        virtual void /*ComputationNodeNonLooping::*/EvaluateThisNodeNonLooping() override
+        virtual void /*ComputationNodeNonLooping::*/ForwardPropNonLooping() override
        {
            FrameRange frameRange(Inputs(0)->GetMBLayout());
            m_logOfRight->SetValue(Inputs(1)->ValueSlice(frameRange));
@ -313,7 +313,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
        {
            InferImageDimsFromInput(0, false);

-            m_imageLayout = ImageLayout();
+            m_sampleLayout = TensorShape();
        }

        virtual void CopyTo(ComputationNodeBasePtr nodeP, const std::wstring& newName, const CopyNodeFlags flags) const override
@ -375,14 +375,14 @@ namespace Microsoft { namespace MSR { namespace CNTK {
            Base(deviceId, name)
        { }

-        virtual void ComputeInputPartialNonLooping(size_t inputIndex) override // scale by number of cols (or samples)
+        virtual void BackpropToNonLooping(size_t inputIndex) override // scale by number of cols (or samples)
        {
            FrameRange frameRange(Inputs(0)->GetMBLayout());
            assert(inputIndex == 0); inputIndex;
-            ComputeInputPartialS(*m_gradientOfL1Norm, Inputs(0)->GradientSlice(frameRange), GradientValues(), Inputs(0)->ValueSlice(frameRange));
+            BackpropToS(*m_gradientOfL1Norm, Inputs(0)->GradientSlice(frameRange), GradientValues(), Inputs(0)->ValueSlice(frameRange));
        }

-        /*TODO: merge with call site*/void ComputeInputPartialS(Matrix<ElemType>& gradientOfL1Norm, 
+        /*TODO: merge with call site*/void BackpropToS(Matrix<ElemType>& gradientOfL1Norm, 
            Matrix<ElemType> inputGradientValues, const Matrix<ElemType>& gradientValues, const Matrix<ElemType>& inputFunctionValues)  
        {
            gradientOfL1Norm.AssignSignOf(inputFunctionValues);
@ -395,7 +395,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
            m_gradientOfL1Norm->Resize(Inputs(0)->GetNumRows(), Inputs(0)->GetNumCols());
        }

-        virtual void /*ComputationNodeNonLooping::*/EvaluateThisNodeNonLooping() override  
+        virtual void /*ComputationNodeNonLooping::*/ForwardPropNonLooping() override  
        {
            FrameRange frameRange(Inputs(0)->GetMBLayout());
            VerifyDims(1, 1);
@ -414,7 +414,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
        {
            InferImageDimsFromInput(0, false);

-            m_imageLayout = ImageLayout();
+            m_sampleLayout = TensorShape();
        }

        virtual void CopyTo(ComputationNodeBasePtr nodeP, const std::wstring& newName, const CopyNodeFlags flags) const override
@ -464,20 +464,20 @@ namespace Microsoft { namespace MSR { namespace CNTK {
            Base(deviceId, name)
        { }

-        virtual void ComputeInputPartialNonLooping(size_t inputIndex) override // scale by number of cols (or samples)
+        virtual void BackpropToNonLooping(size_t inputIndex) override // scale by number of cols (or samples)
        {
            FrameRange frameRange(Inputs(0)->GetMBLayout());
            assert(inputIndex == 0); inputIndex;
-            ComputeInputPartialS(Inputs(0)->GradientSlice(frameRange), GradientValues(), Inputs(0)->ValueSlice(frameRange), FunctionValues());
+            BackpropToS(Inputs(0)->GradientSlice(frameRange), GradientValues(), Inputs(0)->ValueSlice(frameRange), FunctionValues());
        }

-        /*TODO: merge with call site*/void ComputeInputPartialS(Matrix<ElemType> inputGradientValues, const Matrix<ElemType>& gradientValues, const Matrix<ElemType>& inputFunctionValues, const Matrix<ElemType>& functionValues)  
+        /*TODO: merge with call site*/void BackpropToS(Matrix<ElemType> inputGradientValues, const Matrix<ElemType>& gradientValues, const Matrix<ElemType>& inputFunctionValues, const Matrix<ElemType>& functionValues)  
        {
            ElemType v = gradientValues.Get00Element() / (functionValues.Get00Element() + EPS_IN_INVERSE);  // TODO: GPU inefficiency
            inputGradientValues.AddWithScaleOf(v, inputFunctionValues);
        }

-        virtual void /*ComputationNodeNonLooping::*/EvaluateThisNodeNonLooping() override  
+        virtual void /*ComputationNodeNonLooping::*/ForwardPropNonLooping() override  
        {
            FrameRange frameRange(Inputs(0)->GetMBLayout());
            VerifyDims(1,1);
@ -496,7 +496,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
        {
            InferImageDimsFromInput(0, false);

-            m_imageLayout = ImageLayout();
+            m_sampleLayout = TensorShape();
        }
    };

@ -535,15 +535,15 @@ namespace Microsoft { namespace MSR { namespace CNTK {
        { }
        // ^^ TODO: we can merge these two

-        virtual void SaveToFile(File& fstream) const override
+        virtual void Save(File& fstream) const override
        {
-            Base::SaveToFile(fstream);
+            Base::Save(fstream);
            fstream << m_evalMode;
        }

-        virtual void LoadFromFile(File& fstream, size_t modelVersion) override
+        virtual void Load(File& fstream, size_t modelVersion) override
        {
-            Base::LoadFromFile(fstream, modelVersion);
+            Base::Load(fstream, modelVersion);
            fstream >> m_evalMode;
            if (m_evalMode > NCEEvalMode::None)
            {
@ -558,14 +558,14 @@ namespace Microsoft { namespace MSR { namespace CNTK {
        /**
        compute gradients to input observations, the weights to the observations, and the class log posterior probabilities
        */
-        virtual void ComputeInputPartialNonLooping(size_t inputIndex) override
+        virtual void BackpropToNonLooping(size_t inputIndex) override
        {
            FrameRange frameRange(Inputs(0)->GetMBLayout());
            m_needRecomputeGradientToSoftmaxInput = false;
            //gradient computation@yinggongzhao
            //inputIndex should be 2 this time
            if (m_evalMode != NCEEvalMode::None)
-                LogicError("ComputeInputPartial should only be called in training mode");
+                LogicError("BackpropTo should only be called in training mode");
            if (inputIndex == 0)
                InvalidArgument("ComputeInput partial should not be called for label");
            //                                                                              samples+probs                   hidden                  embedding
@ -573,12 +573,12 @@ namespace Microsoft { namespace MSR { namespace CNTK {
        }

 #if 0   // TODO: delete this. Seems copy-paste leftover?
-        /*TODO: merge with call site*/void ComputeInputPartialRight(const Matrix<ElemType>& inputFunctionValues, Matrix<ElemType>& inputGradientValues, const Matrix<ElemType>& gradientValues)
+        /*TODO: merge with call site*/void BackpropToRight(const Matrix<ElemType>& inputFunctionValues, Matrix<ElemType>& inputGradientValues, const Matrix<ElemType>& gradientValues)
        {
            Matrix<ElemType>::MultiplyAndAdd(inputFunctionValues, false, gradientValues, true, inputGradientValues);
        }

-        /*TODO: merge with call site*/void ComputeInputPartialLeft(const Matrix<ElemType>& obs, Matrix<ElemType>& inputGradientValues, const Matrix<ElemType>& gradientValues)
+        /*TODO: merge with call site*/void BackpropToLeft(const Matrix<ElemType>& obs, Matrix<ElemType>& inputGradientValues, const Matrix<ElemType>& gradientValues)
        {
            Matrix<ElemType>::MultiplyAndAdd(obs, false, gradientValues, false, inputGradientValues);
        }
@ -595,7 +595,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
            // TODO (this does not really break it since for full matrices, class Matrix will resize by itself)
        }

-        virtual void /*ComputationNodeNonLooping::*/EvaluateThisNodeNonLooping() override   //-sum(left_i * log(softmax_i(right)))
+        virtual void /*ComputationNodeNonLooping::*/ForwardPropNonLooping() override   //-sum(left_i * log(softmax_i(right)))
        {
            FrameRange frameRange(Inputs(0)->GetMBLayout());
            if (Inputs(0)->HasMBLayout() && Inputs(0)->GetMBLayout()->HasGaps())
@ -668,7 +668,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
        virtual void InferImageDimsFromInputs()
        {
            InferImageDimsFromInput(0, false);
-            m_imageLayout = ImageLayout();
+            m_sampleLayout = TensorShape();
        }

    protected:
@ -720,7 +720,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
        /**
        compute gradients to input observations, the weights to the observations, and the class log posterior probabilites
        */
-        virtual void ComputeInputPartialNonLooping(size_t inputIndex) override
+        virtual void BackpropToNonLooping(size_t inputIndex) override
        {
            // this should never be called for input[0], which is controlled through the needGradient flag
            if (inputIndex != 1 && inputIndex != 2 && inputIndex != 3)
@ -824,10 +824,10 @@ namespace Microsoft { namespace MSR { namespace CNTK {
        }

        // -sum(left_i * log(softmax_i(right)))
-        virtual void /*ComputationNodeNonLooping::*/EvaluateThisNodeNonLooping() override
+        virtual void /*ComputationNodeNonLooping::*/ForwardPropNonLooping() override
        {
            if (Inputs(0)->FunctionValues().GetDeviceId() != CPUDEVICE)
-                LogicError("ClassBasedCrossEntropyWithSoftmax (EvaluateThisNodeNonLooping()): The label matrix is not using CPU device. This will make computation slow, even though the label data is probably saved on GPU. Because of the external loop over time with explicit class id retrieved from the label matrix, the computation will be very slow if the label matrix is saved on GPU. However, this is only a constraint for label matrix and other matrices such as data are suggested to reside on GPU. ");
+                LogicError("ClassBasedCrossEntropyWithSoftmax (ForwardPropNonLooping()): The label matrix is not using CPU device. This will make computation slow, even though the label data is probably saved on GPU. Because of the external loop over time with explicit class id retrieved from the label matrix, the computation will be very slow if the label matrix is saved on GPU. However, this is only a constraint for label matrix and other matrices such as data are suggested to reside on GPU. ");

            // (the below is left-over from refactoring)
            Matrix<ElemType>& functionValues = FunctionValues();
@ -857,7 +857,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
                size_t rgt_bnd = (size_t)lbl_t(3, 0);
                size_t nbr_wrd = (rgt_bnd - lft_bnd);   // number of words in the class
                if (nbr_wrd == 0)
-                    LogicError("ClassBasedCrossEntropyWithSoftmax (EvaluateThisNodeNonLooping()): Encountered a class of size 0. This sample seems to lack an NoInput flag.");
+                    LogicError("ClassBasedCrossEntropyWithSoftmax (ForwardPropNonLooping()): Encountered a class of size 0. This sample seems to lack an NoInput flag.");

                sz += nbr_wrd;
            }
@ -909,7 +909,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {

                // add  the word's class-conditional log posterior
                if (y_t < lft_bnd || y_t >= rgt_bnd)
-                    LogicError("ClassBasedCrossEntropyWithSoftmax (EvaluateThisNodeNonLooping()): Word index out of bounds of class-member index range (word not a class member).");
+                    LogicError("ClassBasedCrossEntropyWithSoftmax (ForwardPropNonLooping()): Word index out of bounds of class-member index range (word not a class member).");
                size_t idx_in_class = y_t - lft_bnd;
                Matrix<ElemType>::AddElementToElement(logSoftMax_t, 0, idx_in_class, functionValues, 0, 0);   // (1x1)

@ -955,7 +955,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
        {
            InferImageDimsFromInput(0, false);

-            m_imageLayout = ImageLayout();
+            m_sampleLayout = TensorShape();
        }

    protected:
@ -1016,7 +1016,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
        { }

        /// compute posterior probability of label y at position t
-        virtual void /*ComputationNodeNonLooping::*/EvaluateThisNodeNonLooping() override
+        virtual void /*ComputationNodeNonLooping::*/ForwardPropNonLooping() override
        {
            FrameRange frameRange(Inputs(0)->GetMBLayout());
            size_t nrow = Inputs(0)->GetNumRows();
@ -1036,7 +1036,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
            {
                FrameRange sequenceRange = frameRange.Sequence(i);    // FrameRange to select one sequence
                // BUGBUG: This ^^ is neither supported nor correct, since this code does not handle gaps or start/end flags
-                EvaluateThisNodeS(
+                ForwardPropS(
                    DataSliceWithMBLayout(mPostProb, sequenceRange, Inputs(0)->GetMBLayout()),
                    DataSliceWithMBLayout(mAlpha,    sequenceRange, Inputs(0)->GetMBLayout()),
                    DataSliceWithMBLayout(mBeta,     sequenceRange, Inputs(0)->GetMBLayout()),
@ -1050,7 +1050,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
            }
        }

-        virtual void ComputeInputPartialNonLooping(size_t inputIndex) override  //scaled by 2*number of colmns (samples) in the Matrix<ElemType>
+        virtual void BackpropToNonLooping(size_t inputIndex) override  //scaled by 2*number of colmns (samples) in the Matrix<ElemType>
        {
            FrameRange frameRange(Inputs(0)->GetMBLayout());
            // inputIndex 0 should not get us here, it should be prevented by the needGradient flag of input[0]
@ -1083,7 +1083,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
        }

        // compute forward backward algorithm
-        /*TODO: merge with call site*/void EvaluateThisNodeS(Matrix<ElemType> postprob, Matrix<ElemType> alpha, Matrix<ElemType> beta, Matrix<ElemType> & functionValues, const Matrix<ElemType> & lbls, const Matrix<ElemType> & pos_scores, const Matrix<ElemType> & pair_scores, int& firstLbl, int& lastLbl, const int iStep = 1)
+        /*TODO: merge with call site*/void ForwardPropS(Matrix<ElemType> postprob, Matrix<ElemType> alpha, Matrix<ElemType> beta, Matrix<ElemType> & functionValues, const Matrix<ElemType> & lbls, const Matrix<ElemType> & pos_scores, const Matrix<ElemType> & pair_scores, int& firstLbl, int& lastLbl, const int iStep = 1)
        {
            /// to-do, each slice is for one sentence
            /// to-do, number of slices correspond to number of frames 
@ -1236,7 +1236,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
        {
            InferImageDimsFromInput(0, false);

-            m_imageLayout = ImageLayout();
+            m_sampleLayout = TensorShape();
        }

        virtual void CopyTo(ComputationNodeBasePtr nodeP, const std::wstring& newName, const CopyNodeFlags flags) const override
@ -1283,17 +1283,17 @@ namespace Microsoft { namespace MSR { namespace CNTK {
        }
        
        //compute gradients to input observations, the weights to the observations, and the class log posterior probabilites
-        virtual void ComputeInputPartialNonLooping(size_t inputIndex) override
+        virtual void BackpropToNonLooping(size_t inputIndex) override
        {
            //auto t_start_time = Timer::MilliSecondElapsed();
            //left Node must be a scalar
            if (inputIndex == 0)  //left derivative
            {
-                ComputeInputPartialLeft(*m_logSoftmaxOfRight, Inputs(inputIndex)->GradientValues(), GradientValues());
+                BackpropToLeft(*m_logSoftmaxOfRight, Inputs(inputIndex)->GradientValues(), GradientValues());
            }
            else if (inputIndex == 1)
            {
-                ComputeInputPartialRight(*m_softmaxOfRight, Inputs(0)->FunctionValues(), Inputs(inputIndex)->GradientValues(),
+                BackpropToRight(*m_softmaxOfRight, Inputs(0)->FunctionValues(), Inputs(inputIndex)->GradientValues(),
                                         GradientValues(), *m_gammaFromLattice, m_fsSmoothingWeight, m_frameDropThreshold);
 #ifdef _DEBUG
                Inputs(inputIndex)->InvalidateMissingGradientColumns(FrameRange(Inputs(inputIndex)->GetMBLayout()));
@ -1312,7 +1312,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
                RuntimeError("SequenceWithSoftmaxNode criterion only takes with respect to label, DNN output and log likelihood.");
        }

-        static void WINAPI ComputeInputPartialLeft(const Matrix<ElemType>& logSoftmaxOfRight, Matrix<ElemType>& inputGradientValues, const Matrix<ElemType>& gradientValues)
+        static void WINAPI BackpropToLeft(const Matrix<ElemType>& logSoftmaxOfRight, Matrix<ElemType>& inputGradientValues, const Matrix<ElemType>& gradientValues)
        {
 #if DUMPOUTPUT
            logSoftmaxOfRight.Print("SequenceWithSoftmaxNode Partial-logSoftmaxOfRight");
@ -1327,7 +1327,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
 #endif
        }

-        static void WINAPI ComputeInputPartialRight(const Matrix<ElemType>& softmaxOfRight, const Matrix<ElemType>& inputFunctionValues,
+        static void WINAPI BackpropToRight(const Matrix<ElemType>& softmaxOfRight, const Matrix<ElemType>& inputFunctionValues,
                                                    Matrix<ElemType>& inputGradientValues, const Matrix<ElemType>& gradientValues,
                                                    const Matrix<ElemType> & gammaFromLattice, double hsmoothingWeight, double frameDropThresh)
        {
@ -1346,7 +1346,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
        }

        // -sum(left_i * log(softmax_i(right)))
-        virtual void EvaluateThisNodeNonLooping()
+        virtual void ForwardPropNonLooping()
        {
            // Initialize m_gammaCalculator
            // TODO: Would this lend itself to a unique_ptr instead of the init flag?
@ -1407,7 +1407,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
        {
            InferImageDimsFromInput(0, false);

-            m_imageLayout = ImageLayout();
+            m_sampleLayout = TensorShape();
        }

        virtual void CopyTo(ComputationNodeBasePtr nodeP, const std::wstring& newName, const CopyNodeFlags flags) const override
@ -1517,13 +1517,13 @@ namespace Microsoft { namespace MSR { namespace CNTK {
            Base(deviceId, name)
        { }

-        virtual void ComputeInputPartialNonLooping(size_t inputIndex) override
+        virtual void BackpropToNonLooping(size_t inputIndex) override
        {
            FrameRange frameRange(Inputs(0)->GetMBLayout());
            if (inputIndex != 1)
                InvalidArgument("%ls %ls operation cannot compute the gradient for its first inpute.", NodeName().c_str(), OperationName().c_str());

-            //ComputeInputPartialRight(m_temp, Inputs(0)->FunctionValues(), Inputs(2)->FunctionValues(), Inputs(inputIndex)->GradientValues(), GradientValues(), m_classZeroLabels, m_result);
+            //BackpropToRight(m_temp, Inputs(0)->FunctionValues(), Inputs(2)->FunctionValues(), Inputs(inputIndex)->GradientValues(), GradientValues(), m_classZeroLabels, m_result);
            // Create vector with 1 for class 1, and -1 for class 0
            m_temp->AssignDifferenceOf(Inputs(0)->ValueSlice(frameRange), *m_classZeroLabels);  // TODO: need a slice for m_classZeroLabels?

@ -1547,7 +1547,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
        }

        //-sum(left * log(right) + (1-left)*log(1-right)) (optionally * weight)
-        virtual void /*ComputationNodeNonLooping::*/EvaluateThisNodeNonLooping() override
+        virtual void /*ComputationNodeNonLooping::*/ForwardPropNonLooping() override
        {
            FrameRange frameRange(Inputs(0)->GetMBLayout());
            
@ -1634,7 +1634,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
        virtual void InferImageDimsFromInputs()
        {
            InferImageDimsFromInput(0, false);
-            m_imageLayout = ImageLayout();
+            m_sampleLayout = TensorShape();
        }

        virtual void CopyTo(const ComputationNodePtr nodeP, const std::wstring& newName, const CopyNodeFlags flags) const
--- a/MachineLearning/CNTKSGDLib/MultiNetworksEvaluator.h
+++ b/MachineLearning/CNTKSGDLib/MultiNetworksEvaluator.h
@ -572,7 +572,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
            for (auto nodeIter = batchComputeNodes.begin(); nodeIter != batchComputeNodes.end(); nodeIter++)
            {
                ComputationNodeBasePtr node = *nodeIter;
-                node->EvaluateThisNode(FrameRange(node->GetMBLayout(), atTime));
+                node->ForwardProp(FrameRange(node->GetMBLayout(), atTime));
                if (node->GetNumCols() != node->GetNumParallelSequences())
                    RuntimeError("preComputeActivityAtTime: the function values has to be a single column matrix ");
            }
--- a/MachineLearning/CNTKSGDLib/MultiNetworksSGD.h
+++ b/MachineLearning/CNTKSGDLib/MultiNetworksSGD.h
@ -286,8 +286,8 @@ namespace Microsoft { namespace MSR { namespace CNTK {
                /// PreCompute(net, encoderTrainSetDataReader, encoderFeatureNodes, encoderlabelNodes, encoderInputMatrices) || 
                startEpoch == 0)
            {
-                encoderNet->SaveToFile(GetEncoderModelNameForEpoch(int(startEpoch) - 1));
-                decoderNet->SaveToFile(GetDecoderModelNameForEpoch(int(startEpoch) - 1));
+                encoderNet->Save(GetEncoderModelNameForEpoch(int(startEpoch) - 1));
+                decoderNet->Save(GetDecoderModelNameForEpoch(int(startEpoch) - 1));
            }

            bool learnRateInitialized = false;
@ -421,8 +421,8 @@ namespace Microsoft { namespace MSR { namespace CNTK {
                            }
                            else
                            {
-                                decoderNet->SaveToFile(GetDecoderModelNameForEpoch(i, true));
-                                encoderNet->SaveToFile(GetEncoderModelNameForEpoch(i, true));
+                                decoderNet->Save(GetDecoderModelNameForEpoch(i, true));
+                                encoderNet->Save(GetEncoderModelNameForEpoch(i, true));
                                fprintf(stderr, "Finished training and saved final model\n\n");
                                break;
                            }
@ -456,8 +456,8 @@ namespace Microsoft { namespace MSR { namespace CNTK {
                }

                //persist model and check-point info
-                decoderNet->SaveToFile(GetDecoderModelNameForEpoch(i));
-                encoderNet->SaveToFile(GetEncoderModelNameForEpoch(i));
+                decoderNet->Save(GetDecoderModelNameForEpoch(i));
+                encoderNet->Save(GetEncoderModelNameForEpoch(i));

                size_t dummyMinibatchSize = 0;
                this->LoadCheckPointInfo(i,
@ -599,7 +599,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
                for (size_t k = 0; k < iNumNetworks; k++)
                {
                    wstring tmpstr = msra::strfun::wstrprintf(L".%d", k);
-                    nets[k]->SaveToFile(GetModelNameForEpoch(int(startEpoch) - 1, false, tmpstr));
+                    nets[k]->Save(GetModelNameForEpoch(int(startEpoch) - 1, false, tmpstr));
                }
            }

@ -749,7 +749,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
                                //persist model and check-point info
                                for (size_t k = 0; k < iNumNetworks; k++)
                                {
-                                    nets[k]->SaveToFile(GetModelNameForEpoch(i, true, msra::strfun::wstrprintf(L".%d", k)));
+                                    nets[k]->Save(GetModelNameForEpoch(i, true, msra::strfun::wstrprintf(L".%d", k)));
                                }
                                fprintf(stderr, "Finished training and saved final model\n\n");
                                break;
@ -786,7 +786,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
                //persist model and check-point info
                for (size_t k = 0; k < iNumNetworks; k++)
                {
-                    nets[k]->SaveToFile(GetModelNameForEpoch(i, false, msra::strfun::wstrprintf(L".%d", k)));
+                    nets[k]->Save(GetModelNameForEpoch(i, false, msra::strfun::wstrprintf(L".%d", k)));
                }

                this->SaveCheckPointInfo(i, totalSamplesSeen, learnRatePerSample, smoothedGradients, prevCriterion, 0);
--- a/MachineLearning/CNTKSGDLib/SGD.cpp
+++ b/MachineLearning/CNTKSGDLib/SGD.cpp
@ -444,7 +444,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
        if (startEpoch < 0)
        {
            // Loads models.
-            origNet->LoadFromFile<ElemType>(origModelFileName);
+            origNet->Load<ElemType>(origModelFileName);

            // Processes feature nodes.
            std::vector<ComputationNodeBasePtr> & sequenceFeatureNodes = sequenceNet->FeatureNodes();
@ -706,7 +706,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
                g_mpi->WaitAll();
            }

-            net->SaveToFile(GetModelNameForEpoch(int(startEpoch) - 1));
+            net->Save(GetModelNameForEpoch(int(startEpoch) - 1));
        }

        // BUGBUG: This is where the trainSetDataReader->GetNumParallelSequences() is used to further normalize
@ -814,7 +814,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
                        i + 1, learnRatePerSample, m_minLearnRate);
                if (m_autoLearnRateSearchType != LearningRateSearchAlgorithm::None)
                {
-                    net->SaveToFile(m_modelPath);
+                    net->Save(m_modelPath);
                }
                break;
            }
@ -1026,7 +1026,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
                        }
                        else
                        {
-                            net->SaveToFile(GetModelNameForEpoch(i, true));
+                            net->Save(GetModelNameForEpoch(i, true));

                            fprintf(stderr, "Finished training and saved final model\n\n");
                            break;
@ -1081,7 +1081,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
            // persist model and check-point info
            if ((g_mpi == nullptr) || g_mpi->IsMainNode())
            {
-                net->SaveToFile(GetModelNameForEpoch(i));
+                net->Save(GetModelNameForEpoch(i));
                SaveCheckPointInfo(i, totalSamplesSeen, learnRatePerSample, smoothedGradients, prevCriterion, chosenMinibatchSize);
                if (!m_keepCheckPointFiles)
                {
--- a/Math/MathPerformanceTests/MathPerformanceTests.cpp
+++ b/Math/MathPerformanceTests/MathPerformanceTests.cpp
@ -43,7 +43,7 @@ void SetToInitStateValueForResetSeg(const Matrix<ElemType>& sentenceBegin,
 }

 template<class ElemType>
-void rnnEvaluateThisNodeSRP(Matrix<ElemType>& functionValues, size_t mNbr, Matrix<ElemType>& pastActivity, Matrix<ElemType>& inputFunctionValues, Matrix<ElemType>& colBegin, const Matrix<ElemType>& needToCompute)
+void rnnForwardPropSRP(Matrix<ElemType>& functionValues, size_t mNbr, Matrix<ElemType>& pastActivity, Matrix<ElemType>& inputFunctionValues, Matrix<ElemType>& colBegin, const Matrix<ElemType>& needToCompute)
 {
    size_t ncol = functionValues.GetNumCols();
    size_t ntime = ncol / mNbr;
@ -74,7 +74,7 @@ void rnnEvaluateThisNodeSRP(Matrix<ElemType>& functionValues, size_t mNbr, Matri
 }

 template<class ElemType>
-void oldRnnEvaluateThisNodeSRP(Matrix<ElemType>& functionValues, size_t mNbr, Matrix<ElemType>& pastActivity, Matrix<ElemType>& inputFunctionValues)
+void oldRnnForwardPropSRP(Matrix<ElemType>& functionValues, size_t mNbr, Matrix<ElemType>& pastActivity, Matrix<ElemType>& inputFunctionValues)
 {
    size_t ncol = functionValues.GetNumCols();
    size_t ntime = ncol / mNbr;
@ -88,13 +88,13 @@ void oldRnnEvaluateThisNodeSRP(Matrix<ElemType>& functionValues, size_t mNbr, Ma
            {
                reset = true;
            }
-            oldRNNEvaluateThisNodeSRP<ElemType>(timeIdxInSeq, 1, reset, (ElemType) 0.1, functionValues, pastActivity, inputFunctionValues, i, mNbr);
+            oldRNNForwardPropSRP<ElemType>(timeIdxInSeq, 1, reset, (ElemType) 0.1, functionValues, pastActivity, inputFunctionValues, i, mNbr);
        }
    }
 }

 template<class ElemType>
-void oldRNNEvaluateThisNodeSRP(const size_t timeIdxInSeq, const int delay, const bool reset, const ElemType default_activity, Matrix<ElemType>& functionValues, const Matrix<ElemType>& pastActivity, const Matrix<ElemType>& inputFunctionValues, const size_t indexInBatch, const size_t mNbr)
+void oldRNNForwardPropSRP(const size_t timeIdxInSeq, const int delay, const bool reset, const ElemType default_activity, Matrix<ElemType>& functionValues, const Matrix<ElemType>& pastActivity, const Matrix<ElemType>& inputFunctionValues, const size_t indexInBatch, const size_t mNbr)
 {
    assert(delay > 0);

@ -128,7 +128,7 @@ void oldRNNEvaluateThisNodeSRP(const size_t timeIdxInSeq, const int delay, const
 The new way of resetting RNN state. 
 */
 template<class ElemType>
-void TestRnnEvaluateThisNodeSRP(size_t nRow = 100, size_t nCol = 1000, size_t mNbr = 10, DEVICEID_TYPE deviceID = 0)
+void TestRnnForwardPropSRP(size_t nRow = 100, size_t nCol = 1000, size_t mNbr = 10, DEVICEID_TYPE deviceID = 0)
 {
    Matrix<ElemType> functionValues(deviceID);
    Matrix<ElemType> colBegin(deviceID);
@ -144,16 +144,16 @@ void TestRnnEvaluateThisNodeSRP(size_t nRow = 100, size_t nCol = 1000, size_t mN
    needToCompute.SetValue(0);
    needToCompute.ColumnSlice(0, 1).SetValue(1);
    auto t_start = clock();
-    rnnEvaluateThisNodeSRP<ElemType>(functionValues, mNbr, pastActivity, inputFunctionValues, colBegin, needToCompute);
+    rnnForwardPropSRP<ElemType>(functionValues, mNbr, pastActivity, inputFunctionValues, colBegin, needToCompute);
    auto t_end = clock();
-    std::cout << "testRnnEvaluateThisNodeSRP: " << 1.0*(t_end - t_start) / CLOCKS_PER_SEC << " seconds" << endl;
+    std::cout << "testRnnForwardPropSRP: " << 1.0*(t_end - t_start) / CLOCKS_PER_SEC << " seconds" << endl;
 }

 /**
 The old way of resetting RNN state, which used if statement. Also only supports up to two sentences within a minibatch
 */
 template<class ElemType>
-void TestOldRnnEvaluateThisNodeSRP(size_t nRow = 100, size_t nCol = 1000, size_t mNbr = 10, DEVICEID_TYPE deviceID = 0)
+void TestOldRnnForwardPropSRP(size_t nRow = 100, size_t nCol = 1000, size_t mNbr = 10, DEVICEID_TYPE deviceID = 0)
 {
    Matrix<ElemType> functionValues(deviceID);
    Matrix<ElemType> colBegin(deviceID);
@ -165,9 +165,9 @@ void TestOldRnnEvaluateThisNodeSRP(size_t nRow = 100, size_t nCol = 1000, size_t
    pastActivity.Resize(nRow, nCol);
    inputFunctionValues.Resize(nRow, nCol);
    auto t_start = clock();
-    oldRnnEvaluateThisNodeSRP<ElemType>(functionValues, mNbr, pastActivity, inputFunctionValues);
+    oldRnnForwardPropSRP<ElemType>(functionValues, mNbr, pastActivity, inputFunctionValues);
    auto t_end = clock();
-    std::cout << "TestOldRnnEvaluateThisNodeSRP: " << 1.0*(t_end - t_start) / CLOCKS_PER_SEC << " seconds" << endl;
+    std::cout << "TestOldRnnForwardPropSRP: " << 1.0*(t_end - t_start) / CLOCKS_PER_SEC << " seconds" << endl;
 }

 template<class ElemType>
@ -441,9 +441,9 @@ int wmain()
 {
    ColumnSliceMultAndAddTest<float>(2048, 2048, 256, 0);

-    TestRnnEvaluateThisNodeSRP<float>();
+    TestRnnForwardPropSRP<float>();

-    TestOldRnnEvaluateThisNodeSRP<float>();
+    TestOldRnnForwardPropSRP<float>();

    //MandSTest<float>(100, 2);