From 9ba93ab84ead42194b5843bdad37401d2f6bed4b Mon Sep 17 00:00:00 2001
From: Amit Agarwal <amitaga@microsoft.com>
Date: Sun, 17 Jul 2016 16:44:44 -0700
Subject: [PATCH] CNTK v2 library: Added support for loading legacy v1 format
 models and saving models in v1 format

---
 Makefile                                      |   1 +
 Source/CNTKv2LibraryDll/API/CNTKLibrary.h     |  25 ++-
 Source/CNTKv2LibraryDll/BackCompat.cpp        | 199 ++++++++++++++++++
 .../CNTKv2LibraryDll/CNTKv2LibraryDll.vcxproj |   1 +
 .../CNTKv2LibraryDll.vcxproj.filters          |   1 +
 Source/CNTKv2LibraryDll/Function.cpp          |  11 +-
 Source/CNTKv2LibraryDll/Function.h            |   5 +-
 Source/CNTKv2LibraryDll/Utils.h               |  12 ++
 .../ComputationNetwork.h                      |   2 +
 .../ComputationNetworkLib/ComputationNode.h   |   3 +-
 .../InputAndParamNodes.h                      |  15 +-
 Source/ComputationNetworkLib/RecurrentNodes.h |   3 +
 Tests/UnitTests/V2LibraryTests/Common.h       |  51 +++++
 .../V2LibraryTests/FeedForwardTests.cpp       |  69 +++---
 .../V2LibraryTests/RecurrentFunctionTests.cpp |  70 +++---
 .../UnitTests/V2LibraryTests/TensorTests.cpp  |   4 +-
 16 files changed, 407 insertions(+), 65 deletions(-)
 create mode 100644 Source/CNTKv2LibraryDll/BackCompat.cpp
diff --git a/Makefile b/Makefile
index 49daef149..e68351458 100644
--- a/Makefile
+++ b/Makefile
@@ -368,6 +368,7 @@ SEQUENCE_TRAINING_LIB_SRC +=\
 endif
 
 CNTKLIBRARY_SRC =\
+	$(SOURCEDIR)/CNTKv2LibraryDll/BackCompat.cpp \
 	$(SOURCEDIR)/CNTKv2LibraryDll/Common.cpp \
 	$(SOURCEDIR)/CNTKv2LibraryDll/Function.cpp \
 	$(SOURCEDIR)/CNTKv2LibraryDll/NDArrayView.cpp \
diff --git a/Source/CNTKv2LibraryDll/API/CNTKLibrary.h b/Source/CNTKv2LibraryDll/API/CNTKLibrary.h
index b38e4fcb7..745af6849 100644
--- a/Source/CNTKv2LibraryDll/API/CNTKLibrary.h
+++ b/Source/CNTKv2LibraryDll/API/CNTKLibrary.h
@@ -758,6 +758,13 @@ namespace CNTK
         friend struct std::hash;
 
     public:
+        ///
+        /// Create an 'Input' Variable.
+        ///
+        Variable(const NDShape& shape, CNTK::DataType dataType, const wchar_t* name = L"")
+            : Variable(shape, dataType, std::wstring(name))
+        {}
+
         ///
         /// Create an 'Input' Variable.
         ///
@@ -920,6 +927,11 @@ namespace CNTK
         return first.m_dataFields == second.m_dataFields;
     }
 
+    inline bool operator!=(const Variable& first, const Variable& second)
+    {
+        return !(first == second);
+    }
+
     ///
     /// Denotes Parameter inputs of a Function.
     ///
@@ -1396,8 +1408,19 @@ namespace CNTK
     /// E.g. When creating a classification model, typically the CrossEntropy loss Function and the ClassificationError Function comprise the two roots
     /// of the computation graph which can be "Combine"d to create a single Function with 2 outputs; viz. CrossEntropy loss and ClassificationError output.
     ///
-    CNTK_API FunctionPtr Combine(const std::initializer_list<FunctionPtr>& operands, const std::wstring& name = L"");
+    CNTK_API FunctionPtr Combine(const std::vector<FunctionPtr>& operands, const std::wstring& name = L"");
 
+    ///
+    /// Load a legacy CNTK v1 format model
+    ///
+    template <typename ElementType>
+    CNTK_API FunctionPtr LoadLegacyModel(const std::wstring& modelFile, const DeviceDescriptor& computeDevice = DeviceDescriptor::DefaultDevice());
+
+    /// 
+    /// Save a Composite Function instance to a file in CNTK legacy model format
+    ///
+    template <typename ElementType>
+    CNTK_API void SaveAsLegacyModel(const FunctionPtr& rootFunction, const std::wstring& modelFile);
     ///
     /// A serializable value represents one of:
     /// a) Boolean
diff --git a/Source/CNTKv2LibraryDll/BackCompat.cpp b/Source/CNTKv2LibraryDll/BackCompat.cpp
new file mode 100644
index 000000000..339812e68
--- /dev/null
+++ b/Source/CNTKv2LibraryDll/BackCompat.cpp
@@ -0,0 +1,199 @@
+//
+// Copyright (c) Microsoft. All rights reserved.
+// Licensed under the MIT license. See LICENSE.md file in the project root for full license information.
+//
+
+#include "stdafx.h"
+#include "CNTKLibrary.h"
+#include "Function.h"
+#include "ComputationNetworkBuilder.h"
+#include "Utils.h"
+#include "ComputationNode.h"
+#include "InputAndParamNodes.h"
+#include "NonlinearityNodes.h"
+#include "LinearAlgebraNodes.h"
+#include "RecurrentNodes.h"
+#include "EvaluationNodes.h"
+#include "TrainingNodes.h"
+
+using namespace Microsoft::MSR::CNTK;
+
+namespace CNTK
+{
+    template <typename ElementType>
+    Variable GetVariable(const ComputationNodeBasePtr& node,
+                         std::unordered_map<ComputationNodeBasePtr, Variable>& nodeToVariableMap,
+                         std::unordered_map<Placeholder, Variable>& placeholderReplacements,
+                         std::unordered_set<FunctionPtr>& allPrimitiveFunctions)
+    {
+        auto iter = nodeToVariableMap.find(node);
+        if (iter != nodeToVariableMap.end())
+            return iter->second;
+
+        Variable var;
+        NDShape varShape = AsNDShape(node->GetSampleLayout());
+        // The CNTK sample layouts may have trailing axes with dimension size of 1 which are automatically
+        // added when converting from NDShape to CNTK internal TensorShapes and are not present in the original
+        // shapes specified by the user. These should be truncated.
+        if (varShape.NumAxes() <= 2)
+        {
+            size_t numTrailingDimsToRemove = 0;
+            for (int i = varShape.NumAxes() - 1; i >= 0; --i)
+            {
+                if (varShape[i] == 1)
+                    numTrailingDimsToRemove++;
+                else
+                    break;
+            }
+            varShape = varShape.SubShape(0, varShape.NumAxes() - numTrailingDimsToRemove);
+        }
+
+        if (node->IsLeaf())
+        {
+            if (node->Is<InputValueBase<ElementType>>())
+            {
+                auto inputNode = node->As<InputValueBase<ElementType>>();
+                bool isSparse = node->Is<SparseInputValue<ElementType>>();
+                if (node->HasMBLayout())
+                {
+                    // TODO: Currently only default dynamic axis is supported
+                    const std::wstring defaultCNTKDynamicAxisName = L"";
+                    if (inputNode->GetRequestedDynamicAxis() != defaultCNTKDynamicAxisName)
+                        LogicError("Found dynamic axis named '%S' while currently only default dynamic axis named '%S' is supported!", node->GetMBLayout()->GetAxisName(), defaultCNTKDynamicAxisName);
+
+                    var = Variable(varShape, isSparse, AsDataType<ElementType>(), node->GetLearningRateMultiplier() != 0, node->GetName());
+                }
+                else
+                {
+                    // TODO: Allow creating inputs without a dynamic axis
+                    LogicError("Found InputNode with no dynamic axis which is currently unsupported");
+                }
+            }
+            else if (node->Is<LearnableParameter<ElementType>>())
+            {
+                auto& matrix = node->As<ComputationNode<ElementType>>()->Value();
+                auto tensorView = new TensorView<ElementType>(std::make_shared<Matrix<ElementType>>(matrix.AsReference()), node->GetSampleLayout());
+                NDArrayViewPtr parameterValue = MakeSharedObject<NDArrayView>(AsDataType<ElementType>(), AsDeviceDescriptor(matrix.GetDeviceId()), AsStorageFormat(matrix.GetFormat()), varShape, false, tensorView);
+                var = Parameter(parameterValue, node->GetName());
+            }
+            else
+                LogicError("CNTK::LoadLegacyModel: Unsupported legacy CNTK node named '%S'", node->NodeName().c_str());
+        }
+        else
+        {
+            // This is a non-leaf node and maps to a primitive Function
+            auto placeholderVar = Placeholder(varShape);
+            nodeToVariableMap[node] = placeholderVar;
+
+            std::vector<Variable> inputVars(node->GetNumInputs());
+            for (size_t i = 0; i < inputVars.size(); ++i)
+            {
+                inputVars[i] = GetVariable<ElementType>(node->Input(i), nodeToVariableMap, placeholderReplacements, allPrimitiveFunctions);
+                if (inputVars[i].IsPlaceholder())
+                    placeholderReplacements[Placeholder(inputVars[i])] = Variable();
+            }
+
+            PrimitiveOpType opType;
+            Dictionary primitiveFunctionConfigParameters;
+            if (node->OperationName() == OperationNameOf(TanhNode))
+                opType = PrimitiveOpType::Tanh;
+            else if (node->OperationName() == OperationNameOf(SigmoidNode))
+                opType = PrimitiveOpType::Sigmoid;
+            else if (node->OperationName() == OperationNameOf(ExpNode))
+                opType = PrimitiveOpType::Exp;
+            else if (node->OperationName() == OperationNameOf(TimesNode))
+                opType = PrimitiveOpType::Times;
+            else if (node->OperationName() == OperationNameOf(PlusNode))
+                opType = PrimitiveOpType::Plus;
+            else if (node->OperationName() == OperationNameOf(PastValueNode))
+            {
+                if (inputVars.size() == 1)
+                {
+                    auto initialStateVar = Constant({}, node->As<PastValueNode<ElementType>>()->InitialActivationValue(), AsDeviceDescriptor(node->GetDeviceId()));
+                    inputVars.insert(inputVars.begin(), initialStateVar);
+                }
+                primitiveFunctionConfigParameters[L"stepSize"] = DictionaryValue((size_t)node->As<PastValueNode<ElementType>>()->TimeStep());
+                opType = PrimitiveOpType::PastValue;
+            }
+            else if (node->OperationName() == OperationNameOf(FutureValueNode))
+            {
+                if (inputVars.size() == 1)
+                {
+                    auto initialStateVar = Constant({}, node->As<FutureValueNode<ElementType>>()->InitialActivationValue(), AsDeviceDescriptor(node->GetDeviceId()));
+                    inputVars.insert(inputVars.begin(), initialStateVar);
+                }
+                primitiveFunctionConfigParameters[L"stepSize"] = DictionaryValue((size_t)node->As<FutureValueNode<ElementType>>()->TimeStep());
+                opType = PrimitiveOpType::FutureValue;
+            }
+            else if (node->OperationName() == OperationNameOf(CrossEntropyWithSoftmaxNode))
+            {
+                std::swap(inputVars[0], inputVars[1]);
+                opType = PrimitiveOpType::CrossEntropyWithSoftmax;
+            }
+            else if (node->OperationName() == OperationNameOf(ErrorPredictionNode))
+            {
+                std::swap(inputVars[0], inputVars[1]);
+                opType = PrimitiveOpType::ClassificationError;
+            }
+            else if (node->OperationName() == OperationNameOf(ElementTimesNode))
+                opType = PrimitiveOpType::ElementTimes;
+            else if (node->OperationName() == OperationNameOf(SumElementsNode))
+                opType = PrimitiveOpType::ReduceSum;
+            else
+                LogicError("Unsupported ComputationNode with OperationName='%S' found when loading legacy CNTK model", node->OperationName().c_str());
+
+            FunctionPtr primitiveFunction = MakeSharedObject<PrimitiveFunction>(opType, inputVars, std::move(primitiveFunctionConfigParameters), node->GetName());
+            allPrimitiveFunctions.insert(primitiveFunction);
+            var = primitiveFunction->Output();
+            if (placeholderReplacements.find(placeholderVar) != placeholderReplacements.end())
+                placeholderReplacements[placeholderVar] = var;
+        }
+
+        nodeToVariableMap[node] = var;
+        return var;
+    }
+
+    template <typename ElementType>
+    FunctionPtr LoadLegacyModel(const std::wstring& modelFile, const DeviceDescriptor& computeDevice /*= DeviceDescriptor::DefaultDevice()*/)
+    {
+        ComputationNetworkPtr net = make_shared<ComputationNetwork>(AsCNTKImplDeviceId(computeDevice));
+        net->Load<ElementType>(modelFile);
+
+        // Now traverse the model and construct the Function graph
+        std::unordered_map<ComputationNodeBasePtr, Variable> nodeToVariableMap;
+        std::unordered_map<Placeholder, Variable> placeholderReplacements;
+        std::unordered_set<FunctionPtr> allPrimitiveFunctions;
+        std::vector<FunctionPtr> rootFunctions;
+        auto& networkRoots = net->RootNodes();
+        for (auto& rootNode : networkRoots)
+        {
+            if (rootNode->IsLeaf())
+                continue;
+
+            rootFunctions.push_back(GetVariable<ElementType>(rootNode, nodeToVariableMap, placeholderReplacements, allPrimitiveFunctions).Owner());
+        }
+
+        auto rootComposite = Combine(rootFunctions);
+        rootComposite->ReplacePlaceholders(placeholderReplacements);
+
+        return rootComposite;
+    }
+
+    template <typename ElementType>
+    void SaveAsLegacyModel(const FunctionPtr& rootFunction, const std::wstring& modelFile)
+    {
+        CompositeFunction* compositeFunction = dynamic_cast<CompositeFunction*>(rootFunction.get());
+        if (compositeFunction == nullptr)
+            InvalidArgument("Primitive (aka non-composite) Function instances cannot be saved");
+
+        auto computationNetwork = compositeFunction->GetComputationNetwork<ElementType>(DeviceDescriptor::CPUDevice(), {});
+        computationNetwork->Save(modelFile);
+    }
+
+    // Template instantiations
+    template CNTK_API FunctionPtr LoadLegacyModel<float>(const std::wstring& modelFile, const DeviceDescriptor& computeDevice /*= DeviceDescriptor::DefaultDevice()*/);
+    template CNTK_API FunctionPtr LoadLegacyModel<double>(const std::wstring& modelFile, const DeviceDescriptor& computeDevice /*= DeviceDescriptor::DefaultDevice()*/);
+
+    template CNTK_API void SaveAsLegacyModel<float>(const FunctionPtr& rootFunction, const std::wstring& modelFile);
+    template CNTK_API void SaveAsLegacyModel<double>(const FunctionPtr& rootFunction, const std::wstring& modelFile);
+}
diff --git a/Source/CNTKv2LibraryDll/CNTKv2LibraryDll.vcxproj b/Source/CNTKv2LibraryDll/CNTKv2LibraryDll.vcxproj
index bf18f452a..c097e592f 100644
--- a/Source/CNTKv2LibraryDll/CNTKv2LibraryDll.vcxproj
+++ b/Source/CNTKv2LibraryDll/CNTKv2LibraryDll.vcxproj
@@ -134,6 +134,7 @@
     <ClInclude Include="targetver.h" />
   </ItemGroup>
   <ItemGroup>
+    <ClCompile Include="BackCompat.cpp" />
     <ClCompile Include="Common.cpp" />
     <ClCompile Include="dllmain.cpp">
       <CompileAsManaged>false</CompileAsManaged>
diff --git a/Source/CNTKv2LibraryDll/CNTKv2LibraryDll.vcxproj.filters b/Source/CNTKv2LibraryDll/CNTKv2LibraryDll.vcxproj.filters
index 1d2b139d1..6cc6e0220 100644
--- a/Source/CNTKv2LibraryDll/CNTKv2LibraryDll.vcxproj.filters
+++ b/Source/CNTKv2LibraryDll/CNTKv2LibraryDll.vcxproj.filters
@@ -11,6 +11,7 @@
     <ClCompile Include="Utils.cpp" />
     <ClCompile Include="NDMask.cpp" />
     <ClCompile Include="Learner.cpp" />
+    <ClCompile Include="BackCompat.cpp" />
   </ItemGroup>
   <ItemGroup>
     <ClInclude Include="stdafx.h" />
diff --git a/Source/CNTKv2LibraryDll/Function.cpp b/Source/CNTKv2LibraryDll/Function.cpp
index b538f05f4..dd9d19c96 100644
--- a/Source/CNTKv2LibraryDll/Function.cpp
+++ b/Source/CNTKv2LibraryDll/Function.cpp
@@ -126,7 +126,14 @@ namespace CNTK
         }
         else if (variable.IsInput())
         {
-            // TODO: Specify dynamic axis
+            // TODO: Support inputs with > 1 dynamic axes
+            if (variable.DynamicAxes().size() != 1)
+                LogicError("Currently only Input variables with one dynamic axis are supported");
+
+            auto dynamicAxis = variable.DynamicAxes()[0];
+            if (dynamicAxis != Axis::DefaultDynamicAxis())
+                LogicError("Currently only Input variables with DefaultDynamicAxis are supported");
+
             if (IsSparseInput(variable))
                 computationNodePtr = builder.CreateSparseInputNode(variable.Name(), AsTensorShape(variable.Shape()));
             else
@@ -872,7 +879,7 @@ namespace CNTK
         return CompositeFunction::Create(MakeSharedObject<PrimitiveFunction>(PrimitiveOpType::Tanh, std::vector<Variable>({ operand }), Dictionary(), name), name);
     }
 
-    FunctionPtr Combine(const std::initializer_list<FunctionPtr>& operands, const std::wstring& name/* = L""*/)
+    FunctionPtr Combine(const std::vector<FunctionPtr>& operands, const std::wstring& name/* = L""*/)
     {
         std::unordered_set<FunctionPtr> uniqueOperands;
         std::vector<Variable> inputs;
diff --git a/Source/CNTKv2LibraryDll/Function.h b/Source/CNTKv2LibraryDll/Function.h
index 814c09b80..2e60c8178 100644
--- a/Source/CNTKv2LibraryDll/Function.h
+++ b/Source/CNTKv2LibraryDll/Function.h
@@ -220,7 +220,7 @@ namespace CNTK
             {
                 assert(inputs.size() == 2);
 
-                if (inputs[0].Shape().NumAxes() > 1)
+                if ((inputs[0].Shape().NumAxes() > 2) || ((inputs[0].Shape().NumAxes() > 1) && (inputs[0].Shape()[1] != 1)))
                     InvalidArgument("The shape of input operands for the %s operation should have at most one axis", PrimitiveOpTypeName(op));
 
                 auto predictionShape = inputs[0].Shape();
@@ -292,6 +292,9 @@ namespace CNTK
         template <typename T, typename ...CtorArgTypes>
         friend inline std::shared_ptr<T> MakeSharedObject(CtorArgTypes&& ...ctorArgs);
 
+        template <typename ElementType>
+        friend void SaveAsLegacyModel(const FunctionPtr& rootFunction, const std::wstring& modelFile);
+
     public:
         static CompositeFunctionPtr Create(const FunctionPtr& rootFunction, const std::wstring& name = L"")
         {
diff --git a/Source/CNTKv2LibraryDll/Utils.h b/Source/CNTKv2LibraryDll/Utils.h
index c18d8e4d2..09cfbe2d3 100644
--- a/Source/CNTKv2LibraryDll/Utils.h
+++ b/Source/CNTKv2LibraryDll/Utils.h
@@ -78,6 +78,18 @@ namespace CNTK
             LogicError("Unknown DataType");
     }
 
+    inline NDShape AsNDShape(const Microsoft::MSR::CNTK::TensorShape& tensorShape)
+    {
+        // The TensorShape should be flattenable to 1D
+        for (size_t i = 1; i < tensorShape.GetRank(); ++i)
+        {
+            if (!tensorShape.CanFlatten(i))
+                InvalidArgument("AsNDShape() can only be called for TensorShapes that can be flattened to 1D");
+        }
+
+        return std::vector<size_t>(tensorShape.GetDims().begin(), tensorShape.GetDims().end());
+    }
+
     inline Microsoft::MSR::CNTK::TensorShape AsTensorShape(const NDShape& viewShape)
     {
         const size_t maxNumAxesSupportedByTensorView = 12;
diff --git a/Source/ComputationNetworkLib/ComputationNetwork.h b/Source/ComputationNetworkLib/ComputationNetwork.h
index 06ba1b64f..2432181c4 100644
--- a/Source/ComputationNetworkLib/ComputationNetwork.h
+++ b/Source/ComputationNetworkLib/ComputationNetwork.h
@@ -522,6 +522,8 @@ public:
     }
 
 
+    const std::vector<ComputationNodeBasePtr>& RootNodes()           const { return m_allRoots; }
+
     // these are specified as such by the user
     const std::vector<ComputationNodeBasePtr>& FeatureNodes()        const { return m_featureNodes   ; }
     const std::vector<ComputationNodeBasePtr>& LabelNodes()          const { return m_labelNodes     ; }
diff --git a/Source/ComputationNetworkLib/ComputationNode.h b/Source/ComputationNetworkLib/ComputationNode.h
index aa5cf658a..18e487b74 100644
--- a/Source/ComputationNetworkLib/ComputationNode.h
+++ b/Source/ComputationNetworkLib/ComputationNode.h
@@ -38,7 +38,8 @@
 #define CNTK_MODEL_VERSION_7 7 // ElemType tag in model file
 #define CNTK_MODEL_VERSION_8 8 // DynamicAxis for inputs
 #define CNTK_MODEL_VERSION_9 9 // Transpose flag in ConvolutionNode to support deconvolution. 
-#define CURRENT_CNTK_MODEL_VERSION CNTK_MODEL_VERSION_9
+#define CNTK_MODEL_VERSION_10 10 // Learning rate multiplier for input nodes. 
+#define CURRENT_CNTK_MODEL_VERSION CNTK_MODEL_VERSION_10
 
 extern bool g_shareNodeValueMatrices;
 
diff --git a/Source/ComputationNetworkLib/InputAndParamNodes.h b/Source/ComputationNetworkLib/InputAndParamNodes.h
index aaccb75ff..9b25cd74e 100644
--- a/Source/ComputationNetworkLib/InputAndParamNodes.h
+++ b/Source/ComputationNetworkLib/InputAndParamNodes.h
@@ -162,7 +162,7 @@ class InputValueBase : public ComputationNode<ElemType>, public NumInputs<0>, pu
     typedef ComputationNode<ElemType> Base;
     UsingComputationNodeMembers;
 
-    void Init(const TensorShape& sampleLayout, bool isSparse, const std::wstring axisName)
+    void Init(const TensorShape& sampleLayout, bool isSparse, const std::wstring axisName, float learningRateMultiplier = 0)
     {
         m_isSparse = isSparse;
         MarkValueNonSharable();
@@ -171,7 +171,7 @@ class InputValueBase : public ComputationNode<ElemType>, public NumInputs<0>, pu
 
         SetDims(sampleLayout, HasMBLayout()); // also called when reloading a file. Then we have an MBLayout, otherwise not yet
         UpdateFunctionValuesSize();           // we must allocate the matrix so that the readers get objects with valid row dimensions (some readers expect that)
-        SetLearningRateMultiplier(0);
+        SetLearningRateMultiplier(learningRateMultiplier);
         m_dynamicAxisNodeName = axisName;
     }
 
@@ -225,9 +225,9 @@ protected:
             Init(ImageDimensions::AsTensorShape(configp->Get(L"imageWidth"), configp->Get(L"imageHeight"), configp->Get(L"imageChannels"), ImageLayoutKindFrom(configp->Get(L"imageLayout"))), isSparse, axisName);
     }
 
+public:
     virtual const std::wstring GetRequestedDynamicAxis() const { return m_dynamicAxisNodeName; }
 
-public:
     virtual void Save(File& fstream) const override
     {
         Base::Save(fstream);
@@ -239,6 +239,8 @@ public:
         unsigned int nrAxes = 1;
         fstream << nrAxes;
         fstream << m_dynamicAxisNodeName;
+
+        fstream << m_learningRateMultiplier;
     }
 
     virtual void Load(File& fstream, size_t modelVersion) override
@@ -268,7 +270,12 @@ public:
         }
         else
             m_dynamicAxisNodeName = L""; // Use default
-        Init(sampleLayout, m_isSparse, m_dynamicAxisNodeName);
+
+        float learningRateMultiplier = 0;
+        if (modelVersion >= CNTK_MODEL_VERSION_10)
+            fstream >> learningRateMultiplier;
+
+        Init(sampleLayout, m_isSparse, m_dynamicAxisNodeName, learningRateMultiplier);
     }
 
     // InputValue must not resize its inputs because that might destroy it. It should already have the correct size.
diff --git a/Source/ComputationNetworkLib/RecurrentNodes.h b/Source/ComputationNetworkLib/RecurrentNodes.h
index 5d1748c3f..88fa55c44 100644
--- a/Source/ComputationNetworkLib/RecurrentNodes.h
+++ b/Source/ComputationNetworkLib/RecurrentNodes.h
@@ -464,6 +464,9 @@ public:
             LogicError("Unrecognized direction in DelayedValueNodeBase");
     }
 
+    int TimeStep() const { return m_timeStep; }
+    ElemType InitialActivationValue() const { return m_initialActivationValue; }
+
 protected:
     ElemType m_initialActivationValue;       // starting value for hidden activation vector at boundary
     Matrix<ElemType> m_delayedValue;         // saves the activation of the previous step that this node points to
diff --git a/Tests/UnitTests/V2LibraryTests/Common.h b/Tests/UnitTests/V2LibraryTests/Common.h
index 444736c4f..3514437c7 100644
--- a/Tests/UnitTests/V2LibraryTests/Common.h
+++ b/Tests/UnitTests/V2LibraryTests/Common.h
@@ -2,6 +2,7 @@
 
 #include <exception>
 #include <algorithm>
+#include "CNTKLibrary.h"
 
 static const double relativeTolerance = 0.001f;
 static const double absoluteTolerance = 0.000001f;
@@ -18,3 +19,53 @@ inline void FloatingPointVectorCompare(const std::vector<ElementType>& first, co
             throw std::runtime_error(message);
     }
 }
+
+#pragma warning(push)
+#pragma warning(disable: 4996)
+
+template <typename ElementType>
+inline void SaveAndReloadModel(CNTK::FunctionPtr& functionPtr, const std::vector<CNTK::Variable*>& variables, const CNTK::DeviceDescriptor& device)
+{
+    static std::wstring s_tempModelPath = L"feedForward.net";
+
+    if ((_wunlink(s_tempModelPath.c_str()) != 0) && (errno != ENOENT))
+        RuntimeError("Error deleting file '%ls': %s", s_tempModelPath.c_str(), strerror(errno));
+
+    std::unordered_map<std::wstring, Variable*> inputVarNames;
+    std::unordered_map<std::wstring, Variable*> outputVarNames;
+
+    for (auto varPtr : variables)
+    {
+        auto retVal = varPtr->IsOutput() ? outputVarNames.insert({ varPtr->Owner()->Name(), varPtr }) : inputVarNames.insert({ varPtr->Name(), varPtr });
+        if (!retVal.second)
+            RuntimeError("SaveAndReloadModel: Multiple variables having same name cannot be restored after save and reload");
+    }
+
+    SaveAsLegacyModel<ElementType>(functionPtr, s_tempModelPath);
+    functionPtr = LoadLegacyModel<ElementType>(s_tempModelPath, device);
+
+    if (_wunlink(s_tempModelPath.c_str()) != 0)
+        RuntimeError("Error deleting file '%ls': %s", s_tempModelPath.c_str(), strerror(errno));
+
+    auto inputs = functionPtr->Inputs();
+    for (auto inputVarInfo : inputVarNames)
+    {
+        auto newInputVar = *(std::find_if(inputs.begin(), inputs.end(), [inputVarInfo](const Variable& var) {
+            return (var.Name() == inputVarInfo.first);
+        }));
+
+        *(inputVarInfo.second) = newInputVar;
+    }
+
+    auto outputs = functionPtr->Outputs();
+    for (auto outputVarInfo : outputVarNames)
+    {
+        auto newOutputVar = *(std::find_if(outputs.begin(), outputs.end(), [outputVarInfo](const Variable& var) {
+            return (var.Owner()->Name() == outputVarInfo.first);
+        }));
+
+        *(outputVarInfo.second) = newOutputVar;
+    }
+}
+
+#pragma warning(pop)
diff --git a/Tests/UnitTests/V2LibraryTests/FeedForwardTests.cpp b/Tests/UnitTests/V2LibraryTests/FeedForwardTests.cpp
index 401f4dd8a..dec82099b 100644
--- a/Tests/UnitTests/V2LibraryTests/FeedForwardTests.cpp
+++ b/Tests/UnitTests/V2LibraryTests/FeedForwardTests.cpp
@@ -18,7 +18,13 @@ FunctionPtr FullyConnectedDNNLayer(Variable input, size_t outputDim, const Devic
     return nonLinearity(plusFunction);
 }
 
-FunctionPtr FullyConnectedFeedForwardClassifierNet(Variable input, size_t numOutputClasses, size_t hiddenLayerDim, size_t numHiddenLayers, const DeviceDescriptor& device, const std::function<FunctionPtr(const FunctionPtr&)>& nonLinearity)
+FunctionPtr FullyConnectedFeedForwardClassifierNet(Variable input,
+                                                   size_t numOutputClasses,
+                                                   size_t hiddenLayerDim,
+                                                   size_t numHiddenLayers,
+                                                   const DeviceDescriptor& device,
+                                                   const std::function<FunctionPtr(const FunctionPtr&)>& nonLinearity,
+                                                   const std::wstring& outputName)
 {
     assert(numHiddenLayers >= 1);
     auto classifierRoot = FullyConnectedDNNLayer(input, hiddenLayerDim, device, nonLinearity);
@@ -26,11 +32,12 @@ FunctionPtr FullyConnectedFeedForwardClassifierNet(Variable input, size_t numOut
         classifierRoot = FullyConnectedDNNLayer(classifierRoot, hiddenLayerDim, device, nonLinearity);
 
     auto outputTimesParam = Parameter(NDArrayView::RandomUniform<float>({ numOutputClasses, hiddenLayerDim }, -0.5, 0.5, 1, device));
-    classifierRoot = Times(outputTimesParam, classifierRoot);
-    return classifierRoot;
+    return Times(outputTimesParam, classifierRoot, outputName);
 }
 
-void TestFeedForwardNetworkCreation(const DeviceDescriptor& device)
+std::wstring s_tempModelPath = L"feedForward.net";
+
+void TestFeedForwardNetworkCreation(const DeviceDescriptor& device, bool testSaveAndReLoad)
 {
     using namespace std::placeholders;
 
@@ -39,14 +46,17 @@ void TestFeedForwardNetworkCreation(const DeviceDescriptor& device)
     const size_t numHiddenLayers = 6;
     const size_t hiddenLayersDim = 2048;
 
-    Variable inputVar({ inputDim }, DataType::Float, L"Features");
-    auto classifierOutputFunction = FullyConnectedFeedForwardClassifierNet(inputVar, numOutputClasses, hiddenLayersDim, numHiddenLayers, device, std::bind(Sigmoid, _1, L""));
+    Variable inputVar({ inputDim }, DataType::Float, L"features");
+    auto classifierOutputFunction = FullyConnectedFeedForwardClassifierNet(inputVar, numOutputClasses, hiddenLayersDim, numHiddenLayers, device, std::bind(Sigmoid, _1, L""), L"classifierOutput");
+    Variable classifierOutput = classifierOutputFunction;
 
     Variable labelsVar({ numOutputClasses }, DataType::Float, L"Labels");
-    auto trainingLossFunction = CNTK::CrossEntropyWithSoftmax(classifierOutputFunction, labelsVar, L"LossFunction");
-    auto predictionFunction = CNTK::ClassificationError(classifierOutputFunction, labelsVar, L"ClassificationError");
+    auto trainingLossFunction = CNTK::CrossEntropyWithSoftmax(classifierOutput, labelsVar, L"LossFunction");
+    Variable trainingLoss = trainingLossFunction;
+    auto predictionFunction = CNTK::ClassificationError(classifierOutput, labelsVar, L"ClassificationError");
+    Variable prediction = predictionFunction;
 
-    auto ffNet = CNTK::Combine({ trainingLossFunction, predictionFunction, classifierOutputFunction }, L"ClassifierModel");
+    auto ffNet = CNTK::Combine({ trainingLoss.Owner(), prediction.Owner(), classifierOutput.Owner() }, L"ClassifierModel");
 
     // Now test the structure
     if (ffNet->Parameters().size() != ((numHiddenLayers * 2) + 1))
@@ -58,6 +68,9 @@ void TestFeedForwardNetworkCreation(const DeviceDescriptor& device)
     if (ffNet->Outputs().size() != 3)
         throw std::runtime_error("TestFeedForwardNetworkCreation: Function does not have expected Output count");
 
+    if (testSaveAndReLoad)
+        SaveAndReloadModel<float>(ffNet, { &inputVar, &labelsVar, &trainingLoss, &prediction, &classifierOutput }, device);
+
     // Run Forward and backward a few times
     size_t iterationCount = 4;
     unsigned int randSeed = 2;
@@ -69,22 +82,22 @@ void TestFeedForwardNetworkCreation(const DeviceDescriptor& device)
         for (size_t i = 0; i < inputData.size(); ++i)
             inputData[i] = ((float)rand()) / RAND_MAX;
 
-        NDShape inputShape = { inputDim, 1, numSamples };
+        NDShape inputShape = inputVar.Shape().AppendShape({ 1, numSamples });
         ValuePtr inputValue = MakeSharedObject<Value>(MakeSharedObject<NDArrayView>(inputShape, inputData.data(), inputData.size(), DeviceDescriptor::CPUDevice(), true));
 
         std::vector<float> labelData(numOutputClasses * numSamples, 0);
         for (size_t i = 0; i < numSamples; ++i)
             labelData[(i*numOutputClasses) + (rand() % numOutputClasses)] = 1;
 
-        NDShape labelShape = { numOutputClasses, 1, numSamples };
+        NDShape labelShape = labelsVar.Shape().AppendShape({ 1, numSamples });
         ValuePtr labelValue = MakeSharedObject<Value>(MakeSharedObject<NDArrayView>(labelShape, labelData.data(), labelData.size(), DeviceDescriptor::CPUDevice(), true));
 
         ValuePtr outputValue, predictionErrorValue;
-        std::unordered_map<Variable, ValuePtr> outputs = { { classifierOutputFunction->Output(), outputValue }, { predictionFunction->Output(), predictionErrorValue } };
-        auto backpropState = ffNet->Forward({ { inputVar, inputValue }, { labelsVar, labelValue } }, outputs, device, { trainingLossFunction->Output() });
+        std::unordered_map<Variable, ValuePtr> outputs = { { classifierOutput, outputValue }, { prediction, predictionErrorValue } };
+        auto backpropState = ffNet->Forward({ { inputVar, inputValue }, { labelsVar, labelValue } }, outputs, device, { trainingLoss });
 
         // Perform backprop
-        NDShape outputShape = trainingLossFunction->Output().Shape();
+        NDShape outputShape = trainingLoss.Shape();
         std::vector<float> rootGradientsData(outputShape.TotalSize(), 1);
         ValuePtr rootGradientValue = MakeSharedObject<Value>(MakeSharedObject<NDArrayView>(outputShape, rootGradientsData.data(), rootGradientsData.size(), DeviceDescriptor::CPUDevice(), true));
         std::unordered_map<Variable, ValuePtr> paramGradients;
@@ -92,7 +105,7 @@ void TestFeedForwardNetworkCreation(const DeviceDescriptor& device)
         for (auto iter = allParams.begin(); iter != allParams.end(); ++iter)
             paramGradients[*iter] = nullptr;
         
-        ffNet->Backward(backpropState, { { trainingLossFunction->Output(), rootGradientValue } }, paramGradients);
+        ffNet->Backward(backpropState, { { trainingLoss, rootGradientValue } }, paramGradients);
     }
 }
 
@@ -103,15 +116,19 @@ void TestTimesAndPlus(size_t inputDim,
                       const DeviceDescriptor& device,
                       size_t numIterations,
                       bool usePreAllocatedOutputs,
-                      bool outputOnSpecifiedDevice = false,
+                      bool outputOnSpecifiedDevice,
+                      bool testSaveAndReLoad,
                       unsigned int seed = 1)
 {
-    Parameter timesParam(MakeSharedObject<NDArrayView>((ElementType)0.5, NDShape({ outputDim, inputDim }), device));
-    Parameter plusParam(MakeSharedObject<NDArrayView>((ElementType)1.2, std::initializer_list<size_t>({ outputDim }), device));
+    Parameter timesParam(MakeSharedObject<NDArrayView>((ElementType)0.5, NDShape({ outputDim, inputDim }), device), L"timesParameters");
+    Parameter plusParam(MakeSharedObject<NDArrayView>((ElementType)1.2, std::initializer_list<size_t>({ outputDim }), device), L"plusParameters");
 
     Variable inputVar({ inputDim }, AsDataType<ElementType>(), L"input");
     auto timesAndPlusFunc = Plus(plusParam, Times(timesParam, inputVar));
 
+    if (testSaveAndReLoad)
+        SaveAndReloadModel<ElementType>(timesAndPlusFunc, { &inputVar, &timesParam, &plusParam }, device);
+
     srand(seed);
     for (size_t iterIdx = 0; iterIdx < numIterations; ++iterIdx)
     {
@@ -119,10 +136,10 @@ void TestTimesAndPlus(size_t inputDim,
         for (size_t i = 0; i < inputData.size(); ++i)
             inputData[i] = ((ElementType)rand()) / RAND_MAX;
 
-        NDShape inputShape = { inputDim, 1, numSamples };
+        NDShape inputShape = inputVar.Shape().AppendShape({ 1, numSamples });
         ValuePtr inputValue = MakeSharedObject<Value>(MakeSharedObject<NDArrayView>(inputShape, inputData.data(), inputData.size(), DeviceDescriptor::CPUDevice(), true));
 
-        NDShape outputShape = { outputDim, 1, numSamples };
+        NDShape outputShape = timesAndPlusFunc->Output().Shape().AppendShape({ 1, numSamples });
         std::vector<ElementType> outputData(outputShape.TotalSize());
         ValuePtr outputValue;
         if (usePreAllocatedOutputs)
@@ -235,12 +252,14 @@ void TestTimesAndPlus(size_t inputDim,
 
 void FeedForwardTests()
 {
-    TestTimesAndPlus<double>(4, 2, 5, DeviceDescriptor::CPUDevice(), 3, true, true);
+    TestTimesAndPlus<double>(4, 2, 5, DeviceDescriptor::CPUDevice(), 3, true, true, true);
 #ifndef CPUONLY
-    TestTimesAndPlus<float>(145, 32, 2, DeviceDescriptor::GPUDevice(0), 10, true, false);
-    TestTimesAndPlus<double>(145, 15, 200, DeviceDescriptor::GPUDevice(0), 21, false);
+    TestTimesAndPlus<float>(145, 32, 2, DeviceDescriptor::GPUDevice(0), 10, true, false, true);
+    TestTimesAndPlus<double>(145, 15, 200, DeviceDescriptor::GPUDevice(0), 21, false, false, false);
 
-    TestFeedForwardNetworkCreation(DeviceDescriptor::GPUDevice(0));
+    TestFeedForwardNetworkCreation(DeviceDescriptor::GPUDevice(0), true);
+    TestFeedForwardNetworkCreation(DeviceDescriptor::GPUDevice(0), false);
 #endif
-    TestFeedForwardNetworkCreation(DeviceDescriptor::CPUDevice());
+    TestFeedForwardNetworkCreation(DeviceDescriptor::CPUDevice(), false);
+    TestFeedForwardNetworkCreation(DeviceDescriptor::CPUDevice(), true);
 }
diff --git a/Tests/UnitTests/V2LibraryTests/RecurrentFunctionTests.cpp b/Tests/UnitTests/V2LibraryTests/RecurrentFunctionTests.cpp
index 38a6506ad..e84fb0550 100644
--- a/Tests/UnitTests/V2LibraryTests/RecurrentFunctionTests.cpp
+++ b/Tests/UnitTests/V2LibraryTests/RecurrentFunctionTests.cpp
@@ -119,7 +119,7 @@ FunctionPtr LSTMPComponentWithSelfStabilization(Variable input, size_t outputDim
 }
 
 template <typename ElementType>
-FunctionPtr LSTMNet(Variable features, size_t cellDim, size_t hiddenDim, size_t numOutputClasses, size_t numLSTMLayers, const DeviceDescriptor& device)
+FunctionPtr LSTMNet(Variable features, size_t cellDim, size_t hiddenDim, size_t numOutputClasses, size_t numLSTMLayers, const DeviceDescriptor& device, const std::wstring& outputName)
 {
     assert(numLSTMLayers >= 1);
     auto classifierRoot = LSTMPComponentWithSelfStabilization<ElementType>(features, hiddenDim, cellDim, device);
@@ -133,11 +133,11 @@ FunctionPtr LSTMNet(Variable features, size_t cellDim, size_t hiddenDim, size_t
     auto sW = Parameter({}, (ElementType)0.0, device);
     auto expsW = Exp(sW);
 
-    return Plus(Times(W, ElementTimes(expsW, classifierRoot)), b);
+    return Plus(Times(W, ElementTimes(expsW, classifierRoot)), b, outputName);
 }
 
 template <typename ElementType>
-void TestRecurrentNetworkCreation(const DeviceDescriptor& device)
+void TestRecurrentNetworkCreation(const DeviceDescriptor& device, bool testSaveAndReLoad)
 {
     const size_t inputDim = 937;
     const size_t numLSTMLayers = 3;
@@ -146,11 +146,14 @@ void TestRecurrentNetworkCreation(const DeviceDescriptor& device)
     const size_t numOutputClasses = 9304;
 
     Variable features({ inputDim }, AsDataType<ElementType>(), L"features");
-    auto classifierOutputFunction = LSTMNet<ElementType>(features, cellDim, hiddenDim, numOutputClasses, numLSTMLayers, device);
+    auto classifierOutputFunction = LSTMNet<ElementType>(features, cellDim, hiddenDim, numOutputClasses, numLSTMLayers, device, L"classifierOutput");
+    Variable classifierOutput = classifierOutputFunction;
 
     Variable labelsVar = Variable({ numOutputClasses }, AsDataType<ElementType>(), L"labels");
     auto trainingLossFunction = CrossEntropyWithSoftmax(classifierOutputFunction, labelsVar, L"lossFunction");
+    Variable trainingLoss = trainingLossFunction;
     auto predictionFunction = ClassificationError(classifierOutputFunction, labelsVar, L"classificationError");
+    Variable prediction = predictionFunction;
 
     auto LSTMClassifier = Combine({ trainingLossFunction, predictionFunction, classifierOutputFunction }, L"LSTMClassifier");
 
@@ -164,6 +167,9 @@ void TestRecurrentNetworkCreation(const DeviceDescriptor& device)
     if (LSTMClassifier->Parameters().size() != ((numLSTMLayers * 28) + 3))
         throw std::runtime_error("TestFeedForwardNetworkCreation: Function does not have expected Parameter count");
 
+    if (testSaveAndReLoad)
+        SaveAndReloadModel<ElementType>(LSTMClassifier, { &features, &labelsVar, &trainingLoss, &prediction, &classifierOutput }, device);
+
     // Run Forward and backward a few times
     size_t iterationCount = 3;
     unsigned int randSeed = 2;
@@ -206,11 +212,11 @@ void TestRecurrentNetworkCreation(const DeviceDescriptor& device)
         ValuePtr labelValue = Value::Create({ numOutputClasses }, labelsData, device, true);
 
         ValuePtr outputValue, predictionErrorValue;
-        std::unordered_map<Variable, ValuePtr> outputs = { { classifierOutputFunction->Output(), outputValue }, { predictionFunction->Output(), predictionErrorValue } };
-        auto backpropState = LSTMClassifier->Forward({ { features, inputValue }, { labelsVar, labelValue } }, outputs, device, { trainingLossFunction->Output() });
+        std::unordered_map<Variable, ValuePtr> outputs = { { classifierOutput, outputValue }, { prediction, predictionErrorValue } };
+        auto backpropState = LSTMClassifier->Forward({ { features, inputValue }, { labelsVar, labelValue } }, outputs, device, { trainingLoss });
 
         // Perform backprop
-        NDShape outputShape = trainingLossFunction->Output().Shape();
+        NDShape outputShape = trainingLoss.Shape();
         std::vector<ElementType> rootGradientsData(outputShape.TotalSize(), 1);
         ValuePtr rootGradientValue = MakeSharedObject<Value>(MakeSharedObject<NDArrayView>(outputShape, rootGradientsData.data(), rootGradientsData.size(), DeviceDescriptor::CPUDevice(), true));
         std::unordered_map<Variable, ValuePtr> paramGradients;
@@ -218,7 +224,7 @@ void TestRecurrentNetworkCreation(const DeviceDescriptor& device)
         for (auto iter = allParams.begin(); iter != allParams.end(); ++iter)
             paramGradients[*iter] = nullptr;
 
-        LSTMClassifier->Backward(backpropState, { { trainingLossFunction->Output(), rootGradientValue } }, paramGradients);
+        LSTMClassifier->Backward(backpropState, { { trainingLoss, rootGradientValue } }, paramGradients);
     }
 }
 
@@ -228,6 +234,7 @@ void TestSimpleRecurrence(size_t inputDim,
                           size_t maxAllowedSequenceLength,
                           size_t numSequences,
                           const DeviceDescriptor& device,
+                          bool testSaveAndReLoad,
                           size_t numIterations,
                           bool useFutureValue,
                           bool useSparseInputs,
@@ -237,24 +244,29 @@ void TestSimpleRecurrence(size_t inputDim,
     if (useOneHotSparseInputs && !useSparseInputs)
         throw std::runtime_error("useOneHotSparseInputs option can only be true when useSparseInputs is true");
 
-    Parameter timesParam(MakeSharedObject<NDArrayView>((ElementType)0.5, NDShape({ outputDim, inputDim }), device));
-    Parameter plusParam(MakeSharedObject<NDArrayView>((ElementType)0.1, std::initializer_list<size_t>({ outputDim }), device));
+    Parameter timesParam(MakeSharedObject<NDArrayView>((ElementType)0.5, NDShape({ outputDim, inputDim }), device), L"timesParameters");
+    Parameter plusParam(MakeSharedObject<NDArrayView>((ElementType)0.1, std::initializer_list<size_t>({ outputDim }), device), L"plusParameters");
 
     Variable inputVar({ inputDim }, useSparseInputs, AsDataType<ElementType>(), true, L"input");
 
     auto placeholder = Placeholder({ outputDim });
-    auto plusOutput = Plus(plusParam, Plus(placeholder, Times(timesParam, inputVar)));
+    auto plusOutputFunction = Plus(plusParam, Plus(placeholder, Times(timesParam, inputVar)), L"plusOutput");
     FunctionPtr placeholderReplacement;
     if (useFutureValue)
-        placeholderReplacement = FutureValue(Constant({}, (ElementType)0.0, device), plusOutput, 1);
+        placeholderReplacement = FutureValue(Constant({}, (ElementType)0.0, device), plusOutputFunction, 1);
     else
-        placeholderReplacement = PastValue(Constant({}, (ElementType)0.0, device), plusOutput, 1);
+        placeholderReplacement = PastValue(Constant({}, (ElementType)0.0, device), plusOutputFunction, 1);
 
-    plusOutput = plusOutput->ReplacePlaceholders({ { placeholder, placeholderReplacement } });
+    plusOutputFunction = plusOutputFunction->ReplacePlaceholders({ { placeholder, placeholderReplacement } });
+    Variable plusOutput = plusOutputFunction;
 
-    auto reducedOutput = ReduceSum(plusOutput);
+    auto reducedOutputFunction = ReduceSum(plusOutput, L"sum");
+    Variable reducedOutput = reducedOutputFunction;
 
-    auto rootFunc = Combine({ reducedOutput, plusOutput });
+    auto rootFunc = Combine({ reducedOutputFunction, plusOutputFunction });
+
+    if (testSaveAndReLoad)
+        SaveAndReloadModel<ElementType>(rootFunc, { &inputVar, &timesParam, &plusParam, &plusOutput, &reducedOutput }, device);
 
     srand(seed);
     for (size_t iterIdx = 0; iterIdx < numIterations; ++iterIdx)
@@ -268,7 +280,7 @@ void TestSimpleRecurrence(size_t inputDim,
                 maxActualSequenceLength = sequenceLengths[i];
         }
 
-        NDShape inputShape = { inputDim, maxActualSequenceLength, numSequences };
+        NDShape inputShape = inputVar.Shape().AppendShape({ maxActualSequenceLength, numSequences });
         ValuePtr inputValue;
         size_t totalNumInputSamples = maxActualSequenceLength * numSequences;
         std::vector<ElementType> inputData(inputDim * totalNumInputSamples, useSparseInputs ? 0 : std::numeric_limits<ElementType>::quiet_NaN());
@@ -330,12 +342,12 @@ void TestSimpleRecurrence(size_t inputDim,
         std::vector<ElementType> reducedOutputData(reducedOutputShape.TotalSize());
         ValuePtr reducedOutputValue = MakeSharedObject<Value>(MakeSharedObject<NDArrayView>(reducedOutputShape, reducedOutputData.data(), reducedOutputData.size(), DeviceDescriptor::CPUDevice(), false));
 
-        NDShape plusOutputShape = plusOutput->Output().Shape().AppendShape({ maxActualSequenceLength, numSequences });
+        NDShape plusOutputShape = plusOutput.Shape().AppendShape({ maxActualSequenceLength, numSequences });
         std::vector<ElementType> plusOutputData(plusOutputShape.TotalSize());
         ValuePtr plusOutputValue = MakeSharedObject<Value>(MakeSharedObject<NDArrayView>(plusOutputShape, plusOutputData.data(), plusOutputData.size(), DeviceDescriptor::CPUDevice(), false), MakeSharedObject<NDMask>(inputValue->Mask()->Shape(), inputValue->Mask()->Device()));
 
-        std::unordered_map<Variable, ValuePtr> outputs = { { reducedOutput->Output(), reducedOutputValue }, { plusOutput->Output(), plusOutputValue } };
-        auto backpropState = rootFunc->Forward({ { inputVar, inputValue } }, outputs, device, { plusOutput->Output() });
+        std::unordered_map<Variable, ValuePtr> outputs = { { reducedOutput, reducedOutputValue }, { plusOutput, plusOutputValue } };
+        auto backpropState = rootFunc->Forward({ { inputVar, inputValue } }, outputs, device, { plusOutput });
 
         // Perform backprop
         std::vector<ElementType> rootGradientsData(plusOutputShape.TotalSize(), std::numeric_limits<ElementType>::quiet_NaN());
@@ -362,7 +374,7 @@ void TestSimpleRecurrence(size_t inputDim,
         ValuePtr inputGradientValue = MakeSharedObject<Value>(MakeSharedObject<NDArrayView>(inputShape, inputGradientData.data(), inputGradientData.size(), DeviceDescriptor::CPUDevice(), false), inputValue->Mask()->DeepClone());
 
         std::unordered_map<Variable, ValuePtr> outGradients = { { inputVar, inputGradientValue }, { plusParam, plusParameterGradientValue }, { timesParam, timesParameterGradientValue } };
-        rootFunc->Backward(backpropState, { { plusOutput->Output(), rootGradientValue } }, outGradients);
+        rootFunc->Backward(backpropState, { { plusOutput, rootGradientValue } }, outGradients);
 
         // Verify forward prop results
         std::vector<ElementType> expectedPlusOutputData(plusOutputShape.TotalSize(), 0);
@@ -473,19 +485,19 @@ void TestSimpleRecurrence(size_t inputDim,
 
 void RecurrentFunctionTests()
 {
-    TestSimpleRecurrence<float>(2, 1, 4, 1, DeviceDescriptor::CPUDevice(), 3, false, false);
+    TestSimpleRecurrence<float>(2, 1, 4, 1, DeviceDescriptor::CPUDevice(), true, 3, false, false);
 #ifndef CPUONLY
-    TestSimpleRecurrence<double>(11, 9, 16, 7, DeviceDescriptor::GPUDevice(0), 5, true, false);
+    TestSimpleRecurrence<double>(11, 9, 16, 7, DeviceDescriptor::GPUDevice(0), true, 5, true, false);
 #endif
-    TestSimpleRecurrence<double>(1000, 9, 16, 3, DeviceDescriptor::CPUDevice(), 2, true, true);
+    TestSimpleRecurrence<double>(1000, 9, 16, 3, DeviceDescriptor::CPUDevice(), false, 2, true, true);
 #ifndef CPUONLY
-    TestSimpleRecurrence<float>(5000, 200, 19, 6, DeviceDescriptor::GPUDevice(0), 3, false, true);
-    TestSimpleRecurrence<double>(1000, 9, 16, 3, DeviceDescriptor::GPUDevice(0), 3, true, true, true);
+    TestSimpleRecurrence<float>(5000, 200, 19, 6, DeviceDescriptor::GPUDevice(0), false, 3, false, true);
+    TestSimpleRecurrence<double>(1000, 9, 16, 3, DeviceDescriptor::GPUDevice(0), true, 3, true, true, true);
 #endif
-    TestSimpleRecurrence<float>(5000, 200, 19, 6, DeviceDescriptor::CPUDevice(), 2, false, true, true);
+    TestSimpleRecurrence<float>(5000, 200, 19, 6, DeviceDescriptor::CPUDevice(), true, 2, false, true, true);
 
 #ifndef CPUONLY
-    TestRecurrentNetworkCreation<float>(DeviceDescriptor::GPUDevice(0));
+    TestRecurrentNetworkCreation<float>(DeviceDescriptor::GPUDevice(0), true);
 #endif
-    TestRecurrentNetworkCreation<double>(DeviceDescriptor::CPUDevice());
+    TestRecurrentNetworkCreation<double>(DeviceDescriptor::CPUDevice(), false);
 }
diff --git a/Tests/UnitTests/V2LibraryTests/TensorTests.cpp b/Tests/UnitTests/V2LibraryTests/TensorTests.cpp
index 20e0c54cb..d092dc963 100644
--- a/Tests/UnitTests/V2LibraryTests/TensorTests.cpp
+++ b/Tests/UnitTests/V2LibraryTests/TensorTests.cpp
@@ -20,8 +20,8 @@ void TestTensorPlus(size_t numAxesLeftOperand, size_t numAxesRightOperand, const
     for (size_t i = std::min(numAxesLeftOperand, numAxesRightOperand); i < numAxesRightOperand; ++i)
         rightInputShape[i] = (rand() % maxDimSize) + 1;
 
-    Variable leftInputVar(leftInputShape, AsDataType<ElementType>(), L"leftInput");
-    Variable rightInputVar(rightInputShape, AsDataType<ElementType>(), L"rightInput");
+    Variable leftInputVar(leftInputShape, AsDataType<ElementType>(), true, L"leftInput");
+    Variable rightInputVar(rightInputShape, AsDataType<ElementType>(), true, L"rightInput");
 
     auto plusFunc = Plus(leftInputVar, rightInputVar);