moved memshare functions from header to ComputationNetworkEvaluation.cpp;

created new ComputationNetworkEditing.cpp
This commit is contained in:
Frank Seide 2015-10-30 10:35:12 -07:00
Родитель 1629ad9515
Коммит 317b9466cd
7 изменённых файлов: 269 добавлений и 215 удалений

Просмотреть файл

@ -192,6 +192,7 @@
<ClCompile Include="ComputationNetwork.cpp" />
<ClCompile Include="ComputationNetworkAnalysis.cpp" />
<ClCompile Include="ComputationNetworkBuilder.cpp" />
<ClCompile Include="ComputationNetworkEditing.cpp" />
<ClCompile Include="ComputationNetworkEvaluation.cpp" />
<ClCompile Include="ComputationNode.cpp" />
<ClCompile Include="NetworkBuilderFromConfig.cpp" />

Просмотреть файл

@ -34,6 +34,9 @@
<ClCompile Include="ComputationNetworkAnalysis.cpp">
<Filter>Network</Filter>
</ClCompile>
<ClCompile Include="ComputationNetworkEditing.cpp">
<Filter>Network</Filter>
</ClCompile>
</ItemGroup>
<ItemGroup>
<ClInclude Include="..\..\Common\Include\basetypes.h">

Просмотреть файл

@ -722,6 +722,68 @@ namespace Microsoft { namespace MSR { namespace CNTK {
}
}
// -----------------------------------------------------------------------
// unit test
// -----------------------------------------------------------------------
/**
call unit test of each node
this adds a verification of the correctness of node operations.
*/
bool ComputationNetwork::UnitTest(bool allowFragment)
{
vector<wstring> vErrors;
// currently only validates nodes, we should validate everything we can
if (FeatureNodes().size() == 0 && !allowFragment)
RuntimeError("No Feature nodes specified");
// first give criteria nodes as root node
if (FinalCriterionNodes().size() > 0)
{
for (auto & node : FinalCriterionNodes())
{
if (!allowFragment)
FormRecurrentLoops(node);
//this->SetActualMiniBatchSizeFromFeatures();
if (!UnitTest(node))
vErrors.push_back(node->NodeName().c_str());
}
}
else if (!allowFragment)
RuntimeError("No Criterion nodes specified");
// now output nodes
if (OutputNodes().size() > 0)
{
for (auto & node : OutputNodes())
if (!UnitTest(node))
vErrors.push_back(node->NodeName().c_str());
}
else if (!allowFragment)
RuntimeError("No Output nodes specified");
// now evaluation nodes
if (EvaluationNodes().size() > 0)
{
for (auto & node : EvaluationNodes())
if (!UnitTest(node))
vErrors.push_back(node->NodeName().c_str());
}
return vErrors.empty();
}
bool ComputationNetwork::UnitTest(const ComputationNodeBasePtr& rootNode)
{
fprintf(stderr, "\n\n Unit test node %ls \n", rootNode->NodeName().c_str());
std::list<ComputationNodeBasePtr>& nodes = GetEvalOrder(rootNode, false);
for (auto & nodeIter : nodes)
if (!nodeIter->UnitTest())
return false;
fprintf(stderr, "\n\n");
return true;
}
// -----------------------------------------------------------------------
// topological plot [erw]
// -----------------------------------------------------------------------

Просмотреть файл

@ -900,221 +900,16 @@ public:
StartEvaluateMinibatchLoop(nodes2);
}
//this function will need to be called before actual validation and execution to
//predetermine how to share matrices to reduce memory usage.
//TODO: find a simple topological order and allocateEvalMatrices on that order directly
//without passing in eval, out, and train nodes.
void AllocateAllEvalMatrices(std::vector<ComputationNodeBasePtr>& evalRootNodes,
std::vector<ComputationNodeBasePtr>& outValueRootNodes,
std::vector<ComputationNodeBasePtr>& trainRootNodes)
{
//allocate memory for forward computation
fprintf(stderr, "\n\nAllocating matrices for forward propagation.\n");
for (int i = 0; i < evalRootNodes.size(); i++)
AllocateEvalMatrices(evalRootNodes[i]);
for (int i = 0; i < outValueRootNodes.size(); i++)
AllocateEvalMatrices(outValueRootNodes[i]);
for (int i = 0; i < trainRootNodes.size(); i++)
AllocateEvalMatrices(trainRootNodes[i]);
void AllocateGradientMatrices(ComputationNodeBasePtr rootNode); // public since this is called by SGD
private:
void AllocateAllEvalMatrices(std::vector<ComputationNodeBasePtr>& evalRootNodes, std::vector<ComputationNodeBasePtr>& outValueRootNodes, std::vector<ComputationNodeBasePtr>& trainRootNodes);
void AllocateEvalMatrices(ComputationNodeBasePtr rootNode);
void ReleaseMatricesAfterEvalForChildren(ComputationNodeBasePtr n, std::map<ComputationNodeBasePtr, int>& parentCount);
void AllocateGradientMatricesForChildren(ComputationNodeBasePtr parentNode);
public:
}
void AllocateEvalMatrices(ComputationNodeBasePtr rootNode)
{
FormRecurrentLoops(rootNode);
std::list<ComputationNodeBasePtr>& allNodes = GetEvalOrder(rootNode, false);
//determine parent size
std::map<ComputationNodeBasePtr, int> parentCount;
for (auto &n : allNodes)
{
for (int i = 0; i < n->ChildrenSize(); i++)
{
ComputationNodeBasePtr pNode = n->GetChildren()[i];
parentCount[pNode]++;
}
}
for (int i = 0; i < m_recurrentInfo.size(); i++)
m_recurrentInfo[i].m_completedEvaluate = false;
for (auto &nodeIter : allNodes)
{
if (nodeIter->IsPartOfLoop())
{
RecurrentInfo* recInfo = FindInRecurrentLoops(nodeIter);
assert(recInfo != nullptr);
if (recInfo->m_completedEvaluate == false)
{
const auto & recurrentNodes = recInfo->m_recurrentNodesForForward;
for (auto &nodeLoopIter : recurrentNodes)
{
nodeLoopIter->RequestMatricesBeforeEval(m_matrixPool);
}
recInfo->m_completedEvaluate = true;
for (auto &nodeLoopIter : recurrentNodes)
{
ReleaseMatricesAfterEvalForChildren(nodeLoopIter, parentCount);
}
}
}
else
{
nodeIter->RequestMatricesBeforeEval(m_matrixPool);
//we only release matrices for the children since the root node's informatioin will be used and should not be shared
//with others
ReleaseMatricesAfterEvalForChildren(nodeIter, parentCount);
}
}
}
void ReleaseMatricesAfterEvalForChildren(ComputationNodeBasePtr n, std::map<ComputationNodeBasePtr, int>& parentCount)
{
for (int i = 0; i < n->ChildrenSize(); i++)
{
ComputationNodeBasePtr pNode = n->GetChildren()[i];
parentCount[pNode]--;
if (parentCount[pNode] == 0)
pNode->ReleaseMatricesAfterEval(m_matrixPool);
}
}
void AllocateGradientMatrices(ComputationNodeBasePtr rootNode)
{
FormRecurrentLoops(rootNode);
//PopulateParents(rootNode);
std::list<ComputationNodeBasePtr>& allNodes = GetGradientCalcOrder(rootNode);
//determine children size
//std::map<ComputationNodeBasePtr, int> childrenCount;
//for (auto &nodeIter : allNodes)
//{
// childrenCount[nodeIter] = nodeIter->ChildrenSize();
//}
//now, simulate the gradient computation order to determine how to allocate matrices
for (int i = 0; i < m_recurrentInfo.size(); i++)
m_recurrentInfo[i].m_completedGradient = false;
//we need to call it here since we always compute gradients for children and root node is not children of other node
rootNode->RequestMatricesBeforeGradientComp(m_matrixPool);
for (auto &n : allNodes)
{
if (n->IsPartOfLoop())
{
std::vector<ComputationNodeBasePtr> recurrentNodes;
RecurrentInfo * recInfo = FindInRecurrentLoops(n);
if (recInfo && recInfo->m_completedGradient == false)
{
const auto & recurrentNodes = recInfo->m_recurrentNodesForForward;
//loops are computed sample by sample so we have to allocate them all
for (auto nodeIter = recurrentNodes.rbegin(); nodeIter != recurrentNodes.rend(); ++nodeIter)
{
AllocateGradientMatricesForChildren(*nodeIter);
}
recInfo->m_completedGradient = true;
for (auto nodeIter = recurrentNodes.rbegin(); nodeIter != recurrentNodes.rend(); ++nodeIter)
{
if ((*nodeIter)->NeedGradient())
{
(*nodeIter)->ReleaseMatricesAfterGradientComp(m_matrixPool);
}
}
}
}
else
{
AllocateGradientMatricesForChildren(n);
if ((n != rootNode) && n->NeedGradient()) //root node's informatioin will be used and should not be shared with others, also it's small (1x1)
n->ReleaseMatricesAfterGradientComp(m_matrixPool);
}
}
}
//void ReleaseMatricesAfterGradientCompForParents(ComputationNodeBasePtr n, std::map<ComputationNodeBasePtr, int>& childrenCount)
//{
// for (int i = 0; i < n->ParentSize(); i++)
// {
// ComputationNodeBasePtr pNode = n->Parent(i);
// childrenCount[pNode] --;
// if (childrenCount[pNode] == 0)
// pNode->ReleaseMatricesAfterGradientComp(m_matrixPool);
// }
//}
void AllocateGradientMatricesForChildren(ComputationNodeBasePtr parentNode)
{
std::vector<ComputationNodeBasePtr> children = parentNode->GetChildren();
for (int i = 0; i < children.size(); i++)
{
if (children[i]->NeedGradient())
children[i]->RequestMatricesBeforeGradientComp(m_matrixPool);
}
}
/**
call unit test of each node
this adds a verification of the correctness of node operations.
*/
bool UnitTest(bool allowFragment = false)
{
vector<wstring> vErrors;
// currently only validates nodes, we should validate everything we can
if (FeatureNodes().size() == 0 && !allowFragment)
RuntimeError("No Feature nodes specified");
// first give criteria nodes as root node
if (FinalCriterionNodes().size() > 0)
{
for (auto & node : FinalCriterionNodes())
{
if (!allowFragment)
FormRecurrentLoops(node);
//this->SetActualMiniBatchSizeFromFeatures();
if (!UnitTest(node))
vErrors.push_back(node->NodeName().c_str());
}
}
else if (!allowFragment)
RuntimeError("No Criterion nodes specified");
// now output nodes
if (OutputNodes().size() > 0)
{
for (auto & node : OutputNodes())
if (!UnitTest(node))
vErrors.push_back(node->NodeName().c_str());
}
else if (!allowFragment)
RuntimeError("No Output nodes specified");
// now evaluation nodes
if (EvaluationNodes().size() > 0)
{
for (auto & node : EvaluationNodes())
if (!UnitTest(node))
vErrors.push_back(node->NodeName().c_str());
}
return vErrors.empty();
}
bool UnitTest(const ComputationNodeBasePtr& rootNode)
{
fprintf(stderr, "\n\n Unit test node %ls \n", rootNode->NodeName().c_str());
std::list<ComputationNodeBasePtr>& nodes = GetEvalOrder(rootNode, false);
for (auto & nodeIter : nodes)
if (!nodeIter->UnitTest())
return false;
fprintf(stderr, "\n\n");
return true;
}
bool UnitTest(bool allowFragment = false);
bool UnitTest(const ComputationNodeBasePtr& rootNode);
// -----------------------------------------------------------------------
// specialized operations

Просмотреть файл

@ -0,0 +1,30 @@
//
// <copyright file="ComputationNetwork.cpp" company="Microsoft">
// Copyright (c) Microsoft Corporation. All rights reserved.
// </copyright>
//
#define _CRT_SECURE_NO_WARNINGS // "secure" CRT not available on all platforms --add this at the top of all CPP files that give "function or variable may be unsafe" warnings
#include "Basics.h"
#include "ComputationNode.h"
#include "ComputationNetwork.h"
//#include "ComputationNetworkBuilder.h" // used for load & save
//#include "LinearAlgebraNodes.h"
//#include "NonlinearityNodes.h"
//#include "ConvolutionalNodes.h"
//#include "RecurrentNodes.h"
//#include "ReshapingNodes.h"
//#include "TrainingCriterionNodes.h"
//#include "CompositeComputationNodes.h"
//#include "EvaluationCriterionNodes.h"
#include <string>
#include <vector>
#include <list>
#include <set>
using namespace std;
namespace Microsoft { namespace MSR { namespace CNTK {
}}}

Просмотреть файл

@ -19,7 +19,7 @@ using namespace std;
namespace Microsoft { namespace MSR { namespace CNTK {
// This source file contains methods related to evaluation (forward prop, backprop) and network validation.
// This source file contains methods related to evaluation (forward prop, backprop), network validation, and matrix memory allocation (memory sharing).
// -----------------------------------------------------------------------
// evaluation
@ -520,4 +520,166 @@ namespace Microsoft { namespace MSR { namespace CNTK {
return m_built.find(rootNode) != m_built.end();
}
// -----------------------------------------------------------------------
// memory allocation
// -----------------------------------------------------------------------
//this function will need to be called before actual validation and execution to
//predetermine how to share matrices to reduce memory usage.
//TODO: find a simple topological order and allocateEvalMatrices on that order directly
//without passing in eval, out, and train nodes.
void ComputationNetwork::AllocateAllEvalMatrices(std::vector<ComputationNodeBasePtr>& evalRootNodes,
std::vector<ComputationNodeBasePtr>& outValueRootNodes,
std::vector<ComputationNodeBasePtr>& trainRootNodes)
{
//allocate memory for forward computation
fprintf(stderr, "\n\nAllocating matrices for forward propagation.\n");
for (int i = 0; i < evalRootNodes.size(); i++)
AllocateEvalMatrices(evalRootNodes[i]);
for (int i = 0; i < outValueRootNodes.size(); i++)
AllocateEvalMatrices(outValueRootNodes[i]);
for (int i = 0; i < trainRootNodes.size(); i++)
AllocateEvalMatrices(trainRootNodes[i]);
}
void ComputationNetwork::AllocateEvalMatrices(ComputationNodeBasePtr rootNode)
{
FormRecurrentLoops(rootNode);
std::list<ComputationNodeBasePtr>& allNodes = GetEvalOrder(rootNode, false);
//determine parent size
std::map<ComputationNodeBasePtr, int> parentCount;
for (auto &n : allNodes)
{
for (int i = 0; i < n->ChildrenSize(); i++)
{
ComputationNodeBasePtr pNode = n->GetChildren()[i];
parentCount[pNode]++;
}
}
for (int i = 0; i < m_recurrentInfo.size(); i++)
m_recurrentInfo[i].m_completedEvaluate = false;
for (auto &nodeIter : allNodes)
{
if (nodeIter->IsPartOfLoop())
{
RecurrentInfo* recInfo = FindInRecurrentLoops(nodeIter);
assert(recInfo != nullptr);
if (recInfo->m_completedEvaluate == false)
{
const auto & recurrentNodes = recInfo->m_recurrentNodesForForward;
for (auto &nodeLoopIter : recurrentNodes)
{
nodeLoopIter->RequestMatricesBeforeEval(m_matrixPool);
}
recInfo->m_completedEvaluate = true;
for (auto &nodeLoopIter : recurrentNodes)
{
ReleaseMatricesAfterEvalForChildren(nodeLoopIter, parentCount);
}
}
}
else
{
nodeIter->RequestMatricesBeforeEval(m_matrixPool);
//we only release matrices for the children since the root node's informatioin will be used and should not be shared
//with others
ReleaseMatricesAfterEvalForChildren(nodeIter, parentCount);
}
}
}
void ComputationNetwork::ReleaseMatricesAfterEvalForChildren(ComputationNodeBasePtr n, std::map<ComputationNodeBasePtr, int>& parentCount)
{
for (int i = 0; i < n->ChildrenSize(); i++)
{
ComputationNodeBasePtr pNode = n->GetChildren()[i];
parentCount[pNode]--;
if (parentCount[pNode] == 0)
pNode->ReleaseMatricesAfterEval(m_matrixPool);
}
}
void ComputationNetwork::AllocateGradientMatrices(ComputationNodeBasePtr rootNode)
{
FormRecurrentLoops(rootNode);
//PopulateParents(rootNode);
std::list<ComputationNodeBasePtr>& allNodes = GetGradientCalcOrder(rootNode);
//determine children size
//std::map<ComputationNodeBasePtr, int> childrenCount;
//for (auto &nodeIter : allNodes)
//{
// childrenCount[nodeIter] = nodeIter->ChildrenSize();
//}
//now, simulate the gradient computation order to determine how to allocate matrices
for (int i = 0; i < m_recurrentInfo.size(); i++)
m_recurrentInfo[i].m_completedGradient = false;
//we need to call it here since we always compute gradients for children and root node is not children of other node
rootNode->RequestMatricesBeforeGradientComp(m_matrixPool);
for (auto &n : allNodes)
{
if (n->IsPartOfLoop())
{
std::vector<ComputationNodeBasePtr> recurrentNodes;
RecurrentInfo * recInfo = FindInRecurrentLoops(n);
if (recInfo && recInfo->m_completedGradient == false)
{
const auto & recurrentNodes = recInfo->m_recurrentNodesForForward;
//loops are computed sample by sample so we have to allocate them all
for (auto nodeIter = recurrentNodes.rbegin(); nodeIter != recurrentNodes.rend(); ++nodeIter)
{
AllocateGradientMatricesForChildren(*nodeIter);
}
recInfo->m_completedGradient = true;
for (auto nodeIter = recurrentNodes.rbegin(); nodeIter != recurrentNodes.rend(); ++nodeIter)
{
if ((*nodeIter)->NeedGradient())
{
(*nodeIter)->ReleaseMatricesAfterGradientComp(m_matrixPool);
}
}
}
}
else
{
AllocateGradientMatricesForChildren(n);
if ((n != rootNode) && n->NeedGradient()) //root node's informatioin will be used and should not be shared with others, also it's small (1x1)
n->ReleaseMatricesAfterGradientComp(m_matrixPool);
}
}
}
//void ReleaseMatricesAfterGradientCompForParents(ComputationNodeBasePtr n, std::map<ComputationNodeBasePtr, int>& childrenCount)
//{
// for (int i = 0; i < n->ParentSize(); i++)
// {
// ComputationNodeBasePtr pNode = n->Parent(i);
// childrenCount[pNode] --;
// if (childrenCount[pNode] == 0)
// pNode->ReleaseMatricesAfterGradientComp(m_matrixPool);
// }
//}
void ComputationNetwork::AllocateGradientMatricesForChildren(ComputationNodeBasePtr parentNode)
{
std::vector<ComputationNodeBasePtr> children = parentNode->GetChildren();
for (int i = 0; i < children.size(); i++)
{
if (children[i]->NeedGradient())
children[i]->RequestMatricesBeforeGradientComp(m_matrixPool);
}
}
}}}

Просмотреть файл

@ -416,6 +416,7 @@ CNTK_SRC =\
MachineLearning/CNTKComputationNetworkLib/ComputationNetwork.cpp \
MachineLearning/CNTKComputationNetworkLib/ComputationNetworkEvaluation.cpp \
MachineLearning/CNTKComputationNetworkLib/ComputationNetworkAnalysis.cpp \
MachineLearning/CNTKComputationNetworkLib/ComputationNetworkEditing.cpp \
MachineLearning/CNTKComputationNetworkLib/ComputationNetworkBuilder.cpp \
MachineLearning/CNTKComputationNetworkLib/NetworkBuilderFromConfig.cpp \
MachineLearning/CNTKSGDLib/Profiler.cpp \