BrainScript: ComputationNetwork can now construct itself from BS (the special MakeRuntimeObject() function is gone);

new source file ComputationNetworkScripting.cpp;
changed ComputationNetwork::SetDeviceId(). Now just takes the input and sets that as the device. Before it did somethinf funky with m_deviceId, which looks like a bug;
in LSTM test config, removed remnants of old NDL, now completely based on BS
This commit is contained in:
Frank Seide 2015-11-16 09:57:01 -08:00
Родитель 7ab6151416
Коммит 2ca3f555a8
10 изменённых файлов: 161 добавлений и 255 удалений

Просмотреть файл

@ -195,6 +195,7 @@
<ClCompile Include="ComputationNetworkBuilder.cpp" />
<ClCompile Include="ComputationNetworkEditing.cpp" />
<ClCompile Include="ComputationNetworkEvaluation.cpp" />
<ClCompile Include="ComputationNetworkScripting.cpp" />
<ClCompile Include="ComputationNode.cpp" />
<ClCompile Include="NetworkBuilderFromConfig.cpp" />
<ClCompile Include="stdafx.cpp" />

Просмотреть файл

@ -37,6 +37,9 @@
<ClCompile Include="ComputationNetworkEditing.cpp">
<Filter>Network</Filter>
</ClCompile>
<ClCompile Include="ComputationNetworkScripting.cpp">
<Filter>Network</Filter>
</ClCompile>
</ItemGroup>
<ItemGroup>
<ClInclude Include="..\..\Common\Include\basetypes.h">

Просмотреть файл

@ -159,16 +159,21 @@ public:
// construction
// -----------------------------------------------------------------------
ComputationNetwork(DEVICEID_TYPE deviceId = AUTOPLACEMATRIX) :
ComputationNetwork() :
m_randomSeedOffset(0),
m_deviceId(deviceId), m_pMBLayout(make_shared<MBLayout>())
m_pMBLayout(make_shared<MBLayout>())
{
}
ComputationNetwork(DEVICEID_TYPE deviceId) :
ComputationNetwork()
{
SetDeviceId(deviceId);
}
ComputationNetwork(const ScriptableObjects::IConfigRecordPtr configp); // construct from config
virtual ~ComputationNetwork()
{
ClearNet();
ClearNet(); // This will explicitly remove all nodes. This is needed to break circular references in loops.
}
// -----------------------------------------------------------------------
@ -280,12 +285,11 @@ public:
// construction
// -----------------------------------------------------------------------
void SetDeviceId(const DEVICEID_TYPE deviceId = AUTOPLACEMATRIX)
void SetDeviceId(DEVICEID_TYPE deviceId)
{
if (m_deviceId == AUTOPLACEMATRIX)
m_deviceId = Matrix<float>::GetBestGPUDeviceId();
else
m_deviceId = deviceId;
if (deviceId == AUTOPLACEMATRIX)
deviceId = Matrix<float>::GetBestGPUDeviceId();
m_deviceId = deviceId;
m_deviceId = EnforceOneGPUOnly(m_deviceId); // see EnforceOneGPUOnly() for comment on what this is
}

Просмотреть файл

@ -371,7 +371,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
if (FeatureNodes().size() == 0 && !allowFragment)
RuntimeError("No Feature nodes specified");
#if 1 // If it is not done here, it will causea crash. But it really only belongs into StartEvluationMinibatchLoop()
#if 1 // If it is not done here, it will causea crash. But it really only belongs into StartEvaluationMinibatchLoop()
// TODO: allocation does not belong here. This is called e.g. after loading. Memory should be allocated only when actually evaluating.
// TODO: move into StartEvaluateMinibatchLoop(), but that is called for output nodes individually--can the process handle that?
AllocateAllEvalMatrices(EvaluationNodes(), OutputNodes(), FinalCriterionNodes());

Просмотреть файл

@ -0,0 +1,117 @@
//
// <copyright file="ComputationNetworkScipting.cpp" company="Microsoft">
// Copyright (c) Microsoft Corporation. All rights reserved.
// </copyright>
//
#define _CRT_SECURE_NO_WARNINGS // "secure" CRT not available on all platforms --add this at the top of all CPP files that give "function or variable may be unsafe" warnings
#include "Basics.h"
#include "ScriptableObjects.h"
#include "ComputationNode.h"
#include "InputAndParamNodes.h"
#include "RecurrentNodes.h"
#include "NonlinearityNodes.h"
#include "LinearAlgebraNodes.h"
#include "ConvolutionalNodes.h"
#include "ReshapingNodes.h"
#include "ComputationNetwork.h"
#include "ComputationNetworkBuilder.h"
#include <memory>
#include <deque>
#include <set>
#include <string>
#ifndef let
#define let const auto
#endif
using namespace std;
namespace Microsoft { namespace MSR { namespace CNTK {
using namespace Microsoft::MSR::ScriptableObjects;
// -------------------------------------------------------------------
// construction from config
// -------------------------------------------------------------------
// construct a ComputationNetwork from a ConfigRecord
ComputationNetwork::ComputationNetwork(const IConfigRecordPtr configp) :
ComputationNetwork()
{
let & config = *configp;
DEVICEID_TYPE deviceId = (DEVICEID_TYPE)(int)config[L"deviceId"];
SetDeviceId(deviceId);
deque<ComputationNodeBasePtr> workList;
// flatten the set of all nodes
// we collect all root ComputationNodes from the config record, and then expand into all their children by work-list processing
// TODO: This currently only supports nodes of the same ElemType. We could allow conversion operators.
for (let & id : config.GetMemberIds())
{
let & value = config[id];
if (value.Is<ComputationNodeBase>())
workList.push_back((const ComputationNodeBasePtr&)value);
}
// process work list
// Also call FinalizeInit where we must.
while (!workList.empty())
{
let node = workList.front();
workList.pop_front();
// add to set
let res = m_nameToNodeMap.insert(make_pair(node->NodeName(), node));
if (!res.second) // not inserted: we already got this one
if (res.first->second == node)
continue; // the same
else // oops, a different node with the same name
LogicError("ComputationNetwork: multiple nodes with the same NodeName() '%ls'", node->NodeName().c_str());
// If node derives from MustFinalizeInit() then it has unresolved inputs. Resolve them now.
// This may generate a whole new load of nodes, including nodes which in turn have late init.
// TODO: think this through whether it may generate circular references nevertheless
let lateAttachingNode = dynamic_pointer_cast<ILateAttachingNode>(node);
if (lateAttachingNode)
lateAttachingNode->LateAttachInputs();
// add it to the respective node group based on the tag
let nodeWithTag = dynamic_pointer_cast<WithTag>(node);
if (nodeWithTag)
{
wstring tag = nodeWithTag->GetTag();
if (tag == L"feature") FeatureNodes().push_back(node);
else if (tag == L"label") LabelNodes().push_back(node);
else if (tag == L"criterion" || tag == L"criteria") FinalCriterionNodes().push_back(node); // 'criteria' is wrong (plural); we keep it for compat
else if (!_wcsnicmp(tag.c_str(), L"eval", 4)) EvaluationNodes().push_back(node); // eval*
else if (tag == L"output") OutputNodes().push_back(node);
#if 0 // deprecated
else if (tag == L"pair") PairNodes().push_back(node); // TODO: I made this up; the original code in SynchronousExecutionEngine did not have this
#endif
else if (!tag.empty())
RuntimeError("ComputationNetwork: unknown tag '%ls'", tag.c_str());
// TODO: are there nodes without tag? Where do they go?
}
// traverse children: append them to the end of the work list
let & children = node->GetChildren();
for (auto & child : children)
workList.push_back(child); // (we could check whether c is in 'nodes' already here to optimize, but this way it is cleaner)
}
ValidateNetwork();
#if 1
wstring args = ToString();
fprintf(stderr, "%ls\n", args.c_str());
#endif
// these post-processing steps are done by the other network builders, but I don't know why they are necessary
FixupInputMinibatchSize(); // make sure dimensions are set up correctly
ResetEvalTimeStamp(); // (should not really be needed)
}
}}}

Просмотреть файл

@ -1378,6 +1378,31 @@ namespace Microsoft { namespace MSR { namespace CNTK {
std::vector<ComputationNodeBasePtr> m_nestedNodes; // nodes tucked away in this node, in evaluation order
};
// =======================================================================
// ILateAttachingNode -- helper wrapper class for ComputationNodes that must AttachInputs() late due to circular references
// =======================================================================
// Instantiate with LateAttachingNode<node type>(lambda, args for node constructor).
// To resolve, call AttachInputs()
// TODO: This is a bit indirect. Can it be done more nicely?
struct ILateAttachingNode { virtual void LateAttachInputs() = 0; };
template<class N>
class LateAttachingNode : public N, public ILateAttachingNode
{
typedef typename N::OurElemType ElemType;
function<void(ComputationNode<ElemType>*)> attachInputs;
public:
// constructor
template<class... _Types>
LateAttachingNode(DEVICEID_TYPE deviceId, const wstring & name, const function<void(ComputationNode<ElemType>*)> & attachInputs, _Types&&... _Args) : attachInputs(attachInputs), N(deviceId, name, forward<_Types>(_Args)...) {}
// the one member that does the work
void /*ILateAttachingNode::*/LateAttachInputs()
{
attachInputs(dynamic_cast<N*>(this));
attachInputs = [](ComputationNode<ElemType>*){ LogicError("LateAttachingNode::AttachInputs: must only be called once"); };
}
};
// =======================================================================
// helper macro to ease access to base members in presence of C++ two-phase name lookup
// =======================================================================

Просмотреть файл

@ -45,28 +45,6 @@ namespace Microsoft { namespace MSR { namespace ScriptableObjects {
// ComputationNode -- covers all standard nodes
// -------------------------------------------------------------------
// helper wrapper class for ComputationNodes that must AttachInputs() late due to circular references
// Instantiate with LateAttachingNode<node type>(lambda, args for node constructor).
// To resolve, call AttachInputs()
// TODO: This is a bit indirect. Can it be done more nicely?
struct ILateAttachingNode { virtual void LateAttachInputs() = 0; };
template<class N>
class LateAttachingNode : public N, public ILateAttachingNode
{
typedef typename N::OurElemType ElemType;
function<void(ComputationNode<ElemType>*)> attachInputs;
public:
// constructor
template<class... _Types>
LateAttachingNode(DEVICEID_TYPE deviceId, const wstring & name, const function<void(ComputationNode<ElemType>*)> & attachInputs, _Types&&... _Args) : attachInputs(attachInputs), N(deviceId, name, forward<_Types>(_Args)...) {}
// the one member that does the work
void /*ILateAttachingNode::*/LateAttachInputs()
{
attachInputs(dynamic_cast<N*>(this));
attachInputs = [](ComputationNode<ElemType>*){ LogicError("LateAttachingNode::AttachInputs: must only be called once"); };
}
};
template<class ElemType>
struct DualPrecisionHelpers<ElemType, ComputationNode<ElemType>>
{
@ -227,88 +205,6 @@ namespace Microsoft { namespace MSR { namespace ScriptableObjects {
}
};
// -------------------------------------------------------------------
// ComputationNetwork
// -------------------------------------------------------------------
// initialize a ComputationNetwork from a ConfigRecord
template<>
/*static*/ shared_ptr<Object> MakeRuntimeObject<ComputationNetwork>(const IConfigRecordPtr configp)
{
let & config = *configp;
DEVICEID_TYPE deviceId = (DEVICEID_TYPE)(int)config[L"deviceId"];
auto net = make_shared<ComputationNetwork>(deviceId);
auto & m_nameToNodeMap = net->GetNameToNodeMap();
deque<ComputationNodeBasePtr> workList;
// flatten the set of all nodes
// we collect all root ComputationNodes from the config record, and then expand into all their children by work-list processing
// TODO: This currently only collects nodes of the same ElemType. We could allow conversion operators.
// TODO: Can we even make the ComputationNetwork independent of ElemType?? As long as the nodes themselves are hooked up properly that should be OK!
for (let & id : config.GetMemberIds())
{
let & value = config[id];
if (value.Is<ComputationNodeBase>())
workList.push_back((const ComputationNodeBasePtr&)value);
}
// process work list
// Also call FinalizeInit where we must.
while (!workList.empty())
{
let node = workList.front();
workList.pop_front();
// add to set
let res = m_nameToNodeMap.insert(make_pair(node->NodeName(), node));
if (!res.second) // not inserted: we already got this one
if (res.first->second == node)
continue; // the same
else // oops, a different node with the same name
LogicError("ComputationNetwork: multiple nodes with the same NodeName() '%ls'", node->NodeName().c_str());
// If node derives from MustFinalizeInit() then it has unresolved inputs. Resolve them now.
// This may generate a whole new load of nodes, including nodes which in turn have late init.
// TODO: think this through whether it may generate circular references nevertheless
let lateAttachingNode = dynamic_pointer_cast<ILateAttachingNode>(node);
if (lateAttachingNode)
lateAttachingNode->LateAttachInputs();
// add it to the respective node group based on the tag
let nodeWithTag = dynamic_pointer_cast<WithTag>(node);
if (nodeWithTag)
{
wstring tag = nodeWithTag->GetTag();
if (tag == L"feature") net->FeatureNodes().push_back(node);
else if (tag == L"label") net->LabelNodes().push_back(node);
else if (tag == L"criterion" || tag == L"criteria") net->FinalCriterionNodes().push_back(node); // 'criteria' is wrong (plural); we keep it for compat
else if (!_wcsnicmp(tag.c_str(), L"eval", 4)) net->EvaluationNodes().push_back(node); // eval*
else if (tag == L"output") net->OutputNodes().push_back(node);
#if 0 // deprecated
else if (tag == L"pair") net->PairNodes().push_back(node); // TODO: I made this up; the original code in SynchronousExecutionEngine did not have this
#endif
else if (!tag.empty())
RuntimeError("ComputationNetwork: unknown tag '%ls'", tag.c_str());
// TODO: are there nodes without tag? Where do they go?
}
// traverse children: append them to the end of the work list
let & children = node->GetChildren();
for (auto & child : children)
workList.push_back(child); // (we could check whether c is in 'nodes' already here to optimize, but this way it is cleaner)
}
net->ValidateNetwork();
#if 1
wstring args = net->ToString();
fprintf(stderr, "%ls\n", args.c_str());
#endif
// these post-processing steps are done by the other network builders, but I don't know why they are necessary
net->FixupInputMinibatchSize(); // make sure dimensions are set up correctly
net->ResetEvalTimeStamp(); // (should not really be needed)
return net;
}
// creates the lambda for creating an object that can exist as 'float' or 'double'
// Pass both types as the two template args.

Просмотреть файл

@ -534,11 +534,9 @@ namespace Microsoft { namespace MSR { namespace CNTK {
InferImageDimsFromInput(0, true);
m_imageLayout = ImageLayoutWHC(m_imageLayout.GetWidth(), m_sliceHeight, m_imageLayout.GetNumChannels());
#if 1 // disabled for now since we now call Validate() inside the loop, and therefore get lots of these warnings. TODO: Bring it back once we no longer call Validate() from inside the loop.
// warn that this node will destroy the image size information from the child
if (m_inputImageLayout.GetWidth() * m_inputImageLayout.GetNumChannels() != 1)
fprintf(stderr, "WARNING: RowSlice operation cannot inherit image size information from its child. Image size info is lost.\n");
#endif
}
private:

Просмотреть файл

@ -418,6 +418,7 @@ CNTK_SRC =\
MachineLearning/CNTKComputationNetworkLib/ComputationNetworkAnalysis.cpp \
MachineLearning/CNTKComputationNetworkLib/ComputationNetworkEditing.cpp \
MachineLearning/CNTKComputationNetworkLib/ComputationNetworkBuilder.cpp \
MachineLearning/CNTKComputationNetworkLib/ComputationNetworkScripting.cpp \
MachineLearning/CNTKComputationNetworkLib/NetworkBuilderFromConfig.cpp \
MachineLearning/CNTKSGDLib/Profiler.cpp \
MachineLearning/CNTKSGDLib/SGD.cpp \

Просмотреть файл

@ -13,10 +13,6 @@ speechTrain=[
deviceId=$DeviceId$
traceLevel=1
#NDLNetworkBuilder=[
# networkDescription=$ConfigDir$/lstmp-3layer_WithSelfStab.ndl
#]
SGD=[
epochSize=20480
minibatchSize=20
@ -48,142 +44,7 @@ speechTrain=[
]
]
# replicating the above with BrainScript --this is 100% converted from NDL
originalExperimentalNetworkBuilder=[
LSTMPComponentWithSelfStab(inputDim, outputDim, cellDim, inputx) =
[
Wxo = Parameter(cellDim, inputDim, init='uniform', initValueScale=1, initOnCPUOnly=true, randomSeed=1); # difference to NDL: 'uniform' must be quoted as a string
Wxi = Parameter(cellDim, inputDim, init='uniform', initValueScale=1, initOnCPUOnly=true, randomSeed=1);
Wxf = Parameter(cellDim, inputDim, init='uniform', initValueScale=1, initOnCPUOnly=true, randomSeed=1);
Wxc = Parameter(cellDim, inputDim, init='uniform', initValueScale=1, initOnCPUOnly=true, randomSeed=1);
bo = Parameter(cellDim, 1, init='fixedValue', value=0.0); # difference to NDL: 'fixedValue' must be quoted as a string and is case-sensitive
bc = Parameter(cellDim, 1, init='fixedValue', value=0.0);
bi = Parameter(cellDim, 1, init='fixedValue', value=0.0);
bf = Parameter(cellDim, 1, init='fixedValue', value=0.0);
Whi = Parameter(cellDim, outputDim, init='uniform', initValueScale=1, initOnCPUOnly=true, randomSeed=1);
Wci = Parameter(cellDim, 1, init='uniform', initValueScale=1, initOnCPUOnly=true, randomSeed=1);
Whf = Parameter(cellDim, outputDim, init='uniform', initValueScale=1, initOnCPUOnly=true, randomSeed=1);
Wcf = Parameter(cellDim, 1, init='uniform', initValueScale=1, initOnCPUOnly=true, randomSeed=1);
Who = Parameter(cellDim, outputDim, init='uniform', initValueScale=1, initOnCPUOnly=true, randomSeed=1);
Wco = Parameter(cellDim, 1, init='uniform', initValueScale=1, initOnCPUOnly=true, randomSeed=1);
Whc = Parameter(cellDim, outputDim, init='uniform', initValueScale=1, initOnCPUOnly=true, randomSeed=1);
Wmr = Parameter(outputDim, cellDim, init='uniform', initValueScale=1, initOnCPUOnly=true, randomSeed=1);
#we provide a scale value for each weight
sWxo = Parameter(1, 1, init='fixedValue', value=0.0);
sWxi = Parameter(1, 1, init='fixedValue', value=0.0);
sWxf = Parameter(1, 1, init='fixedValue', value=0.0);
sWxc = Parameter(1, 1, init='fixedValue', value=0.0);
sWhi = Parameter(1, 1, init='fixedValue', value=0.0);
sWci = Parameter(1, 1, init='fixedValue', value=0.0);
sWhf = Parameter(1, 1, init='fixedValue', value=0.0);
sWcf = Parameter(1, 1, init='fixedValue', value=0.0);
sWho = Parameter(1, 1, init='fixedValue', value=0.0);
sWco = Parameter(1, 1, init='fixedValue', value=0.0);
sWhc = Parameter(1, 1, init='fixedValue', value=0.0);
sWmr = Parameter(1, 1, init='fixedValue', value=0.0);
expsWxo = Exp(sWxo);
expsWxi = Exp(sWxi);
expsWxf = Exp(sWxf);
expsWxc = Exp(sWxc);
expsWhi = Exp(sWhi);
expsWci = Exp(sWci);
expsWhf = Exp(sWhf);
expsWcf = Exp(sWcf);
expsWho = Exp(sWho);
expsWco = Exp(sWco);
expsWhc = Exp(sWhc);
expsWmr = Exp(sWmr);
#end of scale values
dh = PastValue(outputDim, 1, output, timeStep=1);
dc = PastValue(cellDim, 1, ct, timeStep=1);
Wxix = Times(Wxi, Scale(expsWxi, inputx));
Whidh = Times(Whi, Scale(expsWhi, dh));
Wcidc = DiagTimes(Wci, Scale(expsWci, dc));
it = Sigmoid (Plus ( Plus (Plus (Wxix, bi), Whidh), Wcidc));
Wxcx = Times(Wxc, Scale(expsWxc, inputx));
Whcdh = Times(Whc, Scale(expsWhc, dh));
bit = ElementTimes(it, Tanh( Plus(Wxcx, Plus(Whcdh, bc))));
Wxfx = Times(Wxf, Scale(expsWxf,inputx));
Whfdh = Times(Whf, Scale(expsWhf, dh));
Wcfdc = DiagTimes(Wcf, Scale(expsWcf, dc));
ft = Sigmoid( Plus (Plus (Plus(Wxfx, bf), Whfdh), Wcfdc));
bft = ElementTimes(ft, dc);
ct = Plus(bft, bit);
Wxox = Times(Wxo, Scale(expsWxo, inputx));
Whodh = Times(Who, Scale(expsWho, dh));
Wcoct = DiagTimes(Wco, Scale(expsWco, ct));
ot = Sigmoid( Plus( Plus( Plus(Wxox, bo), Whodh), Wcoct));
mt = ElementTimes(ot, Tanh(ct));
output = Times(Wmr, Scale(expsWmr, mt));
]
#define basic i/o
baseFeatDim=33
RowSliceStart=330
FeatDim=363
labelDim=132
cellDim=1024
hiddenDim=256
features=Input(FeatDim, 1, tag='feature') # differences to NDL: needs the '1'; tag value must be quoted as a string
labels=Input(labelDim, 1, tag='label')
feashift=RowSlice(RowSliceStart, baseFeatDim, features); # shift 5 frames right (x_{t+5} -> x_{t} )
featNorm = MeanVarNorm(feashift)
# layer 1
LSTMoutput1 = LSTMPComponentWithSelfStab(baseFeatDim, hiddenDim, cellDim, featNorm);
# layer 2
LSTMoutput2 = LSTMPComponentWithSelfStab(hiddenDim, hiddenDim, cellDim, LSTMoutput1.output); # difference to NDL: LSTMoutput1 is a record, must select the output field explicitly
# layer 3
LSTMoutput3 = LSTMPComponentWithSelfStab(hiddenDim, hiddenDim, cellDim, LSTMoutput2.output);
W = Parameter(labelDim, hiddenDim, init='uniform', initValueScale=1, initOnCPUOnly=true, randomSeed=1);
b = Parameter(labelDim, 1, init='fixedValue', value=0);
sW = Parameter(1, 1, init='fixedValue', value=0.0);
expsW = Exp(sW);
LSTMoutputW = Plus(Times(W, Scale(expsW, LSTMoutput3.output)), b);
cr = CrossEntropyWithSoftmax(labels, LSTMoutputW,tag='criteria'); # differences to NDL: string must be quoted; value is case-sensitive
Err = ErrorPrediction(labels,LSTMoutputW,tag='eval');
logPrior = LogPrior(labels)
ScaledLogLikelihood=Minus(LSTMoutputW,logPrior,tag='output')
]
# replicating the above with BrainScript --we will put stuff here
# define network using BrainScript
ExperimentalNetworkBuilder=[
void = 0 // (BUGBUG: we do not allow zero-argument macros; will be fixed. For now, pass void)