BrainScript: ComputationNetwork can now construct itself from BS (the special MakeRuntimeObject() function is gone);
new source file ComputationNetworkScripting.cpp; changed ComputationNetwork::SetDeviceId(). Now just takes the input and sets that as the device. Before it did somethinf funky with m_deviceId, which looks like a bug; in LSTM test config, removed remnants of old NDL, now completely based on BS
This commit is contained in:
Родитель
7ab6151416
Коммит
2ca3f555a8
|
@ -195,6 +195,7 @@
|
|||
<ClCompile Include="ComputationNetworkBuilder.cpp" />
|
||||
<ClCompile Include="ComputationNetworkEditing.cpp" />
|
||||
<ClCompile Include="ComputationNetworkEvaluation.cpp" />
|
||||
<ClCompile Include="ComputationNetworkScripting.cpp" />
|
||||
<ClCompile Include="ComputationNode.cpp" />
|
||||
<ClCompile Include="NetworkBuilderFromConfig.cpp" />
|
||||
<ClCompile Include="stdafx.cpp" />
|
||||
|
|
|
@ -37,6 +37,9 @@
|
|||
<ClCompile Include="ComputationNetworkEditing.cpp">
|
||||
<Filter>Network</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="ComputationNetworkScripting.cpp">
|
||||
<Filter>Network</Filter>
|
||||
</ClCompile>
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<ClInclude Include="..\..\Common\Include\basetypes.h">
|
||||
|
|
|
@ -159,16 +159,21 @@ public:
|
|||
// construction
|
||||
// -----------------------------------------------------------------------
|
||||
|
||||
ComputationNetwork(DEVICEID_TYPE deviceId = AUTOPLACEMATRIX) :
|
||||
ComputationNetwork() :
|
||||
m_randomSeedOffset(0),
|
||||
m_deviceId(deviceId), m_pMBLayout(make_shared<MBLayout>())
|
||||
m_pMBLayout(make_shared<MBLayout>())
|
||||
{
|
||||
}
|
||||
ComputationNetwork(DEVICEID_TYPE deviceId) :
|
||||
ComputationNetwork()
|
||||
{
|
||||
SetDeviceId(deviceId);
|
||||
}
|
||||
ComputationNetwork(const ScriptableObjects::IConfigRecordPtr configp); // construct from config
|
||||
|
||||
virtual ~ComputationNetwork()
|
||||
{
|
||||
ClearNet();
|
||||
ClearNet(); // This will explicitly remove all nodes. This is needed to break circular references in loops.
|
||||
}
|
||||
|
||||
// -----------------------------------------------------------------------
|
||||
|
@ -280,12 +285,11 @@ public:
|
|||
// construction
|
||||
// -----------------------------------------------------------------------
|
||||
|
||||
void SetDeviceId(const DEVICEID_TYPE deviceId = AUTOPLACEMATRIX)
|
||||
void SetDeviceId(DEVICEID_TYPE deviceId)
|
||||
{
|
||||
if (m_deviceId == AUTOPLACEMATRIX)
|
||||
m_deviceId = Matrix<float>::GetBestGPUDeviceId();
|
||||
else
|
||||
m_deviceId = deviceId;
|
||||
if (deviceId == AUTOPLACEMATRIX)
|
||||
deviceId = Matrix<float>::GetBestGPUDeviceId();
|
||||
m_deviceId = deviceId;
|
||||
m_deviceId = EnforceOneGPUOnly(m_deviceId); // see EnforceOneGPUOnly() for comment on what this is
|
||||
}
|
||||
|
||||
|
|
|
@ -371,7 +371,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
if (FeatureNodes().size() == 0 && !allowFragment)
|
||||
RuntimeError("No Feature nodes specified");
|
||||
|
||||
#if 1 // If it is not done here, it will causea crash. But it really only belongs into StartEvluationMinibatchLoop()
|
||||
#if 1 // If it is not done here, it will causea crash. But it really only belongs into StartEvaluationMinibatchLoop()
|
||||
// TODO: allocation does not belong here. This is called e.g. after loading. Memory should be allocated only when actually evaluating.
|
||||
// TODO: move into StartEvaluateMinibatchLoop(), but that is called for output nodes individually--can the process handle that?
|
||||
AllocateAllEvalMatrices(EvaluationNodes(), OutputNodes(), FinalCriterionNodes());
|
||||
|
|
|
@ -0,0 +1,117 @@
|
|||
//
|
||||
// <copyright file="ComputationNetworkScipting.cpp" company="Microsoft">
|
||||
// Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
// </copyright>
|
||||
//
|
||||
|
||||
#define _CRT_SECURE_NO_WARNINGS // "secure" CRT not available on all platforms --add this at the top of all CPP files that give "function or variable may be unsafe" warnings
|
||||
|
||||
#include "Basics.h"
|
||||
#include "ScriptableObjects.h"
|
||||
|
||||
#include "ComputationNode.h"
|
||||
#include "InputAndParamNodes.h"
|
||||
#include "RecurrentNodes.h"
|
||||
#include "NonlinearityNodes.h"
|
||||
#include "LinearAlgebraNodes.h"
|
||||
#include "ConvolutionalNodes.h"
|
||||
#include "ReshapingNodes.h"
|
||||
|
||||
#include "ComputationNetwork.h"
|
||||
#include "ComputationNetworkBuilder.h"
|
||||
|
||||
#include <memory>
|
||||
#include <deque>
|
||||
#include <set>
|
||||
#include <string>
|
||||
|
||||
#ifndef let
|
||||
#define let const auto
|
||||
#endif
|
||||
|
||||
using namespace std;
|
||||
|
||||
namespace Microsoft { namespace MSR { namespace CNTK {
|
||||
|
||||
using namespace Microsoft::MSR::ScriptableObjects;
|
||||
|
||||
// -------------------------------------------------------------------
|
||||
// construction from config
|
||||
// -------------------------------------------------------------------
|
||||
|
||||
// construct a ComputationNetwork from a ConfigRecord
|
||||
ComputationNetwork::ComputationNetwork(const IConfigRecordPtr configp) :
|
||||
ComputationNetwork()
|
||||
{
|
||||
let & config = *configp;
|
||||
|
||||
DEVICEID_TYPE deviceId = (DEVICEID_TYPE)(int)config[L"deviceId"];
|
||||
SetDeviceId(deviceId);
|
||||
|
||||
deque<ComputationNodeBasePtr> workList;
|
||||
// flatten the set of all nodes
|
||||
// we collect all root ComputationNodes from the config record, and then expand into all their children by work-list processing
|
||||
// TODO: This currently only supports nodes of the same ElemType. We could allow conversion operators.
|
||||
for (let & id : config.GetMemberIds())
|
||||
{
|
||||
let & value = config[id];
|
||||
if (value.Is<ComputationNodeBase>())
|
||||
workList.push_back((const ComputationNodeBasePtr&)value);
|
||||
}
|
||||
// process work list
|
||||
// Also call FinalizeInit where we must.
|
||||
while (!workList.empty())
|
||||
{
|
||||
let node = workList.front();
|
||||
workList.pop_front();
|
||||
|
||||
// add to set
|
||||
let res = m_nameToNodeMap.insert(make_pair(node->NodeName(), node));
|
||||
if (!res.second) // not inserted: we already got this one
|
||||
if (res.first->second == node)
|
||||
continue; // the same
|
||||
else // oops, a different node with the same name
|
||||
LogicError("ComputationNetwork: multiple nodes with the same NodeName() '%ls'", node->NodeName().c_str());
|
||||
|
||||
// If node derives from MustFinalizeInit() then it has unresolved inputs. Resolve them now.
|
||||
// This may generate a whole new load of nodes, including nodes which in turn have late init.
|
||||
// TODO: think this through whether it may generate circular references nevertheless
|
||||
let lateAttachingNode = dynamic_pointer_cast<ILateAttachingNode>(node);
|
||||
if (lateAttachingNode)
|
||||
lateAttachingNode->LateAttachInputs();
|
||||
|
||||
// add it to the respective node group based on the tag
|
||||
let nodeWithTag = dynamic_pointer_cast<WithTag>(node);
|
||||
if (nodeWithTag)
|
||||
{
|
||||
wstring tag = nodeWithTag->GetTag();
|
||||
if (tag == L"feature") FeatureNodes().push_back(node);
|
||||
else if (tag == L"label") LabelNodes().push_back(node);
|
||||
else if (tag == L"criterion" || tag == L"criteria") FinalCriterionNodes().push_back(node); // 'criteria' is wrong (plural); we keep it for compat
|
||||
else if (!_wcsnicmp(tag.c_str(), L"eval", 4)) EvaluationNodes().push_back(node); // eval*
|
||||
else if (tag == L"output") OutputNodes().push_back(node);
|
||||
#if 0 // deprecated
|
||||
else if (tag == L"pair") PairNodes().push_back(node); // TODO: I made this up; the original code in SynchronousExecutionEngine did not have this
|
||||
#endif
|
||||
else if (!tag.empty())
|
||||
RuntimeError("ComputationNetwork: unknown tag '%ls'", tag.c_str());
|
||||
// TODO: are there nodes without tag? Where do they go?
|
||||
}
|
||||
|
||||
// traverse children: append them to the end of the work list
|
||||
let & children = node->GetChildren();
|
||||
for (auto & child : children)
|
||||
workList.push_back(child); // (we could check whether c is in 'nodes' already here to optimize, but this way it is cleaner)
|
||||
}
|
||||
|
||||
ValidateNetwork();
|
||||
#if 1
|
||||
wstring args = ToString();
|
||||
fprintf(stderr, "%ls\n", args.c_str());
|
||||
#endif
|
||||
// these post-processing steps are done by the other network builders, but I don't know why they are necessary
|
||||
FixupInputMinibatchSize(); // make sure dimensions are set up correctly
|
||||
ResetEvalTimeStamp(); // (should not really be needed)
|
||||
}
|
||||
|
||||
}}}
|
|
@ -1378,6 +1378,31 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
std::vector<ComputationNodeBasePtr> m_nestedNodes; // nodes tucked away in this node, in evaluation order
|
||||
};
|
||||
|
||||
// =======================================================================
|
||||
// ILateAttachingNode -- helper wrapper class for ComputationNodes that must AttachInputs() late due to circular references
|
||||
// =======================================================================
|
||||
|
||||
// Instantiate with LateAttachingNode<node type>(lambda, args for node constructor).
|
||||
// To resolve, call AttachInputs()
|
||||
// TODO: This is a bit indirect. Can it be done more nicely?
|
||||
struct ILateAttachingNode { virtual void LateAttachInputs() = 0; };
|
||||
template<class N>
|
||||
class LateAttachingNode : public N, public ILateAttachingNode
|
||||
{
|
||||
typedef typename N::OurElemType ElemType;
|
||||
function<void(ComputationNode<ElemType>*)> attachInputs;
|
||||
public:
|
||||
// constructor
|
||||
template<class... _Types>
|
||||
LateAttachingNode(DEVICEID_TYPE deviceId, const wstring & name, const function<void(ComputationNode<ElemType>*)> & attachInputs, _Types&&... _Args) : attachInputs(attachInputs), N(deviceId, name, forward<_Types>(_Args)...) {}
|
||||
// the one member that does the work
|
||||
void /*ILateAttachingNode::*/LateAttachInputs()
|
||||
{
|
||||
attachInputs(dynamic_cast<N*>(this));
|
||||
attachInputs = [](ComputationNode<ElemType>*){ LogicError("LateAttachingNode::AttachInputs: must only be called once"); };
|
||||
}
|
||||
};
|
||||
|
||||
// =======================================================================
|
||||
// helper macro to ease access to base members in presence of C++ two-phase name lookup
|
||||
// =======================================================================
|
||||
|
|
|
@ -45,28 +45,6 @@ namespace Microsoft { namespace MSR { namespace ScriptableObjects {
|
|||
// ComputationNode -- covers all standard nodes
|
||||
// -------------------------------------------------------------------
|
||||
|
||||
// helper wrapper class for ComputationNodes that must AttachInputs() late due to circular references
|
||||
// Instantiate with LateAttachingNode<node type>(lambda, args for node constructor).
|
||||
// To resolve, call AttachInputs()
|
||||
// TODO: This is a bit indirect. Can it be done more nicely?
|
||||
struct ILateAttachingNode { virtual void LateAttachInputs() = 0; };
|
||||
template<class N>
|
||||
class LateAttachingNode : public N, public ILateAttachingNode
|
||||
{
|
||||
typedef typename N::OurElemType ElemType;
|
||||
function<void(ComputationNode<ElemType>*)> attachInputs;
|
||||
public:
|
||||
// constructor
|
||||
template<class... _Types>
|
||||
LateAttachingNode(DEVICEID_TYPE deviceId, const wstring & name, const function<void(ComputationNode<ElemType>*)> & attachInputs, _Types&&... _Args) : attachInputs(attachInputs), N(deviceId, name, forward<_Types>(_Args)...) {}
|
||||
// the one member that does the work
|
||||
void /*ILateAttachingNode::*/LateAttachInputs()
|
||||
{
|
||||
attachInputs(dynamic_cast<N*>(this));
|
||||
attachInputs = [](ComputationNode<ElemType>*){ LogicError("LateAttachingNode::AttachInputs: must only be called once"); };
|
||||
}
|
||||
};
|
||||
|
||||
template<class ElemType>
|
||||
struct DualPrecisionHelpers<ElemType, ComputationNode<ElemType>>
|
||||
{
|
||||
|
@ -227,88 +205,6 @@ namespace Microsoft { namespace MSR { namespace ScriptableObjects {
|
|||
}
|
||||
};
|
||||
|
||||
// -------------------------------------------------------------------
|
||||
// ComputationNetwork
|
||||
// -------------------------------------------------------------------
|
||||
|
||||
// initialize a ComputationNetwork from a ConfigRecord
|
||||
template<>
|
||||
/*static*/ shared_ptr<Object> MakeRuntimeObject<ComputationNetwork>(const IConfigRecordPtr configp)
|
||||
{
|
||||
let & config = *configp;
|
||||
|
||||
DEVICEID_TYPE deviceId = (DEVICEID_TYPE)(int)config[L"deviceId"];
|
||||
auto net = make_shared<ComputationNetwork>(deviceId);
|
||||
|
||||
auto & m_nameToNodeMap = net->GetNameToNodeMap();
|
||||
|
||||
deque<ComputationNodeBasePtr> workList;
|
||||
// flatten the set of all nodes
|
||||
// we collect all root ComputationNodes from the config record, and then expand into all their children by work-list processing
|
||||
// TODO: This currently only collects nodes of the same ElemType. We could allow conversion operators.
|
||||
// TODO: Can we even make the ComputationNetwork independent of ElemType?? As long as the nodes themselves are hooked up properly that should be OK!
|
||||
for (let & id : config.GetMemberIds())
|
||||
{
|
||||
let & value = config[id];
|
||||
if (value.Is<ComputationNodeBase>())
|
||||
workList.push_back((const ComputationNodeBasePtr&)value);
|
||||
}
|
||||
// process work list
|
||||
// Also call FinalizeInit where we must.
|
||||
while (!workList.empty())
|
||||
{
|
||||
let node = workList.front();
|
||||
workList.pop_front();
|
||||
|
||||
// add to set
|
||||
let res = m_nameToNodeMap.insert(make_pair(node->NodeName(), node));
|
||||
if (!res.second) // not inserted: we already got this one
|
||||
if (res.first->second == node)
|
||||
continue; // the same
|
||||
else // oops, a different node with the same name
|
||||
LogicError("ComputationNetwork: multiple nodes with the same NodeName() '%ls'", node->NodeName().c_str());
|
||||
|
||||
// If node derives from MustFinalizeInit() then it has unresolved inputs. Resolve them now.
|
||||
// This may generate a whole new load of nodes, including nodes which in turn have late init.
|
||||
// TODO: think this through whether it may generate circular references nevertheless
|
||||
let lateAttachingNode = dynamic_pointer_cast<ILateAttachingNode>(node);
|
||||
if (lateAttachingNode)
|
||||
lateAttachingNode->LateAttachInputs();
|
||||
|
||||
// add it to the respective node group based on the tag
|
||||
let nodeWithTag = dynamic_pointer_cast<WithTag>(node);
|
||||
if (nodeWithTag)
|
||||
{
|
||||
wstring tag = nodeWithTag->GetTag();
|
||||
if (tag == L"feature") net->FeatureNodes().push_back(node);
|
||||
else if (tag == L"label") net->LabelNodes().push_back(node);
|
||||
else if (tag == L"criterion" || tag == L"criteria") net->FinalCriterionNodes().push_back(node); // 'criteria' is wrong (plural); we keep it for compat
|
||||
else if (!_wcsnicmp(tag.c_str(), L"eval", 4)) net->EvaluationNodes().push_back(node); // eval*
|
||||
else if (tag == L"output") net->OutputNodes().push_back(node);
|
||||
#if 0 // deprecated
|
||||
else if (tag == L"pair") net->PairNodes().push_back(node); // TODO: I made this up; the original code in SynchronousExecutionEngine did not have this
|
||||
#endif
|
||||
else if (!tag.empty())
|
||||
RuntimeError("ComputationNetwork: unknown tag '%ls'", tag.c_str());
|
||||
// TODO: are there nodes without tag? Where do they go?
|
||||
}
|
||||
|
||||
// traverse children: append them to the end of the work list
|
||||
let & children = node->GetChildren();
|
||||
for (auto & child : children)
|
||||
workList.push_back(child); // (we could check whether c is in 'nodes' already here to optimize, but this way it is cleaner)
|
||||
}
|
||||
|
||||
net->ValidateNetwork();
|
||||
#if 1
|
||||
wstring args = net->ToString();
|
||||
fprintf(stderr, "%ls\n", args.c_str());
|
||||
#endif
|
||||
// these post-processing steps are done by the other network builders, but I don't know why they are necessary
|
||||
net->FixupInputMinibatchSize(); // make sure dimensions are set up correctly
|
||||
net->ResetEvalTimeStamp(); // (should not really be needed)
|
||||
return net;
|
||||
}
|
||||
|
||||
// creates the lambda for creating an object that can exist as 'float' or 'double'
|
||||
// Pass both types as the two template args.
|
||||
|
|
|
@ -534,11 +534,9 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
InferImageDimsFromInput(0, true);
|
||||
m_imageLayout = ImageLayoutWHC(m_imageLayout.GetWidth(), m_sliceHeight, m_imageLayout.GetNumChannels());
|
||||
|
||||
#if 1 // disabled for now since we now call Validate() inside the loop, and therefore get lots of these warnings. TODO: Bring it back once we no longer call Validate() from inside the loop.
|
||||
// warn that this node will destroy the image size information from the child
|
||||
if (m_inputImageLayout.GetWidth() * m_inputImageLayout.GetNumChannels() != 1)
|
||||
fprintf(stderr, "WARNING: RowSlice operation cannot inherit image size information from its child. Image size info is lost.\n");
|
||||
#endif
|
||||
}
|
||||
|
||||
private:
|
||||
|
|
1
Makefile
1
Makefile
|
@ -418,6 +418,7 @@ CNTK_SRC =\
|
|||
MachineLearning/CNTKComputationNetworkLib/ComputationNetworkAnalysis.cpp \
|
||||
MachineLearning/CNTKComputationNetworkLib/ComputationNetworkEditing.cpp \
|
||||
MachineLearning/CNTKComputationNetworkLib/ComputationNetworkBuilder.cpp \
|
||||
MachineLearning/CNTKComputationNetworkLib/ComputationNetworkScripting.cpp \
|
||||
MachineLearning/CNTKComputationNetworkLib/NetworkBuilderFromConfig.cpp \
|
||||
MachineLearning/CNTKSGDLib/Profiler.cpp \
|
||||
MachineLearning/CNTKSGDLib/SGD.cpp \
|
||||
|
|
|
@ -13,10 +13,6 @@ speechTrain=[
|
|||
deviceId=$DeviceId$
|
||||
traceLevel=1
|
||||
|
||||
#NDLNetworkBuilder=[
|
||||
# networkDescription=$ConfigDir$/lstmp-3layer_WithSelfStab.ndl
|
||||
#]
|
||||
|
||||
SGD=[
|
||||
epochSize=20480
|
||||
minibatchSize=20
|
||||
|
@ -48,142 +44,7 @@ speechTrain=[
|
|||
]
|
||||
]
|
||||
|
||||
|
||||
# replicating the above with BrainScript --this is 100% converted from NDL
|
||||
originalExperimentalNetworkBuilder=[
|
||||
|
||||
LSTMPComponentWithSelfStab(inputDim, outputDim, cellDim, inputx) =
|
||||
[
|
||||
Wxo = Parameter(cellDim, inputDim, init='uniform', initValueScale=1, initOnCPUOnly=true, randomSeed=1); # difference to NDL: 'uniform' must be quoted as a string
|
||||
Wxi = Parameter(cellDim, inputDim, init='uniform', initValueScale=1, initOnCPUOnly=true, randomSeed=1);
|
||||
Wxf = Parameter(cellDim, inputDim, init='uniform', initValueScale=1, initOnCPUOnly=true, randomSeed=1);
|
||||
Wxc = Parameter(cellDim, inputDim, init='uniform', initValueScale=1, initOnCPUOnly=true, randomSeed=1);
|
||||
|
||||
bo = Parameter(cellDim, 1, init='fixedValue', value=0.0); # difference to NDL: 'fixedValue' must be quoted as a string and is case-sensitive
|
||||
bc = Parameter(cellDim, 1, init='fixedValue', value=0.0);
|
||||
bi = Parameter(cellDim, 1, init='fixedValue', value=0.0);
|
||||
bf = Parameter(cellDim, 1, init='fixedValue', value=0.0);
|
||||
|
||||
Whi = Parameter(cellDim, outputDim, init='uniform', initValueScale=1, initOnCPUOnly=true, randomSeed=1);
|
||||
Wci = Parameter(cellDim, 1, init='uniform', initValueScale=1, initOnCPUOnly=true, randomSeed=1);
|
||||
Whf = Parameter(cellDim, outputDim, init='uniform', initValueScale=1, initOnCPUOnly=true, randomSeed=1);
|
||||
Wcf = Parameter(cellDim, 1, init='uniform', initValueScale=1, initOnCPUOnly=true, randomSeed=1);
|
||||
Who = Parameter(cellDim, outputDim, init='uniform', initValueScale=1, initOnCPUOnly=true, randomSeed=1);
|
||||
Wco = Parameter(cellDim, 1, init='uniform', initValueScale=1, initOnCPUOnly=true, randomSeed=1);
|
||||
Whc = Parameter(cellDim, outputDim, init='uniform', initValueScale=1, initOnCPUOnly=true, randomSeed=1);
|
||||
|
||||
Wmr = Parameter(outputDim, cellDim, init='uniform', initValueScale=1, initOnCPUOnly=true, randomSeed=1);
|
||||
|
||||
#we provide a scale value for each weight
|
||||
|
||||
sWxo = Parameter(1, 1, init='fixedValue', value=0.0);
|
||||
sWxi = Parameter(1, 1, init='fixedValue', value=0.0);
|
||||
sWxf = Parameter(1, 1, init='fixedValue', value=0.0);
|
||||
sWxc = Parameter(1, 1, init='fixedValue', value=0.0);
|
||||
|
||||
sWhi = Parameter(1, 1, init='fixedValue', value=0.0);
|
||||
sWci = Parameter(1, 1, init='fixedValue', value=0.0);
|
||||
|
||||
sWhf = Parameter(1, 1, init='fixedValue', value=0.0);
|
||||
sWcf = Parameter(1, 1, init='fixedValue', value=0.0);
|
||||
sWho = Parameter(1, 1, init='fixedValue', value=0.0);
|
||||
sWco = Parameter(1, 1, init='fixedValue', value=0.0);
|
||||
sWhc = Parameter(1, 1, init='fixedValue', value=0.0);
|
||||
|
||||
sWmr = Parameter(1, 1, init='fixedValue', value=0.0);
|
||||
|
||||
expsWxo = Exp(sWxo);
|
||||
expsWxi = Exp(sWxi);
|
||||
expsWxf = Exp(sWxf);
|
||||
expsWxc = Exp(sWxc);
|
||||
|
||||
expsWhi = Exp(sWhi);
|
||||
expsWci = Exp(sWci);
|
||||
|
||||
expsWhf = Exp(sWhf);
|
||||
expsWcf = Exp(sWcf);
|
||||
expsWho = Exp(sWho);
|
||||
expsWco = Exp(sWco);
|
||||
expsWhc = Exp(sWhc);
|
||||
|
||||
expsWmr = Exp(sWmr);
|
||||
|
||||
#end of scale values
|
||||
|
||||
dh = PastValue(outputDim, 1, output, timeStep=1);
|
||||
dc = PastValue(cellDim, 1, ct, timeStep=1);
|
||||
|
||||
Wxix = Times(Wxi, Scale(expsWxi, inputx));
|
||||
Whidh = Times(Whi, Scale(expsWhi, dh));
|
||||
Wcidc = DiagTimes(Wci, Scale(expsWci, dc));
|
||||
|
||||
it = Sigmoid (Plus ( Plus (Plus (Wxix, bi), Whidh), Wcidc));
|
||||
|
||||
Wxcx = Times(Wxc, Scale(expsWxc, inputx));
|
||||
Whcdh = Times(Whc, Scale(expsWhc, dh));
|
||||
bit = ElementTimes(it, Tanh( Plus(Wxcx, Plus(Whcdh, bc))));
|
||||
|
||||
Wxfx = Times(Wxf, Scale(expsWxf,inputx));
|
||||
Whfdh = Times(Whf, Scale(expsWhf, dh));
|
||||
Wcfdc = DiagTimes(Wcf, Scale(expsWcf, dc));
|
||||
|
||||
ft = Sigmoid( Plus (Plus (Plus(Wxfx, bf), Whfdh), Wcfdc));
|
||||
|
||||
bft = ElementTimes(ft, dc);
|
||||
|
||||
ct = Plus(bft, bit);
|
||||
|
||||
Wxox = Times(Wxo, Scale(expsWxo, inputx));
|
||||
Whodh = Times(Who, Scale(expsWho, dh));
|
||||
Wcoct = DiagTimes(Wco, Scale(expsWco, ct));
|
||||
|
||||
ot = Sigmoid( Plus( Plus( Plus(Wxox, bo), Whodh), Wcoct));
|
||||
|
||||
mt = ElementTimes(ot, Tanh(ct));
|
||||
|
||||
output = Times(Wmr, Scale(expsWmr, mt));
|
||||
]
|
||||
|
||||
#define basic i/o
|
||||
baseFeatDim=33
|
||||
RowSliceStart=330
|
||||
FeatDim=363
|
||||
labelDim=132
|
||||
cellDim=1024
|
||||
hiddenDim=256
|
||||
|
||||
features=Input(FeatDim, 1, tag='feature') # differences to NDL: needs the '1'; tag value must be quoted as a string
|
||||
labels=Input(labelDim, 1, tag='label')
|
||||
feashift=RowSlice(RowSliceStart, baseFeatDim, features); # shift 5 frames right (x_{t+5} -> x_{t} )
|
||||
|
||||
|
||||
featNorm = MeanVarNorm(feashift)
|
||||
|
||||
|
||||
# layer 1
|
||||
LSTMoutput1 = LSTMPComponentWithSelfStab(baseFeatDim, hiddenDim, cellDim, featNorm);
|
||||
# layer 2
|
||||
LSTMoutput2 = LSTMPComponentWithSelfStab(hiddenDim, hiddenDim, cellDim, LSTMoutput1.output); # difference to NDL: LSTMoutput1 is a record, must select the output field explicitly
|
||||
# layer 3
|
||||
LSTMoutput3 = LSTMPComponentWithSelfStab(hiddenDim, hiddenDim, cellDim, LSTMoutput2.output);
|
||||
|
||||
W = Parameter(labelDim, hiddenDim, init='uniform', initValueScale=1, initOnCPUOnly=true, randomSeed=1);
|
||||
b = Parameter(labelDim, 1, init='fixedValue', value=0);
|
||||
|
||||
sW = Parameter(1, 1, init='fixedValue', value=0.0);
|
||||
expsW = Exp(sW);
|
||||
|
||||
LSTMoutputW = Plus(Times(W, Scale(expsW, LSTMoutput3.output)), b);
|
||||
|
||||
cr = CrossEntropyWithSoftmax(labels, LSTMoutputW,tag='criteria'); # differences to NDL: string must be quoted; value is case-sensitive
|
||||
Err = ErrorPrediction(labels,LSTMoutputW,tag='eval');
|
||||
|
||||
logPrior = LogPrior(labels)
|
||||
ScaledLogLikelihood=Minus(LSTMoutputW,logPrior,tag='output')
|
||||
]
|
||||
|
||||
|
||||
# replicating the above with BrainScript --we will put stuff here
|
||||
# define network using BrainScript
|
||||
ExperimentalNetworkBuilder=[
|
||||
|
||||
void = 0 // (BUGBUG: we do not allow zero-argument macros; will be fixed. For now, pass void)
|
||||
|
|
Загрузка…
Ссылка в новой задаче