changed most uses of ComputationNetwork * and & to ComputationNetworkPtr, to eliminate ownership bugs and allow integration with BS. Also allowed for some minor code simplifications;
made IComputationNetBuilder::LoadNetworkFromFile() 'protected' since it is no longer used. Will be deleted soon
This commit is contained in:
Родитель
38ca2aa166
Коммит
04e0621d90
|
@ -136,9 +136,7 @@ void DoEvalBase(const ConfigParameters& config, IDataReader<ElemType>& reader)
|
|||
evalNodeNamesVector.push_back(evalNodeNames[i]);
|
||||
}
|
||||
|
||||
ComputationNetwork net(deviceId);
|
||||
net.LoadFromFile<ElemType>(modelPath);
|
||||
net.ResetEvalTimeStamp();
|
||||
auto net = ComputationNetwork::CreateFromFile<ElemType>(deviceId, modelPath);
|
||||
|
||||
SimpleEvaluator<ElemType> eval(net, numMBsToShowResult, traceLevel);
|
||||
eval.Evaluate(&reader, evalNodeNamesVector, mbSize[0], epochSize);
|
||||
|
@ -180,9 +178,7 @@ void DoEvalUnroll(const ConfigParameters& config)
|
|||
intargvector mbSize = minibatchSize;
|
||||
wstring path2EvalResults = config(L"path2EvalResults", L"");
|
||||
|
||||
ComputationNetwork net(deviceId);
|
||||
net.LoadFromFile<ElemType>(modelPath);
|
||||
net.ResetEvalTimeStamp();
|
||||
auto net = ComputationNetwork::CreateFromFile<ElemType>(deviceId, modelPath);
|
||||
|
||||
MultiNetworksEvaluator<ElemType> eval(net);
|
||||
double evalEntropy;
|
||||
|
@ -244,9 +240,7 @@ void DoCrossValidate(const ConfigParameters& config)
|
|||
}
|
||||
|
||||
cvModels.push_back(cvModelPath);
|
||||
ComputationNetwork net(deviceId);
|
||||
net.LoadFromFile<ElemType>(cvModelPath);
|
||||
net.ResetEvalTimeStamp();
|
||||
auto net = ComputationNetwork::CreateFromFile<ElemType>(deviceId, cvModelPath);
|
||||
|
||||
SimpleEvaluator<ElemType> eval(net, numMBsToShowResult, traceLevel);
|
||||
|
||||
|
@ -320,9 +314,7 @@ void DoWriteOutput(const ConfigParameters& config)
|
|||
outputNodeNamesVector.push_back(outputNodeNames[i]);
|
||||
}
|
||||
|
||||
ComputationNetwork net(deviceId);
|
||||
net.LoadFromFile<ElemType>(modelPath);
|
||||
net.ResetEvalTimeStamp();
|
||||
auto net = ComputationNetwork::CreateFromFile<ElemType>(deviceId, modelPath);
|
||||
|
||||
SimpleOutputWriter<ElemType> writer(net, 1);
|
||||
|
||||
|
@ -803,7 +795,7 @@ public:
|
|||
BrainScriptNetworkBuilder(const ConfigParameters & config) { NOT_IMPLEMENTED; }
|
||||
|
||||
// build a ComputationNetwork from description language
|
||||
virtual /*IComputationNetBuilder::*/ComputationNetwork* BuildNetworkFromDescription(ComputationNetwork* = nullptr) override
|
||||
virtual /*IComputationNetBuilder::*/ComputationNetworkPtr BuildNetworkFromDescription(ComputationNetwork* = nullptr) override
|
||||
{
|
||||
vector<ScriptableObjects::ConfigValuePtr> args; // this lambda has no arguments
|
||||
ScriptableObjects::ConfigLambda::NamedParams namedArgs;
|
||||
|
@ -813,7 +805,7 @@ public:
|
|||
fprintf(stderr, "BrainScriptNetworkBuilder using CPU\n");
|
||||
else
|
||||
fprintf(stderr, "BrainScriptNetworkBuilder using GPU %d\n", (int)m_net->GetDeviceId());
|
||||
return m_net.get();
|
||||
return m_net;
|
||||
}
|
||||
|
||||
// load an existing file--this is the same code as for NDLNetworkBuilder.h (OK to copy it here because this is temporary code anyway)
|
||||
|
@ -876,7 +868,7 @@ void DoTrain(const ConfigRecordType & config)
|
|||
else if (config.Exists(L"SimpleNetworkBuilder"))
|
||||
{
|
||||
const ConfigRecordType & simpleNetworkBuilderConfig(config(L"SimpleNetworkBuilder", ConfigRecordType::Record()));
|
||||
shared_ptr<IComputationNetBuilder<ElemType>> netBuilder = make_shared<SimpleNetworkBuilder<ElemType>>(simpleNetworkBuilderConfig);
|
||||
auto netBuilder = make_shared<SimpleNetworkBuilder<ElemType>>(simpleNetworkBuilderConfig);
|
||||
createNetworkFn = [netBuilder](DEVICEID_TYPE deviceId)
|
||||
{
|
||||
return shared_ptr<ComputationNetwork>(netBuilder->BuildNetworkFromDescription());
|
||||
|
@ -886,7 +878,7 @@ void DoTrain(const ConfigRecordType & config)
|
|||
else if (config.Exists(L"NDLNetworkBuilder"))
|
||||
{
|
||||
const ConfigRecordType & ndlNetworkBuilderConfig(config(L"NDLNetworkBuilder", ConfigRecordType::Record()));
|
||||
shared_ptr<IComputationNetBuilder<ElemType>> netBuilder = make_shared<NDLBuilder<ElemType>>(ndlNetworkBuilderConfig);
|
||||
shared_ptr<NDLBuilder<ElemType>> netBuilder = make_shared<NDLBuilder<ElemType>>(ndlNetworkBuilderConfig);
|
||||
createNetworkFn = [netBuilder](DEVICEID_TYPE deviceId)
|
||||
{
|
||||
return shared_ptr<ComputationNetwork>(netBuilder->BuildNetworkFromDescription());
|
||||
|
@ -1063,7 +1055,7 @@ void DoEncoderDecoder(const ConfigParameters& config)
|
|||
validationDataReader.push_back(cvEncoderDataReader);
|
||||
validationDataReader.push_back(cvDecoderDataReader);
|
||||
|
||||
sgd.EncoderDecoder(netBuilders, trainDataReader, validationDataReader, makeMode);
|
||||
sgd.EncoderDecoder(netBuilders, (int)config(L"deviceId"), trainDataReader, validationDataReader, makeMode);
|
||||
|
||||
delete encoderDataReader;
|
||||
delete decoderDataReader;
|
||||
|
@ -1149,7 +1141,7 @@ void DoBidirectionEncoderDecoder(const ConfigParameters& config)
|
|||
validationDataReader.push_back(cvDecoderDataReader);
|
||||
validationDataReader.push_back(cvBackwardDecoderDataReader);
|
||||
|
||||
sgd.EncoderDecoder(netBuilders, trainDataReader, validationDataReader, makeMode);
|
||||
sgd.EncoderDecoder(netBuilders, (int)config(L"deviceId"), trainDataReader, validationDataReader, makeMode);
|
||||
|
||||
delete encoderDataReader;
|
||||
delete decoderDataReader;
|
||||
|
@ -1198,17 +1190,13 @@ void DoEvalEncodingBeamSearchDecoding(const ConfigParameters& config)
|
|||
int traceLevel = config(L"traceLevel", "0");
|
||||
size_t numMBsToShowResult = config(L"numMBsToShowResult", "100");
|
||||
|
||||
vector<ComputationNetwork*> nets;
|
||||
ComputationNetwork encoderNet(deviceId);
|
||||
encoderNet.LoadFromFile<ElemType>(encoderModelPath, FileOptions::fileOptionsBinary, true);
|
||||
encoderNet.ResetEvalTimeStamp();
|
||||
vector<ComputationNetworkPtr> nets;
|
||||
auto encoderNet = ComputationNetwork::CreateFromFile<ElemType>(deviceId, encoderModelPath, FileOptions::fileOptionsBinary, true);
|
||||
|
||||
ComputationNetwork decoderNet(deviceId);
|
||||
decoderNet.LoadFromFile<ElemType>(decoderModelPath, FileOptions::fileOptionsBinary, false, &encoderNet);
|
||||
decoderNet.ResetEvalTimeStamp();
|
||||
auto decoderNet = ComputationNetwork::CreateFromFile<ElemType>(deviceId, decoderModelPath, FileOptions::fileOptionsBinary, false, encoderNet.get());
|
||||
|
||||
nets.push_back(&encoderNet);
|
||||
nets.push_back(&decoderNet);
|
||||
nets.push_back(encoderNet);
|
||||
nets.push_back(decoderNet);
|
||||
ConfigArray evalNodeNames = config(L"evalNodeNames");
|
||||
vector<wstring> evalNodeNamesVector;
|
||||
for (int i = 0; i < evalNodeNames.size(); ++i)
|
||||
|
@ -1273,9 +1261,7 @@ void DoEvalBeamSearch(const ConfigParameters& config, IDataReader<ElemType>& rea
|
|||
int traceLevel = config(L"traceLevel", "0");
|
||||
size_t numMBsToShowResult = config(L"numMBsToShowResult", "100");
|
||||
|
||||
ComputationNetwork net(deviceId);
|
||||
net.LoadFromFile<ElemType>(modelPath);
|
||||
net.ResetEvalTimeStamp();
|
||||
auto net = ComputationNetwork::CreateFromFile<ElemType>(deviceId, modelPath);
|
||||
|
||||
ConfigArray evalNodeNames = config(L"evalNodeNames");
|
||||
vector<wstring> evalNodeNamesVector;
|
||||
|
@ -1365,15 +1351,12 @@ void DoEdit(const ConfigParameters& config)
|
|||
template <typename ElemType>
|
||||
void DoConvertFromDbn(const ConfigParameters& config)
|
||||
{
|
||||
//config.Insert("deviceId","-1"); //force using CPU
|
||||
|
||||
wstring modelPath = config(L"modelPath");
|
||||
wstring dbnModelPath = config(L"dbnModelPath");
|
||||
|
||||
IComputationNetBuilder<ElemType>* netBuilder = (IComputationNetBuilder<ElemType>*)new SimpleNetworkBuilder<ElemType>(config);
|
||||
ComputationNetwork* net = netBuilder->LoadNetworkFromFile(dbnModelPath);
|
||||
auto netBuilder = make_shared<SimpleNetworkBuilder<ElemType>>(config);
|
||||
ComputationNetworkPtr net = netBuilder->BuildNetworkFromDbnFile(dbnModelPath);
|
||||
net->SaveToFile(modelPath);
|
||||
delete (netBuilder);
|
||||
}
|
||||
|
||||
// do topological plot of computation network
|
||||
|
|
|
@ -126,6 +126,7 @@
|
|||
//BinaryStandardNode(TransposeTimesNode)
|
||||
;
|
||||
|
||||
#if 0 // no longer needed
|
||||
namespace Microsoft { namespace MSR { namespace CNTK {
|
||||
|
||||
using namespace Microsoft::MSR;
|
||||
|
@ -137,7 +138,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
|
||||
// build a ComputationNetwork from BrainScript source code
|
||||
template<class ElemType>
|
||||
/*virtual*/ /*IComputationNetBuilder::*/ComputationNetwork* ExperimentalNetworkBuilder<ElemType>::BuildNetworkFromDescription(ComputationNetwork*)
|
||||
/*virtual*/ /*IComputationNetBuilder::*/ComputationNetworkPtr ExperimentalNetworkBuilder<ElemType>::BuildNetworkFromDescription(ComputationNetwork*)
|
||||
{
|
||||
if (!m_net || m_net->GetTotalNumberOfNodes() < 1) //not built yet
|
||||
{
|
||||
|
@ -160,10 +161,11 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
// TODO: old CNTK code seems to be able to load the network in-place--is that important; is it OK to just replace the pointer?
|
||||
}
|
||||
m_net->ResetEvalTimeStamp();
|
||||
return m_net.get();
|
||||
return m_net;
|
||||
}
|
||||
|
||||
template class ExperimentalNetworkBuilder<float>;
|
||||
template class ExperimentalNetworkBuilder<double>;
|
||||
|
||||
}}}
|
||||
#endif
|
||||
|
|
|
@ -1,3 +1,4 @@
|
|||
#if 0 // no longer needed
|
||||
// ExperimentalNetworkBuilder.h -- interface to new version of NDL (and config) parser --fseide
|
||||
|
||||
#pragma once
|
||||
|
@ -29,12 +30,13 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
|
||||
// build a ComputationNetwork from description language
|
||||
// TODO: change return type of these interfaces to shared_ptrs
|
||||
virtual /*IComputationNetBuilder::*/ComputationNetwork* BuildNetworkFromDescription(ComputationNetwork* = nullptr);
|
||||
virtual /*IComputationNetBuilder::*/ComputationNetworkPtr BuildNetworkFromDescription(ComputationNetwork* = nullptr) override;
|
||||
// TODO: that function argument is related to PairNetworkNode, which will go away (we don't support it here)
|
||||
|
||||
// load an existing file--this is the same code as for NDLNetworkBuilder.h (OK to copy it here because this is temporary code anyway)
|
||||
virtual /*IComputationNetBuilder::*/ComputationNetwork* LoadNetworkFromFile(const wstring& modelFileName, bool forceLoad = true,
|
||||
bool bAllowNoCriterionNode = false, ComputationNetwork* anotherNetwork = nullptr)
|
||||
bool bAllowNoCriterionNode = false,
|
||||
ComputationNetwork* anotherNetwork = nullptr) override
|
||||
{
|
||||
if (!m_net || m_net->GetTotalNumberOfNodes() == 0 || forceLoad) //not built or force load
|
||||
{
|
||||
|
@ -48,3 +50,4 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
};
|
||||
|
||||
}}}
|
||||
#endif
|
||||
|
|
|
@ -14,7 +14,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
class IExecutionEngine
|
||||
{
|
||||
public:
|
||||
virtual ComputationNetwork & GetComputationNetwork() = 0;
|
||||
virtual ComputationNetworkPtr GetComputationNetwork() = 0;
|
||||
|
||||
virtual NDLNodeEvaluator<ElemType> & GetNodeEvaluator() = 0;
|
||||
|
||||
|
|
|
@ -104,7 +104,7 @@ void MELScript<ElemType>::CallFunction(const std::string& p_name, const ConfigPa
|
|||
if (params.size() > numFixedParams + numOptionalParams || params.size() < numFixedParams)
|
||||
RuntimeError("Invalid number of parameters. Valid parameters: CreateModel(). newly created model always becomes the new default.");
|
||||
|
||||
ComputationNetwork* cn = new ComputationNetwork(CPUDEVICE);
|
||||
auto cn = make_shared<ComputationNetwork>(CPUDEVICE);
|
||||
OverrideModelNameAndSetDefaultModel(cn);
|
||||
}
|
||||
if (EqualInsensitive(name, "CreateModelWithName")) //create a blank model
|
||||
|
@ -113,7 +113,7 @@ void MELScript<ElemType>::CallFunction(const std::string& p_name, const ConfigPa
|
|||
if (params.size() > numFixedParams + numOptionalParams || params.size() < numFixedParams)
|
||||
RuntimeError("Invalid number of parameters. Valid parameters: CreateModelWithName(modelName). newly created model always becomes the new default.");
|
||||
|
||||
ComputationNetwork* cn = new ComputationNetwork(CPUDEVICE);
|
||||
auto cn = make_shared<ComputationNetwork>(CPUDEVICE);
|
||||
OverrideModelNameAndSetDefaultModel(cn, params[0]);
|
||||
}
|
||||
else if (EqualInsensitive(name, "LoadModel"))
|
||||
|
@ -124,7 +124,7 @@ void MELScript<ElemType>::CallFunction(const std::string& p_name, const ConfigPa
|
|||
|
||||
std::wstring modelFormat = GetOptionalModelFormat(params, numFixedParams);
|
||||
|
||||
ComputationNetwork* cn = new ComputationNetwork(CPUDEVICE);
|
||||
auto cn = make_shared<ComputationNetwork>(CPUDEVICE);
|
||||
cn->LoadFromFile<ElemType>(params[0]);
|
||||
OverrideModelNameAndSetDefaultModel(cn);
|
||||
}
|
||||
|
@ -136,7 +136,7 @@ void MELScript<ElemType>::CallFunction(const std::string& p_name, const ConfigPa
|
|||
|
||||
std::wstring modelFormat = GetOptionalModelFormat(params, numFixedParams);
|
||||
|
||||
ComputationNetwork* cn = new ComputationNetwork(CPUDEVICE);
|
||||
auto cn = make_shared<ComputationNetwork>(CPUDEVICE);
|
||||
cn->LoadFromFile<ElemType>(params[1]);
|
||||
OverrideModelNameAndSetDefaultModel(cn, params[0]);
|
||||
}
|
||||
|
@ -148,7 +148,7 @@ void MELScript<ElemType>::CallFunction(const std::string& p_name, const ConfigPa
|
|||
|
||||
string modelName = params[0];
|
||||
wstring ndlSnippetFileName = params[1];
|
||||
ComputationNetwork* cn = new ComputationNetwork(CPUDEVICE);
|
||||
auto cn = make_shared<ComputationNetwork>(CPUDEVICE);
|
||||
NDLScript<ElemType> script;
|
||||
ConfigParameters ndlScript (script.ReadConfigFile(ndlSnippetFileName));
|
||||
|
||||
|
@ -181,7 +181,7 @@ void MELScript<ElemType>::CallFunction(const std::string& p_name, const ConfigPa
|
|||
|
||||
std::wstring fileName = params[0];
|
||||
|
||||
ComputationNetwork* cn = m_netNdlDefault->cn;
|
||||
auto cn = m_netNdlDefault->cn;
|
||||
if (cn == NULL)
|
||||
RuntimeError("SaveDefaultModel can only be called after a default name exists (i.e., at least one model is loaded.)");
|
||||
|
||||
|
@ -440,7 +440,7 @@ void MELScript<ElemType>::CallFunction(const std::string& p_name, const ConfigPa
|
|||
// this probabably won't do anything, but make sure all NDL has been created
|
||||
ProcessNDLScript(netNdl, ndlPassInitial, false);
|
||||
|
||||
ComputationNetwork* cn = netNdl->cn;
|
||||
auto cn = netNdl->cn;
|
||||
for (auto & node : nodes)
|
||||
{
|
||||
switch(prop)
|
||||
|
|
|
@ -147,7 +147,7 @@ public:
|
|||
search = symbol.substr(firstStart);
|
||||
}
|
||||
|
||||
ComputationNetwork* cn = netNdl->cn;
|
||||
ComputationNetworkPtr cn = netNdl->cn;
|
||||
wstring name = msra::strfun::utf16(search);
|
||||
vector<ComputationNodeBasePtr> nodes = cn->GetNodesFromName(name);
|
||||
// didn't find the name in the current symbols, try NDL
|
||||
|
@ -378,7 +378,7 @@ public:
|
|||
}
|
||||
}
|
||||
|
||||
void OverrideModelNameAndSetDefaultModel(ComputationNetwork* cn, string modelName = "default")
|
||||
void OverrideModelNameAndSetDefaultModel(ComputationNetworkPtr cn, string modelName = "default")
|
||||
{
|
||||
auto found = m_mapNameToNetNdl.find(modelName);
|
||||
if (found != m_mapNameToNetNdl.end() && found->second.cn != cn)
|
||||
|
@ -583,7 +583,7 @@ public:
|
|||
// EvaluateNDLSnippet - evaluate the passed snippet of NDL into a computational network
|
||||
// script - [in] text of the NDL snippet
|
||||
// network - [in/out] computation network to insert NDL into
|
||||
void EvaluateNDLSnippet(const ConfigValue& script, ComputationNetwork* network)
|
||||
void EvaluateNDLSnippet(const ConfigValue& script, ComputationNetworkPtr network)
|
||||
{
|
||||
NDLUtil<ElemType> ndlUtil(network);
|
||||
ndlUtil.ProcessNDLConfig(script);
|
||||
|
@ -646,7 +646,7 @@ public:
|
|||
// model1=[...] - Embedded NDL script
|
||||
if (0 == foundBrace)
|
||||
{
|
||||
ComputationNetwork* cn = new ComputationNetwork();
|
||||
ComputationNetworkPtr cn = make_shared<ComputationNetwork>();
|
||||
EvaluateNDLSnippet(rightValue, cn);
|
||||
OverrideModelNameAndSetDefaultModel(cn, key);
|
||||
}
|
||||
|
|
|
@ -32,13 +32,13 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
const ConfigParameters* m_baseConfig; // NOTE: the lifetime of the parent MUST exist from the call to Init to the BuildNetworkFromDescription() call for stringize
|
||||
|
||||
public:
|
||||
NDLBuilder() : m_net(nullptr)
|
||||
NDLBuilder()
|
||||
{
|
||||
m_executionEngine = NULL;
|
||||
m_baseConfig = NULL;
|
||||
} // empty constructor, call Init immediately hereafter
|
||||
|
||||
NDLBuilder(const ConfigParameters& config) : m_net(nullptr)
|
||||
NDLBuilder(const ConfigParameters& config)
|
||||
{
|
||||
m_baseConfig = config.GetParent();
|
||||
Init(config);
|
||||
|
@ -57,7 +57,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
m_dumpFileName = dumpFileName;
|
||||
m_initialConfig = configParams;
|
||||
m_deviceId = deviceId;
|
||||
m_net = &(executionEngine->GetComputationNetwork());
|
||||
m_net = executionEngine->GetComputationNetwork();
|
||||
if (m_deviceId == AUTOPLACEMATRIX)
|
||||
m_deviceId = Matrix<ElemType>::GetBestGPUDeviceId();
|
||||
m_deviceId = EnforceOneGPUOnly(m_deviceId); // see EnforceOneGPUOnly() for comment on what this is
|
||||
|
@ -158,16 +158,16 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
}
|
||||
|
||||
virtual ComputationNetwork* LoadNetworkFromFile(const wstring& modelFileName, bool forceLoad = true,
|
||||
bool bAllowNoCriterionNode = false, ComputationNetwork* anotherNetwork = nullptr)
|
||||
bool bAllowNoCriterionNode = false, ComputationNetwork* anotherNetwork = nullptr) override
|
||||
{
|
||||
if (m_net->GetTotalNumberOfNodes() == 0 || forceLoad) //not built or force load
|
||||
m_net->LoadFromFile<ElemType>(modelFileName, FileOptions::fileOptionsBinary, bAllowNoCriterionNode, anotherNetwork);
|
||||
|
||||
m_net->ResetEvalTimeStamp();
|
||||
return m_net;
|
||||
return m_net.get();
|
||||
}
|
||||
|
||||
ComputationNetwork* LoadNetworkFromConfig(const wstring& configFilePaths, bool forceLoad = true)
|
||||
ComputationNetworkPtr LoadNetworkFromConfig(const wstring& configFilePaths, bool forceLoad = true)
|
||||
{
|
||||
if (m_net->GetTotalNumberOfNodes() == 0 || forceLoad) //not built or force load
|
||||
LoadFromConfig(configFilePaths);
|
||||
|
@ -214,7 +214,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
ndlUtil.ProcessNDLConfig(config, true);
|
||||
}
|
||||
|
||||
virtual ComputationNetwork* BuildNetworkFromDescription(ComputationNetwork* = nullptr)
|
||||
virtual ComputationNetworkPtr BuildNetworkFromDescription(ComputationNetwork* = nullptr) override
|
||||
{
|
||||
if (m_net->GetTotalNumberOfNodes() < 1) //not built yet
|
||||
{
|
||||
|
@ -226,7 +226,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
}
|
||||
|
||||
private:
|
||||
ComputationNetwork* m_net;
|
||||
ComputationNetworkPtr m_net;
|
||||
IExecutionEngine<ElemType>* m_executionEngine;
|
||||
std::wstring m_networkConfig;
|
||||
std::wstring m_dumpFileName;
|
||||
|
|
|
@ -23,14 +23,10 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
{
|
||||
typedef shared_ptr<ComputationNode<ElemType>> ComputationNodePtr;
|
||||
private:
|
||||
ComputationNetwork* m_net;
|
||||
ComputationNetworkPtr m_net;
|
||||
|
||||
public:
|
||||
NDLUtil(ComputationNetwork * net) : m_net(net)
|
||||
{
|
||||
}
|
||||
|
||||
~NDLUtil()
|
||||
NDLUtil(ComputationNetworkPtr net) : m_net(net)
|
||||
{
|
||||
}
|
||||
|
||||
|
@ -106,7 +102,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
m_net->DumpAllNodesToFile(false, dumpFileName, false);
|
||||
m_net->ValidateNetwork(!fullValidate);
|
||||
}
|
||||
SynchronousNodeEvaluator<ElemType> ndlEvaluator(*m_net);
|
||||
SynchronousNodeEvaluator<ElemType> ndlEvaluator(m_net);
|
||||
NDLNode<ElemType>* lastNode = script->Evaluate(ndlEvaluator, L"", ndlPass, skipThrough);
|
||||
if (ndlPass == ndlPassResolve)
|
||||
{
|
||||
|
|
|
@ -108,12 +108,12 @@ template <typename ElemType>
|
|||
class NetNdl // class to associate a network with an NDLScript
|
||||
{
|
||||
public:
|
||||
ComputationNetwork* cn;
|
||||
ComputationNetworkPtr cn;
|
||||
NDLScript<ElemType>* ndl; // NDLScript we are using for this network. NOTE: the actual script used
|
||||
NDLNode<ElemType>* lastNode[ndlPassMax]; // last node we evaluated for each pass
|
||||
NetNdl(): cn(nullptr), ndl(nullptr) {ClearLastNodes();}
|
||||
NetNdl(ComputationNetwork*p_cn): cn(p_cn), ndl(nullptr) {ClearLastNodes();}
|
||||
NetNdl(ComputationNetwork*p_cn, NDLScript<ElemType>* p_ndl): cn(p_cn), ndl(p_ndl) {ClearLastNodes();}
|
||||
NetNdl(ComputationNetworkPtr p_cn): cn(p_cn), ndl(nullptr) {ClearLastNodes();}
|
||||
NetNdl(ComputationNetworkPtr p_cn, NDLScript<ElemType>* p_ndl): cn(p_cn), ndl(p_ndl) {ClearLastNodes();}
|
||||
~NetNdl()
|
||||
{}
|
||||
|
||||
|
@ -130,9 +130,8 @@ public:
|
|||
// NOTE: this deletes the network and the NDLScript, use with care!
|
||||
void Clear()
|
||||
{
|
||||
delete cn;
|
||||
cn.reset();
|
||||
delete ndl;
|
||||
cn = nullptr;
|
||||
ndl = nullptr;
|
||||
ClearLastNodes();
|
||||
}
|
||||
|
@ -385,7 +384,7 @@ private:
|
|||
bool m_noDefinitions; // no definitions can be made in this script, interpret all macro/function names as calls
|
||||
static NDLScript<ElemType> s_global; //("global"); // global script for storing macros and global nodes
|
||||
std::vector<NDLNode<ElemType>*> m_children; // child nodes. Note that m_script nodes may not be children of this object, they include macro nodes
|
||||
ComputationNetwork* m_cn; // computation network to use for backup symbol lookup. Used for MEL where NDL and network nodes are mixed
|
||||
ComputationNetworkPtr m_cn; // computation network to use for backup symbol lookup. Used for MEL where NDL and network nodes are mixed
|
||||
bool m_definingMacro; // currently defining a macro, flag to determine if we are defining or interpretting a macro call
|
||||
|
||||
public:
|
||||
|
@ -518,7 +517,7 @@ public:
|
|||
}
|
||||
|
||||
// SetComputationNetwork - set the computation network this NDL is associated with
|
||||
void SetComputationNetwork(ComputationNetwork* cn)
|
||||
void SetComputationNetwork(ComputationNetworkPtr cn)
|
||||
{
|
||||
m_cn = cn;
|
||||
}
|
||||
|
|
|
@ -24,10 +24,10 @@
|
|||
namespace Microsoft { namespace MSR { namespace CNTK {
|
||||
|
||||
template<class ElemType>
|
||||
ComputationNetwork* SimpleNetworkBuilder<ElemType>::BuildNetworkFromDescription(ComputationNetwork* encoderNet)
|
||||
ComputationNetworkPtr SimpleNetworkBuilder<ElemType>::BuildNetworkFromDescription(ComputationNetwork* encoderNet)
|
||||
{
|
||||
size_t mbSize = 1;
|
||||
ComputationNetwork* net = nullptr;
|
||||
ComputationNetworkPtr net;
|
||||
|
||||
// TODO: this seems to call for a switch statement
|
||||
if (m_rnnType == SIMPLENET)
|
||||
|
@ -68,7 +68,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
}
|
||||
|
||||
template<class ElemType>
|
||||
ComputationNetwork* SimpleNetworkBuilder<ElemType>::BuildSimpleDNN()
|
||||
ComputationNetworkPtr SimpleNetworkBuilder<ElemType>::BuildSimpleDNN()
|
||||
{
|
||||
|
||||
ComputationNetworkBuilder<ElemType> builder(*m_net);
|
||||
|
@ -170,7 +170,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
|
||||
// Note: while ComputationNode and CompuationNetwork are (supposed to be) independent of ElemType, it is OK to keep this class dependent.
|
||||
template<class ElemType>
|
||||
ComputationNetwork* SimpleNetworkBuilder<ElemType>::BuildSimpleRNN(size_t mbSize)
|
||||
ComputationNetworkPtr SimpleNetworkBuilder<ElemType>::BuildSimpleRNN(size_t mbSize)
|
||||
{
|
||||
ComputationNetworkBuilder<ElemType> builder(*m_net);
|
||||
if (m_net->GetTotalNumberOfNodes() < 1) //not built yet
|
||||
|
@ -279,7 +279,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
}
|
||||
|
||||
template<class ElemType>
|
||||
ComputationNetwork* SimpleNetworkBuilder<ElemType>::BuildClassEntropyNetwork(size_t mbSize)
|
||||
ComputationNetworkPtr SimpleNetworkBuilder<ElemType>::BuildClassEntropyNetwork(size_t mbSize)
|
||||
{
|
||||
ComputationNetworkBuilder<ElemType> builder(*m_net);
|
||||
|
||||
|
@ -395,12 +395,11 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
|
||||
m_net->ResetEvalTimeStamp();
|
||||
|
||||
return m_net;
|
||||
|
||||
return m_net;
|
||||
}
|
||||
|
||||
template<class ElemType>
|
||||
ComputationNetwork* SimpleNetworkBuilder<ElemType>::BuildConditionalLSTMNetworkFromDescription(size_t mbSize)
|
||||
ComputationNetworkPtr SimpleNetworkBuilder<ElemType>::BuildConditionalLSTMNetworkFromDescription(size_t mbSize)
|
||||
{
|
||||
ComputationNetworkBuilder<ElemType> builder(*m_net);
|
||||
if (m_net->GetTotalNumberOfNodes() < 1) //not built yet
|
||||
|
@ -507,12 +506,11 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
this builds an alignment based LM generator
|
||||
the aligment node takes a variable length input and relates each element to a variable length output
|
||||
*/
|
||||
template<class ElemType>
|
||||
ComputationNetwork* SimpleNetworkBuilder<ElemType>::BuildAlignmentForwardDecoderNetworkFromDescription(ComputationNetwork* encoderNet,
|
||||
size_t mbSize)
|
||||
{
|
||||
template<class ElemType>
|
||||
ComputationNetworkPtr SimpleNetworkBuilder<ElemType>::BuildAlignmentForwardDecoderNetworkFromDescription(ComputationNetwork* encoderNet, size_t mbSize)
|
||||
{
|
||||
ComputationNetworkBuilder<ElemType> builder(*m_net);
|
||||
if (m_net->GetTotalNumberOfNodes() < 1) //not built yet
|
||||
if (m_net->GetTotalNumberOfNodes() < 1) //not built yet
|
||||
{
|
||||
unsigned long randomSeed = 1;
|
||||
|
||||
|
@ -634,13 +632,12 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
|
||||
m_net->ResetEvalTimeStamp();
|
||||
|
||||
return m_net;
|
||||
}
|
||||
return m_net;
|
||||
}
|
||||
|
||||
template<class ElemType>
|
||||
ComputationNetwork* SimpleNetworkBuilder<ElemType>::BuildAlignmentDecoderNetworkFromDescription(ComputationNetwork* encoderNet,
|
||||
size_t mbSize)
|
||||
{
|
||||
template<class ElemType>
|
||||
ComputationNetworkPtr SimpleNetworkBuilder<ElemType>::BuildAlignmentDecoderNetworkFromDescription(ComputationNetwork* encoderNet, size_t mbSize)
|
||||
{
|
||||
ComputationNetworkBuilder<ElemType> builder(*m_net);
|
||||
if (m_net->GetTotalNumberOfNodes() < 1) //not built yet
|
||||
{
|
||||
|
@ -771,7 +768,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
}
|
||||
|
||||
template<class ElemType>
|
||||
ComputationNetwork* SimpleNetworkBuilder<ElemType>::BuildLogBilinearNetworkFromDescription(size_t mbSize)
|
||||
ComputationNetworkPtr SimpleNetworkBuilder<ElemType>::BuildLogBilinearNetworkFromDescription(size_t mbSize)
|
||||
{
|
||||
ComputationNetworkBuilder<ElemType> builder(*m_net);
|
||||
if (m_net->GetTotalNumberOfNodes() < 1) //not built yet
|
||||
|
@ -890,7 +887,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
}
|
||||
|
||||
template<class ElemType>
|
||||
ComputationNetwork* SimpleNetworkBuilder<ElemType>::BuildNeuralProbNetworkFromDescription(size_t mbSize)
|
||||
ComputationNetworkPtr SimpleNetworkBuilder<ElemType>::BuildNeuralProbNetworkFromDescription(size_t mbSize)
|
||||
{
|
||||
ComputationNetworkBuilder<ElemType> builder(*m_net);
|
||||
if (m_net->GetTotalNumberOfNodes() < 1) //not built yet
|
||||
|
@ -1237,7 +1234,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
}
|
||||
|
||||
template<class ElemType>
|
||||
ComputationNetwork* SimpleNetworkBuilder<ElemType>::BuildSeqTrnLSTMNetworkFromDescription(size_t mbSize)
|
||||
ComputationNetworkPtr SimpleNetworkBuilder<ElemType>::BuildSeqTrnLSTMNetworkFromDescription(size_t mbSize)
|
||||
{
|
||||
ComputationNetworkBuilder<ElemType> builder(*m_net);
|
||||
if (m_net->GetTotalNumberOfNodes() < 1) //not built yet
|
||||
|
@ -1338,7 +1335,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
}
|
||||
|
||||
template<class ElemType>
|
||||
ComputationNetwork* SimpleNetworkBuilder<ElemType>::BuildCLASSLSTMNetworkFromDescription(size_t mbSize)
|
||||
ComputationNetworkPtr SimpleNetworkBuilder<ElemType>::BuildCLASSLSTMNetworkFromDescription(size_t mbSize)
|
||||
{
|
||||
ComputationNetworkBuilder<ElemType> builder(*m_net);
|
||||
if (m_net->GetTotalNumberOfNodes() < 1) //not built yet
|
||||
|
@ -1474,7 +1471,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
}
|
||||
|
||||
template<class ElemType>
|
||||
ComputationNetwork* SimpleNetworkBuilder<ElemType>::BuildLSTMNetworkFromDescription(size_t mbSize)
|
||||
ComputationNetworkPtr SimpleNetworkBuilder<ElemType>::BuildLSTMNetworkFromDescription(size_t mbSize)
|
||||
{
|
||||
ComputationNetworkBuilder<ElemType> builder(*m_net);
|
||||
if (m_net->GetTotalNumberOfNodes() < 1) //not built yet
|
||||
|
@ -1609,7 +1606,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
K. Yao, G. Zweig, "Sequence-to-sequence neural net models for grapheme-to-phoneme conversion, submitted to Interspeech 2015
|
||||
*/
|
||||
template<class ElemType>
|
||||
ComputationNetwork* SimpleNetworkBuilder<ElemType>::BuildLSTMEncoderNetworkFromDescription(size_t mbSize)
|
||||
ComputationNetworkPtr SimpleNetworkBuilder<ElemType>::BuildLSTMEncoderNetworkFromDescription(size_t mbSize)
|
||||
{
|
||||
|
||||
ComputationNetworkBuilder<ElemType> builder(*m_net);
|
||||
|
@ -1700,7 +1697,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
K. Yao, G. Zweig, "Sequence-to-sequence neural net models for grapheme-to-phoneme conversion" submitted to Interspeech 2015
|
||||
*/
|
||||
template<class ElemType>
|
||||
ComputationNetwork* SimpleNetworkBuilder<ElemType>::BuildUnidirectionalLSTMNetworksFromDescription(size_t mbSize)
|
||||
ComputationNetworkPtr SimpleNetworkBuilder<ElemType>::BuildUnidirectionalLSTMNetworksFromDescription(size_t mbSize)
|
||||
{
|
||||
ComputationNetworkBuilder<ElemType> builder(*m_net);
|
||||
if (m_net->GetTotalNumberOfNodes() < 1) //not built yet
|
||||
|
@ -2020,7 +2017,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
K. Yao, G. Zweig, "Sequence-to-sequence neural net models for grapheme-to-phoneme conversion, submitted to Interspeech 2015
|
||||
*/
|
||||
template<class ElemType>
|
||||
ComputationNetwork* SimpleNetworkBuilder<ElemType>::BuildBiDirectionalLSTMNetworksFromDescription(size_t mbSize)
|
||||
ComputationNetworkPtr SimpleNetworkBuilder<ElemType>::BuildBiDirectionalLSTMNetworksFromDescription(size_t mbSize)
|
||||
{
|
||||
ComputationNetworkBuilder<ElemType> builder(*m_net);
|
||||
if (m_net->GetTotalNumberOfNodes() < 1) //not built yet
|
||||
|
@ -2170,7 +2167,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
}
|
||||
|
||||
template<class ElemType>
|
||||
ComputationNetwork* SimpleNetworkBuilder<ElemType>::BuildNCELSTMNetworkFromDescription(size_t mbSize)
|
||||
ComputationNetworkPtr SimpleNetworkBuilder<ElemType>::BuildNCELSTMNetworkFromDescription(size_t mbSize)
|
||||
{
|
||||
ComputationNetworkBuilder<ElemType> builder(*m_net);
|
||||
if (m_net->GetTotalNumberOfNodes() < 1) //not built yet
|
||||
|
@ -2285,7 +2282,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
}
|
||||
|
||||
template<class ElemType>
|
||||
ComputationNetwork* SimpleNetworkBuilder<ElemType>::BuildNetworkFromDbnFile(const std::wstring& dbnModelFileName)
|
||||
ComputationNetworkPtr SimpleNetworkBuilder<ElemType>::BuildNetworkFromDbnFile(const std::wstring& dbnModelFileName)
|
||||
{
|
||||
ComputationNetworkBuilder<ElemType> builder(*m_net);
|
||||
|
||||
|
|
|
@ -100,7 +100,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
const bool applyMeanVarNorm = false, bool needPrior = false, DEVICEID_TYPE deviceId = AUTOPLACEMATRIX)
|
||||
{
|
||||
m_deviceId = deviceId;
|
||||
m_net = new ComputationNetwork(m_deviceId);
|
||||
m_net = make_shared<ComputationNetwork>(m_deviceId);
|
||||
|
||||
m_outputLayerSize = outputLayerSize;
|
||||
m_layerSizes = layerSizes;
|
||||
|
@ -248,11 +248,6 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
|
||||
}
|
||||
|
||||
virtual ~SimpleNetworkBuilder()
|
||||
{
|
||||
delete m_net;
|
||||
}
|
||||
|
||||
static bool CheckDbnTag(File &fstream, const std::string expectedTag)
|
||||
{
|
||||
char tag[5];
|
||||
|
@ -264,7 +259,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
|
||||
// this load function allows an alternative file format of an early internal predecessor of CNTK, internally called DBN.exe
|
||||
virtual ComputationNetwork* LoadNetworkFromFile(const wstring& modelFileName, bool forceLoad = true,
|
||||
bool bAllowNoCriterion = false, ComputationNetwork* anotherNetwork = nullptr)
|
||||
bool bAllowNoCriterion = false, ComputationNetwork* anotherNetwork = nullptr) override
|
||||
{
|
||||
if (m_net->GetTotalNumberOfNodes() == 0 || forceLoad) //not built or force load
|
||||
{
|
||||
|
@ -282,20 +277,22 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
}
|
||||
|
||||
m_net->ResetEvalTimeStamp();
|
||||
return m_net;
|
||||
return m_net.get();
|
||||
}
|
||||
|
||||
ComputationNetwork* BuildNetworkFromDescription(ComputationNetwork* encoderNet);
|
||||
ComputationNetworkPtr BuildNetworkFromDescription(ComputationNetwork* encoderNet = nullptr) override;
|
||||
|
||||
ComputationNetworkPtr BuildNetworkFromDbnFile(const std::wstring& dbnModelFileName); // support for fseide's Microsoft-internal legacy tool "DBN.exe"
|
||||
|
||||
RNNTYPE RnnType(){ return m_rnnType; }
|
||||
|
||||
protected:
|
||||
|
||||
ComputationNetwork* BuildSimpleDNN();
|
||||
ComputationNetworkPtr BuildSimpleDNN();
|
||||
|
||||
ComputationNetwork* BuildSimpleRNN(size_t mbSize = 1);
|
||||
ComputationNetworkPtr BuildSimpleRNN(size_t mbSize = 1);
|
||||
|
||||
ComputationNetwork* BuildClassEntropyNetwork(size_t mbSize = 1);
|
||||
ComputationNetworkPtr BuildClassEntropyNetwork(size_t mbSize = 1);
|
||||
|
||||
ComputationNodePtr BuildLSTMComponent(unsigned long &randomSeed, size_t mbSize, size_t iLayer, size_t inputDim, size_t outputDim, ComputationNodePtr input);
|
||||
|
||||
|
@ -305,31 +302,29 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
|
||||
ComputationNodePtr BuildDirectConnect(unsigned long &randomSeed, size_t mbSize, size_t iLayer, size_t inputDim, size_t outputDim, ComputationNodePtr input, ComputationNodePtr toNode);
|
||||
|
||||
ComputationNetwork* BuildLogBilinearNetworkFromDescription(size_t mbSize = 1);
|
||||
ComputationNetworkPtr BuildLogBilinearNetworkFromDescription(size_t mbSize = 1);
|
||||
|
||||
ComputationNetwork* BuildNeuralProbNetworkFromDescription(size_t mbSize = 1);
|
||||
ComputationNetworkPtr BuildNeuralProbNetworkFromDescription(size_t mbSize = 1);
|
||||
|
||||
ComputationNetwork* BuildLSTMNetworkFromDescription(size_t mbSize = 1);
|
||||
ComputationNetworkPtr BuildLSTMNetworkFromDescription(size_t mbSize = 1);
|
||||
|
||||
ComputationNetwork* BuildSeqTrnLSTMNetworkFromDescription(size_t mbSize = 1);
|
||||
ComputationNetworkPtr BuildSeqTrnLSTMNetworkFromDescription(size_t mbSize = 1);
|
||||
|
||||
ComputationNetwork* BuildLSTMEncoderNetworkFromDescription(size_t mbSize = 1);
|
||||
ComputationNetworkPtr BuildLSTMEncoderNetworkFromDescription(size_t mbSize = 1);
|
||||
|
||||
ComputationNetwork* BuildUnidirectionalLSTMNetworksFromDescription(size_t mbSize = 1);
|
||||
ComputationNetworkPtr BuildUnidirectionalLSTMNetworksFromDescription(size_t mbSize = 1);
|
||||
|
||||
ComputationNetwork* BuildBiDirectionalLSTMNetworksFromDescription(size_t mbSize = 1);
|
||||
ComputationNetworkPtr BuildBiDirectionalLSTMNetworksFromDescription(size_t mbSize = 1);
|
||||
|
||||
ComputationNetwork* BuildCLASSLSTMNetworkFromDescription(size_t mbSize = 1);
|
||||
ComputationNetworkPtr BuildCLASSLSTMNetworkFromDescription(size_t mbSize = 1);
|
||||
|
||||
ComputationNetwork* BuildConditionalLSTMNetworkFromDescription(size_t mbSize = 1);
|
||||
ComputationNetworkPtr BuildConditionalLSTMNetworkFromDescription(size_t mbSize = 1);
|
||||
|
||||
ComputationNetwork* BuildNCELSTMNetworkFromDescription(size_t mbSize = 1);
|
||||
ComputationNetworkPtr BuildNCELSTMNetworkFromDescription(size_t mbSize = 1);
|
||||
|
||||
ComputationNetwork* BuildAlignmentForwardDecoderNetworkFromDescription(ComputationNetwork* encoderNet, size_t mbSize = 1);
|
||||
ComputationNetworkPtr BuildAlignmentForwardDecoderNetworkFromDescription(ComputationNetwork* encoderNet, size_t mbSize = 1);
|
||||
|
||||
ComputationNetwork* BuildAlignmentDecoderNetworkFromDescription(ComputationNetwork* encoderNet, size_t mbSize = 1);
|
||||
|
||||
ComputationNetwork* BuildNetworkFromDbnFile(const std::wstring& dbnModelFileName);
|
||||
ComputationNetworkPtr BuildAlignmentDecoderNetworkFromDescription(ComputationNetwork* encoderNet, size_t mbSize = 1);
|
||||
|
||||
//layer is 0 based
|
||||
ComputationNodePtr ApplyNonlinearFunction(ComputationNodePtr input, const size_t layer, const std::wstring nodeName = L"");
|
||||
|
@ -377,7 +372,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
|
||||
protected:
|
||||
|
||||
ComputationNetwork* m_net;
|
||||
ComputationNetworkPtr m_net;
|
||||
|
||||
int m_outputLayerSize;
|
||||
intargvector m_layerSizes;
|
||||
|
|
|
@ -21,7 +21,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
template<class ElemType>
|
||||
void SynchronousNodeEvaluator<ElemType>::Evaluate(NDLNode<ElemType>* node, const wstring& baseName, const NDLPass pass)
|
||||
{
|
||||
ComputationNetworkBuilder<ElemType> builder(m_net);
|
||||
ComputationNetworkBuilder<ElemType> builder(*m_net);
|
||||
|
||||
// constants don't need to be evaluated, they just translate into numbers...
|
||||
if (node->GetType() == ndlTypeConstant
|
||||
|
@ -53,7 +53,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
nodePtr = ComputationNode<ElemType>::FromVoidPtr(node->GetEvalValue());
|
||||
if (!nodePtr)
|
||||
{
|
||||
nodePtr = dynamic_pointer_cast<ComputationNode<ElemType>>(m_net.GetNodeFromName(name));
|
||||
nodePtr = dynamic_pointer_cast<ComputationNode<ElemType>>(m_net->GetNodeFromName(name));
|
||||
node->SetEvalValue(nodePtr.get());
|
||||
}
|
||||
}
|
||||
|
@ -71,8 +71,8 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
size_t cols = params.size() > 1 ? ((NDLNode<ElemType>*)params[1])->GetScalar() : 1;
|
||||
|
||||
// first look for this node already existing in the network
|
||||
if (m_net.NodeNameExist(name))
|
||||
nodePtr = dynamic_pointer_cast<ComputationNode<ElemType>>(m_net.GetNodeFromName(name));
|
||||
if (m_net->NodeNameExist(name))
|
||||
nodePtr = dynamic_pointer_cast<ComputationNode<ElemType>>(m_net->GetNodeFromName(name));
|
||||
else
|
||||
nodePtr = builder.CreateInputNode(name, rows, cols);
|
||||
}
|
||||
|
@ -90,8 +90,8 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
size_t cols = params.size() > 1 ? ((NDLNode<ElemType>*)params[1])->GetScalar() : 1;
|
||||
|
||||
// first look for this node already existing in the network
|
||||
if (m_net.NodeNameExist(name))
|
||||
nodePtr = dynamic_pointer_cast<ComputationNode<ElemType>>(m_net.GetNodeFromName(name));
|
||||
if (m_net->NodeNameExist(name))
|
||||
nodePtr = dynamic_pointer_cast<ComputationNode<ElemType>>(m_net->GetNodeFromName(name));
|
||||
else
|
||||
nodePtr = builder.CreateSparseInputNode(name, rows, cols);
|
||||
}
|
||||
|
@ -161,9 +161,9 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
if (initString == "fixedvalue")
|
||||
nodePtr->FunctionValues().SetValue(value);
|
||||
else if (initString == "uniform")
|
||||
m_net.InitLearnableParameters(nodePtr, true, forcedRandomSeed < 0 ? randomSeed++ : (unsigned long)forcedRandomSeed, initValueScale, initOnCPUOnly);
|
||||
m_net->InitLearnableParameters(nodePtr, true, forcedRandomSeed < 0 ? randomSeed++ : (unsigned long)forcedRandomSeed, initValueScale, initOnCPUOnly);
|
||||
else if (initString == "gaussian")
|
||||
m_net.InitLearnableParameters(nodePtr, false, forcedRandomSeed < 0 ? randomSeed++ : (unsigned long)forcedRandomSeed, initValueScale, initOnCPUOnly);
|
||||
m_net->InitLearnableParameters(nodePtr, false, forcedRandomSeed < 0 ? randomSeed++ : (unsigned long)forcedRandomSeed, initValueScale, initOnCPUOnly);
|
||||
else if (initString == "fromfile")
|
||||
{
|
||||
std::string initFromFilePath = node->GetOptionalParameter("initFromFilePath", "");
|
||||
|
@ -209,9 +209,9 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
if (initString == "fixedvalue")
|
||||
nodePtr->FunctionValues().SetValue(value);
|
||||
else if (initString == "uniform")
|
||||
m_net.InitLearnableParameters(nodePtr, true, randomSeed++, initValueScale);
|
||||
m_net->InitLearnableParameters(nodePtr, true, randomSeed++, initValueScale);
|
||||
else if (initString == "gaussian")
|
||||
m_net.InitLearnableParameters(nodePtr, false, randomSeed++, initValueScale);
|
||||
m_net->InitLearnableParameters(nodePtr, false, randomSeed++, initValueScale);
|
||||
else if (initString == "fromfile")
|
||||
{
|
||||
std::string initFromFilePath = node->GetOptionalParameter("initFromFilePath", "");
|
||||
|
|
|
@ -22,7 +22,7 @@ class SynchronousNodeEvaluator : public NDLNodeEvaluator<ElemType>
|
|||
typedef shared_ptr<ComputationNode<ElemType>> ComputationNodePtr;
|
||||
public:
|
||||
// Constructor - create evaluator
|
||||
SynchronousNodeEvaluator(ComputationNetwork& cn) : m_net(cn)
|
||||
SynchronousNodeEvaluator(ComputationNetworkPtr cn) : m_net(cn)
|
||||
{ }
|
||||
|
||||
// Evaluate - evaluate a node and translate into underlying
|
||||
|
@ -69,7 +69,7 @@ public:
|
|||
|
||||
// In this example, in the call D=Times(A.B,X.B), we need to resolve A.B and X.B appropriately.
|
||||
// Specifically, "A.B" must be resolved to the fully qualified name "C.A.B", whereas "X.B" must be resolved to the fully qualified name "P.B".
|
||||
// We then use this fully-qualified name to look up this node in the model (using "m_net.GetNodeFromName").
|
||||
// We then use this fully-qualified name to look up this node in the model (using "m_net->GetNodeFromName").
|
||||
|
||||
std::size_t firstDotPos = name.find_first_of(".");
|
||||
if (firstDotPos == std::string::npos)
|
||||
|
@ -105,9 +105,9 @@ public:
|
|||
}
|
||||
|
||||
// fully qualified names can be looked up in the model
|
||||
if (m_net.NodeNameExist(wname))
|
||||
if (m_net->NodeNameExist(wname))
|
||||
{
|
||||
void* np = (void*)m_net.GetNodeFromName(wname);
|
||||
void* np = (void*)m_net->GetNodeFromName(wname);
|
||||
nodeParam->SetEvalValue(np);
|
||||
}
|
||||
// NOTE: there is a bug here, we allow an abbreviated node reference (i.e. L1.BFF) based on return values in NDL
|
||||
|
@ -170,9 +170,9 @@ public:
|
|||
// check for the fully quantified name in the computation network
|
||||
// this is needed for MEL processing, since CN nodes names can be used as parameters in MEL
|
||||
std::wstring wname = msra::strfun::utf16(name);
|
||||
if (m_net.NodeNameExist(wname))
|
||||
if (m_net->NodeNameExist(wname))
|
||||
{
|
||||
void* np = (void*)m_net.GetNodeFromName(wname).get();
|
||||
void* np = (void*)m_net->GetNodeFromName(wname).get();
|
||||
// if we don't have a resolve node, it's because the name didn't exist in NDL
|
||||
if (!nodeResolve)
|
||||
nodeResolve = nodeParam;
|
||||
|
@ -276,15 +276,15 @@ public:
|
|||
std::string value = param->GetValue();
|
||||
if (!_stricmp(value.c_str(), "feature"))
|
||||
{
|
||||
SetOutputNode(m_net.FeatureNodes(), compNode);
|
||||
SetOutputNode(m_net->FeatureNodes(), compNode);
|
||||
}
|
||||
else if (!_stricmp(value.c_str(), "label"))
|
||||
{
|
||||
SetOutputNode(m_net.LabelNodes(), compNode);
|
||||
SetOutputNode(m_net->LabelNodes(), compNode);
|
||||
}
|
||||
else if (!_stricmp(value.c_str(), "criteria"))
|
||||
{
|
||||
SetOutputNode(m_net.FinalCriterionNodes(), compNode);
|
||||
SetOutputNode(m_net->FinalCriterionNodes(), compNode);
|
||||
}
|
||||
else if (!_stricmp(value.c_str(), "multiseq"))
|
||||
{
|
||||
|
@ -292,11 +292,11 @@ public:
|
|||
}
|
||||
else if (!_strnicmp(value.c_str(), "eval", 4)) // only compare the first 4 characters
|
||||
{
|
||||
SetOutputNode(m_net.EvaluationNodes(), compNode);
|
||||
SetOutputNode(m_net->EvaluationNodes(), compNode);
|
||||
}
|
||||
else if (!_stricmp(value.c_str(), "output"))
|
||||
{
|
||||
SetOutputNode(m_net.OutputNodes(), compNode);
|
||||
SetOutputNode(m_net->OutputNodes(), compNode);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -321,8 +321,8 @@ public:
|
|||
// returns - pointer to the matching EvalValue for that node, of NULL if not found
|
||||
virtual void* FindSymbol(const wstring& symbol)
|
||||
{
|
||||
if (m_net.NodeNameExist(symbol))
|
||||
return m_net.GetNodeFromName(symbol).get();
|
||||
if (m_net->NodeNameExist(symbol))
|
||||
return m_net->GetNodeFromName(symbol).get();
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
|
@ -331,7 +331,7 @@ public:
|
|||
}
|
||||
|
||||
private:
|
||||
ComputationNetwork& m_net;
|
||||
ComputationNetworkPtr m_net;
|
||||
void operator=(const SynchronousNodeEvaluator&);
|
||||
};
|
||||
|
||||
|
@ -343,29 +343,25 @@ class SynchronousExecutionEngine : public IExecutionEngine<ElemType>
|
|||
public:
|
||||
SynchronousExecutionEngine(DEVICEID_TYPE deviceId=AUTOPLACEMATRIX, unsigned long randomSeedOffset=0)
|
||||
{
|
||||
m_computationNetwork = new ComputationNetwork(deviceId);
|
||||
m_computationNetwork = make_shared<ComputationNetwork>(deviceId);
|
||||
m_computationNetwork->SetRandomSeedOffset(randomSeedOffset);
|
||||
m_ownNetwork = true;
|
||||
m_nodeEvaluator = new SynchronousNodeEvaluator<ElemType>(*m_computationNetwork);
|
||||
m_nodeEvaluator = new SynchronousNodeEvaluator<ElemType>(m_computationNetwork);
|
||||
}
|
||||
|
||||
SynchronousExecutionEngine(ComputationNetwork* computationNetwork)
|
||||
SynchronousExecutionEngine(ComputationNetworkPtr computationNetwork)
|
||||
{
|
||||
m_computationNetwork = computationNetwork;
|
||||
m_ownNetwork = false;
|
||||
m_nodeEvaluator = new SynchronousNodeEvaluator<ElemType>(*m_computationNetwork);
|
||||
m_nodeEvaluator = new SynchronousNodeEvaluator<ElemType>(m_computationNetwork);
|
||||
}
|
||||
|
||||
virtual ~SynchronousExecutionEngine()
|
||||
{
|
||||
if (m_ownNetwork)
|
||||
delete m_computationNetwork;
|
||||
delete m_nodeEvaluator;
|
||||
}
|
||||
|
||||
ComputationNetwork& GetComputationNetwork()
|
||||
ComputationNetworkPtr GetComputationNetwork()
|
||||
{
|
||||
return *m_computationNetwork;
|
||||
return m_computationNetwork;
|
||||
}
|
||||
|
||||
NDLNodeEvaluator<ElemType>& GetNodeEvaluator()
|
||||
|
@ -374,8 +370,7 @@ public:
|
|||
}
|
||||
|
||||
private:
|
||||
bool m_ownNetwork;
|
||||
ComputationNetwork* m_computationNetwork;
|
||||
ComputationNetworkPtr m_computationNetwork;
|
||||
SynchronousNodeEvaluator<ElemType>* m_nodeEvaluator;
|
||||
protected:
|
||||
// Copy constructor, should never be called.
|
||||
|
|
|
@ -212,7 +212,7 @@ template <typename ElemType>
|
|||
void TestMacros(const ConfigParameters& configBase)
|
||||
{
|
||||
NDLScript<ElemType> script = configBase("ndlFull");
|
||||
ComputationNetwork net;
|
||||
ComputationNetworkPtr net = make_shared<ComputationNetwork>();
|
||||
SynchronousNodeEvaluator<ElemType> nodeEvaluator(net);
|
||||
script.Evaluate(nodeEvaluator, L"", ndlPassInitial);
|
||||
}
|
||||
|
|
|
@ -606,12 +606,12 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
}
|
||||
|
||||
template<class ElemType>
|
||||
/*static*/void ComputationNetwork::SetDropoutRate(ComputationNetwork& net, const ComputationNodeBasePtr& criterionNode, const double dropoutRate, double & prevDropoutRate, unsigned long & dropOutSeed)
|
||||
/*static*/void ComputationNetwork::SetDropoutRate(ComputationNetworkPtr net, const ComputationNodeBasePtr& criterionNode, const double dropoutRate, double & prevDropoutRate, unsigned long & dropOutSeed)
|
||||
{
|
||||
if (dropoutRate != prevDropoutRate)
|
||||
{
|
||||
fprintf(stderr, "Switching dropout rate to %.8g.\n", dropoutRate);
|
||||
list<ComputationNodeBasePtr> dropoutNodes = net.GetNodesWithType(OperationNameOf(DropoutNode), criterionNode);
|
||||
list<ComputationNodeBasePtr> dropoutNodes = net->GetNodesWithType(OperationNameOf(DropoutNode), criterionNode);
|
||||
if (dropoutNodes.size() == 0 && dropoutRate > 0)
|
||||
fprintf(stderr, "WARNING: there is no dropout node.\n");
|
||||
else for (auto nodeIter = dropoutNodes.begin(); nodeIter != dropoutNodes.end(); nodeIter++)
|
||||
|
@ -627,10 +627,10 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
|
||||
//set sequence training parameters, e.g. smoothing weight, frame drop threshhold
|
||||
template<class ElemType>
|
||||
void ComputationNetwork::SetSeqParam(ComputationNetwork& net, const ComputationNodeBasePtr criterionNode, double hsmoothingWeight, double frameDropThresh, const bool doreferencealign)
|
||||
void ComputationNetwork::SetSeqParam(ComputationNetworkPtr net, const ComputationNodeBasePtr criterionNode, double hsmoothingWeight, double frameDropThresh, const bool doreferencealign)
|
||||
{
|
||||
fprintf(stderr, "Setting Hsmoothing weight to %.8g and frame-dropping threshhold to %.8g\n", hsmoothingWeight, frameDropThresh);
|
||||
list<ComputationNodeBasePtr> seqNodes = net.GetNodesWithType(OperationNameOf(SequenceWithSoftmaxNode), criterionNode);
|
||||
list<ComputationNodeBasePtr> seqNodes = net->GetNodesWithType(OperationNameOf(SequenceWithSoftmaxNode), criterionNode);
|
||||
if (seqNodes.size() == 0)
|
||||
{
|
||||
fprintf(stderr, "WARNING: there is no sequence node.\n");
|
||||
|
@ -647,10 +647,10 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
}
|
||||
}
|
||||
|
||||
/*static*/void ComputationNetwork::SetMaxTempMemSizeForCNN(ComputationNetwork& net, const ComputationNodeBasePtr& criterionNode, const size_t maxTempMemSizeInSamples)
|
||||
/*static*/void ComputationNetwork::SetMaxTempMemSizeForCNN(ComputationNetworkPtr net, const ComputationNodeBasePtr& criterionNode, const size_t maxTempMemSizeInSamples)
|
||||
{
|
||||
fprintf(stderr, "Set Max Temp Mem Size For Convolution Nodes to %lu samples.\n", maxTempMemSizeInSamples);
|
||||
list<ComputationNodeBasePtr> convolutionNodes = net.GetNodesWithType(OperationNameOf(ConvolutionNode), criterionNode);
|
||||
list<ComputationNodeBasePtr> convolutionNodes = net->GetNodesWithType(OperationNameOf(ConvolutionNode), criterionNode);
|
||||
if (convolutionNodes.size() == 0 && maxTempMemSizeInSamples != 0)
|
||||
{
|
||||
fprintf(stderr, "WARNING: there is no convolution node.\n");
|
||||
|
@ -1116,14 +1116,14 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
template void ComputationNetwork::InitLearnableParameters<float>(const ComputationNodeBasePtr& node, const bool uniformInit, const unsigned long randomSeed, const float initValueScale, bool initOnCPUOnly);
|
||||
template void ComputationNetwork::LoadFromFile<float>(const wstring& fileName, const FileOptions fileFormat, const bool bAllowNoCriterionNode, ComputationNetwork* anotherNetwork);
|
||||
template void ComputationNetwork::PerformSVDecomposition<float>(const map<wstring, float>& SVDConfig, size_t alignedsize);
|
||||
template /*static*/void ComputationNetwork::SetDropoutRate<float>(ComputationNetwork& net, const ComputationNodeBasePtr& criterionNode, const double dropoutRate, double & prevDropoutRate, unsigned long & dropOutSeed);
|
||||
template void ComputationNetwork::SetSeqParam<float>(ComputationNetwork& net, const ComputationNodeBasePtr criterionNode, double hsmoothingWeight, double frameDropThresh, const bool doreferencealign);
|
||||
template /*static*/void ComputationNetwork::SetDropoutRate<float>(ComputationNetworkPtr net, const ComputationNodeBasePtr& criterionNode, const double dropoutRate, double & prevDropoutRate, unsigned long & dropOutSeed);
|
||||
template void ComputationNetwork::SetSeqParam<float>(ComputationNetworkPtr net, const ComputationNodeBasePtr criterionNode, double hsmoothingWeight, double frameDropThresh, const bool doreferencealign);
|
||||
|
||||
template void ComputationNetwork::InitLearnableParameters<double>(const ComputationNodeBasePtr& node, const bool uniformInit, const unsigned long randomSeed, const double initValueScale, bool initOnCPUOnly);
|
||||
template void ComputationNetwork::LoadFromFile<double>(const wstring& fileName, const FileOptions fileFormat, const bool bAllowNoCriterionNode, ComputationNetwork* anotherNetwork);
|
||||
template void ComputationNetwork::PerformSVDecomposition<double>(const map<wstring, float>& SVDConfig, size_t alignedsize);
|
||||
template /*static*/void ComputationNetwork::SetDropoutRate<double>(ComputationNetwork& net, const ComputationNodeBasePtr& criterionNode, const double dropoutRate, double & prevDropoutRate, unsigned long & dropOutSeed);
|
||||
template void ComputationNetwork::SetSeqParam<double>(ComputationNetwork& net, const ComputationNodeBasePtr criterionNode, double hsmoothingWeight, double frameDropThresh, const bool doreferencealign);
|
||||
template /*static*/void ComputationNetwork::SetDropoutRate<double>(ComputationNetworkPtr net, const ComputationNodeBasePtr& criterionNode, const double dropoutRate, double & prevDropoutRate, unsigned long & dropOutSeed);
|
||||
template void ComputationNetwork::SetSeqParam<double>(ComputationNetworkPtr net, const ComputationNodeBasePtr criterionNode, double hsmoothingWeight, double frameDropThresh, const bool doreferencealign);
|
||||
|
||||
// register ComputationNetwork with the ScriptableObject system
|
||||
ScriptableObjects::ConfigurableRuntimeTypeRegister::Add<ComputationNetwork> registerComputationNetwork(L"ComputationNetwork");
|
||||
|
|
|
@ -67,6 +67,8 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
|
||||
class ComputationNetwork : public ScriptableObjects::Object, public ScriptableObjects::HasToString, public ScriptableObjects::IConfigRecord
|
||||
{
|
||||
public:
|
||||
typedef shared_ptr<ComputationNetwork> ComputationNetworkPtr;
|
||||
protected:
|
||||
|
||||
// FlowControlNodes for internal use by this class:
|
||||
|
@ -359,6 +361,17 @@ public:
|
|||
void LoadFromFile(const std::wstring& fileName, const FileOptions fileFormat = FileOptions::fileOptionsBinary,
|
||||
const bool bAllowNoCriterionNode = false, ComputationNetwork* anotherNetwork = nullptr);
|
||||
|
||||
// static helper to instantiate a network from a file
|
||||
template<class ElemType>
|
||||
static ComputationNetworkPtr CreateFromFile(DEVICEID_TYPE deviceId, const std::wstring& fileName,
|
||||
const FileOptions fileFormat = FileOptions::fileOptionsBinary,
|
||||
const bool bAllowNoCriterionNode = false, ComputationNetwork* anotherNetwork = nullptr)
|
||||
{
|
||||
auto net = make_shared<ComputationNetwork>(deviceId);
|
||||
net->LoadFromFile<ElemType>(fileName, FileOptions::fileOptionsBinary, bAllowNoCriterionNode, anotherNetwork);
|
||||
return net;
|
||||
}
|
||||
|
||||
// -----------------------------------------------------------------------
|
||||
// evaluation
|
||||
// -----------------------------------------------------------------------
|
||||
|
@ -547,10 +560,10 @@ public:
|
|||
// -----------------------------------------------------------------------
|
||||
|
||||
template<class ElemType> // TODO: dropoutRate change to double
|
||||
static void SetDropoutRate(ComputationNetwork& net, const ComputationNodeBasePtr& criterionNode, const double dropoutRate, double & prevDropoutRate, unsigned long & dropOutSeed);
|
||||
static void SetDropoutRate(ComputationNetworkPtr net, const ComputationNodeBasePtr& criterionNode, const double dropoutRate, double & prevDropoutRate, unsigned long & dropOutSeed);
|
||||
template<class ElemType>
|
||||
static void SetSeqParam(ComputationNetwork& net, const ComputationNodeBasePtr criterionNode, double hsmoothingWeight, double frameDropThresh, const bool doreferencealign);
|
||||
static void SetMaxTempMemSizeForCNN(ComputationNetwork& net, const ComputationNodeBasePtr& criterionNode, const size_t maxTempMemSizeInSamples);
|
||||
static void SetSeqParam(ComputationNetworkPtr net, const ComputationNodeBasePtr criterionNode, double hsmoothingWeight, double frameDropThresh, const bool doreferencealign);
|
||||
static void SetMaxTempMemSizeForCNN(ComputationNetworkPtr net, const ComputationNodeBasePtr& criterionNode, const size_t maxTempMemSizeInSamples);
|
||||
|
||||
// -----------------------------------------------------------------------
|
||||
// evaluation
|
||||
|
@ -984,6 +997,6 @@ private: // TODO: make all private that can be made private
|
|||
// TODO: does this apply to anything else besides temporary node-internal intermediate results? What, for example?
|
||||
MatrixPool m_matrixPool;
|
||||
};
|
||||
typedef shared_ptr<ComputationNetwork> ComputationNetworkPtr;
|
||||
typedef ComputationNetwork::ComputationNetworkPtr ComputationNetworkPtr;
|
||||
|
||||
}}}
|
||||
|
|
|
@ -58,7 +58,7 @@ template<class ElemType>
|
|||
void CNTKEval<ElemType>::Destroy()
|
||||
{
|
||||
// cleanup everything
|
||||
delete m_net; // TODO: use shared_ptr
|
||||
m_net.reset();
|
||||
delete m_reader;
|
||||
delete m_writer;
|
||||
delete this;
|
||||
|
@ -71,11 +71,7 @@ void CNTKEval<ElemType>::LoadModel(const std::wstring& modelFileName)
|
|||
{
|
||||
DEVICEID_TYPE deviceId = DeviceFromConfig(m_config);
|
||||
fprintf(stderr, "DeviceID=%d\n", (int)deviceId);
|
||||
if (m_net != NULL)
|
||||
delete m_net;
|
||||
m_net = new ComputationNetwork(deviceId);
|
||||
m_net->LoadFromFile<ElemType>(modelFileName);
|
||||
m_net->ResetEvalTimeStamp();
|
||||
m_net = ComputationNetwork::CreateFromFile<ElemType>(deviceId, modelFileName);
|
||||
}
|
||||
|
||||
// GetNodeDimensions - Get the node dimensions of the specified nodes
|
||||
|
@ -169,7 +165,7 @@ void CNTKEval<ElemType>::Evaluate(std::map<std::wstring, std::vector<ElemType>*>
|
|||
m_writer->SetData(&outputs, &m_dimensions);
|
||||
|
||||
// call the evaluator
|
||||
SimpleOutputWriter<ElemType> eval(*m_net);
|
||||
SimpleOutputWriter<ElemType> eval(m_net);
|
||||
eval.WriteOutput(*m_reader, minibatchSize, *m_writer, outNodeNames);
|
||||
}
|
||||
|
||||
|
|
|
@ -25,7 +25,7 @@ class CNTKEval : public IEvaluateModel<ElemType>
|
|||
EvalReader<ElemType>* m_reader;
|
||||
EvalWriter<ElemType>* m_writer;
|
||||
ConfigParameters m_config;
|
||||
ComputationNetwork* m_net;
|
||||
ComputationNetworkPtr m_net;
|
||||
std::map<std::wstring, size_t> m_dimensions;
|
||||
size_t m_start;
|
||||
|
||||
|
|
|
@ -107,14 +107,14 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
// TODO: callers of this often do ComputationNetwork::UpdateEvalTimeStamps(featureNodes) and also for labels; we should eliminate the need for this.
|
||||
template<class ElemType>
|
||||
static bool GetMinibatchIntoNetwork(IDataReader<ElemType>& trainSetDataReader,
|
||||
ComputationNetwork& net,
|
||||
ComputationNetworkPtr net,
|
||||
ComputationNodeBasePtr criterionNode,
|
||||
bool useDistributedMBReading,
|
||||
bool useParallelTrain,
|
||||
std::map<std::wstring, Matrix<ElemType>*> & inputMatrices,
|
||||
size_t & actualMBSize)
|
||||
{
|
||||
auto pMBLayout = net.GetMBLayoutPtr();
|
||||
auto pMBLayout = net->GetMBLayoutPtr();
|
||||
// Reading consists of a sequence of Reader API calls:
|
||||
// - GetMinibatch() --fills the inputMatrices
|
||||
// - SetActualMiniBatchSizeFromFeatures() --tells Network to resize the nodes' buffers
|
||||
|
@ -127,15 +127,15 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
// no data is read. When it does, 'wasDataRead' can be removed
|
||||
bool wasDataRead = trainSetDataReader.GetMinibatch(inputMatrices); // fill in the minibatch data into the Input nodes' buffers directly
|
||||
// reader will have resized input node's m_functionValues directly. Nodes must be notified to do necessary internal state updates from that.
|
||||
net.NotifyInputNodesFunctionValuesMBSizeModified();
|
||||
size_t readMBSize = net.DetermineActualMBSizeFromFeatures();
|
||||
net->NotifyInputNodesFunctionValuesMBSizeModified();
|
||||
size_t readMBSize = net->DetermineActualMBSizeFromFeatures();
|
||||
if (readMBSize == 0)
|
||||
wasDataRead = false;
|
||||
|
||||
trainSetDataReader.CopyMBLayoutTo(pMBLayout); // and layout meta-data
|
||||
|
||||
// verify some DataReader calls that are redundant since the MBLayout refactoring (keep verifying for a while for cosy feeling)
|
||||
net.VerifyActualNumParallelSequences(trainSetDataReader.GetNumParallelSequences()); // info already contained in MBLayout
|
||||
net->VerifyActualNumParallelSequences(trainSetDataReader.GetNumParallelSequences()); // info already contained in MBLayout
|
||||
//assert(trainSetDataReader.RequireSentenceSeg() == pMBLayout->RequireSentenceSeg()); // this one is redundant, too
|
||||
|
||||
if ((criterionNode != nullptr) && (criterionNode->OperationName() == L"SequenceWithSoftmax"))
|
||||
|
@ -174,8 +174,8 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
// decimate if needed. Decimation happens in-place.
|
||||
if (wasDataRead && !useDistributedMBReading && useParallelTrain)
|
||||
{
|
||||
DecimateMinibatch(inputMatrices, g_mpi->NumNodesInUse(), g_mpi->CurrentNodeRank(), net.GetMBLayoutPtr());
|
||||
net.NotifyInputNodesFunctionValuesMBSizeModified(); // need to tell'm again since we modified it again
|
||||
DecimateMinibatch(inputMatrices, g_mpi->NumNodesInUse(), g_mpi->CurrentNodeRank(), net->GetMBLayoutPtr());
|
||||
net->NotifyInputNodesFunctionValuesMBSizeModified(); // need to tell'm again since we modified it again
|
||||
}
|
||||
|
||||
// get MB size and tell Network to update its nodes' buffers based on what's in the input matrices
|
||||
|
@ -184,7 +184,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
// TODO: This will go away, as we will do resizing inside EvaluateThisNode(FrameRange()).
|
||||
actualMBSize = 0;
|
||||
if (wasDataRead) // TODO: what if we call it always?
|
||||
actualMBSize = net.DetermineActualMBSizeFromFeatures(); // TODO: don't we know the size from reader? Should this be a check instead?
|
||||
actualMBSize = net->DetermineActualMBSizeFromFeatures(); // TODO: don't we know the size from reader? Should this be a check instead?
|
||||
|
||||
return true;
|
||||
}
|
||||
|
|
|
@ -13,10 +13,11 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
template<class ElemType>
|
||||
class IComputationNetBuilder //Abstract Class that cannot be instantiated
|
||||
{
|
||||
public:
|
||||
protected:
|
||||
virtual ComputationNetwork* LoadNetworkFromFile(const std::wstring& modelFileName, bool forceLoad = true,
|
||||
bool bAllowNoCriterion = false, ComputationNetwork* = nullptr) = 0;
|
||||
virtual ComputationNetwork* BuildNetworkFromDescription(ComputationNetwork* = nullptr) = 0;
|
||||
public:
|
||||
virtual ComputationNetworkPtr BuildNetworkFromDescription(ComputationNetwork* = nullptr) = 0;
|
||||
virtual ~IComputationNetBuilder() {};
|
||||
};
|
||||
|
||||
|
|
|
@ -54,7 +54,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
typedef shared_ptr<ComputationNode<ElemType>> ComputationNodePtr;
|
||||
typedef ClassBasedCrossEntropyWithSoftmaxNode<ElemType>* ClassBasedCrossEntropyWithSoftmaxNodePtr;
|
||||
public:
|
||||
MultiNetworksEvaluator(ComputationNetwork& net, const size_t numMBsToShowResult = 100, const int traceLevel = 0) : Base(net, numMBsToShowResult, traceLevel) { }
|
||||
MultiNetworksEvaluator(ComputationNetworkPtr net, const size_t numMBsToShowResult = 100, const int traceLevel = 0) : Base(net, numMBsToShowResult, traceLevel) { }
|
||||
|
||||
//returns error rate
|
||||
// This was a special early implementation of RNNs by emulating them as a DNN.
|
||||
|
@ -63,10 +63,10 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
// TODO: can probably be removed.
|
||||
double EvaluateUnroll(IDataReader<ElemType>* dataReader, const size_t mbSize, double &evalSetCrossEntropy, const wchar_t* output = nullptr, const size_t testSize = requestDataSize)
|
||||
{
|
||||
std::vector<ComputationNodeBasePtr> & featureNodes = m_net.FeatureNodes();
|
||||
std::vector<ComputationNodeBasePtr> & labelNodes = m_net.LabelNodes();
|
||||
std::vector<ComputationNodeBasePtr> & criterionNodes = m_net.FinalCriterionNodes();
|
||||
std::vector<ComputationNodeBasePtr> & evaluationNodes = m_net.EvaluationNodes();
|
||||
std::vector<ComputationNodeBasePtr> & featureNodes = m_net->FeatureNodes();
|
||||
std::vector<ComputationNodeBasePtr> & labelNodes = m_net->LabelNodes();
|
||||
std::vector<ComputationNodeBasePtr> & criterionNodes = m_net->FinalCriterionNodes();
|
||||
std::vector<ComputationNodeBasePtr> & evaluationNodes = m_net->EvaluationNodes();
|
||||
|
||||
if (criterionNodes.size() == 0)
|
||||
RuntimeError("No CrossEntropyWithSoftmax node found\n");
|
||||
|
@ -78,10 +78,10 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
inputMatrices[featureNodes[i]->NodeName()] = &dynamic_pointer_cast<ComputationNode<ElemType>>(featureNodes[i])->FunctionValues();
|
||||
for (size_t i = 0; i < labelNodes.size(); i++)
|
||||
inputMatrices[labelNodes[i]->NodeName()] = &dynamic_pointer_cast<ComputationNode<ElemType>>(labelNodes[i])->FunctionValues();
|
||||
inputMatrices[L"numberobs"] = new Matrix<ElemType>(1, 1, m_net.GetDeviceId());
|
||||
inputMatrices[L"numberobs"] = new Matrix<ElemType>(1, 1, m_net->GetDeviceId());
|
||||
|
||||
dataReader->StartMinibatchLoop(mbSize, 0, testSize);
|
||||
m_net.StartEvaluateMinibatchLoop(criterionNodes, evaluationNodes);
|
||||
m_net->StartEvaluateMinibatchLoop(criterionNodes, evaluationNodes);
|
||||
|
||||
double epochEvalError = 0;
|
||||
double epochCrossEntropy = 0;
|
||||
|
@ -117,9 +117,9 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
featureNodes[npos]->UpdateEvalTimeStamp();
|
||||
labelNodes[npos]->UpdateEvalTimeStamp();
|
||||
|
||||
m_net.Evaluate(criterionNodes[npos]); //use only the first criterion. Is there any possibility to use more?
|
||||
m_net->Evaluate(criterionNodes[npos]); //use only the first criterion. Is there any possibility to use more?
|
||||
|
||||
m_net.Evaluate(evaluationNodes[npos]);
|
||||
m_net->Evaluate(evaluationNodes[npos]);
|
||||
|
||||
double mbCrossEntropy = (double)criterionNodes[npos]->Get00Element(); // criterionNode should be a scalar
|
||||
epochCrossEntropy += mbCrossEntropy;
|
||||
|
@ -134,7 +134,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
if (outputStream.is_open())
|
||||
{
|
||||
//TODO: add support to dump multiple outputs
|
||||
ComputationNodePtr outputNode = dynamic_pointer_cast<ComputationNode<ElemType>>(m_net.OutputNodes()[0]);
|
||||
ComputationNodePtr outputNode = dynamic_pointer_cast<ComputationNode<ElemType>>(m_net->OutputNodes()[0]);
|
||||
foreach_column(j, outputNode->FunctionValues())
|
||||
{
|
||||
foreach_row(i, outputNode->FunctionValues())
|
||||
|
@ -200,14 +200,14 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
only beam search decoding is applied to the last network
|
||||
*/
|
||||
double EvaluateEncoderDecoderWithHiddenStates(
|
||||
vector<ComputationNetwork*> nets,
|
||||
vector<ComputationNetworkPtr> nets,
|
||||
vector<IDataReader<ElemType>*> dataReaders,
|
||||
const size_t mbSize,
|
||||
const size_t testSize = requestDataSize)
|
||||
{
|
||||
size_t iNumNets = nets.size();
|
||||
|
||||
ComputationNetwork* decoderNet = nullptr;
|
||||
ComputationNetworkPtr decoderNet = nullptr;
|
||||
IDataReader<ElemType>* decoderDataReader = dataReaders[iNumNets - 1];
|
||||
decoderNet = nets[iNumNets - 1];
|
||||
|
||||
|
@ -396,7 +396,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
}
|
||||
|
||||
void EncodingEvaluateDecodingBeamSearch(
|
||||
vector<ComputationNetwork*> nets,
|
||||
vector<ComputationNetworkPtr> nets,
|
||||
vector<IDataReader<ElemType>*> readers,
|
||||
IDataWriter<ElemType>& dataWriter,
|
||||
const vector<wstring>& evalNodeNames,
|
||||
|
@ -409,7 +409,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
LogicError("Has to have at least two networks");
|
||||
}
|
||||
|
||||
ComputationNetwork* decoderNet = nets[iNumNets - 1];
|
||||
ComputationNetworkPtr decoderNet = nets[iNumNets - 1];
|
||||
IDataReader<ElemType>* encoderDataReader = readers[iNumNets - 2];
|
||||
IDataReader<ElemType>* decoderDataReader = readers[iNumNets - 1];
|
||||
vector<ComputationNodeBasePtr> & decoderFeatureNodes = decoderNet->FeatureNodes();
|
||||
|
@ -422,7 +422,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
//specify nodes to write to file
|
||||
std::vector<ComputationNodeBasePtr> writeNodes;
|
||||
for (int i = 0; i < writeNodeNames.size(); i++)
|
||||
writeNodes.push_back(m_net.GetNodeFromName(writeNodeNames[i]));
|
||||
writeNodes.push_back(m_net->GetNodeFromName(writeNodeNames[i]));
|
||||
|
||||
//prepare features and labels
|
||||
std::map<std::wstring, Matrix<ElemType>*> inputMatrices;
|
||||
|
@ -458,7 +458,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
(*ptr)->SetNumParallelSequences(1);
|
||||
}
|
||||
|
||||
Matrix<ElemType> historyMat(m_net.GetDeviceId());
|
||||
Matrix<ElemType> historyMat(m_net->GetDeviceId());
|
||||
|
||||
bool bDecoding = true;
|
||||
while (bDecoding)
|
||||
|
@ -640,16 +640,16 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
//specify output nodes and files
|
||||
std::vector<ComputationNodeBasePtr> outputNodes;
|
||||
for (int i = 0; i < outputNodeNames.size(); i++)
|
||||
outputNodes.push_back(m_net.GetNodeFromName(outputNodeNames[i]));
|
||||
outputNodes.push_back(m_net->GetNodeFromName(outputNodeNames[i]));
|
||||
|
||||
//specify nodes to write to file
|
||||
std::vector<ComputationNodeBasePtr> writeNodes;
|
||||
for (int i = 0; i < writeNodeNames.size(); i++)
|
||||
writeNodes.push_back(m_net.GetNodeFromName(writeNodeNames[i]));
|
||||
writeNodes.push_back(m_net->GetNodeFromName(writeNodeNames[i]));
|
||||
|
||||
//prepare features and labels
|
||||
/*const*/ auto & featureNodes = m_net.FeatureNodes();
|
||||
const auto & labelNodes = m_net.LabelNodes();
|
||||
/*const*/ auto & featureNodes = m_net->FeatureNodes();
|
||||
const auto & labelNodes = m_net->LabelNodes();
|
||||
|
||||
std::map<std::wstring, Matrix<ElemType>*> inputMatrices;
|
||||
for (size_t i = 0; i < featureNodes.size(); i++)
|
||||
|
@ -671,11 +671,11 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
{
|
||||
// note: GetMinibatchIntoNetwork() will also fetch the MBLayout although we don't need ithere. This should not hurt.
|
||||
ComputationNetwork::UpdateEvalTimeStamps(featureNodes);
|
||||
//actualMBSize = m_net.SetActualMiniBatchSizeFromFeatures();
|
||||
//actualMBSize = m_net->SetActualMiniBatchSizeFromFeatures();
|
||||
|
||||
vector<size_t> best_path;
|
||||
|
||||
FindBestPath(&m_net, dataReader,
|
||||
FindBestPath(m_net, dataReader,
|
||||
dataWriter, outputNodes,
|
||||
writeNodes, featureNodes,
|
||||
beam, &inputMatrices, best_path);
|
||||
|
@ -704,7 +704,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
fprintf(stderr, "done decoding\n");
|
||||
}
|
||||
|
||||
void FindBestPath(ComputationNetwork* evalnet,
|
||||
void FindBestPath(ComputationNetworkPtr evalnet,
|
||||
IDataReader<ElemType>* dataReader, IDataWriter<ElemType>& dataWriter,
|
||||
const std::vector<ComputationNodeBasePtr>& evalNodes,
|
||||
const std::vector<ComputationNodeBasePtr>& outputNodes,
|
||||
|
@ -865,7 +865,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
/**
|
||||
beam search decoder
|
||||
*/
|
||||
double FindBestPathWithVariableLength(ComputationNetwork* evalnet,
|
||||
double FindBestPathWithVariableLength(ComputationNetworkPtr evalnet,
|
||||
size_t inputLength,
|
||||
IDataReader<ElemType>* dataReader,
|
||||
IDataWriter<ElemType>& dataWriter,
|
||||
|
|
Разница между файлами не показана из-за своего большого размера
Загрузить разницу
|
@ -382,7 +382,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
void SGD<ElemType>::Adapt(wstring origModelFileName, wstring refNodeName,
|
||||
IDataReader<ElemType>* trainSetDataReader,
|
||||
IDataReader<ElemType>* validationSetDataReader,
|
||||
const DEVICEID_TYPE deviceID, const bool makeMode)
|
||||
const DEVICEID_TYPE deviceId, const bool makeMode)
|
||||
{
|
||||
if (origModelFileName == L"" || trainSetDataReader == nullptr)
|
||||
InvalidArgument("origModel and trainSetDataReader should not be null.");
|
||||
|
@ -394,27 +394,27 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
return;
|
||||
}
|
||||
|
||||
ComputationNetwork net(deviceID);
|
||||
ComputationNetworkPtr net;
|
||||
if (startEpoch >= 0)
|
||||
{
|
||||
wstring modelFileName = GetModelNameForEpoch(int(startEpoch) - 1);
|
||||
fprintf(stderr, "Starting from checkpoint. Load Network From File %ls.\n", modelFileName.c_str());
|
||||
net.LoadFromFile<ElemType>(modelFileName);
|
||||
net = ComputationNetwork::CreateFromFile<ElemType>(deviceId, modelFileName);
|
||||
}
|
||||
else
|
||||
{
|
||||
fprintf(stderr, "Load Network From the original model file %ls.\n", origModelFileName.c_str());
|
||||
net.LoadFromFile<ElemType>(origModelFileName);
|
||||
net = ComputationNetwork::CreateFromFile<ElemType>(deviceId, origModelFileName);
|
||||
}
|
||||
|
||||
startEpoch = max(startEpoch, 0);
|
||||
|
||||
ComputationNetwork refNet(deviceID);
|
||||
ComputationNetworkPtr refNet;
|
||||
m_needAdaptRegularization = m_adaptationRegType != AdaptationRegType::None && m_adaptationRegWeight > 0;
|
||||
if (m_needAdaptRegularization)
|
||||
{
|
||||
fprintf(stderr, "Load reference Network From the original model file %ls.\n", origModelFileName.c_str());
|
||||
refNet.LoadFromFile<ElemType>(origModelFileName);
|
||||
refNet = ComputationNetwork::CreateFromFile<ElemType>(deviceId, origModelFileName);
|
||||
}
|
||||
|
||||
ComputationNodeBasePtr refNode;
|
||||
|
@ -423,7 +423,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
fprintf(stderr, "Checking refNodeName %ls.\n", origModelFileName.c_str());
|
||||
if (refNodeName == L"")
|
||||
InvalidArgument("refNodeName does not exist and is needed when adaptationRegType is KL.");
|
||||
refNode = refNet.GetNodeFromName(refNodeName);
|
||||
refNode = refNet->GetNodeFromName(refNodeName);
|
||||
}
|
||||
|
||||
TrainOrAdaptModel(startEpoch, net, refNet, refNode, trainSetDataReader, validationSetDataReader);
|
||||
|
@ -432,7 +432,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
template<class ElemType>
|
||||
void SGD<ElemType>::SequenceTrain(IComputationNetBuilder<ElemType>* netBuilder, wstring origModelFileName,
|
||||
IDataReader<ElemType>* trainSetDataReader, IDataReader<ElemType>* validationSetDataReader,
|
||||
const DEVICEID_TYPE deviceID, const bool makeMode)
|
||||
const DEVICEID_TYPE deviceId, const bool makeMode)
|
||||
{
|
||||
if (netBuilder == nullptr || origModelFileName == L"" || trainSetDataReader == nullptr)
|
||||
{
|
||||
|
@ -447,37 +447,38 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
}
|
||||
|
||||
// Initializes the model from original model.
|
||||
ComputationNetwork origNet(deviceID);
|
||||
ComputationNetwork* sequenceNet =
|
||||
(startEpoch < 0) ? netBuilder->BuildNetworkFromDescription() : &origNet;
|
||||
// TODO: Comment what this does!
|
||||
auto origNet = make_shared<ComputationNetwork>(deviceId);
|
||||
ComputationNetworkPtr sequenceNet =
|
||||
(startEpoch < 0) ? netBuilder->BuildNetworkFromDescription() : origNet;
|
||||
std::vector<ComputationNodeBasePtr> addedFeatureNodes;
|
||||
std::vector<ComputationNodeBasePtr> replacedCriterionNodes;
|
||||
if (startEpoch < 0)
|
||||
{
|
||||
// Loads models.
|
||||
origNet.LoadFromFile<ElemType>(origModelFileName);
|
||||
origNet->LoadFromFile<ElemType>(origModelFileName);
|
||||
|
||||
// Processes feature nodes.
|
||||
std::vector<ComputationNodeBasePtr> & sequenceFeatureNodes = sequenceNet->FeatureNodes();
|
||||
for (size_t i = 0; i < sequenceFeatureNodes.size(); ++i)
|
||||
{
|
||||
if (!origNet.NodeNameExist(sequenceFeatureNodes[i]->NodeName()))
|
||||
if (!origNet->NodeNameExist(sequenceFeatureNodes[i]->NodeName()))
|
||||
{
|
||||
addedFeatureNodes.push_back(sequenceFeatureNodes[i]);
|
||||
origNet.AddFeatureNode(sequenceFeatureNodes[i]);
|
||||
origNet->AddFeatureNode(sequenceFeatureNodes[i]);
|
||||
}
|
||||
}
|
||||
|
||||
// Processes criterion nodes.
|
||||
auto & origCriterionNodes = GetTrainCriterionNodes(origNet);
|
||||
auto & sequenceCriterionNodes = GetTrainCriterionNodes(*sequenceNet);
|
||||
auto & sequenceCriterionNodes = GetTrainCriterionNodes(sequenceNet);
|
||||
if (origCriterionNodes.size() == 0 || sequenceCriterionNodes.size() == 0)
|
||||
{
|
||||
RuntimeError("Training criterion node does not exist.");
|
||||
}
|
||||
replacedCriterionNodes.push_back(origCriterionNodes[0]);
|
||||
origNet.ReplaceFinalCriterionNode(origCriterionNodes[0]->NodeName(), sequenceCriterionNodes[0]);
|
||||
origNet.ResetEvalTimeStamp();
|
||||
origNet->ReplaceFinalCriterionNode(origCriterionNodes[0]->NodeName(), sequenceCriterionNodes[0]);
|
||||
origNet->ResetEvalTimeStamp();
|
||||
}
|
||||
|
||||
wstring modelFileName = GetModelNameForEpoch(int(startEpoch) - 1);
|
||||
|
@ -489,21 +490,22 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
{
|
||||
fprintf(stderr, "Load Network From the original model file %ls.\n", origModelFileName.c_str());
|
||||
}
|
||||
ComputationNetwork *net = (startEpoch < 0) ? &origNet : netBuilder->LoadNetworkFromFile(modelFileName);
|
||||
ComputationNetworkPtr net = (startEpoch < 0) ? origNet : ComputationNetwork::CreateFromFile<ElemType>(deviceId, modelFileName);
|
||||
|
||||
startEpoch = max(startEpoch, 0);
|
||||
|
||||
TrainOrAdaptModel(startEpoch, *net, *net, nullptr, trainSetDataReader, validationSetDataReader);
|
||||
TrainOrAdaptModel(startEpoch, net, net, nullptr, trainSetDataReader, validationSetDataReader);
|
||||
|
||||
// Handles deletions carefully here.
|
||||
// TODO: This is no longer needed since we own our networks and deal with shared_ptrs now.
|
||||
if (startEpoch < 0)
|
||||
{
|
||||
for (size_t i = 0; i < addedFeatureNodes.size(); ++i)
|
||||
{
|
||||
origNet.RemoveFeatureNode(addedFeatureNodes[i]);
|
||||
origNet->RemoveFeatureNode(addedFeatureNodes[i]);
|
||||
}
|
||||
auto & origCriterionNodes = GetTrainCriterionNodes(origNet);
|
||||
origNet.ReplaceFinalCriterionNode(origCriterionNodes[0]->NodeName(), replacedCriterionNodes[0]);
|
||||
origNet->ReplaceFinalCriterionNode(origCriterionNodes[0]->NodeName(), replacedCriterionNodes[0]);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -533,15 +535,10 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
if (startEpoch >= 0)
|
||||
fprintf(stderr, "Starting from checkpoint. Load Network From File %ls.\n", modelFileName.c_str());
|
||||
|
||||
shared_ptr<ComputationNetwork> net;
|
||||
if (startEpoch < 0)
|
||||
net = createNetworkFn(deviceId);
|
||||
else
|
||||
{
|
||||
net = make_shared<ComputationNetwork>(deviceId);
|
||||
net->LoadFromFile<ElemType>(modelFileName, FileOptions::fileOptionsBinary, false/*bAllowNoCriterionNode*/, nullptr/*anotherNetwork*/);
|
||||
}
|
||||
// log the device
|
||||
// create or load from checkpoint
|
||||
shared_ptr<ComputationNetwork> net = startEpoch < 0 ? createNetworkFn(deviceId) : ComputationNetwork::CreateFromFile<ElemType>(deviceId, modelFileName);
|
||||
|
||||
// log the device we are computing on
|
||||
if (net->GetDeviceId() < 0)
|
||||
fprintf(stderr, "SGD using CPU.\n");
|
||||
else
|
||||
|
@ -561,51 +558,51 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
startEpoch = max(startEpoch, 0);
|
||||
m_needAdaptRegularization = false;
|
||||
|
||||
TrainOrAdaptModel(startEpoch, *net, *net, nullptr, trainSetDataReader, validationSetDataReader);
|
||||
TrainOrAdaptModel(startEpoch, net, net, nullptr, trainSetDataReader, validationSetDataReader);
|
||||
}
|
||||
|
||||
// protected:
|
||||
|
||||
// Get{Train,Eval}CriterionNodes() return a reference that is, unfortunately, dependent on the network.
|
||||
// So we hold those inside here. Not very nice. Also not thread-safe. This may go away once we fix sequence-to-sequence models properly.
|
||||
static map<ComputationNetwork*, vector<ComputationNodeBasePtr>> tmpCriterionNodeSets;
|
||||
static map<ComputationNetworkPtr, vector<ComputationNodeBasePtr>> tmpCriterionNodeSets;
|
||||
// TODO: test this, then remove this comment
|
||||
|
||||
template<class ElemType>
|
||||
std::vector<ComputationNodeBasePtr> & SGD<ElemType>::GetTrainCriterionNodes(ComputationNetwork& net)
|
||||
std::vector<ComputationNodeBasePtr> & SGD<ElemType>::GetTrainCriterionNodes(ComputationNetworkPtr net)
|
||||
{
|
||||
fprintf(stderr, "GetTrainCriterionNodes %ls ...\n", m_trainCriterionNodeName.c_str());
|
||||
if (!m_trainCriterionNodeName.empty())
|
||||
{
|
||||
tmpCriterionNodeSets[&net] = net.CriterionNodesFrom(m_trainCriterionNodeName);
|
||||
return tmpCriterionNodeSets[&net];
|
||||
tmpCriterionNodeSets[net] = net->CriterionNodesFrom(m_trainCriterionNodeName);
|
||||
return tmpCriterionNodeSets[net];
|
||||
}
|
||||
else
|
||||
return net.FinalCriterionNodes();
|
||||
return net->FinalCriterionNodes();
|
||||
}
|
||||
|
||||
template<class ElemType>
|
||||
std::vector<ComputationNodeBasePtr> & SGD<ElemType>::GetEvalCriterionNodes(ComputationNetwork& net)
|
||||
std::vector<ComputationNodeBasePtr> & SGD<ElemType>::GetEvalCriterionNodes(ComputationNetworkPtr net)
|
||||
{
|
||||
fprintf(stderr, "GetEvalCriterionNodes %ls ...\n", m_evalCriterionNodeName.c_str());
|
||||
if (!m_evalCriterionNodeName.empty())
|
||||
{
|
||||
tmpCriterionNodeSets[&net] = net.CriterionNodesFrom(m_evalCriterionNodeName);
|
||||
return tmpCriterionNodeSets[&net];
|
||||
tmpCriterionNodeSets[net] = net->CriterionNodesFrom(m_evalCriterionNodeName);
|
||||
return tmpCriterionNodeSets[net];
|
||||
}
|
||||
else
|
||||
return net.EvaluationNodes();
|
||||
return net->EvaluationNodes();
|
||||
}
|
||||
|
||||
template<class ElemType>
|
||||
void SGD<ElemType>::TrainOrAdaptModel(int startEpoch, ComputationNetwork& net,
|
||||
ComputationNetwork& refNet,
|
||||
void SGD<ElemType>::TrainOrAdaptModel(int startEpoch, ComputationNetworkPtr net,
|
||||
ComputationNetworkPtr refNet,
|
||||
ComputationNodeBasePtr refNode,
|
||||
IDataReader<ElemType>* trainSetDataReader,
|
||||
IDataReader<ElemType>* validationSetDataReader)
|
||||
{
|
||||
auto & featureNodes = net.FeatureNodes();
|
||||
auto & labelNodes = net.LabelNodes();
|
||||
auto & featureNodes = net->FeatureNodes();
|
||||
auto & labelNodes = net->LabelNodes();
|
||||
auto & criterionNodes = GetTrainCriterionNodes(net);
|
||||
auto & evaluationNodes = GetEvalCriterionNodes(net);
|
||||
|
||||
|
@ -613,7 +610,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
//we intentionally separate it from above loop to make sure forward computing gets the right matrices
|
||||
fprintf(stderr, "\n\nAllocating matrices for gradient computing\n");
|
||||
for (int i = 0; i < criterionNodes.size(); i++)
|
||||
net.AllocateGradientMatrices(criterionNodes[i]);
|
||||
net->AllocateGradientMatrices(criterionNodes[i]);
|
||||
// give the layout something to validate with (some code below validates the network before actually receiving data)
|
||||
// Note: yak!
|
||||
|
||||
|
@ -627,7 +624,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
{
|
||||
auto & node = nodes[i];
|
||||
auto * functionValues = &dynamic_pointer_cast<ComputationNode<ElemType>>(node)->FunctionValues();
|
||||
assert(functionValues->GetNumCols() == net.GetMBLayoutPtr()->GetNumTimeSteps());
|
||||
assert(functionValues->GetNumCols() == net->GetMBLayoutPtr()->GetNumTimeSteps());
|
||||
(*inputMatrices)[node->NodeName()] = functionValues;
|
||||
}
|
||||
}
|
||||
|
@ -651,17 +648,17 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
for (size_t i = 0; i < featureNodes.size(); i++)
|
||||
{
|
||||
//we need to keep this info to handle deletion
|
||||
refFeatureNodes[i] = refNet.GetNodeFromName(featureNodes[i]->NodeName());
|
||||
refNet.ChangeNode(featureNodes[i]->NodeName(), featureNodes[i]);
|
||||
refFeatureNodes[i] = refNet->GetNodeFromName(featureNodes[i]->NodeName());
|
||||
refNet->ChangeNode(featureNodes[i]->NodeName(), featureNodes[i]);
|
||||
}
|
||||
|
||||
refNet.RebuildNetwork(refNode);
|
||||
refNet->RebuildNetwork(refNode);
|
||||
}
|
||||
|
||||
//initializing weights and gradient holder
|
||||
//only one criterion so far TODO: support multiple ones?
|
||||
// BUGBUG: fails here in validation--MBLayout not set yet
|
||||
auto & learnableNodes = net.LearnableNodes(criterionNodes[0]);
|
||||
auto & learnableNodes = net->LearnableNodes(criterionNodes[0]);
|
||||
std::list<Matrix<ElemType>> smoothedGradients;
|
||||
|
||||
for (auto nodeIter = learnableNodes.begin(); nodeIter != learnableNodes.end(); nodeIter++)
|
||||
|
@ -669,7 +666,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
ComputationNodePtr node = dynamic_pointer_cast<ComputationNode<ElemType>>(*nodeIter);
|
||||
smoothedGradients.push_back(Matrix<ElemType>(node->GetNumRows(),
|
||||
node->GetNumCols(),
|
||||
net.GetDeviceId()));
|
||||
net->GetDeviceId()));
|
||||
}
|
||||
|
||||
double epochCriterion, avgCriterion, prevCriterion, lrControlCriterion;
|
||||
|
@ -705,7 +702,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
if (g_mpi != nullptr)
|
||||
g_mpi->WaitAll();
|
||||
|
||||
net.SaveToFile(GetModelNameForEpoch(int(startEpoch) - 1));
|
||||
net->SaveToFile(GetModelNameForEpoch(int(startEpoch) - 1));
|
||||
}
|
||||
|
||||
// BUGBUG: This is where the trainSetDataReader->GetNumParallelSequences() is used to further normalize
|
||||
|
@ -806,7 +803,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
fprintf(stderr, "Learn Rate Per Sample for Epoch[%d] = %.8g is less than minLearnRate %.8g. Training complete.\n",
|
||||
i + 1, learnRatePerSample, m_minLearnRate);
|
||||
if (m_autoLearnRateSearchType != LearningRateSearchAlgorithm::None)
|
||||
net.SaveToFile(m_modelPath);
|
||||
net->SaveToFile(m_modelPath);
|
||||
break;
|
||||
}
|
||||
|
||||
|
@ -966,9 +963,9 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
if (m_loadBestModel)
|
||||
{
|
||||
fprintf(stderr, "Loaded the previous model which has better training criterion.\n");
|
||||
net.LoadPersistableParametersFromFile(GetModelNameForEpoch(i - 1),
|
||||
net->LoadPersistableParametersFromFile(GetModelNameForEpoch(i - 1),
|
||||
m_validateAfterModelReloading);
|
||||
net.ResetEvalTimeStamp();
|
||||
net->ResetEvalTimeStamp();
|
||||
LoadCheckPointInfo(i - 1,
|
||||
/*out*/ totalSamplesSeen,
|
||||
/*out*/ learnRatePerSample,
|
||||
|
@ -989,7 +986,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
learnRateReduced = true;
|
||||
else
|
||||
{
|
||||
net.SaveToFile(GetModelNameForEpoch(i, true));
|
||||
net->SaveToFile(GetModelNameForEpoch(i, true));
|
||||
|
||||
fprintf(stderr, "Finished training and saved final model\n\n");
|
||||
break;
|
||||
|
@ -1044,7 +1041,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
// persist model and check-point info
|
||||
if ((g_mpi == nullptr) || g_mpi->IsMainNode())
|
||||
{
|
||||
net.SaveToFile(GetModelNameForEpoch(i));
|
||||
net->SaveToFile(GetModelNameForEpoch(i));
|
||||
SaveCheckPointInfo(i, totalSamplesSeen, learnRatePerSample, smoothedGradients, prevCriterion, chosenMinibatchSize);
|
||||
if (!m_keepCheckPointFiles)
|
||||
{
|
||||
|
@ -1078,7 +1075,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
for (size_t i = 0; i < refFeatureNodes.size(); i++)
|
||||
{
|
||||
// note we need to handle deletion carefully
|
||||
refNet.ChangeNode(refFeatureNodes[i]->NodeName(), refFeatureNodes[i]);
|
||||
refNet->ChangeNode(refFeatureNodes[i]->NodeName(), refFeatureNodes[i]);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1089,13 +1086,13 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
|
||||
// return true if precomputation is executed.
|
||||
template<class ElemType>
|
||||
bool SGD<ElemType>::PreCompute(ComputationNetwork& net,
|
||||
bool SGD<ElemType>::PreCompute(ComputationNetworkPtr net,
|
||||
IDataReader<ElemType>* trainSetDataReader,
|
||||
std::vector<ComputationNodeBasePtr> & featureNodes,
|
||||
std::vector<ComputationNodeBasePtr> & labelNodes,
|
||||
std::map<std::wstring, Matrix<ElemType>*>* inputMatrices)
|
||||
{
|
||||
std::list<ComputationNodeBasePtr> nodes = net.GetNodesRequiringPreComputation(); // this tests all HasComputed() flags
|
||||
std::list<ComputationNodeBasePtr> nodes = net->GetNodesRequiringPreComputation(); // this tests all HasComputed() flags
|
||||
|
||||
if (nodes.size() == 0)
|
||||
{
|
||||
|
@ -1119,7 +1116,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
trainSetDataReader->StartMinibatchLoop(m_mbSize[0], 0);
|
||||
else // using only one epoch
|
||||
trainSetDataReader->StartMinibatchLoop(m_mbSize[0], 0, m_epochSize);
|
||||
net.StartEvaluateMinibatchLoop(nodes);
|
||||
net->StartEvaluateMinibatchLoop(nodes);
|
||||
|
||||
// initialize
|
||||
for (auto nodeIter = nodes.begin(); nodeIter != nodes.end(); nodeIter++)
|
||||
|
@ -1134,7 +1131,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
ComputationNetwork::UpdateEvalTimeStamps(featureNodes);
|
||||
ComputationNetwork::UpdateEvalTimeStamps(labelNodes);
|
||||
|
||||
net.Evaluate(nodes);
|
||||
net->Evaluate(nodes);
|
||||
}
|
||||
// finalize
|
||||
for (auto nodeIter = nodes.begin(); nodeIter != nodes.end(); nodeIter++)
|
||||
|
@ -1149,20 +1146,20 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
|
||||
// return a reasonable initial learning rate based on the initial mbsize
|
||||
template<class ElemType>
|
||||
double SGD<ElemType>::SearchForBestLearnRate(ComputationNetwork& net,
|
||||
ComputationNetwork& refNet,
|
||||
const ComputationNodeBasePtr& refNode, const int epochNumber,
|
||||
const double curLearnRate,
|
||||
IDataReader<ElemType>* trainSetDataReader,
|
||||
const std::vector<ComputationNodeBasePtr> & featureNodes,
|
||||
const std::vector<ComputationNodeBasePtr> & labelNodes,
|
||||
const std::vector<ComputationNodeBasePtr> & criterionNodes,
|
||||
const std::vector<ComputationNodeBasePtr> & evaluationNodes,
|
||||
std::map<std::wstring, Matrix<ElemType>*>* inputMatrices,
|
||||
const std::list<ComputationNodeBasePtr> & learnableNodes,
|
||||
std::list<Matrix<ElemType>>& smoothedGradients,
|
||||
const bool learnRateInitialized,
|
||||
const double largestPrevLearnRatePerSample)
|
||||
double SGD<ElemType>::SearchForBestLearnRate(ComputationNetworkPtr net,
|
||||
ComputationNetworkPtr refNet,
|
||||
const ComputationNodeBasePtr& refNode, const int epochNumber,
|
||||
const double curLearnRate,
|
||||
IDataReader<ElemType>* trainSetDataReader,
|
||||
const std::vector<ComputationNodeBasePtr> & featureNodes,
|
||||
const std::vector<ComputationNodeBasePtr> & labelNodes,
|
||||
const std::vector<ComputationNodeBasePtr> & criterionNodes,
|
||||
const std::vector<ComputationNodeBasePtr> & evaluationNodes,
|
||||
std::map<std::wstring, Matrix<ElemType>*>* inputMatrices,
|
||||
const std::list<ComputationNodeBasePtr> & learnableNodes,
|
||||
std::list<Matrix<ElemType>>& smoothedGradients,
|
||||
const bool learnRateInitialized,
|
||||
const double largestPrevLearnRatePerSample)
|
||||
{
|
||||
double epochCriterion = std::numeric_limits<double>::infinity();
|
||||
double prevCriterion = std::numeric_limits<double>::infinity();
|
||||
|
@ -1190,8 +1187,8 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
}
|
||||
|
||||
int baseModelEpoch = epochNumber - 1;
|
||||
net.LoadPersistableParametersFromFile(GetModelNameForEpoch(baseModelEpoch), m_validateAfterModelReloading);
|
||||
net.ResetEvalTimeStamp();
|
||||
net->LoadPersistableParametersFromFile(GetModelNameForEpoch(baseModelEpoch), m_validateAfterModelReloading);
|
||||
net->ResetEvalTimeStamp();
|
||||
|
||||
double learnRate = learnRatePerSample;
|
||||
size_t dummyMinibatchSize = 0;
|
||||
|
@ -1311,23 +1308,23 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
}
|
||||
|
||||
template<class ElemType>
|
||||
void SGD<ElemType>::TrainOneMiniEpochAndReloadModel(ComputationNetwork& net,
|
||||
ComputationNetwork& refNet,
|
||||
const ComputationNodeBasePtr& refNode, const int epochNumber,
|
||||
const size_t epochSize, IDataReader<ElemType>* trainSetDataReader,
|
||||
const double learnRatePerSample,
|
||||
const size_t minibatchSize,
|
||||
const std::vector<ComputationNodeBasePtr> & featureNodes,
|
||||
const std::vector<ComputationNodeBasePtr> & labelNodes,
|
||||
const std::vector<ComputationNodeBasePtr> & criterionNodes,
|
||||
const std::vector<ComputationNodeBasePtr> & evaluationNodes,
|
||||
std::map<std::wstring, Matrix<ElemType>*>* inputMatrices,
|
||||
const std::list<ComputationNodeBasePtr> & learnableNodes,
|
||||
std::list<Matrix<ElemType>>& smoothedGradients,
|
||||
/*out*/ double& epochCriterion,
|
||||
/*out*/ std::vector<double>& epochEvalErrors,
|
||||
/*out*/ size_t& totalSamplesSeen,
|
||||
std::string prefixMsg)
|
||||
void SGD<ElemType>::TrainOneMiniEpochAndReloadModel(ComputationNetworkPtr net,
|
||||
ComputationNetworkPtr refNet,
|
||||
const ComputationNodeBasePtr& refNode, const int epochNumber,
|
||||
const size_t epochSize, IDataReader<ElemType>* trainSetDataReader,
|
||||
const double learnRatePerSample,
|
||||
const size_t minibatchSize,
|
||||
const std::vector<ComputationNodeBasePtr> & featureNodes,
|
||||
const std::vector<ComputationNodeBasePtr> & labelNodes,
|
||||
const std::vector<ComputationNodeBasePtr> & criterionNodes,
|
||||
const std::vector<ComputationNodeBasePtr> & evaluationNodes,
|
||||
std::map<std::wstring, Matrix<ElemType>*>* inputMatrices,
|
||||
const std::list<ComputationNodeBasePtr> & learnableNodes,
|
||||
std::list<Matrix<ElemType>>& smoothedGradients,
|
||||
/*out*/ double& epochCriterion,
|
||||
/*out*/ std::vector<double>& epochEvalErrors,
|
||||
/*out*/ size_t& totalSamplesSeen,
|
||||
std::string prefixMsg)
|
||||
{
|
||||
TrainOneEpoch(net, refNet, refNode, epochNumber, epochSize,
|
||||
trainSetDataReader, learnRatePerSample, minibatchSize, featureNodes,
|
||||
|
@ -1351,8 +1348,8 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
}
|
||||
|
||||
int baseModelEpoch = epochNumber - 1;
|
||||
net.LoadPersistableParametersFromFile(GetModelNameForEpoch(baseModelEpoch), m_validateAfterModelReloading);
|
||||
net.ResetEvalTimeStamp();
|
||||
net->LoadPersistableParametersFromFile(GetModelNameForEpoch(baseModelEpoch), m_validateAfterModelReloading);
|
||||
net->ResetEvalTimeStamp();
|
||||
|
||||
double dummyLearnRate;
|
||||
double dummtPrevCriterion;
|
||||
|
@ -1366,22 +1363,22 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
}
|
||||
|
||||
template<class ElemType>
|
||||
size_t SGD<ElemType>::AdaptiveMinibatchSizing(ComputationNetwork& net,
|
||||
ComputationNetwork& refNet,
|
||||
const ComputationNodeBasePtr& refNode,
|
||||
const int epochNumber,
|
||||
const size_t numFramesToUseInSearch,
|
||||
IDataReader<ElemType>* trainSetDataReader,
|
||||
const double learnRatePerSample,
|
||||
const size_t initialMinibatchSize,
|
||||
const std::vector<ComputationNodeBasePtr> & featureNodes,
|
||||
const std::vector<ComputationNodeBasePtr> & labelNodes,
|
||||
const std::vector<ComputationNodeBasePtr> & criterionNodes,
|
||||
const std::vector<ComputationNodeBasePtr> & evaluationNodes,
|
||||
std::map<std::wstring, Matrix<ElemType>*>* inputMatrices,
|
||||
const std::list<ComputationNodeBasePtr> & learnableNodes,
|
||||
std::list<Matrix<ElemType>>& smoothedGradients,
|
||||
const double learningRateAdjustmentFactor)
|
||||
size_t SGD<ElemType>::AdaptiveMinibatchSizing(ComputationNetworkPtr net,
|
||||
ComputationNetworkPtr refNet,
|
||||
const ComputationNodeBasePtr& refNode,
|
||||
const int epochNumber,
|
||||
const size_t numFramesToUseInSearch,
|
||||
IDataReader<ElemType>* trainSetDataReader,
|
||||
const double learnRatePerSample,
|
||||
const size_t initialMinibatchSize,
|
||||
const std::vector<ComputationNodeBasePtr> & featureNodes,
|
||||
const std::vector<ComputationNodeBasePtr> & labelNodes,
|
||||
const std::vector<ComputationNodeBasePtr> & criterionNodes,
|
||||
const std::vector<ComputationNodeBasePtr> & evaluationNodes,
|
||||
std::map<std::wstring, Matrix<ElemType>*>* inputMatrices,
|
||||
const std::list<ComputationNodeBasePtr> & learnableNodes,
|
||||
std::list<Matrix<ElemType>>& smoothedGradients,
|
||||
const double learningRateAdjustmentFactor)
|
||||
{
|
||||
size_t minMinibatchSize = initialMinibatchSize;
|
||||
size_t chosenMinibatchSize = initialMinibatchSize;
|
||||
|
@ -1470,21 +1467,21 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
// uses a small percentage of training data of minibatch to
|
||||
// speculatively train with various MB sizes; then picks the best
|
||||
template<class ElemType>
|
||||
size_t SGD<ElemType>::SearchForBestMinibatchSize(ComputationNetwork& net,
|
||||
ComputationNetwork& refNet,
|
||||
const ComputationNodeBasePtr& refNode,
|
||||
const int epochNumber,
|
||||
const size_t numFramesToUseInSearch,
|
||||
IDataReader<ElemType>* trainSetDataReader,
|
||||
const double learnRatePerSample,
|
||||
const std::vector<ComputationNodeBasePtr> & featureNodes,
|
||||
const std::vector<ComputationNodeBasePtr> & labelNodes,
|
||||
const std::vector<ComputationNodeBasePtr> & criterionNodes,
|
||||
const std::vector<ComputationNodeBasePtr> & evaluationNodes,
|
||||
std::map<std::wstring, Matrix<ElemType>*>* inputMatrices,
|
||||
const std::list<ComputationNodeBasePtr> & learnableNodes,
|
||||
std::list<Matrix<ElemType>>& smoothedGradients,
|
||||
const size_t minMinibatchSize, const size_t maxMinibatchSize)
|
||||
size_t SGD<ElemType>::SearchForBestMinibatchSize(ComputationNetworkPtr net,
|
||||
ComputationNetworkPtr refNet,
|
||||
const ComputationNodeBasePtr& refNode,
|
||||
const int epochNumber,
|
||||
const size_t numFramesToUseInSearch,
|
||||
IDataReader<ElemType>* trainSetDataReader,
|
||||
const double learnRatePerSample,
|
||||
const std::vector<ComputationNodeBasePtr> & featureNodes,
|
||||
const std::vector<ComputationNodeBasePtr> & labelNodes,
|
||||
const std::vector<ComputationNodeBasePtr> & criterionNodes,
|
||||
const std::vector<ComputationNodeBasePtr> & evaluationNodes,
|
||||
std::map<std::wstring, Matrix<ElemType>*>* inputMatrices,
|
||||
const std::list<ComputationNodeBasePtr> & learnableNodes,
|
||||
std::list<Matrix<ElemType>>& smoothedGradients,
|
||||
const size_t minMinibatchSize, const size_t maxMinibatchSize)
|
||||
{
|
||||
// may happen for automatically reduced learning rates
|
||||
if (minMinibatchSize > maxMinibatchSize)
|
||||
|
@ -1574,10 +1571,10 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
// processing more utterances at the same time. Only used in Kaldi2Reader.
|
||||
// TODO: move the two-forward-pass support out of the reader.
|
||||
template<class ElemType>
|
||||
void SGD<ElemType>::AttemptUtteranceDerivativeFeatures(ComputationNetwork& net,
|
||||
IDataReader<ElemType>* trainSetDataReader,
|
||||
const std::vector<ComputationNodeBasePtr> & featureNodes,
|
||||
std::map<std::wstring, Matrix<ElemType>*>* inputMatrices)
|
||||
void SGD<ElemType>::AttemptUtteranceDerivativeFeatures(ComputationNetworkPtr net,
|
||||
IDataReader<ElemType>* trainSetDataReader,
|
||||
const std::vector<ComputationNodeBasePtr> & featureNodes,
|
||||
std::map<std::wstring, Matrix<ElemType>*>* inputMatrices)
|
||||
{
|
||||
assert(trainSetDataReader != NULL);
|
||||
std::vector<std::vector<std::pair<wstring, size_t>>> uttInfo;
|
||||
|
@ -1587,14 +1584,14 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
{
|
||||
ComputationNetwork::UpdateEvalTimeStamps(featureNodes);
|
||||
|
||||
auto & outputNodes = net.OutputNodes();
|
||||
auto & outputNodes = net->OutputNodes();
|
||||
if (outputNodes.empty())
|
||||
LogicError("no output node was found.");
|
||||
|
||||
//net.SetActualMiniBatchSizeFromFeatures();
|
||||
trainSetDataReader->CopyMBLayoutTo(net.GetMBLayoutPtr());
|
||||
net.VerifyActualNumParallelSequences(trainSetDataReader->GetNumParallelSequences());
|
||||
net.Evaluate(outputNodes[0]); // Only evaluate the first output
|
||||
//net->SetActualMiniBatchSizeFromFeatures();
|
||||
trainSetDataReader->CopyMBLayoutTo(net->GetMBLayoutPtr());
|
||||
net->VerifyActualNumParallelSequences(trainSetDataReader->GetNumParallelSequences());
|
||||
net->Evaluate(outputNodes[0]); // Only evaluate the first output
|
||||
trainSetDataReader->SetNetOutput(uttInfo,
|
||||
dynamic_pointer_cast<ComputationNode<ElemType>>(outputNodes[0])->FunctionValues(),
|
||||
pMBLayout);
|
||||
|
@ -1636,25 +1633,25 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
}
|
||||
|
||||
template<class ElemType>
|
||||
size_t SGD<ElemType>::TrainOneEpoch(ComputationNetwork& net,
|
||||
ComputationNetwork& refNet,
|
||||
const ComputationNodeBasePtr& refNode,
|
||||
const int epochNumber,
|
||||
const size_t epochSize,
|
||||
IDataReader<ElemType>* trainSetDataReader,
|
||||
const double learnRatePerSample,
|
||||
size_t tunedMBSize,
|
||||
const std::vector<ComputationNodeBasePtr> & featureNodes,
|
||||
const std::vector<ComputationNodeBasePtr> & labelNodes,
|
||||
const std::vector<ComputationNodeBasePtr> & criterionNodes,
|
||||
const std::vector<ComputationNodeBasePtr> & evaluationNodes,
|
||||
std::map<std::wstring, Matrix<ElemType>*>* inputMatrices, // TODO: why is this a pointer?
|
||||
const std::list<ComputationNodeBasePtr> & learnableNodes,
|
||||
std::list<Matrix<ElemType>>& smoothedGradients,
|
||||
/*out*/ double& epochCriterion,
|
||||
/*out*/ std::vector<double>& epochEvalErrors,
|
||||
/*out*/ size_t& totalSamplesSeen,
|
||||
std::string prefixMsg)
|
||||
size_t SGD<ElemType>::TrainOneEpoch(ComputationNetworkPtr net,
|
||||
ComputationNetworkPtr refNet,
|
||||
const ComputationNodeBasePtr& refNode,
|
||||
const int epochNumber,
|
||||
const size_t epochSize,
|
||||
IDataReader<ElemType>* trainSetDataReader,
|
||||
const double learnRatePerSample,
|
||||
size_t tunedMBSize,
|
||||
const std::vector<ComputationNodeBasePtr> & featureNodes,
|
||||
const std::vector<ComputationNodeBasePtr> & labelNodes,
|
||||
const std::vector<ComputationNodeBasePtr> & criterionNodes,
|
||||
const std::vector<ComputationNodeBasePtr> & evaluationNodes,
|
||||
std::map<std::wstring, Matrix<ElemType>*>* inputMatrices, // TODO: why is this a pointer?
|
||||
const std::list<ComputationNodeBasePtr> & learnableNodes,
|
||||
std::list<Matrix<ElemType>>& smoothedGradients,
|
||||
/*out*/ double& epochCriterion,
|
||||
/*out*/ std::vector<double>& epochEvalErrors,
|
||||
/*out*/ size_t& totalSamplesSeen,
|
||||
std::string prefixMsg)
|
||||
{
|
||||
double totalTimeInMBs = 0; // use double since timer has sub-microsecond time resolution
|
||||
double epochCriterionLastMBs = 0;
|
||||
|
@ -1670,8 +1667,8 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
// NOTE: the following two local matrices are not used in distGradAgg path
|
||||
// assume only one training criterion node for each epoch.
|
||||
// The criterion values are accumulated here over the minibatches (without having to pull them off the GPU).
|
||||
Matrix<ElemType> localEpochCriterion(1, 1, net.GetDeviceId());
|
||||
Matrix<ElemType> localEpochEvalErrors(1, epochEvalErrors.size(), net.GetDeviceId());
|
||||
Matrix<ElemType> localEpochCriterion(1, 1, net->GetDeviceId());
|
||||
Matrix<ElemType> localEpochEvalErrors(1, epochEvalErrors.size(), net->GetDeviceId());
|
||||
|
||||
localEpochCriterion.SetValue(0);
|
||||
localEpochEvalErrors.SetValue(0);
|
||||
|
@ -1713,11 +1710,11 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
trainSetDataReader->StartMinibatchLoop(tunedMBSize, epochNumber, epochSize);
|
||||
}
|
||||
|
||||
net.StartEvaluateMinibatchLoop(evaluationNodes);
|
||||
net.StartEvaluateMinibatchLoop(criterionNodes);
|
||||
net->StartEvaluateMinibatchLoop(evaluationNodes);
|
||||
net->StartEvaluateMinibatchLoop(criterionNodes);
|
||||
if (m_needAdaptRegularization && m_adaptationRegType == AdaptationRegType::KL && refNode)
|
||||
{
|
||||
refNet.StartEvaluateMinibatchLoop(refNode);
|
||||
refNet->StartEvaluateMinibatchLoop(refNode);
|
||||
}
|
||||
|
||||
// Attemps to compute the error signal for the whole utterance, which will
|
||||
|
@ -1776,17 +1773,17 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
if (m_needAdaptRegularization && m_adaptationRegType == AdaptationRegType::KL && refNode)
|
||||
{
|
||||
#if 0 // TODO: where does refNet get its features from?
|
||||
refNet.ResizeAllFeatureNodes(actualMBSize);
|
||||
refNet->ResizeAllFeatureNodes(actualMBSize);
|
||||
#endif
|
||||
//size_t actualMBSize2 = refNet.SetActualMiniBatchSizeFromFeatures();
|
||||
size_t actualMBSize2 = refNet.DetermineActualMBSizeFromFeatures();
|
||||
refNet.GetMBLayoutPtr()->CopyFrom(net.GetMBLayoutPtr()); // TODO: This is UNTESTED (before this was missing, seemingly inconsistently)
|
||||
refNet.VerifyActualNumParallelSequences(trainSetDataReader->GetNumParallelSequences());
|
||||
//size_t actualMBSize2 = refNet->SetActualMiniBatchSizeFromFeatures();
|
||||
size_t actualMBSize2 = refNet->DetermineActualMBSizeFromFeatures();
|
||||
refNet->GetMBLayoutPtr()->CopyFrom(net->GetMBLayoutPtr()); // TODO: This is UNTESTED (before this was missing, seemingly inconsistently)
|
||||
refNet->VerifyActualNumParallelSequences(trainSetDataReader->GetNumParallelSequences());
|
||||
|
||||
if (actualMBSize2 != actualMBSize)
|
||||
LogicError("TrainOneEpoch: refNet has different MB size than main net??");
|
||||
|
||||
refNet.Evaluate(refNode);
|
||||
refNet->Evaluate(refNode);
|
||||
Matrix<ElemType>::ScaleAndAdd((ElemType)m_adaptationRegWeight,
|
||||
dynamic_pointer_cast<ComputationNode<ElemType>>(refNode)->FunctionValues(),
|
||||
(ElemType)(1.0 - m_adaptationRegWeight),
|
||||
|
@ -1795,7 +1792,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
|
||||
//compute eval node first since when gradient is computed the forward function values
|
||||
//may be changed and need to be recomputed when gradient and function value share the same matrix
|
||||
net.Evaluate(evaluationNodes);
|
||||
net->Evaluate(evaluationNodes);
|
||||
|
||||
// only compute gradient when learning rate is large enough
|
||||
if (learnRatePerSample > m_minLearnRate * 0.01)
|
||||
|
@ -1804,7 +1801,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
// ==============================
|
||||
// forward prop, back-prop --this is where the magic happens baby, what we have all be waiting for!
|
||||
// ==============================
|
||||
net.ComputeGradient<ElemType>(criterionNodes[0]);
|
||||
net->ComputeGradient<ElemType>(criterionNodes[0]);
|
||||
// TODO: we should split Evaluate() out from ComputeGradient(), then call them ForwardProp() and BackProp(), for clarity
|
||||
}
|
||||
else
|
||||
|
@ -1813,7 +1810,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
// ==============================
|
||||
// forward prop
|
||||
// ==============================
|
||||
net.Evaluate(criterionNodes[0]);
|
||||
net->Evaluate(criterionNodes[0]);
|
||||
}
|
||||
} // if (actualMBSize > 0)
|
||||
|
||||
|
@ -1821,7 +1818,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
//for now since we share the same label masking flag we call this on the network.
|
||||
//Later, when we apply different labels on different nodes
|
||||
//we need to add code to call this function multiple times, one for each criteria node
|
||||
size_t numSamplesWithLabel = net.GetNumSamplesWithLabel(actualMBSize);
|
||||
size_t numSamplesWithLabel = net->GetNumSamplesWithLabel(actualMBSize);
|
||||
|
||||
totalSamplesProcessed += numSamplesWithLabel;
|
||||
|
||||
|
@ -1902,7 +1899,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
LogicError("%ls %ls operation has NaNs in smoothedGradient.", node->NodeName().c_str(), node->OperationName().c_str());
|
||||
#endif
|
||||
UpdateWeights(node, smoothedGradient, learnRatePerSample,
|
||||
GetMomentumPerSample(epochNumber/*BUGBUG workaround:*/, net.GetMBLayoutPtr()->GetNumParallelSequences()), aggregateNumSamples,
|
||||
GetMomentumPerSample(epochNumber/*BUGBUG workaround:*/, net->GetMBLayoutPtr()->GetNumParallelSequences()), aggregateNumSamples,
|
||||
m_L2RegWeight, m_L1RegWeight,
|
||||
m_needAveMultiplier);
|
||||
#ifdef _DEBUG
|
||||
|
@ -2514,14 +2511,14 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
|
||||
// this probes the automatic gradient computation with random inputs
|
||||
template<class ElemType>
|
||||
bool SGD<ElemType>::GradientCheck(ComputationNetwork& net,
|
||||
const std::vector<ComputationNodeBasePtr> & criterionNodes,
|
||||
const std::list<ComputationNodeBasePtr> & learnableNodes,
|
||||
int npos)
|
||||
bool SGD<ElemType>::GradientCheck(ComputationNetworkPtr net,
|
||||
const std::vector<ComputationNodeBasePtr> & criterionNodes,
|
||||
const std::list<ComputationNodeBasePtr> & learnableNodes,
|
||||
int npos)
|
||||
{
|
||||
vector<string> errMsgs;
|
||||
|
||||
net.StartEvaluateMinibatchLoop(criterionNodes[npos]);
|
||||
net->StartEvaluateMinibatchLoop(criterionNodes[npos]);
|
||||
|
||||
// gradient checking
|
||||
for (auto nodeIter = learnableNodes.begin(); nodeIter != learnableNodes.end(); nodeIter++)
|
||||
|
@ -2540,11 +2537,11 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
fprintf(stderr, "\n###### d%ls######\n", node->NodeName().c_str());
|
||||
|
||||
double eOrg = node->FunctionValues()(irow, icol);
|
||||
node->FunctionValues().TransferToDeviceIfNotThere(net.GetDeviceId(), true);
|
||||
node->FunctionValues().TransferToDeviceIfNotThere(net->GetDeviceId(), true);
|
||||
|
||||
node->UpdateEvalTimeStamp();
|
||||
|
||||
net.ComputeGradient<ElemType>(criterionNodes[npos]);
|
||||
net->ComputeGradient<ElemType>(criterionNodes[npos]);
|
||||
|
||||
if (node->GradientValues().GetMatrixType() == MatrixType::SPARSE)
|
||||
{
|
||||
|
@ -2556,32 +2553,32 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
// TODO: why is this value not used?
|
||||
criterionNodes[npos]->Get00Element();
|
||||
double eGradErr = node->GradientValues()(irow, icol);
|
||||
node->GradientValues().TransferToDeviceIfNotThere(net.GetDeviceId(), true);
|
||||
node->GradientValues().TransferToDeviceIfNotThere(net->GetDeviceId(), true);
|
||||
|
||||
double ePos = eOrg + EPSILON;
|
||||
double eNeg = eOrg - EPSILON;
|
||||
|
||||
node->FunctionValues()(irow, icol) = (ElemType)ePos;
|
||||
node->FunctionValues().TransferToDeviceIfNotThere(net.GetDeviceId(), true);
|
||||
node->FunctionValues().TransferToDeviceIfNotThere(net->GetDeviceId(), true);
|
||||
|
||||
node->UpdateEvalTimeStamp();
|
||||
net.Evaluate(criterionNodes[npos]);
|
||||
net->Evaluate(criterionNodes[npos]);
|
||||
//criterionNode should be a scalar
|
||||
|
||||
double mbEvalCriPos = criterionNodes[npos]->Get00Element(); // TODO: make Get00Element() a function of ComputationNodeBase
|
||||
|
||||
node->FunctionValues()(irow, icol) = (ElemType)eNeg;
|
||||
node->FunctionValues().TransferToDeviceIfNotThere(net.GetDeviceId(), true);
|
||||
node->FunctionValues().TransferToDeviceIfNotThere(net->GetDeviceId(), true);
|
||||
|
||||
node->UpdateEvalTimeStamp();
|
||||
net.Evaluate(criterionNodes[npos]);
|
||||
net->Evaluate(criterionNodes[npos]);
|
||||
|
||||
// criterionNode should be a scalar
|
||||
double mbEvalCriNeg = criterionNodes[npos]->Get00Element();
|
||||
|
||||
// back to its orginal parameter value
|
||||
node->FunctionValues()(irow, icol) = (ElemType)eOrg;
|
||||
node->FunctionValues().TransferToDeviceIfNotThere(net.GetDeviceId(), true);
|
||||
node->FunctionValues().TransferToDeviceIfNotThere(net->GetDeviceId(), true);
|
||||
|
||||
// check if they are consistent
|
||||
double eGradNum = ((mbEvalCriPos - mbEvalCriNeg) / (ePos - eNeg));
|
||||
|
|
|
@ -282,26 +282,26 @@ public:
|
|||
const DEVICEID_TYPE deviceID, const bool makeMode = true);
|
||||
|
||||
protected:
|
||||
std::vector<ComputationNodeBasePtr> & GetTrainCriterionNodes(ComputationNetwork& net);
|
||||
std::vector<ComputationNodeBasePtr> & GetEvalCriterionNodes(ComputationNetwork& net);
|
||||
std::vector<ComputationNodeBasePtr> & GetTrainCriterionNodes(ComputationNetworkPtr net);
|
||||
std::vector<ComputationNodeBasePtr> & GetEvalCriterionNodes(ComputationNetworkPtr net);
|
||||
|
||||
void TrainOrAdaptModel(int startEpoch, ComputationNetwork& net,
|
||||
ComputationNetwork& refNet,
|
||||
void TrainOrAdaptModel(int startEpoch, ComputationNetworkPtr net,
|
||||
ComputationNetworkPtr refNet,
|
||||
ComputationNodeBasePtr refNode,
|
||||
IDataReader<ElemType>* trainSetDataReader,
|
||||
IDataReader<ElemType>* validationSetDataReader);
|
||||
|
||||
protected:
|
||||
// return true if precomputation is executed.
|
||||
bool PreCompute(ComputationNetwork& net,
|
||||
bool PreCompute(ComputationNetworkPtr net,
|
||||
IDataReader<ElemType>* trainSetDataReader,
|
||||
std::vector<ComputationNodeBasePtr> & featureNodes,
|
||||
std::vector<ComputationNodeBasePtr> & labelNodes,
|
||||
std::map<std::wstring, Matrix<ElemType>*>* inputMatrices);
|
||||
|
||||
// return a reasonable initial learning rate based on the initial mbsize
|
||||
double SearchForBestLearnRate(ComputationNetwork& net,
|
||||
ComputationNetwork& refNet,
|
||||
double SearchForBestLearnRate(ComputationNetworkPtr net,
|
||||
ComputationNetworkPtr refNet,
|
||||
const ComputationNodeBasePtr& refNode, const int epochNumber,
|
||||
const double curLearnRate,
|
||||
IDataReader<ElemType>* trainSetDataReader,
|
||||
|
@ -315,8 +315,8 @@ protected:
|
|||
const bool learnRateInitialized,
|
||||
const double largestPrevLearnRatePerSample);
|
||||
|
||||
void TrainOneMiniEpochAndReloadModel(ComputationNetwork& net,
|
||||
ComputationNetwork& refNet,
|
||||
void TrainOneMiniEpochAndReloadModel(ComputationNetworkPtr net,
|
||||
ComputationNetworkPtr refNet,
|
||||
const ComputationNodeBasePtr& refNode, const int epochNumber,
|
||||
const size_t epochSize, IDataReader<ElemType>* trainSetDataReader,
|
||||
const double learnRatePerSample,
|
||||
|
@ -333,8 +333,8 @@ protected:
|
|||
/*out*/ size_t& totalSamplesSeen,
|
||||
std::string prefixMsg = "");
|
||||
|
||||
size_t AdaptiveMinibatchSizing(ComputationNetwork& net,
|
||||
ComputationNetwork& refNet,
|
||||
size_t AdaptiveMinibatchSizing(ComputationNetworkPtr net,
|
||||
ComputationNetworkPtr refNet,
|
||||
const ComputationNodeBasePtr& refNode,
|
||||
const int epochNumber,
|
||||
const size_t numFramesToUseInSearch,
|
||||
|
@ -352,8 +352,8 @@ protected:
|
|||
|
||||
// uses a small percentage of training data of minibatch to
|
||||
// speculatively train with various MB sizes; then picks the best
|
||||
size_t SearchForBestMinibatchSize(ComputationNetwork& net,
|
||||
ComputationNetwork& refNet,
|
||||
size_t SearchForBestMinibatchSize(ComputationNetworkPtr net,
|
||||
ComputationNetworkPtr refNet,
|
||||
const ComputationNodeBasePtr& refNode,
|
||||
const int epochNumber,
|
||||
const size_t numFramesToUseInSearch,
|
||||
|
@ -373,13 +373,13 @@ protected:
|
|||
// for the two-forward-pass sequence and ctc training, which allows
|
||||
// processing more utterances at the same time. Only used in Kaldi2Reader.
|
||||
// TODO: move the two-forward-pass support out of the reader.
|
||||
void AttemptUtteranceDerivativeFeatures(ComputationNetwork& net,
|
||||
void AttemptUtteranceDerivativeFeatures(ComputationNetworkPtr net,
|
||||
IDataReader<ElemType>* trainSetDataReader,
|
||||
const std::vector<ComputationNodeBasePtr> & featureNodes,
|
||||
std::map<std::wstring, Matrix<ElemType>*>* inputMatrices);
|
||||
|
||||
size_t TrainOneEpoch(ComputationNetwork& net,
|
||||
ComputationNetwork& refNet,
|
||||
size_t TrainOneEpoch(ComputationNetworkPtr net,
|
||||
ComputationNetworkPtr refNet,
|
||||
const ComputationNodeBasePtr& refNode,
|
||||
const int epochNumber,
|
||||
const size_t epochSize,
|
||||
|
@ -455,7 +455,7 @@ public:
|
|||
|
||||
#define EPSILON 1e-5
|
||||
|
||||
bool GradientCheck(ComputationNetwork& net,
|
||||
bool GradientCheck(ComputationNetworkPtr net,
|
||||
const std::vector<ComputationNodeBasePtr> & criterionNodes,
|
||||
const std::list<ComputationNodeBasePtr> & learnableNodes,
|
||||
int npos);
|
||||
|
|
|
@ -33,7 +33,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
|
||||
public:
|
||||
|
||||
SimpleEvaluator(ComputationNetwork& net, const size_t numMBsToShowResult = 100, const int traceLevel = 0)
|
||||
SimpleEvaluator(ComputationNetworkPtr net, const size_t numMBsToShowResult = 100, const int traceLevel = 0)
|
||||
: m_net(net), m_numMBsToShowResult(numMBsToShowResult), m_traceLevel(traceLevel)
|
||||
{
|
||||
}
|
||||
|
@ -47,21 +47,21 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
if (evalNodeNames.size() == 0)
|
||||
{
|
||||
fprintf(stderr, "evalNodeNames are not specified, using all the default evalnodes and training criterion nodes.\n");
|
||||
if (m_net.EvaluationNodes().size() == 0 && m_net.FinalCriterionNodes().size() == 0)
|
||||
if (m_net->EvaluationNodes().size() == 0 && m_net->FinalCriterionNodes().size() == 0)
|
||||
LogicError("There is no default evalnodes or training criterion node specified in the network.");
|
||||
|
||||
for (int i = 0; i < m_net.EvaluationNodes().size(); i++)
|
||||
evalNodes.push_back(m_net.EvaluationNodes()[i]);
|
||||
for (int i = 0; i < m_net->EvaluationNodes().size(); i++)
|
||||
evalNodes.push_back(m_net->EvaluationNodes()[i]);
|
||||
|
||||
for (int i = 0; i < m_net.FinalCriterionNodes().size(); i++)
|
||||
evalNodes.push_back(m_net.FinalCriterionNodes()[i]);
|
||||
for (int i = 0; i < m_net->FinalCriterionNodes().size(); i++)
|
||||
evalNodes.push_back(m_net->FinalCriterionNodes()[i]);
|
||||
}
|
||||
else
|
||||
{
|
||||
for (int i = 0; i < evalNodeNames.size(); i++)
|
||||
{
|
||||
const auto & node = m_net.GetNodeFromName(evalNodeNames[i]);
|
||||
m_net.BuildAndValidateSubNetwork(node);
|
||||
const auto & node = m_net->GetNodeFromName(evalNodeNames[i]);
|
||||
m_net->BuildAndValidateSubNetwork(node);
|
||||
if (node->GetNumRows() != 1 || node->GetNumCols() != 1)
|
||||
LogicError("The nodes passed to SimpleEvaluator::Evaluate function must be either eval or training criterion nodes (which evalues to 1x1 value).");
|
||||
evalNodes.push_back(node);
|
||||
|
@ -74,8 +74,8 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
evalResults.push_back((double)0);
|
||||
|
||||
//prepare features and labels
|
||||
auto & featureNodes = m_net.FeatureNodes();
|
||||
auto & labelNodes = m_net.LabelNodes();
|
||||
auto & featureNodes = m_net->FeatureNodes();
|
||||
auto & labelNodes = m_net->LabelNodes();
|
||||
|
||||
std::map<std::wstring, Matrix<ElemType>*> inputMatrices;
|
||||
for (size_t i = 0; i < featureNodes.size(); i++)
|
||||
|
@ -95,24 +95,20 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
evalResultsLastMBs.push_back((ElemType)0);
|
||||
|
||||
dataReader->StartMinibatchLoop(mbSize, 0, testSize);
|
||||
m_net.StartEvaluateMinibatchLoop(evalNodes);
|
||||
m_net->StartEvaluateMinibatchLoop(evalNodes);
|
||||
|
||||
while (DataReaderHelpers::GetMinibatchIntoNetwork(*dataReader, m_net, nullptr, false, false, inputMatrices, actualMBSize))
|
||||
{
|
||||
ComputationNetwork::UpdateEvalTimeStamps(featureNodes);
|
||||
ComputationNetwork::UpdateEvalTimeStamps(labelNodes);
|
||||
|
||||
//actualMBSize = m_net.SetActualMiniBatchSizeFromFeatures();
|
||||
//dataReader->CopyMBLayoutTo(m_net.GetMBLayoutPtr());
|
||||
//m_net.VerifyActualNumParallelSequences(dataReader->GetNumParallelSequences());
|
||||
|
||||
//for now since we share the same label masking flag we call this on one node only
|
||||
//Later, when we apply different labels on different nodes
|
||||
//we need to add code to call this function multiple times, one for each criteria node
|
||||
size_t numSamplesWithLabel = m_net.GetNumSamplesWithLabel(actualMBSize);
|
||||
size_t numSamplesWithLabel = m_net->GetNumSamplesWithLabel(actualMBSize);
|
||||
for (int i = 0; i < evalNodes.size(); i++)
|
||||
{
|
||||
m_net.Evaluate(evalNodes[i]);
|
||||
m_net->Evaluate(evalNodes[i]);
|
||||
evalResults[i] += (double)evalNodes[i]->Get00Element(); //criterionNode should be a scalar
|
||||
}
|
||||
|
||||
|
@ -203,7 +199,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
}
|
||||
|
||||
protected:
|
||||
ComputationNetwork& m_net;
|
||||
ComputationNetworkPtr m_net;
|
||||
size_t m_numMBsToShowResult;
|
||||
int m_traceLevel;
|
||||
void operator=(const SimpleEvaluator&); // (not assignable)
|
||||
|
|
|
@ -26,7 +26,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
|
||||
public:
|
||||
|
||||
SimpleOutputWriter(ComputationNetwork & net, int verbosity = 0) :
|
||||
SimpleOutputWriter(ComputationNetworkPtr net, int verbosity = 0) :
|
||||
m_net(net), m_verbosity(verbosity)
|
||||
{ }
|
||||
|
||||
|
@ -39,20 +39,20 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
{
|
||||
if (m_verbosity > 0)
|
||||
fprintf (stderr, "OutputNodeNames are not specified, using the default outputnodes.\n");
|
||||
if (m_net.OutputNodes().size() == 0)
|
||||
if (m_net->OutputNodes().size() == 0)
|
||||
LogicError("There is no default output node specified in the network.");
|
||||
|
||||
outputNodes = m_net.OutputNodes();
|
||||
outputNodes = m_net->OutputNodes();
|
||||
}
|
||||
else
|
||||
{
|
||||
for (int i=0; i<outputNodeNames.size(); i++)
|
||||
outputNodes.push_back(m_net.GetNodeFromName(outputNodeNames[i]));
|
||||
outputNodes.push_back(m_net->GetNodeFromName(outputNodeNames[i]));
|
||||
}
|
||||
|
||||
//specify feature value nodes
|
||||
std::vector<ComputationNodeBasePtr>& featureNodes = m_net.FeatureNodes();
|
||||
std::vector<ComputationNodeBasePtr>& labelNodes = m_net.LabelNodes();
|
||||
std::vector<ComputationNodeBasePtr>& featureNodes = m_net->FeatureNodes();
|
||||
std::vector<ComputationNodeBasePtr>& labelNodes = m_net->LabelNodes();
|
||||
std::map<std::wstring, Matrix<ElemType>*> inputMatrices;
|
||||
for (size_t i=0; i<featureNodes.size(); i++)
|
||||
inputMatrices[featureNodes[i]->NodeName()] = &dynamic_pointer_cast<ComputationNode<ElemType>>(featureNodes[i])->FunctionValues();
|
||||
|
@ -65,7 +65,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
dataReader.StartMinibatchLoop(mbSize, 0, numOutputSamples);
|
||||
dataReader.SetNumParallelSequences(1);
|
||||
|
||||
m_net.StartEvaluateMinibatchLoop(outputNodes);
|
||||
m_net->StartEvaluateMinibatchLoop(outputNodes);
|
||||
|
||||
size_t totalEpochSamples = 0;
|
||||
std::map<std::wstring, void *, nocase_compare> outputMatrices;
|
||||
|
@ -76,13 +76,13 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
ComputationNetwork::UpdateEvalTimeStamps(featureNodes);
|
||||
ComputationNetwork::UpdateEvalTimeStamps(labelNodes);
|
||||
|
||||
//size_t actualMBSize = m_net.SetActualMiniBatchSizeFromFeatures();
|
||||
//dataReader.CopyMBLayoutTo(m_net.GetMBLayoutPtr());
|
||||
//m_net.VerifyActualNumParallelSequences(dataReader.GetNumParallelSequences());
|
||||
//size_t actualMBSize = m_net->SetActualMiniBatchSizeFromFeatures();
|
||||
//dataReader.CopyMBLayoutTo(m_net->GetMBLayoutPtr());
|
||||
//m_net->VerifyActualNumParallelSequences(dataReader.GetNumParallelSequences());
|
||||
|
||||
for (int i=0; i<outputNodes.size(); i++)
|
||||
{
|
||||
m_net.Evaluate(outputNodes[i]);
|
||||
m_net->Evaluate(outputNodes[i]);
|
||||
outputMatrices[outputNodes[i]->NodeName()] = (void *)(&dynamic_pointer_cast<ComputationNode<ElemType>>(outputNodes[i])->FunctionValues());
|
||||
}
|
||||
|
||||
|
@ -119,15 +119,15 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
if (outputNodeNames.size() == 0)
|
||||
{
|
||||
fprintf (stderr, "OutputNodeNames are not specified, using the default outputnodes.\n");
|
||||
if (m_net.OutputNodes().size() == 0)
|
||||
if (m_net->OutputNodes().size() == 0)
|
||||
LogicError("There is no default output node specified in the network.");
|
||||
|
||||
outputNodes = m_net.OutputNodes();
|
||||
outputNodes = m_net->OutputNodes();
|
||||
}
|
||||
else
|
||||
{
|
||||
for (int i=0; i<outputNodeNames.size(); i++)
|
||||
outputNodes.push_back(m_net.GetNodeFromName(outputNodeNames[i]));
|
||||
outputNodes.push_back(m_net->GetNodeFromName(outputNodeNames[i]));
|
||||
}
|
||||
|
||||
std::vector<ofstream *> outputStreams;
|
||||
|
@ -139,7 +139,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
#endif
|
||||
|
||||
//specify feature value nodes
|
||||
auto & featureNodes = m_net.FeatureNodes();
|
||||
auto & featureNodes = m_net->FeatureNodes();
|
||||
std::map<std::wstring, Matrix<ElemType>*> inputMatrices;
|
||||
for (size_t i=0; i<featureNodes.size(); i++)
|
||||
inputMatrices[featureNodes[i]->NodeName()] = &dynamic_pointer_cast<ComputationNode<ElemType>>(featureNodes[i])->FunctionValues();
|
||||
|
@ -147,7 +147,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
// evaluate with minibatches
|
||||
dataReader.StartMinibatchLoop(mbSize, 0, numOutputSamples);
|
||||
|
||||
m_net.StartEvaluateMinibatchLoop(outputNodes);
|
||||
m_net->StartEvaluateMinibatchLoop(outputNodes);
|
||||
|
||||
size_t totalEpochSamples = 0;
|
||||
size_t numMBsRun = 0;
|
||||
|
@ -159,13 +159,13 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
{
|
||||
ComputationNetwork::UpdateEvalTimeStamps(featureNodes);
|
||||
|
||||
//size_t actualMBSize = m_net.SetActualMiniBatchSizeFromFeatures();
|
||||
//dataReader.CopyMBLayoutTo(m_net.GetMBLayoutPtr());
|
||||
//m_net.VerifyActualNumParallelSequences(dataReader.GetNumParallelSequences()); // TODO: This was added by my (fseide) but UNTESTED. If this fails, comment out and let me know.
|
||||
//size_t actualMBSize = m_net->SetActualMiniBatchSizeFromFeatures();
|
||||
//dataReader.CopyMBLayoutTo(m_net->GetMBLayoutPtr());
|
||||
//m_net->VerifyActualNumParallelSequences(dataReader.GetNumParallelSequences()); // TODO: This was added by my (fseide) but UNTESTED. If this fails, comment out and let me know.
|
||||
|
||||
for (int i=0; i<outputNodes.size(); i++)
|
||||
{
|
||||
m_net.Evaluate(outputNodes[i]);
|
||||
m_net->Evaluate(outputNodes[i]);
|
||||
|
||||
Matrix<ElemType> & outputValues = dynamic_pointer_cast<ComputationNode<ElemType>>(outputNodes[i])->FunctionValues();
|
||||
ofstream & outputStream = *outputStreams[i];
|
||||
|
@ -198,7 +198,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
delete [] tempArray;
|
||||
}
|
||||
private:
|
||||
ComputationNetwork& m_net;
|
||||
ComputationNetworkPtr m_net;
|
||||
int m_verbosity;
|
||||
void operator=(const SimpleOutputWriter&); // (not assignable)
|
||||
};
|
||||
|
|
Загрузка…
Ссылка в новой задаче