changed most uses of ComputationNetwork * and & to ComputationNetworkPtr, to eliminate ownership bugs and allow integration with BS. Also allowed for some minor code simplifications;

made IComputationNetBuilder::LoadNetworkFromFile() 'protected' since it is no longer used. Will be deleted soon
This commit is contained in:
Frank Seide 2015-11-21 14:23:59 -08:00
Родитель 38ca2aa166
Коммит 04e0621d90
26 изменённых файлов: 1685 добавлений и 1712 удалений

Просмотреть файл

@ -136,9 +136,7 @@ void DoEvalBase(const ConfigParameters& config, IDataReader<ElemType>& reader)
evalNodeNamesVector.push_back(evalNodeNames[i]);
}
ComputationNetwork net(deviceId);
net.LoadFromFile<ElemType>(modelPath);
net.ResetEvalTimeStamp();
auto net = ComputationNetwork::CreateFromFile<ElemType>(deviceId, modelPath);
SimpleEvaluator<ElemType> eval(net, numMBsToShowResult, traceLevel);
eval.Evaluate(&reader, evalNodeNamesVector, mbSize[0], epochSize);
@ -180,9 +178,7 @@ void DoEvalUnroll(const ConfigParameters& config)
intargvector mbSize = minibatchSize;
wstring path2EvalResults = config(L"path2EvalResults", L"");
ComputationNetwork net(deviceId);
net.LoadFromFile<ElemType>(modelPath);
net.ResetEvalTimeStamp();
auto net = ComputationNetwork::CreateFromFile<ElemType>(deviceId, modelPath);
MultiNetworksEvaluator<ElemType> eval(net);
double evalEntropy;
@ -244,9 +240,7 @@ void DoCrossValidate(const ConfigParameters& config)
}
cvModels.push_back(cvModelPath);
ComputationNetwork net(deviceId);
net.LoadFromFile<ElemType>(cvModelPath);
net.ResetEvalTimeStamp();
auto net = ComputationNetwork::CreateFromFile<ElemType>(deviceId, cvModelPath);
SimpleEvaluator<ElemType> eval(net, numMBsToShowResult, traceLevel);
@ -320,9 +314,7 @@ void DoWriteOutput(const ConfigParameters& config)
outputNodeNamesVector.push_back(outputNodeNames[i]);
}
ComputationNetwork net(deviceId);
net.LoadFromFile<ElemType>(modelPath);
net.ResetEvalTimeStamp();
auto net = ComputationNetwork::CreateFromFile<ElemType>(deviceId, modelPath);
SimpleOutputWriter<ElemType> writer(net, 1);
@ -803,7 +795,7 @@ public:
BrainScriptNetworkBuilder(const ConfigParameters & config) { NOT_IMPLEMENTED; }
// build a ComputationNetwork from description language
virtual /*IComputationNetBuilder::*/ComputationNetwork* BuildNetworkFromDescription(ComputationNetwork* = nullptr) override
virtual /*IComputationNetBuilder::*/ComputationNetworkPtr BuildNetworkFromDescription(ComputationNetwork* = nullptr) override
{
vector<ScriptableObjects::ConfigValuePtr> args; // this lambda has no arguments
ScriptableObjects::ConfigLambda::NamedParams namedArgs;
@ -813,7 +805,7 @@ public:
fprintf(stderr, "BrainScriptNetworkBuilder using CPU\n");
else
fprintf(stderr, "BrainScriptNetworkBuilder using GPU %d\n", (int)m_net->GetDeviceId());
return m_net.get();
return m_net;
}
// load an existing file--this is the same code as for NDLNetworkBuilder.h (OK to copy it here because this is temporary code anyway)
@ -876,7 +868,7 @@ void DoTrain(const ConfigRecordType & config)
else if (config.Exists(L"SimpleNetworkBuilder"))
{
const ConfigRecordType & simpleNetworkBuilderConfig(config(L"SimpleNetworkBuilder", ConfigRecordType::Record()));
shared_ptr<IComputationNetBuilder<ElemType>> netBuilder = make_shared<SimpleNetworkBuilder<ElemType>>(simpleNetworkBuilderConfig);
auto netBuilder = make_shared<SimpleNetworkBuilder<ElemType>>(simpleNetworkBuilderConfig);
createNetworkFn = [netBuilder](DEVICEID_TYPE deviceId)
{
return shared_ptr<ComputationNetwork>(netBuilder->BuildNetworkFromDescription());
@ -886,7 +878,7 @@ void DoTrain(const ConfigRecordType & config)
else if (config.Exists(L"NDLNetworkBuilder"))
{
const ConfigRecordType & ndlNetworkBuilderConfig(config(L"NDLNetworkBuilder", ConfigRecordType::Record()));
shared_ptr<IComputationNetBuilder<ElemType>> netBuilder = make_shared<NDLBuilder<ElemType>>(ndlNetworkBuilderConfig);
shared_ptr<NDLBuilder<ElemType>> netBuilder = make_shared<NDLBuilder<ElemType>>(ndlNetworkBuilderConfig);
createNetworkFn = [netBuilder](DEVICEID_TYPE deviceId)
{
return shared_ptr<ComputationNetwork>(netBuilder->BuildNetworkFromDescription());
@ -1063,7 +1055,7 @@ void DoEncoderDecoder(const ConfigParameters& config)
validationDataReader.push_back(cvEncoderDataReader);
validationDataReader.push_back(cvDecoderDataReader);
sgd.EncoderDecoder(netBuilders, trainDataReader, validationDataReader, makeMode);
sgd.EncoderDecoder(netBuilders, (int)config(L"deviceId"), trainDataReader, validationDataReader, makeMode);
delete encoderDataReader;
delete decoderDataReader;
@ -1149,7 +1141,7 @@ void DoBidirectionEncoderDecoder(const ConfigParameters& config)
validationDataReader.push_back(cvDecoderDataReader);
validationDataReader.push_back(cvBackwardDecoderDataReader);
sgd.EncoderDecoder(netBuilders, trainDataReader, validationDataReader, makeMode);
sgd.EncoderDecoder(netBuilders, (int)config(L"deviceId"), trainDataReader, validationDataReader, makeMode);
delete encoderDataReader;
delete decoderDataReader;
@ -1198,17 +1190,13 @@ void DoEvalEncodingBeamSearchDecoding(const ConfigParameters& config)
int traceLevel = config(L"traceLevel", "0");
size_t numMBsToShowResult = config(L"numMBsToShowResult", "100");
vector<ComputationNetwork*> nets;
ComputationNetwork encoderNet(deviceId);
encoderNet.LoadFromFile<ElemType>(encoderModelPath, FileOptions::fileOptionsBinary, true);
encoderNet.ResetEvalTimeStamp();
vector<ComputationNetworkPtr> nets;
auto encoderNet = ComputationNetwork::CreateFromFile<ElemType>(deviceId, encoderModelPath, FileOptions::fileOptionsBinary, true);
ComputationNetwork decoderNet(deviceId);
decoderNet.LoadFromFile<ElemType>(decoderModelPath, FileOptions::fileOptionsBinary, false, &encoderNet);
decoderNet.ResetEvalTimeStamp();
auto decoderNet = ComputationNetwork::CreateFromFile<ElemType>(deviceId, decoderModelPath, FileOptions::fileOptionsBinary, false, encoderNet.get());
nets.push_back(&encoderNet);
nets.push_back(&decoderNet);
nets.push_back(encoderNet);
nets.push_back(decoderNet);
ConfigArray evalNodeNames = config(L"evalNodeNames");
vector<wstring> evalNodeNamesVector;
for (int i = 0; i < evalNodeNames.size(); ++i)
@ -1273,9 +1261,7 @@ void DoEvalBeamSearch(const ConfigParameters& config, IDataReader<ElemType>& rea
int traceLevel = config(L"traceLevel", "0");
size_t numMBsToShowResult = config(L"numMBsToShowResult", "100");
ComputationNetwork net(deviceId);
net.LoadFromFile<ElemType>(modelPath);
net.ResetEvalTimeStamp();
auto net = ComputationNetwork::CreateFromFile<ElemType>(deviceId, modelPath);
ConfigArray evalNodeNames = config(L"evalNodeNames");
vector<wstring> evalNodeNamesVector;
@ -1365,15 +1351,12 @@ void DoEdit(const ConfigParameters& config)
template <typename ElemType>
void DoConvertFromDbn(const ConfigParameters& config)
{
//config.Insert("deviceId","-1"); //force using CPU
wstring modelPath = config(L"modelPath");
wstring dbnModelPath = config(L"dbnModelPath");
IComputationNetBuilder<ElemType>* netBuilder = (IComputationNetBuilder<ElemType>*)new SimpleNetworkBuilder<ElemType>(config);
ComputationNetwork* net = netBuilder->LoadNetworkFromFile(dbnModelPath);
auto netBuilder = make_shared<SimpleNetworkBuilder<ElemType>>(config);
ComputationNetworkPtr net = netBuilder->BuildNetworkFromDbnFile(dbnModelPath);
net->SaveToFile(modelPath);
delete (netBuilder);
}
// do topological plot of computation network

Просмотреть файл

@ -126,6 +126,7 @@
//BinaryStandardNode(TransposeTimesNode)
;
#if 0 // no longer needed
namespace Microsoft { namespace MSR { namespace CNTK {
using namespace Microsoft::MSR;
@ -137,7 +138,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
// build a ComputationNetwork from BrainScript source code
template<class ElemType>
/*virtual*/ /*IComputationNetBuilder::*/ComputationNetwork* ExperimentalNetworkBuilder<ElemType>::BuildNetworkFromDescription(ComputationNetwork*)
/*virtual*/ /*IComputationNetBuilder::*/ComputationNetworkPtr ExperimentalNetworkBuilder<ElemType>::BuildNetworkFromDescription(ComputationNetwork*)
{
if (!m_net || m_net->GetTotalNumberOfNodes() < 1) //not built yet
{
@ -160,10 +161,11 @@ namespace Microsoft { namespace MSR { namespace CNTK {
// TODO: old CNTK code seems to be able to load the network in-place--is that important; is it OK to just replace the pointer?
}
m_net->ResetEvalTimeStamp();
return m_net.get();
return m_net;
}
template class ExperimentalNetworkBuilder<float>;
template class ExperimentalNetworkBuilder<double>;
}}}
#endif

Просмотреть файл

@ -1,3 +1,4 @@
#if 0 // no longer needed
// ExperimentalNetworkBuilder.h -- interface to new version of NDL (and config) parser --fseide
#pragma once
@ -29,12 +30,13 @@ namespace Microsoft { namespace MSR { namespace CNTK {
// build a ComputationNetwork from description language
// TODO: change return type of these interfaces to shared_ptrs
virtual /*IComputationNetBuilder::*/ComputationNetwork* BuildNetworkFromDescription(ComputationNetwork* = nullptr);
virtual /*IComputationNetBuilder::*/ComputationNetworkPtr BuildNetworkFromDescription(ComputationNetwork* = nullptr) override;
// TODO: that function argument is related to PairNetworkNode, which will go away (we don't support it here)
// load an existing file--this is the same code as for NDLNetworkBuilder.h (OK to copy it here because this is temporary code anyway)
virtual /*IComputationNetBuilder::*/ComputationNetwork* LoadNetworkFromFile(const wstring& modelFileName, bool forceLoad = true,
bool bAllowNoCriterionNode = false, ComputationNetwork* anotherNetwork = nullptr)
bool bAllowNoCriterionNode = false,
ComputationNetwork* anotherNetwork = nullptr) override
{
if (!m_net || m_net->GetTotalNumberOfNodes() == 0 || forceLoad) //not built or force load
{
@ -48,3 +50,4 @@ namespace Microsoft { namespace MSR { namespace CNTK {
};
}}}
#endif

Просмотреть файл

@ -14,7 +14,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
class IExecutionEngine
{
public:
virtual ComputationNetwork & GetComputationNetwork() = 0;
virtual ComputationNetworkPtr GetComputationNetwork() = 0;
virtual NDLNodeEvaluator<ElemType> & GetNodeEvaluator() = 0;

Просмотреть файл

@ -104,7 +104,7 @@ void MELScript<ElemType>::CallFunction(const std::string& p_name, const ConfigPa
if (params.size() > numFixedParams + numOptionalParams || params.size() < numFixedParams)
RuntimeError("Invalid number of parameters. Valid parameters: CreateModel(). newly created model always becomes the new default.");
ComputationNetwork* cn = new ComputationNetwork(CPUDEVICE);
auto cn = make_shared<ComputationNetwork>(CPUDEVICE);
OverrideModelNameAndSetDefaultModel(cn);
}
if (EqualInsensitive(name, "CreateModelWithName")) //create a blank model
@ -113,7 +113,7 @@ void MELScript<ElemType>::CallFunction(const std::string& p_name, const ConfigPa
if (params.size() > numFixedParams + numOptionalParams || params.size() < numFixedParams)
RuntimeError("Invalid number of parameters. Valid parameters: CreateModelWithName(modelName). newly created model always becomes the new default.");
ComputationNetwork* cn = new ComputationNetwork(CPUDEVICE);
auto cn = make_shared<ComputationNetwork>(CPUDEVICE);
OverrideModelNameAndSetDefaultModel(cn, params[0]);
}
else if (EqualInsensitive(name, "LoadModel"))
@ -124,7 +124,7 @@ void MELScript<ElemType>::CallFunction(const std::string& p_name, const ConfigPa
std::wstring modelFormat = GetOptionalModelFormat(params, numFixedParams);
ComputationNetwork* cn = new ComputationNetwork(CPUDEVICE);
auto cn = make_shared<ComputationNetwork>(CPUDEVICE);
cn->LoadFromFile<ElemType>(params[0]);
OverrideModelNameAndSetDefaultModel(cn);
}
@ -136,7 +136,7 @@ void MELScript<ElemType>::CallFunction(const std::string& p_name, const ConfigPa
std::wstring modelFormat = GetOptionalModelFormat(params, numFixedParams);
ComputationNetwork* cn = new ComputationNetwork(CPUDEVICE);
auto cn = make_shared<ComputationNetwork>(CPUDEVICE);
cn->LoadFromFile<ElemType>(params[1]);
OverrideModelNameAndSetDefaultModel(cn, params[0]);
}
@ -148,7 +148,7 @@ void MELScript<ElemType>::CallFunction(const std::string& p_name, const ConfigPa
string modelName = params[0];
wstring ndlSnippetFileName = params[1];
ComputationNetwork* cn = new ComputationNetwork(CPUDEVICE);
auto cn = make_shared<ComputationNetwork>(CPUDEVICE);
NDLScript<ElemType> script;
ConfigParameters ndlScript (script.ReadConfigFile(ndlSnippetFileName));
@ -181,7 +181,7 @@ void MELScript<ElemType>::CallFunction(const std::string& p_name, const ConfigPa
std::wstring fileName = params[0];
ComputationNetwork* cn = m_netNdlDefault->cn;
auto cn = m_netNdlDefault->cn;
if (cn == NULL)
RuntimeError("SaveDefaultModel can only be called after a default name exists (i.e., at least one model is loaded.)");
@ -440,7 +440,7 @@ void MELScript<ElemType>::CallFunction(const std::string& p_name, const ConfigPa
// this probabably won't do anything, but make sure all NDL has been created
ProcessNDLScript(netNdl, ndlPassInitial, false);
ComputationNetwork* cn = netNdl->cn;
auto cn = netNdl->cn;
for (auto & node : nodes)
{
switch(prop)

Просмотреть файл

@ -147,7 +147,7 @@ public:
search = symbol.substr(firstStart);
}
ComputationNetwork* cn = netNdl->cn;
ComputationNetworkPtr cn = netNdl->cn;
wstring name = msra::strfun::utf16(search);
vector<ComputationNodeBasePtr> nodes = cn->GetNodesFromName(name);
// didn't find the name in the current symbols, try NDL
@ -378,7 +378,7 @@ public:
}
}
void OverrideModelNameAndSetDefaultModel(ComputationNetwork* cn, string modelName = "default")
void OverrideModelNameAndSetDefaultModel(ComputationNetworkPtr cn, string modelName = "default")
{
auto found = m_mapNameToNetNdl.find(modelName);
if (found != m_mapNameToNetNdl.end() && found->second.cn != cn)
@ -583,7 +583,7 @@ public:
// EvaluateNDLSnippet - evaluate the passed snippet of NDL into a computational network
// script - [in] text of the NDL snippet
// network - [in/out] computation network to insert NDL into
void EvaluateNDLSnippet(const ConfigValue& script, ComputationNetwork* network)
void EvaluateNDLSnippet(const ConfigValue& script, ComputationNetworkPtr network)
{
NDLUtil<ElemType> ndlUtil(network);
ndlUtil.ProcessNDLConfig(script);
@ -646,7 +646,7 @@ public:
// model1=[...] - Embedded NDL script
if (0 == foundBrace)
{
ComputationNetwork* cn = new ComputationNetwork();
ComputationNetworkPtr cn = make_shared<ComputationNetwork>();
EvaluateNDLSnippet(rightValue, cn);
OverrideModelNameAndSetDefaultModel(cn, key);
}

Просмотреть файл

@ -32,13 +32,13 @@ namespace Microsoft { namespace MSR { namespace CNTK {
const ConfigParameters* m_baseConfig; // NOTE: the lifetime of the parent MUST exist from the call to Init to the BuildNetworkFromDescription() call for stringize
public:
NDLBuilder() : m_net(nullptr)
NDLBuilder()
{
m_executionEngine = NULL;
m_baseConfig = NULL;
} // empty constructor, call Init immediately hereafter
NDLBuilder(const ConfigParameters& config) : m_net(nullptr)
NDLBuilder(const ConfigParameters& config)
{
m_baseConfig = config.GetParent();
Init(config);
@ -57,7 +57,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
m_dumpFileName = dumpFileName;
m_initialConfig = configParams;
m_deviceId = deviceId;
m_net = &(executionEngine->GetComputationNetwork());
m_net = executionEngine->GetComputationNetwork();
if (m_deviceId == AUTOPLACEMATRIX)
m_deviceId = Matrix<ElemType>::GetBestGPUDeviceId();
m_deviceId = EnforceOneGPUOnly(m_deviceId); // see EnforceOneGPUOnly() for comment on what this is
@ -158,16 +158,16 @@ namespace Microsoft { namespace MSR { namespace CNTK {
}
virtual ComputationNetwork* LoadNetworkFromFile(const wstring& modelFileName, bool forceLoad = true,
bool bAllowNoCriterionNode = false, ComputationNetwork* anotherNetwork = nullptr)
bool bAllowNoCriterionNode = false, ComputationNetwork* anotherNetwork = nullptr) override
{
if (m_net->GetTotalNumberOfNodes() == 0 || forceLoad) //not built or force load
m_net->LoadFromFile<ElemType>(modelFileName, FileOptions::fileOptionsBinary, bAllowNoCriterionNode, anotherNetwork);
m_net->ResetEvalTimeStamp();
return m_net;
return m_net.get();
}
ComputationNetwork* LoadNetworkFromConfig(const wstring& configFilePaths, bool forceLoad = true)
ComputationNetworkPtr LoadNetworkFromConfig(const wstring& configFilePaths, bool forceLoad = true)
{
if (m_net->GetTotalNumberOfNodes() == 0 || forceLoad) //not built or force load
LoadFromConfig(configFilePaths);
@ -214,7 +214,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
ndlUtil.ProcessNDLConfig(config, true);
}
virtual ComputationNetwork* BuildNetworkFromDescription(ComputationNetwork* = nullptr)
virtual ComputationNetworkPtr BuildNetworkFromDescription(ComputationNetwork* = nullptr) override
{
if (m_net->GetTotalNumberOfNodes() < 1) //not built yet
{
@ -226,7 +226,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
}
private:
ComputationNetwork* m_net;
ComputationNetworkPtr m_net;
IExecutionEngine<ElemType>* m_executionEngine;
std::wstring m_networkConfig;
std::wstring m_dumpFileName;

Просмотреть файл

@ -23,14 +23,10 @@ namespace Microsoft { namespace MSR { namespace CNTK {
{
typedef shared_ptr<ComputationNode<ElemType>> ComputationNodePtr;
private:
ComputationNetwork* m_net;
ComputationNetworkPtr m_net;
public:
NDLUtil(ComputationNetwork * net) : m_net(net)
{
}
~NDLUtil()
NDLUtil(ComputationNetworkPtr net) : m_net(net)
{
}
@ -106,7 +102,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
m_net->DumpAllNodesToFile(false, dumpFileName, false);
m_net->ValidateNetwork(!fullValidate);
}
SynchronousNodeEvaluator<ElemType> ndlEvaluator(*m_net);
SynchronousNodeEvaluator<ElemType> ndlEvaluator(m_net);
NDLNode<ElemType>* lastNode = script->Evaluate(ndlEvaluator, L"", ndlPass, skipThrough);
if (ndlPass == ndlPassResolve)
{

Просмотреть файл

@ -108,12 +108,12 @@ template <typename ElemType>
class NetNdl // class to associate a network with an NDLScript
{
public:
ComputationNetwork* cn;
ComputationNetworkPtr cn;
NDLScript<ElemType>* ndl; // NDLScript we are using for this network. NOTE: the actual script used
NDLNode<ElemType>* lastNode[ndlPassMax]; // last node we evaluated for each pass
NetNdl(): cn(nullptr), ndl(nullptr) {ClearLastNodes();}
NetNdl(ComputationNetwork*p_cn): cn(p_cn), ndl(nullptr) {ClearLastNodes();}
NetNdl(ComputationNetwork*p_cn, NDLScript<ElemType>* p_ndl): cn(p_cn), ndl(p_ndl) {ClearLastNodes();}
NetNdl(ComputationNetworkPtr p_cn): cn(p_cn), ndl(nullptr) {ClearLastNodes();}
NetNdl(ComputationNetworkPtr p_cn, NDLScript<ElemType>* p_ndl): cn(p_cn), ndl(p_ndl) {ClearLastNodes();}
~NetNdl()
{}
@ -130,9 +130,8 @@ public:
// NOTE: this deletes the network and the NDLScript, use with care!
void Clear()
{
delete cn;
cn.reset();
delete ndl;
cn = nullptr;
ndl = nullptr;
ClearLastNodes();
}
@ -385,7 +384,7 @@ private:
bool m_noDefinitions; // no definitions can be made in this script, interpret all macro/function names as calls
static NDLScript<ElemType> s_global; //("global"); // global script for storing macros and global nodes
std::vector<NDLNode<ElemType>*> m_children; // child nodes. Note that m_script nodes may not be children of this object, they include macro nodes
ComputationNetwork* m_cn; // computation network to use for backup symbol lookup. Used for MEL where NDL and network nodes are mixed
ComputationNetworkPtr m_cn; // computation network to use for backup symbol lookup. Used for MEL where NDL and network nodes are mixed
bool m_definingMacro; // currently defining a macro, flag to determine if we are defining or interpretting a macro call
public:
@ -518,7 +517,7 @@ public:
}
// SetComputationNetwork - set the computation network this NDL is associated with
void SetComputationNetwork(ComputationNetwork* cn)
void SetComputationNetwork(ComputationNetworkPtr cn)
{
m_cn = cn;
}

Просмотреть файл

@ -24,10 +24,10 @@
namespace Microsoft { namespace MSR { namespace CNTK {
template<class ElemType>
ComputationNetwork* SimpleNetworkBuilder<ElemType>::BuildNetworkFromDescription(ComputationNetwork* encoderNet)
ComputationNetworkPtr SimpleNetworkBuilder<ElemType>::BuildNetworkFromDescription(ComputationNetwork* encoderNet)
{
size_t mbSize = 1;
ComputationNetwork* net = nullptr;
ComputationNetworkPtr net;
// TODO: this seems to call for a switch statement
if (m_rnnType == SIMPLENET)
@ -68,7 +68,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
}
template<class ElemType>
ComputationNetwork* SimpleNetworkBuilder<ElemType>::BuildSimpleDNN()
ComputationNetworkPtr SimpleNetworkBuilder<ElemType>::BuildSimpleDNN()
{
ComputationNetworkBuilder<ElemType> builder(*m_net);
@ -170,7 +170,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
// Note: while ComputationNode and CompuationNetwork are (supposed to be) independent of ElemType, it is OK to keep this class dependent.
template<class ElemType>
ComputationNetwork* SimpleNetworkBuilder<ElemType>::BuildSimpleRNN(size_t mbSize)
ComputationNetworkPtr SimpleNetworkBuilder<ElemType>::BuildSimpleRNN(size_t mbSize)
{
ComputationNetworkBuilder<ElemType> builder(*m_net);
if (m_net->GetTotalNumberOfNodes() < 1) //not built yet
@ -279,7 +279,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
}
template<class ElemType>
ComputationNetwork* SimpleNetworkBuilder<ElemType>::BuildClassEntropyNetwork(size_t mbSize)
ComputationNetworkPtr SimpleNetworkBuilder<ElemType>::BuildClassEntropyNetwork(size_t mbSize)
{
ComputationNetworkBuilder<ElemType> builder(*m_net);
@ -395,12 +395,11 @@ namespace Microsoft { namespace MSR { namespace CNTK {
m_net->ResetEvalTimeStamp();
return m_net;
return m_net;
}
template<class ElemType>
ComputationNetwork* SimpleNetworkBuilder<ElemType>::BuildConditionalLSTMNetworkFromDescription(size_t mbSize)
ComputationNetworkPtr SimpleNetworkBuilder<ElemType>::BuildConditionalLSTMNetworkFromDescription(size_t mbSize)
{
ComputationNetworkBuilder<ElemType> builder(*m_net);
if (m_net->GetTotalNumberOfNodes() < 1) //not built yet
@ -507,12 +506,11 @@ namespace Microsoft { namespace MSR { namespace CNTK {
this builds an alignment based LM generator
the aligment node takes a variable length input and relates each element to a variable length output
*/
template<class ElemType>
ComputationNetwork* SimpleNetworkBuilder<ElemType>::BuildAlignmentForwardDecoderNetworkFromDescription(ComputationNetwork* encoderNet,
size_t mbSize)
{
template<class ElemType>
ComputationNetworkPtr SimpleNetworkBuilder<ElemType>::BuildAlignmentForwardDecoderNetworkFromDescription(ComputationNetwork* encoderNet, size_t mbSize)
{
ComputationNetworkBuilder<ElemType> builder(*m_net);
if (m_net->GetTotalNumberOfNodes() < 1) //not built yet
if (m_net->GetTotalNumberOfNodes() < 1) //not built yet
{
unsigned long randomSeed = 1;
@ -634,13 +632,12 @@ namespace Microsoft { namespace MSR { namespace CNTK {
m_net->ResetEvalTimeStamp();
return m_net;
}
return m_net;
}
template<class ElemType>
ComputationNetwork* SimpleNetworkBuilder<ElemType>::BuildAlignmentDecoderNetworkFromDescription(ComputationNetwork* encoderNet,
size_t mbSize)
{
template<class ElemType>
ComputationNetworkPtr SimpleNetworkBuilder<ElemType>::BuildAlignmentDecoderNetworkFromDescription(ComputationNetwork* encoderNet, size_t mbSize)
{
ComputationNetworkBuilder<ElemType> builder(*m_net);
if (m_net->GetTotalNumberOfNodes() < 1) //not built yet
{
@ -771,7 +768,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
}
template<class ElemType>
ComputationNetwork* SimpleNetworkBuilder<ElemType>::BuildLogBilinearNetworkFromDescription(size_t mbSize)
ComputationNetworkPtr SimpleNetworkBuilder<ElemType>::BuildLogBilinearNetworkFromDescription(size_t mbSize)
{
ComputationNetworkBuilder<ElemType> builder(*m_net);
if (m_net->GetTotalNumberOfNodes() < 1) //not built yet
@ -890,7 +887,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
}
template<class ElemType>
ComputationNetwork* SimpleNetworkBuilder<ElemType>::BuildNeuralProbNetworkFromDescription(size_t mbSize)
ComputationNetworkPtr SimpleNetworkBuilder<ElemType>::BuildNeuralProbNetworkFromDescription(size_t mbSize)
{
ComputationNetworkBuilder<ElemType> builder(*m_net);
if (m_net->GetTotalNumberOfNodes() < 1) //not built yet
@ -1237,7 +1234,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
}
template<class ElemType>
ComputationNetwork* SimpleNetworkBuilder<ElemType>::BuildSeqTrnLSTMNetworkFromDescription(size_t mbSize)
ComputationNetworkPtr SimpleNetworkBuilder<ElemType>::BuildSeqTrnLSTMNetworkFromDescription(size_t mbSize)
{
ComputationNetworkBuilder<ElemType> builder(*m_net);
if (m_net->GetTotalNumberOfNodes() < 1) //not built yet
@ -1338,7 +1335,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
}
template<class ElemType>
ComputationNetwork* SimpleNetworkBuilder<ElemType>::BuildCLASSLSTMNetworkFromDescription(size_t mbSize)
ComputationNetworkPtr SimpleNetworkBuilder<ElemType>::BuildCLASSLSTMNetworkFromDescription(size_t mbSize)
{
ComputationNetworkBuilder<ElemType> builder(*m_net);
if (m_net->GetTotalNumberOfNodes() < 1) //not built yet
@ -1474,7 +1471,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
}
template<class ElemType>
ComputationNetwork* SimpleNetworkBuilder<ElemType>::BuildLSTMNetworkFromDescription(size_t mbSize)
ComputationNetworkPtr SimpleNetworkBuilder<ElemType>::BuildLSTMNetworkFromDescription(size_t mbSize)
{
ComputationNetworkBuilder<ElemType> builder(*m_net);
if (m_net->GetTotalNumberOfNodes() < 1) //not built yet
@ -1609,7 +1606,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
K. Yao, G. Zweig, "Sequence-to-sequence neural net models for grapheme-to-phoneme conversion, submitted to Interspeech 2015
*/
template<class ElemType>
ComputationNetwork* SimpleNetworkBuilder<ElemType>::BuildLSTMEncoderNetworkFromDescription(size_t mbSize)
ComputationNetworkPtr SimpleNetworkBuilder<ElemType>::BuildLSTMEncoderNetworkFromDescription(size_t mbSize)
{
ComputationNetworkBuilder<ElemType> builder(*m_net);
@ -1700,7 +1697,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
K. Yao, G. Zweig, "Sequence-to-sequence neural net models for grapheme-to-phoneme conversion" submitted to Interspeech 2015
*/
template<class ElemType>
ComputationNetwork* SimpleNetworkBuilder<ElemType>::BuildUnidirectionalLSTMNetworksFromDescription(size_t mbSize)
ComputationNetworkPtr SimpleNetworkBuilder<ElemType>::BuildUnidirectionalLSTMNetworksFromDescription(size_t mbSize)
{
ComputationNetworkBuilder<ElemType> builder(*m_net);
if (m_net->GetTotalNumberOfNodes() < 1) //not built yet
@ -2020,7 +2017,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
K. Yao, G. Zweig, "Sequence-to-sequence neural net models for grapheme-to-phoneme conversion, submitted to Interspeech 2015
*/
template<class ElemType>
ComputationNetwork* SimpleNetworkBuilder<ElemType>::BuildBiDirectionalLSTMNetworksFromDescription(size_t mbSize)
ComputationNetworkPtr SimpleNetworkBuilder<ElemType>::BuildBiDirectionalLSTMNetworksFromDescription(size_t mbSize)
{
ComputationNetworkBuilder<ElemType> builder(*m_net);
if (m_net->GetTotalNumberOfNodes() < 1) //not built yet
@ -2170,7 +2167,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
}
template<class ElemType>
ComputationNetwork* SimpleNetworkBuilder<ElemType>::BuildNCELSTMNetworkFromDescription(size_t mbSize)
ComputationNetworkPtr SimpleNetworkBuilder<ElemType>::BuildNCELSTMNetworkFromDescription(size_t mbSize)
{
ComputationNetworkBuilder<ElemType> builder(*m_net);
if (m_net->GetTotalNumberOfNodes() < 1) //not built yet
@ -2285,7 +2282,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
}
template<class ElemType>
ComputationNetwork* SimpleNetworkBuilder<ElemType>::BuildNetworkFromDbnFile(const std::wstring& dbnModelFileName)
ComputationNetworkPtr SimpleNetworkBuilder<ElemType>::BuildNetworkFromDbnFile(const std::wstring& dbnModelFileName)
{
ComputationNetworkBuilder<ElemType> builder(*m_net);

Просмотреть файл

@ -100,7 +100,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
const bool applyMeanVarNorm = false, bool needPrior = false, DEVICEID_TYPE deviceId = AUTOPLACEMATRIX)
{
m_deviceId = deviceId;
m_net = new ComputationNetwork(m_deviceId);
m_net = make_shared<ComputationNetwork>(m_deviceId);
m_outputLayerSize = outputLayerSize;
m_layerSizes = layerSizes;
@ -248,11 +248,6 @@ namespace Microsoft { namespace MSR { namespace CNTK {
}
virtual ~SimpleNetworkBuilder()
{
delete m_net;
}
static bool CheckDbnTag(File &fstream, const std::string expectedTag)
{
char tag[5];
@ -264,7 +259,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
// this load function allows an alternative file format of an early internal predecessor of CNTK, internally called DBN.exe
virtual ComputationNetwork* LoadNetworkFromFile(const wstring& modelFileName, bool forceLoad = true,
bool bAllowNoCriterion = false, ComputationNetwork* anotherNetwork = nullptr)
bool bAllowNoCriterion = false, ComputationNetwork* anotherNetwork = nullptr) override
{
if (m_net->GetTotalNumberOfNodes() == 0 || forceLoad) //not built or force load
{
@ -282,20 +277,22 @@ namespace Microsoft { namespace MSR { namespace CNTK {
}
m_net->ResetEvalTimeStamp();
return m_net;
return m_net.get();
}
ComputationNetwork* BuildNetworkFromDescription(ComputationNetwork* encoderNet);
ComputationNetworkPtr BuildNetworkFromDescription(ComputationNetwork* encoderNet = nullptr) override;
ComputationNetworkPtr BuildNetworkFromDbnFile(const std::wstring& dbnModelFileName); // support for fseide's Microsoft-internal legacy tool "DBN.exe"
RNNTYPE RnnType(){ return m_rnnType; }
protected:
ComputationNetwork* BuildSimpleDNN();
ComputationNetworkPtr BuildSimpleDNN();
ComputationNetwork* BuildSimpleRNN(size_t mbSize = 1);
ComputationNetworkPtr BuildSimpleRNN(size_t mbSize = 1);
ComputationNetwork* BuildClassEntropyNetwork(size_t mbSize = 1);
ComputationNetworkPtr BuildClassEntropyNetwork(size_t mbSize = 1);
ComputationNodePtr BuildLSTMComponent(unsigned long &randomSeed, size_t mbSize, size_t iLayer, size_t inputDim, size_t outputDim, ComputationNodePtr input);
@ -305,31 +302,29 @@ namespace Microsoft { namespace MSR { namespace CNTK {
ComputationNodePtr BuildDirectConnect(unsigned long &randomSeed, size_t mbSize, size_t iLayer, size_t inputDim, size_t outputDim, ComputationNodePtr input, ComputationNodePtr toNode);
ComputationNetwork* BuildLogBilinearNetworkFromDescription(size_t mbSize = 1);
ComputationNetworkPtr BuildLogBilinearNetworkFromDescription(size_t mbSize = 1);
ComputationNetwork* BuildNeuralProbNetworkFromDescription(size_t mbSize = 1);
ComputationNetworkPtr BuildNeuralProbNetworkFromDescription(size_t mbSize = 1);
ComputationNetwork* BuildLSTMNetworkFromDescription(size_t mbSize = 1);
ComputationNetworkPtr BuildLSTMNetworkFromDescription(size_t mbSize = 1);
ComputationNetwork* BuildSeqTrnLSTMNetworkFromDescription(size_t mbSize = 1);
ComputationNetworkPtr BuildSeqTrnLSTMNetworkFromDescription(size_t mbSize = 1);
ComputationNetwork* BuildLSTMEncoderNetworkFromDescription(size_t mbSize = 1);
ComputationNetworkPtr BuildLSTMEncoderNetworkFromDescription(size_t mbSize = 1);
ComputationNetwork* BuildUnidirectionalLSTMNetworksFromDescription(size_t mbSize = 1);
ComputationNetworkPtr BuildUnidirectionalLSTMNetworksFromDescription(size_t mbSize = 1);
ComputationNetwork* BuildBiDirectionalLSTMNetworksFromDescription(size_t mbSize = 1);
ComputationNetworkPtr BuildBiDirectionalLSTMNetworksFromDescription(size_t mbSize = 1);
ComputationNetwork* BuildCLASSLSTMNetworkFromDescription(size_t mbSize = 1);
ComputationNetworkPtr BuildCLASSLSTMNetworkFromDescription(size_t mbSize = 1);
ComputationNetwork* BuildConditionalLSTMNetworkFromDescription(size_t mbSize = 1);
ComputationNetworkPtr BuildConditionalLSTMNetworkFromDescription(size_t mbSize = 1);
ComputationNetwork* BuildNCELSTMNetworkFromDescription(size_t mbSize = 1);
ComputationNetworkPtr BuildNCELSTMNetworkFromDescription(size_t mbSize = 1);
ComputationNetwork* BuildAlignmentForwardDecoderNetworkFromDescription(ComputationNetwork* encoderNet, size_t mbSize = 1);
ComputationNetworkPtr BuildAlignmentForwardDecoderNetworkFromDescription(ComputationNetwork* encoderNet, size_t mbSize = 1);
ComputationNetwork* BuildAlignmentDecoderNetworkFromDescription(ComputationNetwork* encoderNet, size_t mbSize = 1);
ComputationNetwork* BuildNetworkFromDbnFile(const std::wstring& dbnModelFileName);
ComputationNetworkPtr BuildAlignmentDecoderNetworkFromDescription(ComputationNetwork* encoderNet, size_t mbSize = 1);
//layer is 0 based
ComputationNodePtr ApplyNonlinearFunction(ComputationNodePtr input, const size_t layer, const std::wstring nodeName = L"");
@ -377,7 +372,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
protected:
ComputationNetwork* m_net;
ComputationNetworkPtr m_net;
int m_outputLayerSize;
intargvector m_layerSizes;

Просмотреть файл

@ -21,7 +21,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
template<class ElemType>
void SynchronousNodeEvaluator<ElemType>::Evaluate(NDLNode<ElemType>* node, const wstring& baseName, const NDLPass pass)
{
ComputationNetworkBuilder<ElemType> builder(m_net);
ComputationNetworkBuilder<ElemType> builder(*m_net);
// constants don't need to be evaluated, they just translate into numbers...
if (node->GetType() == ndlTypeConstant
@ -53,7 +53,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
nodePtr = ComputationNode<ElemType>::FromVoidPtr(node->GetEvalValue());
if (!nodePtr)
{
nodePtr = dynamic_pointer_cast<ComputationNode<ElemType>>(m_net.GetNodeFromName(name));
nodePtr = dynamic_pointer_cast<ComputationNode<ElemType>>(m_net->GetNodeFromName(name));
node->SetEvalValue(nodePtr.get());
}
}
@ -71,8 +71,8 @@ namespace Microsoft { namespace MSR { namespace CNTK {
size_t cols = params.size() > 1 ? ((NDLNode<ElemType>*)params[1])->GetScalar() : 1;
// first look for this node already existing in the network
if (m_net.NodeNameExist(name))
nodePtr = dynamic_pointer_cast<ComputationNode<ElemType>>(m_net.GetNodeFromName(name));
if (m_net->NodeNameExist(name))
nodePtr = dynamic_pointer_cast<ComputationNode<ElemType>>(m_net->GetNodeFromName(name));
else
nodePtr = builder.CreateInputNode(name, rows, cols);
}
@ -90,8 +90,8 @@ namespace Microsoft { namespace MSR { namespace CNTK {
size_t cols = params.size() > 1 ? ((NDLNode<ElemType>*)params[1])->GetScalar() : 1;
// first look for this node already existing in the network
if (m_net.NodeNameExist(name))
nodePtr = dynamic_pointer_cast<ComputationNode<ElemType>>(m_net.GetNodeFromName(name));
if (m_net->NodeNameExist(name))
nodePtr = dynamic_pointer_cast<ComputationNode<ElemType>>(m_net->GetNodeFromName(name));
else
nodePtr = builder.CreateSparseInputNode(name, rows, cols);
}
@ -161,9 +161,9 @@ namespace Microsoft { namespace MSR { namespace CNTK {
if (initString == "fixedvalue")
nodePtr->FunctionValues().SetValue(value);
else if (initString == "uniform")
m_net.InitLearnableParameters(nodePtr, true, forcedRandomSeed < 0 ? randomSeed++ : (unsigned long)forcedRandomSeed, initValueScale, initOnCPUOnly);
m_net->InitLearnableParameters(nodePtr, true, forcedRandomSeed < 0 ? randomSeed++ : (unsigned long)forcedRandomSeed, initValueScale, initOnCPUOnly);
else if (initString == "gaussian")
m_net.InitLearnableParameters(nodePtr, false, forcedRandomSeed < 0 ? randomSeed++ : (unsigned long)forcedRandomSeed, initValueScale, initOnCPUOnly);
m_net->InitLearnableParameters(nodePtr, false, forcedRandomSeed < 0 ? randomSeed++ : (unsigned long)forcedRandomSeed, initValueScale, initOnCPUOnly);
else if (initString == "fromfile")
{
std::string initFromFilePath = node->GetOptionalParameter("initFromFilePath", "");
@ -209,9 +209,9 @@ namespace Microsoft { namespace MSR { namespace CNTK {
if (initString == "fixedvalue")
nodePtr->FunctionValues().SetValue(value);
else if (initString == "uniform")
m_net.InitLearnableParameters(nodePtr, true, randomSeed++, initValueScale);
m_net->InitLearnableParameters(nodePtr, true, randomSeed++, initValueScale);
else if (initString == "gaussian")
m_net.InitLearnableParameters(nodePtr, false, randomSeed++, initValueScale);
m_net->InitLearnableParameters(nodePtr, false, randomSeed++, initValueScale);
else if (initString == "fromfile")
{
std::string initFromFilePath = node->GetOptionalParameter("initFromFilePath", "");

Просмотреть файл

@ -22,7 +22,7 @@ class SynchronousNodeEvaluator : public NDLNodeEvaluator<ElemType>
typedef shared_ptr<ComputationNode<ElemType>> ComputationNodePtr;
public:
// Constructor - create evaluator
SynchronousNodeEvaluator(ComputationNetwork& cn) : m_net(cn)
SynchronousNodeEvaluator(ComputationNetworkPtr cn) : m_net(cn)
{ }
// Evaluate - evaluate a node and translate into underlying
@ -69,7 +69,7 @@ public:
// In this example, in the call D=Times(A.B,X.B), we need to resolve A.B and X.B appropriately.
// Specifically, "A.B" must be resolved to the fully qualified name "C.A.B", whereas "X.B" must be resolved to the fully qualified name "P.B".
// We then use this fully-qualified name to look up this node in the model (using "m_net.GetNodeFromName").
// We then use this fully-qualified name to look up this node in the model (using "m_net->GetNodeFromName").
std::size_t firstDotPos = name.find_first_of(".");
if (firstDotPos == std::string::npos)
@ -105,9 +105,9 @@ public:
}
// fully qualified names can be looked up in the model
if (m_net.NodeNameExist(wname))
if (m_net->NodeNameExist(wname))
{
void* np = (void*)m_net.GetNodeFromName(wname);
void* np = (void*)m_net->GetNodeFromName(wname);
nodeParam->SetEvalValue(np);
}
// NOTE: there is a bug here, we allow an abbreviated node reference (i.e. L1.BFF) based on return values in NDL
@ -170,9 +170,9 @@ public:
// check for the fully quantified name in the computation network
// this is needed for MEL processing, since CN nodes names can be used as parameters in MEL
std::wstring wname = msra::strfun::utf16(name);
if (m_net.NodeNameExist(wname))
if (m_net->NodeNameExist(wname))
{
void* np = (void*)m_net.GetNodeFromName(wname).get();
void* np = (void*)m_net->GetNodeFromName(wname).get();
// if we don't have a resolve node, it's because the name didn't exist in NDL
if (!nodeResolve)
nodeResolve = nodeParam;
@ -276,15 +276,15 @@ public:
std::string value = param->GetValue();
if (!_stricmp(value.c_str(), "feature"))
{
SetOutputNode(m_net.FeatureNodes(), compNode);
SetOutputNode(m_net->FeatureNodes(), compNode);
}
else if (!_stricmp(value.c_str(), "label"))
{
SetOutputNode(m_net.LabelNodes(), compNode);
SetOutputNode(m_net->LabelNodes(), compNode);
}
else if (!_stricmp(value.c_str(), "criteria"))
{
SetOutputNode(m_net.FinalCriterionNodes(), compNode);
SetOutputNode(m_net->FinalCriterionNodes(), compNode);
}
else if (!_stricmp(value.c_str(), "multiseq"))
{
@ -292,11 +292,11 @@ public:
}
else if (!_strnicmp(value.c_str(), "eval", 4)) // only compare the first 4 characters
{
SetOutputNode(m_net.EvaluationNodes(), compNode);
SetOutputNode(m_net->EvaluationNodes(), compNode);
}
else if (!_stricmp(value.c_str(), "output"))
{
SetOutputNode(m_net.OutputNodes(), compNode);
SetOutputNode(m_net->OutputNodes(), compNode);
}
}
@ -321,8 +321,8 @@ public:
// returns - pointer to the matching EvalValue for that node, of NULL if not found
virtual void* FindSymbol(const wstring& symbol)
{
if (m_net.NodeNameExist(symbol))
return m_net.GetNodeFromName(symbol).get();
if (m_net->NodeNameExist(symbol))
return m_net->GetNodeFromName(symbol).get();
return nullptr;
}
@ -331,7 +331,7 @@ public:
}
private:
ComputationNetwork& m_net;
ComputationNetworkPtr m_net;
void operator=(const SynchronousNodeEvaluator&);
};
@ -343,29 +343,25 @@ class SynchronousExecutionEngine : public IExecutionEngine<ElemType>
public:
SynchronousExecutionEngine(DEVICEID_TYPE deviceId=AUTOPLACEMATRIX, unsigned long randomSeedOffset=0)
{
m_computationNetwork = new ComputationNetwork(deviceId);
m_computationNetwork = make_shared<ComputationNetwork>(deviceId);
m_computationNetwork->SetRandomSeedOffset(randomSeedOffset);
m_ownNetwork = true;
m_nodeEvaluator = new SynchronousNodeEvaluator<ElemType>(*m_computationNetwork);
m_nodeEvaluator = new SynchronousNodeEvaluator<ElemType>(m_computationNetwork);
}
SynchronousExecutionEngine(ComputationNetwork* computationNetwork)
SynchronousExecutionEngine(ComputationNetworkPtr computationNetwork)
{
m_computationNetwork = computationNetwork;
m_ownNetwork = false;
m_nodeEvaluator = new SynchronousNodeEvaluator<ElemType>(*m_computationNetwork);
m_nodeEvaluator = new SynchronousNodeEvaluator<ElemType>(m_computationNetwork);
}
virtual ~SynchronousExecutionEngine()
{
if (m_ownNetwork)
delete m_computationNetwork;
delete m_nodeEvaluator;
}
ComputationNetwork& GetComputationNetwork()
ComputationNetworkPtr GetComputationNetwork()
{
return *m_computationNetwork;
return m_computationNetwork;
}
NDLNodeEvaluator<ElemType>& GetNodeEvaluator()
@ -374,8 +370,7 @@ public:
}
private:
bool m_ownNetwork;
ComputationNetwork* m_computationNetwork;
ComputationNetworkPtr m_computationNetwork;
SynchronousNodeEvaluator<ElemType>* m_nodeEvaluator;
protected:
// Copy constructor, should never be called.

Просмотреть файл

@ -212,7 +212,7 @@ template <typename ElemType>
void TestMacros(const ConfigParameters& configBase)
{
NDLScript<ElemType> script = configBase("ndlFull");
ComputationNetwork net;
ComputationNetworkPtr net = make_shared<ComputationNetwork>();
SynchronousNodeEvaluator<ElemType> nodeEvaluator(net);
script.Evaluate(nodeEvaluator, L"", ndlPassInitial);
}

Просмотреть файл

@ -606,12 +606,12 @@ namespace Microsoft { namespace MSR { namespace CNTK {
}
template<class ElemType>
/*static*/void ComputationNetwork::SetDropoutRate(ComputationNetwork& net, const ComputationNodeBasePtr& criterionNode, const double dropoutRate, double & prevDropoutRate, unsigned long & dropOutSeed)
/*static*/void ComputationNetwork::SetDropoutRate(ComputationNetworkPtr net, const ComputationNodeBasePtr& criterionNode, const double dropoutRate, double & prevDropoutRate, unsigned long & dropOutSeed)
{
if (dropoutRate != prevDropoutRate)
{
fprintf(stderr, "Switching dropout rate to %.8g.\n", dropoutRate);
list<ComputationNodeBasePtr> dropoutNodes = net.GetNodesWithType(OperationNameOf(DropoutNode), criterionNode);
list<ComputationNodeBasePtr> dropoutNodes = net->GetNodesWithType(OperationNameOf(DropoutNode), criterionNode);
if (dropoutNodes.size() == 0 && dropoutRate > 0)
fprintf(stderr, "WARNING: there is no dropout node.\n");
else for (auto nodeIter = dropoutNodes.begin(); nodeIter != dropoutNodes.end(); nodeIter++)
@ -627,10 +627,10 @@ namespace Microsoft { namespace MSR { namespace CNTK {
//set sequence training parameters, e.g. smoothing weight, frame drop threshhold
template<class ElemType>
void ComputationNetwork::SetSeqParam(ComputationNetwork& net, const ComputationNodeBasePtr criterionNode, double hsmoothingWeight, double frameDropThresh, const bool doreferencealign)
void ComputationNetwork::SetSeqParam(ComputationNetworkPtr net, const ComputationNodeBasePtr criterionNode, double hsmoothingWeight, double frameDropThresh, const bool doreferencealign)
{
fprintf(stderr, "Setting Hsmoothing weight to %.8g and frame-dropping threshhold to %.8g\n", hsmoothingWeight, frameDropThresh);
list<ComputationNodeBasePtr> seqNodes = net.GetNodesWithType(OperationNameOf(SequenceWithSoftmaxNode), criterionNode);
list<ComputationNodeBasePtr> seqNodes = net->GetNodesWithType(OperationNameOf(SequenceWithSoftmaxNode), criterionNode);
if (seqNodes.size() == 0)
{
fprintf(stderr, "WARNING: there is no sequence node.\n");
@ -647,10 +647,10 @@ namespace Microsoft { namespace MSR { namespace CNTK {
}
}
/*static*/void ComputationNetwork::SetMaxTempMemSizeForCNN(ComputationNetwork& net, const ComputationNodeBasePtr& criterionNode, const size_t maxTempMemSizeInSamples)
/*static*/void ComputationNetwork::SetMaxTempMemSizeForCNN(ComputationNetworkPtr net, const ComputationNodeBasePtr& criterionNode, const size_t maxTempMemSizeInSamples)
{
fprintf(stderr, "Set Max Temp Mem Size For Convolution Nodes to %lu samples.\n", maxTempMemSizeInSamples);
list<ComputationNodeBasePtr> convolutionNodes = net.GetNodesWithType(OperationNameOf(ConvolutionNode), criterionNode);
list<ComputationNodeBasePtr> convolutionNodes = net->GetNodesWithType(OperationNameOf(ConvolutionNode), criterionNode);
if (convolutionNodes.size() == 0 && maxTempMemSizeInSamples != 0)
{
fprintf(stderr, "WARNING: there is no convolution node.\n");
@ -1116,14 +1116,14 @@ namespace Microsoft { namespace MSR { namespace CNTK {
template void ComputationNetwork::InitLearnableParameters<float>(const ComputationNodeBasePtr& node, const bool uniformInit, const unsigned long randomSeed, const float initValueScale, bool initOnCPUOnly);
template void ComputationNetwork::LoadFromFile<float>(const wstring& fileName, const FileOptions fileFormat, const bool bAllowNoCriterionNode, ComputationNetwork* anotherNetwork);
template void ComputationNetwork::PerformSVDecomposition<float>(const map<wstring, float>& SVDConfig, size_t alignedsize);
template /*static*/void ComputationNetwork::SetDropoutRate<float>(ComputationNetwork& net, const ComputationNodeBasePtr& criterionNode, const double dropoutRate, double & prevDropoutRate, unsigned long & dropOutSeed);
template void ComputationNetwork::SetSeqParam<float>(ComputationNetwork& net, const ComputationNodeBasePtr criterionNode, double hsmoothingWeight, double frameDropThresh, const bool doreferencealign);
template /*static*/void ComputationNetwork::SetDropoutRate<float>(ComputationNetworkPtr net, const ComputationNodeBasePtr& criterionNode, const double dropoutRate, double & prevDropoutRate, unsigned long & dropOutSeed);
template void ComputationNetwork::SetSeqParam<float>(ComputationNetworkPtr net, const ComputationNodeBasePtr criterionNode, double hsmoothingWeight, double frameDropThresh, const bool doreferencealign);
template void ComputationNetwork::InitLearnableParameters<double>(const ComputationNodeBasePtr& node, const bool uniformInit, const unsigned long randomSeed, const double initValueScale, bool initOnCPUOnly);
template void ComputationNetwork::LoadFromFile<double>(const wstring& fileName, const FileOptions fileFormat, const bool bAllowNoCriterionNode, ComputationNetwork* anotherNetwork);
template void ComputationNetwork::PerformSVDecomposition<double>(const map<wstring, float>& SVDConfig, size_t alignedsize);
template /*static*/void ComputationNetwork::SetDropoutRate<double>(ComputationNetwork& net, const ComputationNodeBasePtr& criterionNode, const double dropoutRate, double & prevDropoutRate, unsigned long & dropOutSeed);
template void ComputationNetwork::SetSeqParam<double>(ComputationNetwork& net, const ComputationNodeBasePtr criterionNode, double hsmoothingWeight, double frameDropThresh, const bool doreferencealign);
template /*static*/void ComputationNetwork::SetDropoutRate<double>(ComputationNetworkPtr net, const ComputationNodeBasePtr& criterionNode, const double dropoutRate, double & prevDropoutRate, unsigned long & dropOutSeed);
template void ComputationNetwork::SetSeqParam<double>(ComputationNetworkPtr net, const ComputationNodeBasePtr criterionNode, double hsmoothingWeight, double frameDropThresh, const bool doreferencealign);
// register ComputationNetwork with the ScriptableObject system
ScriptableObjects::ConfigurableRuntimeTypeRegister::Add<ComputationNetwork> registerComputationNetwork(L"ComputationNetwork");

Просмотреть файл

@ -67,6 +67,8 @@ namespace Microsoft { namespace MSR { namespace CNTK {
class ComputationNetwork : public ScriptableObjects::Object, public ScriptableObjects::HasToString, public ScriptableObjects::IConfigRecord
{
public:
typedef shared_ptr<ComputationNetwork> ComputationNetworkPtr;
protected:
// FlowControlNodes for internal use by this class:
@ -359,6 +361,17 @@ public:
void LoadFromFile(const std::wstring& fileName, const FileOptions fileFormat = FileOptions::fileOptionsBinary,
const bool bAllowNoCriterionNode = false, ComputationNetwork* anotherNetwork = nullptr);
// static helper to instantiate a network from a file
template<class ElemType>
static ComputationNetworkPtr CreateFromFile(DEVICEID_TYPE deviceId, const std::wstring& fileName,
const FileOptions fileFormat = FileOptions::fileOptionsBinary,
const bool bAllowNoCriterionNode = false, ComputationNetwork* anotherNetwork = nullptr)
{
auto net = make_shared<ComputationNetwork>(deviceId);
net->LoadFromFile<ElemType>(fileName, FileOptions::fileOptionsBinary, bAllowNoCriterionNode, anotherNetwork);
return net;
}
// -----------------------------------------------------------------------
// evaluation
// -----------------------------------------------------------------------
@ -547,10 +560,10 @@ public:
// -----------------------------------------------------------------------
template<class ElemType> // TODO: dropoutRate change to double
static void SetDropoutRate(ComputationNetwork& net, const ComputationNodeBasePtr& criterionNode, const double dropoutRate, double & prevDropoutRate, unsigned long & dropOutSeed);
static void SetDropoutRate(ComputationNetworkPtr net, const ComputationNodeBasePtr& criterionNode, const double dropoutRate, double & prevDropoutRate, unsigned long & dropOutSeed);
template<class ElemType>
static void SetSeqParam(ComputationNetwork& net, const ComputationNodeBasePtr criterionNode, double hsmoothingWeight, double frameDropThresh, const bool doreferencealign);
static void SetMaxTempMemSizeForCNN(ComputationNetwork& net, const ComputationNodeBasePtr& criterionNode, const size_t maxTempMemSizeInSamples);
static void SetSeqParam(ComputationNetworkPtr net, const ComputationNodeBasePtr criterionNode, double hsmoothingWeight, double frameDropThresh, const bool doreferencealign);
static void SetMaxTempMemSizeForCNN(ComputationNetworkPtr net, const ComputationNodeBasePtr& criterionNode, const size_t maxTempMemSizeInSamples);
// -----------------------------------------------------------------------
// evaluation
@ -984,6 +997,6 @@ private: // TODO: make all private that can be made private
// TODO: does this apply to anything else besides temporary node-internal intermediate results? What, for example?
MatrixPool m_matrixPool;
};
typedef shared_ptr<ComputationNetwork> ComputationNetworkPtr;
typedef ComputationNetwork::ComputationNetworkPtr ComputationNetworkPtr;
}}}

Просмотреть файл

@ -58,7 +58,7 @@ template<class ElemType>
void CNTKEval<ElemType>::Destroy()
{
// cleanup everything
delete m_net; // TODO: use shared_ptr
m_net.reset();
delete m_reader;
delete m_writer;
delete this;
@ -71,11 +71,7 @@ void CNTKEval<ElemType>::LoadModel(const std::wstring& modelFileName)
{
DEVICEID_TYPE deviceId = DeviceFromConfig(m_config);
fprintf(stderr, "DeviceID=%d\n", (int)deviceId);
if (m_net != NULL)
delete m_net;
m_net = new ComputationNetwork(deviceId);
m_net->LoadFromFile<ElemType>(modelFileName);
m_net->ResetEvalTimeStamp();
m_net = ComputationNetwork::CreateFromFile<ElemType>(deviceId, modelFileName);
}
// GetNodeDimensions - Get the node dimensions of the specified nodes
@ -169,7 +165,7 @@ void CNTKEval<ElemType>::Evaluate(std::map<std::wstring, std::vector<ElemType>*>
m_writer->SetData(&outputs, &m_dimensions);
// call the evaluator
SimpleOutputWriter<ElemType> eval(*m_net);
SimpleOutputWriter<ElemType> eval(m_net);
eval.WriteOutput(*m_reader, minibatchSize, *m_writer, outNodeNames);
}

Просмотреть файл

@ -25,7 +25,7 @@ class CNTKEval : public IEvaluateModel<ElemType>
EvalReader<ElemType>* m_reader;
EvalWriter<ElemType>* m_writer;
ConfigParameters m_config;
ComputationNetwork* m_net;
ComputationNetworkPtr m_net;
std::map<std::wstring, size_t> m_dimensions;
size_t m_start;

Просмотреть файл

@ -107,14 +107,14 @@ namespace Microsoft { namespace MSR { namespace CNTK {
// TODO: callers of this often do ComputationNetwork::UpdateEvalTimeStamps(featureNodes) and also for labels; we should eliminate the need for this.
template<class ElemType>
static bool GetMinibatchIntoNetwork(IDataReader<ElemType>& trainSetDataReader,
ComputationNetwork& net,
ComputationNetworkPtr net,
ComputationNodeBasePtr criterionNode,
bool useDistributedMBReading,
bool useParallelTrain,
std::map<std::wstring, Matrix<ElemType>*> & inputMatrices,
size_t & actualMBSize)
{
auto pMBLayout = net.GetMBLayoutPtr();
auto pMBLayout = net->GetMBLayoutPtr();
// Reading consists of a sequence of Reader API calls:
// - GetMinibatch() --fills the inputMatrices
// - SetActualMiniBatchSizeFromFeatures() --tells Network to resize the nodes' buffers
@ -127,15 +127,15 @@ namespace Microsoft { namespace MSR { namespace CNTK {
// no data is read. When it does, 'wasDataRead' can be removed
bool wasDataRead = trainSetDataReader.GetMinibatch(inputMatrices); // fill in the minibatch data into the Input nodes' buffers directly
// reader will have resized input node's m_functionValues directly. Nodes must be notified to do necessary internal state updates from that.
net.NotifyInputNodesFunctionValuesMBSizeModified();
size_t readMBSize = net.DetermineActualMBSizeFromFeatures();
net->NotifyInputNodesFunctionValuesMBSizeModified();
size_t readMBSize = net->DetermineActualMBSizeFromFeatures();
if (readMBSize == 0)
wasDataRead = false;
trainSetDataReader.CopyMBLayoutTo(pMBLayout); // and layout meta-data
// verify some DataReader calls that are redundant since the MBLayout refactoring (keep verifying for a while for cosy feeling)
net.VerifyActualNumParallelSequences(trainSetDataReader.GetNumParallelSequences()); // info already contained in MBLayout
net->VerifyActualNumParallelSequences(trainSetDataReader.GetNumParallelSequences()); // info already contained in MBLayout
//assert(trainSetDataReader.RequireSentenceSeg() == pMBLayout->RequireSentenceSeg()); // this one is redundant, too
if ((criterionNode != nullptr) && (criterionNode->OperationName() == L"SequenceWithSoftmax"))
@ -174,8 +174,8 @@ namespace Microsoft { namespace MSR { namespace CNTK {
// decimate if needed. Decimation happens in-place.
if (wasDataRead && !useDistributedMBReading && useParallelTrain)
{
DecimateMinibatch(inputMatrices, g_mpi->NumNodesInUse(), g_mpi->CurrentNodeRank(), net.GetMBLayoutPtr());
net.NotifyInputNodesFunctionValuesMBSizeModified(); // need to tell'm again since we modified it again
DecimateMinibatch(inputMatrices, g_mpi->NumNodesInUse(), g_mpi->CurrentNodeRank(), net->GetMBLayoutPtr());
net->NotifyInputNodesFunctionValuesMBSizeModified(); // need to tell'm again since we modified it again
}
// get MB size and tell Network to update its nodes' buffers based on what's in the input matrices
@ -184,7 +184,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
// TODO: This will go away, as we will do resizing inside EvaluateThisNode(FrameRange()).
actualMBSize = 0;
if (wasDataRead) // TODO: what if we call it always?
actualMBSize = net.DetermineActualMBSizeFromFeatures(); // TODO: don't we know the size from reader? Should this be a check instead?
actualMBSize = net->DetermineActualMBSizeFromFeatures(); // TODO: don't we know the size from reader? Should this be a check instead?
return true;
}

Просмотреть файл

@ -13,10 +13,11 @@ namespace Microsoft { namespace MSR { namespace CNTK {
template<class ElemType>
class IComputationNetBuilder //Abstract Class that cannot be instantiated
{
public:
protected:
virtual ComputationNetwork* LoadNetworkFromFile(const std::wstring& modelFileName, bool forceLoad = true,
bool bAllowNoCriterion = false, ComputationNetwork* = nullptr) = 0;
virtual ComputationNetwork* BuildNetworkFromDescription(ComputationNetwork* = nullptr) = 0;
public:
virtual ComputationNetworkPtr BuildNetworkFromDescription(ComputationNetwork* = nullptr) = 0;
virtual ~IComputationNetBuilder() {};
};

Просмотреть файл

@ -54,7 +54,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
typedef shared_ptr<ComputationNode<ElemType>> ComputationNodePtr;
typedef ClassBasedCrossEntropyWithSoftmaxNode<ElemType>* ClassBasedCrossEntropyWithSoftmaxNodePtr;
public:
MultiNetworksEvaluator(ComputationNetwork& net, const size_t numMBsToShowResult = 100, const int traceLevel = 0) : Base(net, numMBsToShowResult, traceLevel) { }
MultiNetworksEvaluator(ComputationNetworkPtr net, const size_t numMBsToShowResult = 100, const int traceLevel = 0) : Base(net, numMBsToShowResult, traceLevel) { }
//returns error rate
// This was a special early implementation of RNNs by emulating them as a DNN.
@ -63,10 +63,10 @@ namespace Microsoft { namespace MSR { namespace CNTK {
// TODO: can probably be removed.
double EvaluateUnroll(IDataReader<ElemType>* dataReader, const size_t mbSize, double &evalSetCrossEntropy, const wchar_t* output = nullptr, const size_t testSize = requestDataSize)
{
std::vector<ComputationNodeBasePtr> & featureNodes = m_net.FeatureNodes();
std::vector<ComputationNodeBasePtr> & labelNodes = m_net.LabelNodes();
std::vector<ComputationNodeBasePtr> & criterionNodes = m_net.FinalCriterionNodes();
std::vector<ComputationNodeBasePtr> & evaluationNodes = m_net.EvaluationNodes();
std::vector<ComputationNodeBasePtr> & featureNodes = m_net->FeatureNodes();
std::vector<ComputationNodeBasePtr> & labelNodes = m_net->LabelNodes();
std::vector<ComputationNodeBasePtr> & criterionNodes = m_net->FinalCriterionNodes();
std::vector<ComputationNodeBasePtr> & evaluationNodes = m_net->EvaluationNodes();
if (criterionNodes.size() == 0)
RuntimeError("No CrossEntropyWithSoftmax node found\n");
@ -78,10 +78,10 @@ namespace Microsoft { namespace MSR { namespace CNTK {
inputMatrices[featureNodes[i]->NodeName()] = &dynamic_pointer_cast<ComputationNode<ElemType>>(featureNodes[i])->FunctionValues();
for (size_t i = 0; i < labelNodes.size(); i++)
inputMatrices[labelNodes[i]->NodeName()] = &dynamic_pointer_cast<ComputationNode<ElemType>>(labelNodes[i])->FunctionValues();
inputMatrices[L"numberobs"] = new Matrix<ElemType>(1, 1, m_net.GetDeviceId());
inputMatrices[L"numberobs"] = new Matrix<ElemType>(1, 1, m_net->GetDeviceId());
dataReader->StartMinibatchLoop(mbSize, 0, testSize);
m_net.StartEvaluateMinibatchLoop(criterionNodes, evaluationNodes);
m_net->StartEvaluateMinibatchLoop(criterionNodes, evaluationNodes);
double epochEvalError = 0;
double epochCrossEntropy = 0;
@ -117,9 +117,9 @@ namespace Microsoft { namespace MSR { namespace CNTK {
featureNodes[npos]->UpdateEvalTimeStamp();
labelNodes[npos]->UpdateEvalTimeStamp();
m_net.Evaluate(criterionNodes[npos]); //use only the first criterion. Is there any possibility to use more?
m_net->Evaluate(criterionNodes[npos]); //use only the first criterion. Is there any possibility to use more?
m_net.Evaluate(evaluationNodes[npos]);
m_net->Evaluate(evaluationNodes[npos]);
double mbCrossEntropy = (double)criterionNodes[npos]->Get00Element(); // criterionNode should be a scalar
epochCrossEntropy += mbCrossEntropy;
@ -134,7 +134,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
if (outputStream.is_open())
{
//TODO: add support to dump multiple outputs
ComputationNodePtr outputNode = dynamic_pointer_cast<ComputationNode<ElemType>>(m_net.OutputNodes()[0]);
ComputationNodePtr outputNode = dynamic_pointer_cast<ComputationNode<ElemType>>(m_net->OutputNodes()[0]);
foreach_column(j, outputNode->FunctionValues())
{
foreach_row(i, outputNode->FunctionValues())
@ -200,14 +200,14 @@ namespace Microsoft { namespace MSR { namespace CNTK {
only beam search decoding is applied to the last network
*/
double EvaluateEncoderDecoderWithHiddenStates(
vector<ComputationNetwork*> nets,
vector<ComputationNetworkPtr> nets,
vector<IDataReader<ElemType>*> dataReaders,
const size_t mbSize,
const size_t testSize = requestDataSize)
{
size_t iNumNets = nets.size();
ComputationNetwork* decoderNet = nullptr;
ComputationNetworkPtr decoderNet = nullptr;
IDataReader<ElemType>* decoderDataReader = dataReaders[iNumNets - 1];
decoderNet = nets[iNumNets - 1];
@ -396,7 +396,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
}
void EncodingEvaluateDecodingBeamSearch(
vector<ComputationNetwork*> nets,
vector<ComputationNetworkPtr> nets,
vector<IDataReader<ElemType>*> readers,
IDataWriter<ElemType>& dataWriter,
const vector<wstring>& evalNodeNames,
@ -409,7 +409,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
LogicError("Has to have at least two networks");
}
ComputationNetwork* decoderNet = nets[iNumNets - 1];
ComputationNetworkPtr decoderNet = nets[iNumNets - 1];
IDataReader<ElemType>* encoderDataReader = readers[iNumNets - 2];
IDataReader<ElemType>* decoderDataReader = readers[iNumNets - 1];
vector<ComputationNodeBasePtr> & decoderFeatureNodes = decoderNet->FeatureNodes();
@ -422,7 +422,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
//specify nodes to write to file
std::vector<ComputationNodeBasePtr> writeNodes;
for (int i = 0; i < writeNodeNames.size(); i++)
writeNodes.push_back(m_net.GetNodeFromName(writeNodeNames[i]));
writeNodes.push_back(m_net->GetNodeFromName(writeNodeNames[i]));
//prepare features and labels
std::map<std::wstring, Matrix<ElemType>*> inputMatrices;
@ -458,7 +458,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
(*ptr)->SetNumParallelSequences(1);
}
Matrix<ElemType> historyMat(m_net.GetDeviceId());
Matrix<ElemType> historyMat(m_net->GetDeviceId());
bool bDecoding = true;
while (bDecoding)
@ -640,16 +640,16 @@ namespace Microsoft { namespace MSR { namespace CNTK {
//specify output nodes and files
std::vector<ComputationNodeBasePtr> outputNodes;
for (int i = 0; i < outputNodeNames.size(); i++)
outputNodes.push_back(m_net.GetNodeFromName(outputNodeNames[i]));
outputNodes.push_back(m_net->GetNodeFromName(outputNodeNames[i]));
//specify nodes to write to file
std::vector<ComputationNodeBasePtr> writeNodes;
for (int i = 0; i < writeNodeNames.size(); i++)
writeNodes.push_back(m_net.GetNodeFromName(writeNodeNames[i]));
writeNodes.push_back(m_net->GetNodeFromName(writeNodeNames[i]));
//prepare features and labels
/*const*/ auto & featureNodes = m_net.FeatureNodes();
const auto & labelNodes = m_net.LabelNodes();
/*const*/ auto & featureNodes = m_net->FeatureNodes();
const auto & labelNodes = m_net->LabelNodes();
std::map<std::wstring, Matrix<ElemType>*> inputMatrices;
for (size_t i = 0; i < featureNodes.size(); i++)
@ -671,11 +671,11 @@ namespace Microsoft { namespace MSR { namespace CNTK {
{
// note: GetMinibatchIntoNetwork() will also fetch the MBLayout although we don't need ithere. This should not hurt.
ComputationNetwork::UpdateEvalTimeStamps(featureNodes);
//actualMBSize = m_net.SetActualMiniBatchSizeFromFeatures();
//actualMBSize = m_net->SetActualMiniBatchSizeFromFeatures();
vector<size_t> best_path;
FindBestPath(&m_net, dataReader,
FindBestPath(m_net, dataReader,
dataWriter, outputNodes,
writeNodes, featureNodes,
beam, &inputMatrices, best_path);
@ -704,7 +704,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
fprintf(stderr, "done decoding\n");
}
void FindBestPath(ComputationNetwork* evalnet,
void FindBestPath(ComputationNetworkPtr evalnet,
IDataReader<ElemType>* dataReader, IDataWriter<ElemType>& dataWriter,
const std::vector<ComputationNodeBasePtr>& evalNodes,
const std::vector<ComputationNodeBasePtr>& outputNodes,
@ -865,7 +865,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
/**
beam search decoder
*/
double FindBestPathWithVariableLength(ComputationNetwork* evalnet,
double FindBestPathWithVariableLength(ComputationNetworkPtr evalnet,
size_t inputLength,
IDataReader<ElemType>* dataReader,
IDataWriter<ElemType>& dataWriter,

Разница между файлами не показана из-за своего большого размера Загрузить разницу

Просмотреть файл

@ -382,7 +382,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
void SGD<ElemType>::Adapt(wstring origModelFileName, wstring refNodeName,
IDataReader<ElemType>* trainSetDataReader,
IDataReader<ElemType>* validationSetDataReader,
const DEVICEID_TYPE deviceID, const bool makeMode)
const DEVICEID_TYPE deviceId, const bool makeMode)
{
if (origModelFileName == L"" || trainSetDataReader == nullptr)
InvalidArgument("origModel and trainSetDataReader should not be null.");
@ -394,27 +394,27 @@ namespace Microsoft { namespace MSR { namespace CNTK {
return;
}
ComputationNetwork net(deviceID);
ComputationNetworkPtr net;
if (startEpoch >= 0)
{
wstring modelFileName = GetModelNameForEpoch(int(startEpoch) - 1);
fprintf(stderr, "Starting from checkpoint. Load Network From File %ls.\n", modelFileName.c_str());
net.LoadFromFile<ElemType>(modelFileName);
net = ComputationNetwork::CreateFromFile<ElemType>(deviceId, modelFileName);
}
else
{
fprintf(stderr, "Load Network From the original model file %ls.\n", origModelFileName.c_str());
net.LoadFromFile<ElemType>(origModelFileName);
net = ComputationNetwork::CreateFromFile<ElemType>(deviceId, origModelFileName);
}
startEpoch = max(startEpoch, 0);
ComputationNetwork refNet(deviceID);
ComputationNetworkPtr refNet;
m_needAdaptRegularization = m_adaptationRegType != AdaptationRegType::None && m_adaptationRegWeight > 0;
if (m_needAdaptRegularization)
{
fprintf(stderr, "Load reference Network From the original model file %ls.\n", origModelFileName.c_str());
refNet.LoadFromFile<ElemType>(origModelFileName);
refNet = ComputationNetwork::CreateFromFile<ElemType>(deviceId, origModelFileName);
}
ComputationNodeBasePtr refNode;
@ -423,7 +423,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
fprintf(stderr, "Checking refNodeName %ls.\n", origModelFileName.c_str());
if (refNodeName == L"")
InvalidArgument("refNodeName does not exist and is needed when adaptationRegType is KL.");
refNode = refNet.GetNodeFromName(refNodeName);
refNode = refNet->GetNodeFromName(refNodeName);
}
TrainOrAdaptModel(startEpoch, net, refNet, refNode, trainSetDataReader, validationSetDataReader);
@ -432,7 +432,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
template<class ElemType>
void SGD<ElemType>::SequenceTrain(IComputationNetBuilder<ElemType>* netBuilder, wstring origModelFileName,
IDataReader<ElemType>* trainSetDataReader, IDataReader<ElemType>* validationSetDataReader,
const DEVICEID_TYPE deviceID, const bool makeMode)
const DEVICEID_TYPE deviceId, const bool makeMode)
{
if (netBuilder == nullptr || origModelFileName == L"" || trainSetDataReader == nullptr)
{
@ -447,37 +447,38 @@ namespace Microsoft { namespace MSR { namespace CNTK {
}
// Initializes the model from original model.
ComputationNetwork origNet(deviceID);
ComputationNetwork* sequenceNet =
(startEpoch < 0) ? netBuilder->BuildNetworkFromDescription() : &origNet;
// TODO: Comment what this does!
auto origNet = make_shared<ComputationNetwork>(deviceId);
ComputationNetworkPtr sequenceNet =
(startEpoch < 0) ? netBuilder->BuildNetworkFromDescription() : origNet;
std::vector<ComputationNodeBasePtr> addedFeatureNodes;
std::vector<ComputationNodeBasePtr> replacedCriterionNodes;
if (startEpoch < 0)
{
// Loads models.
origNet.LoadFromFile<ElemType>(origModelFileName);
origNet->LoadFromFile<ElemType>(origModelFileName);
// Processes feature nodes.
std::vector<ComputationNodeBasePtr> & sequenceFeatureNodes = sequenceNet->FeatureNodes();
for (size_t i = 0; i < sequenceFeatureNodes.size(); ++i)
{
if (!origNet.NodeNameExist(sequenceFeatureNodes[i]->NodeName()))
if (!origNet->NodeNameExist(sequenceFeatureNodes[i]->NodeName()))
{
addedFeatureNodes.push_back(sequenceFeatureNodes[i]);
origNet.AddFeatureNode(sequenceFeatureNodes[i]);
origNet->AddFeatureNode(sequenceFeatureNodes[i]);
}
}
// Processes criterion nodes.
auto & origCriterionNodes = GetTrainCriterionNodes(origNet);
auto & sequenceCriterionNodes = GetTrainCriterionNodes(*sequenceNet);
auto & sequenceCriterionNodes = GetTrainCriterionNodes(sequenceNet);
if (origCriterionNodes.size() == 0 || sequenceCriterionNodes.size() == 0)
{
RuntimeError("Training criterion node does not exist.");
}
replacedCriterionNodes.push_back(origCriterionNodes[0]);
origNet.ReplaceFinalCriterionNode(origCriterionNodes[0]->NodeName(), sequenceCriterionNodes[0]);
origNet.ResetEvalTimeStamp();
origNet->ReplaceFinalCriterionNode(origCriterionNodes[0]->NodeName(), sequenceCriterionNodes[0]);
origNet->ResetEvalTimeStamp();
}
wstring modelFileName = GetModelNameForEpoch(int(startEpoch) - 1);
@ -489,21 +490,22 @@ namespace Microsoft { namespace MSR { namespace CNTK {
{
fprintf(stderr, "Load Network From the original model file %ls.\n", origModelFileName.c_str());
}
ComputationNetwork *net = (startEpoch < 0) ? &origNet : netBuilder->LoadNetworkFromFile(modelFileName);
ComputationNetworkPtr net = (startEpoch < 0) ? origNet : ComputationNetwork::CreateFromFile<ElemType>(deviceId, modelFileName);
startEpoch = max(startEpoch, 0);
TrainOrAdaptModel(startEpoch, *net, *net, nullptr, trainSetDataReader, validationSetDataReader);
TrainOrAdaptModel(startEpoch, net, net, nullptr, trainSetDataReader, validationSetDataReader);
// Handles deletions carefully here.
// TODO: This is no longer needed since we own our networks and deal with shared_ptrs now.
if (startEpoch < 0)
{
for (size_t i = 0; i < addedFeatureNodes.size(); ++i)
{
origNet.RemoveFeatureNode(addedFeatureNodes[i]);
origNet->RemoveFeatureNode(addedFeatureNodes[i]);
}
auto & origCriterionNodes = GetTrainCriterionNodes(origNet);
origNet.ReplaceFinalCriterionNode(origCriterionNodes[0]->NodeName(), replacedCriterionNodes[0]);
origNet->ReplaceFinalCriterionNode(origCriterionNodes[0]->NodeName(), replacedCriterionNodes[0]);
}
}
@ -533,15 +535,10 @@ namespace Microsoft { namespace MSR { namespace CNTK {
if (startEpoch >= 0)
fprintf(stderr, "Starting from checkpoint. Load Network From File %ls.\n", modelFileName.c_str());
shared_ptr<ComputationNetwork> net;
if (startEpoch < 0)
net = createNetworkFn(deviceId);
else
{
net = make_shared<ComputationNetwork>(deviceId);
net->LoadFromFile<ElemType>(modelFileName, FileOptions::fileOptionsBinary, false/*bAllowNoCriterionNode*/, nullptr/*anotherNetwork*/);
}
// log the device
// create or load from checkpoint
shared_ptr<ComputationNetwork> net = startEpoch < 0 ? createNetworkFn(deviceId) : ComputationNetwork::CreateFromFile<ElemType>(deviceId, modelFileName);
// log the device we are computing on
if (net->GetDeviceId() < 0)
fprintf(stderr, "SGD using CPU.\n");
else
@ -561,51 +558,51 @@ namespace Microsoft { namespace MSR { namespace CNTK {
startEpoch = max(startEpoch, 0);
m_needAdaptRegularization = false;
TrainOrAdaptModel(startEpoch, *net, *net, nullptr, trainSetDataReader, validationSetDataReader);
TrainOrAdaptModel(startEpoch, net, net, nullptr, trainSetDataReader, validationSetDataReader);
}
// protected:
// Get{Train,Eval}CriterionNodes() return a reference that is, unfortunately, dependent on the network.
// So we hold those inside here. Not very nice. Also not thread-safe. This may go away once we fix sequence-to-sequence models properly.
static map<ComputationNetwork*, vector<ComputationNodeBasePtr>> tmpCriterionNodeSets;
static map<ComputationNetworkPtr, vector<ComputationNodeBasePtr>> tmpCriterionNodeSets;
// TODO: test this, then remove this comment
template<class ElemType>
std::vector<ComputationNodeBasePtr> & SGD<ElemType>::GetTrainCriterionNodes(ComputationNetwork& net)
std::vector<ComputationNodeBasePtr> & SGD<ElemType>::GetTrainCriterionNodes(ComputationNetworkPtr net)
{
fprintf(stderr, "GetTrainCriterionNodes %ls ...\n", m_trainCriterionNodeName.c_str());
if (!m_trainCriterionNodeName.empty())
{
tmpCriterionNodeSets[&net] = net.CriterionNodesFrom(m_trainCriterionNodeName);
return tmpCriterionNodeSets[&net];
tmpCriterionNodeSets[net] = net->CriterionNodesFrom(m_trainCriterionNodeName);
return tmpCriterionNodeSets[net];
}
else
return net.FinalCriterionNodes();
return net->FinalCriterionNodes();
}
template<class ElemType>
std::vector<ComputationNodeBasePtr> & SGD<ElemType>::GetEvalCriterionNodes(ComputationNetwork& net)
std::vector<ComputationNodeBasePtr> & SGD<ElemType>::GetEvalCriterionNodes(ComputationNetworkPtr net)
{
fprintf(stderr, "GetEvalCriterionNodes %ls ...\n", m_evalCriterionNodeName.c_str());
if (!m_evalCriterionNodeName.empty())
{
tmpCriterionNodeSets[&net] = net.CriterionNodesFrom(m_evalCriterionNodeName);
return tmpCriterionNodeSets[&net];
tmpCriterionNodeSets[net] = net->CriterionNodesFrom(m_evalCriterionNodeName);
return tmpCriterionNodeSets[net];
}
else
return net.EvaluationNodes();
return net->EvaluationNodes();
}
template<class ElemType>
void SGD<ElemType>::TrainOrAdaptModel(int startEpoch, ComputationNetwork& net,
ComputationNetwork& refNet,
void SGD<ElemType>::TrainOrAdaptModel(int startEpoch, ComputationNetworkPtr net,
ComputationNetworkPtr refNet,
ComputationNodeBasePtr refNode,
IDataReader<ElemType>* trainSetDataReader,
IDataReader<ElemType>* validationSetDataReader)
{
auto & featureNodes = net.FeatureNodes();
auto & labelNodes = net.LabelNodes();
auto & featureNodes = net->FeatureNodes();
auto & labelNodes = net->LabelNodes();
auto & criterionNodes = GetTrainCriterionNodes(net);
auto & evaluationNodes = GetEvalCriterionNodes(net);
@ -613,7 +610,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
//we intentionally separate it from above loop to make sure forward computing gets the right matrices
fprintf(stderr, "\n\nAllocating matrices for gradient computing\n");
for (int i = 0; i < criterionNodes.size(); i++)
net.AllocateGradientMatrices(criterionNodes[i]);
net->AllocateGradientMatrices(criterionNodes[i]);
// give the layout something to validate with (some code below validates the network before actually receiving data)
// Note: yak!
@ -627,7 +624,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
{
auto & node = nodes[i];
auto * functionValues = &dynamic_pointer_cast<ComputationNode<ElemType>>(node)->FunctionValues();
assert(functionValues->GetNumCols() == net.GetMBLayoutPtr()->GetNumTimeSteps());
assert(functionValues->GetNumCols() == net->GetMBLayoutPtr()->GetNumTimeSteps());
(*inputMatrices)[node->NodeName()] = functionValues;
}
}
@ -651,17 +648,17 @@ namespace Microsoft { namespace MSR { namespace CNTK {
for (size_t i = 0; i < featureNodes.size(); i++)
{
//we need to keep this info to handle deletion
refFeatureNodes[i] = refNet.GetNodeFromName(featureNodes[i]->NodeName());
refNet.ChangeNode(featureNodes[i]->NodeName(), featureNodes[i]);
refFeatureNodes[i] = refNet->GetNodeFromName(featureNodes[i]->NodeName());
refNet->ChangeNode(featureNodes[i]->NodeName(), featureNodes[i]);
}
refNet.RebuildNetwork(refNode);
refNet->RebuildNetwork(refNode);
}
//initializing weights and gradient holder
//only one criterion so far TODO: support multiple ones?
// BUGBUG: fails here in validation--MBLayout not set yet
auto & learnableNodes = net.LearnableNodes(criterionNodes[0]);
auto & learnableNodes = net->LearnableNodes(criterionNodes[0]);
std::list<Matrix<ElemType>> smoothedGradients;
for (auto nodeIter = learnableNodes.begin(); nodeIter != learnableNodes.end(); nodeIter++)
@ -669,7 +666,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
ComputationNodePtr node = dynamic_pointer_cast<ComputationNode<ElemType>>(*nodeIter);
smoothedGradients.push_back(Matrix<ElemType>(node->GetNumRows(),
node->GetNumCols(),
net.GetDeviceId()));
net->GetDeviceId()));
}
double epochCriterion, avgCriterion, prevCriterion, lrControlCriterion;
@ -705,7 +702,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
if (g_mpi != nullptr)
g_mpi->WaitAll();
net.SaveToFile(GetModelNameForEpoch(int(startEpoch) - 1));
net->SaveToFile(GetModelNameForEpoch(int(startEpoch) - 1));
}
// BUGBUG: This is where the trainSetDataReader->GetNumParallelSequences() is used to further normalize
@ -806,7 +803,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
fprintf(stderr, "Learn Rate Per Sample for Epoch[%d] = %.8g is less than minLearnRate %.8g. Training complete.\n",
i + 1, learnRatePerSample, m_minLearnRate);
if (m_autoLearnRateSearchType != LearningRateSearchAlgorithm::None)
net.SaveToFile(m_modelPath);
net->SaveToFile(m_modelPath);
break;
}
@ -966,9 +963,9 @@ namespace Microsoft { namespace MSR { namespace CNTK {
if (m_loadBestModel)
{
fprintf(stderr, "Loaded the previous model which has better training criterion.\n");
net.LoadPersistableParametersFromFile(GetModelNameForEpoch(i - 1),
net->LoadPersistableParametersFromFile(GetModelNameForEpoch(i - 1),
m_validateAfterModelReloading);
net.ResetEvalTimeStamp();
net->ResetEvalTimeStamp();
LoadCheckPointInfo(i - 1,
/*out*/ totalSamplesSeen,
/*out*/ learnRatePerSample,
@ -989,7 +986,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
learnRateReduced = true;
else
{
net.SaveToFile(GetModelNameForEpoch(i, true));
net->SaveToFile(GetModelNameForEpoch(i, true));
fprintf(stderr, "Finished training and saved final model\n\n");
break;
@ -1044,7 +1041,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
// persist model and check-point info
if ((g_mpi == nullptr) || g_mpi->IsMainNode())
{
net.SaveToFile(GetModelNameForEpoch(i));
net->SaveToFile(GetModelNameForEpoch(i));
SaveCheckPointInfo(i, totalSamplesSeen, learnRatePerSample, smoothedGradients, prevCriterion, chosenMinibatchSize);
if (!m_keepCheckPointFiles)
{
@ -1078,7 +1075,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
for (size_t i = 0; i < refFeatureNodes.size(); i++)
{
// note we need to handle deletion carefully
refNet.ChangeNode(refFeatureNodes[i]->NodeName(), refFeatureNodes[i]);
refNet->ChangeNode(refFeatureNodes[i]->NodeName(), refFeatureNodes[i]);
}
}
@ -1089,13 +1086,13 @@ namespace Microsoft { namespace MSR { namespace CNTK {
// return true if precomputation is executed.
template<class ElemType>
bool SGD<ElemType>::PreCompute(ComputationNetwork& net,
bool SGD<ElemType>::PreCompute(ComputationNetworkPtr net,
IDataReader<ElemType>* trainSetDataReader,
std::vector<ComputationNodeBasePtr> & featureNodes,
std::vector<ComputationNodeBasePtr> & labelNodes,
std::map<std::wstring, Matrix<ElemType>*>* inputMatrices)
{
std::list<ComputationNodeBasePtr> nodes = net.GetNodesRequiringPreComputation(); // this tests all HasComputed() flags
std::list<ComputationNodeBasePtr> nodes = net->GetNodesRequiringPreComputation(); // this tests all HasComputed() flags
if (nodes.size() == 0)
{
@ -1119,7 +1116,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
trainSetDataReader->StartMinibatchLoop(m_mbSize[0], 0);
else // using only one epoch
trainSetDataReader->StartMinibatchLoop(m_mbSize[0], 0, m_epochSize);
net.StartEvaluateMinibatchLoop(nodes);
net->StartEvaluateMinibatchLoop(nodes);
// initialize
for (auto nodeIter = nodes.begin(); nodeIter != nodes.end(); nodeIter++)
@ -1134,7 +1131,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
ComputationNetwork::UpdateEvalTimeStamps(featureNodes);
ComputationNetwork::UpdateEvalTimeStamps(labelNodes);
net.Evaluate(nodes);
net->Evaluate(nodes);
}
// finalize
for (auto nodeIter = nodes.begin(); nodeIter != nodes.end(); nodeIter++)
@ -1149,20 +1146,20 @@ namespace Microsoft { namespace MSR { namespace CNTK {
// return a reasonable initial learning rate based on the initial mbsize
template<class ElemType>
double SGD<ElemType>::SearchForBestLearnRate(ComputationNetwork& net,
ComputationNetwork& refNet,
const ComputationNodeBasePtr& refNode, const int epochNumber,
const double curLearnRate,
IDataReader<ElemType>* trainSetDataReader,
const std::vector<ComputationNodeBasePtr> & featureNodes,
const std::vector<ComputationNodeBasePtr> & labelNodes,
const std::vector<ComputationNodeBasePtr> & criterionNodes,
const std::vector<ComputationNodeBasePtr> & evaluationNodes,
std::map<std::wstring, Matrix<ElemType>*>* inputMatrices,
const std::list<ComputationNodeBasePtr> & learnableNodes,
std::list<Matrix<ElemType>>& smoothedGradients,
const bool learnRateInitialized,
const double largestPrevLearnRatePerSample)
double SGD<ElemType>::SearchForBestLearnRate(ComputationNetworkPtr net,
ComputationNetworkPtr refNet,
const ComputationNodeBasePtr& refNode, const int epochNumber,
const double curLearnRate,
IDataReader<ElemType>* trainSetDataReader,
const std::vector<ComputationNodeBasePtr> & featureNodes,
const std::vector<ComputationNodeBasePtr> & labelNodes,
const std::vector<ComputationNodeBasePtr> & criterionNodes,
const std::vector<ComputationNodeBasePtr> & evaluationNodes,
std::map<std::wstring, Matrix<ElemType>*>* inputMatrices,
const std::list<ComputationNodeBasePtr> & learnableNodes,
std::list<Matrix<ElemType>>& smoothedGradients,
const bool learnRateInitialized,
const double largestPrevLearnRatePerSample)
{
double epochCriterion = std::numeric_limits<double>::infinity();
double prevCriterion = std::numeric_limits<double>::infinity();
@ -1190,8 +1187,8 @@ namespace Microsoft { namespace MSR { namespace CNTK {
}
int baseModelEpoch = epochNumber - 1;
net.LoadPersistableParametersFromFile(GetModelNameForEpoch(baseModelEpoch), m_validateAfterModelReloading);
net.ResetEvalTimeStamp();
net->LoadPersistableParametersFromFile(GetModelNameForEpoch(baseModelEpoch), m_validateAfterModelReloading);
net->ResetEvalTimeStamp();
double learnRate = learnRatePerSample;
size_t dummyMinibatchSize = 0;
@ -1311,23 +1308,23 @@ namespace Microsoft { namespace MSR { namespace CNTK {
}
template<class ElemType>
void SGD<ElemType>::TrainOneMiniEpochAndReloadModel(ComputationNetwork& net,
ComputationNetwork& refNet,
const ComputationNodeBasePtr& refNode, const int epochNumber,
const size_t epochSize, IDataReader<ElemType>* trainSetDataReader,
const double learnRatePerSample,
const size_t minibatchSize,
const std::vector<ComputationNodeBasePtr> & featureNodes,
const std::vector<ComputationNodeBasePtr> & labelNodes,
const std::vector<ComputationNodeBasePtr> & criterionNodes,
const std::vector<ComputationNodeBasePtr> & evaluationNodes,
std::map<std::wstring, Matrix<ElemType>*>* inputMatrices,
const std::list<ComputationNodeBasePtr> & learnableNodes,
std::list<Matrix<ElemType>>& smoothedGradients,
/*out*/ double& epochCriterion,
/*out*/ std::vector<double>& epochEvalErrors,
/*out*/ size_t& totalSamplesSeen,
std::string prefixMsg)
void SGD<ElemType>::TrainOneMiniEpochAndReloadModel(ComputationNetworkPtr net,
ComputationNetworkPtr refNet,
const ComputationNodeBasePtr& refNode, const int epochNumber,
const size_t epochSize, IDataReader<ElemType>* trainSetDataReader,
const double learnRatePerSample,
const size_t minibatchSize,
const std::vector<ComputationNodeBasePtr> & featureNodes,
const std::vector<ComputationNodeBasePtr> & labelNodes,
const std::vector<ComputationNodeBasePtr> & criterionNodes,
const std::vector<ComputationNodeBasePtr> & evaluationNodes,
std::map<std::wstring, Matrix<ElemType>*>* inputMatrices,
const std::list<ComputationNodeBasePtr> & learnableNodes,
std::list<Matrix<ElemType>>& smoothedGradients,
/*out*/ double& epochCriterion,
/*out*/ std::vector<double>& epochEvalErrors,
/*out*/ size_t& totalSamplesSeen,
std::string prefixMsg)
{
TrainOneEpoch(net, refNet, refNode, epochNumber, epochSize,
trainSetDataReader, learnRatePerSample, minibatchSize, featureNodes,
@ -1351,8 +1348,8 @@ namespace Microsoft { namespace MSR { namespace CNTK {
}
int baseModelEpoch = epochNumber - 1;
net.LoadPersistableParametersFromFile(GetModelNameForEpoch(baseModelEpoch), m_validateAfterModelReloading);
net.ResetEvalTimeStamp();
net->LoadPersistableParametersFromFile(GetModelNameForEpoch(baseModelEpoch), m_validateAfterModelReloading);
net->ResetEvalTimeStamp();
double dummyLearnRate;
double dummtPrevCriterion;
@ -1366,22 +1363,22 @@ namespace Microsoft { namespace MSR { namespace CNTK {
}
template<class ElemType>
size_t SGD<ElemType>::AdaptiveMinibatchSizing(ComputationNetwork& net,
ComputationNetwork& refNet,
const ComputationNodeBasePtr& refNode,
const int epochNumber,
const size_t numFramesToUseInSearch,
IDataReader<ElemType>* trainSetDataReader,
const double learnRatePerSample,
const size_t initialMinibatchSize,
const std::vector<ComputationNodeBasePtr> & featureNodes,
const std::vector<ComputationNodeBasePtr> & labelNodes,
const std::vector<ComputationNodeBasePtr> & criterionNodes,
const std::vector<ComputationNodeBasePtr> & evaluationNodes,
std::map<std::wstring, Matrix<ElemType>*>* inputMatrices,
const std::list<ComputationNodeBasePtr> & learnableNodes,
std::list<Matrix<ElemType>>& smoothedGradients,
const double learningRateAdjustmentFactor)
size_t SGD<ElemType>::AdaptiveMinibatchSizing(ComputationNetworkPtr net,
ComputationNetworkPtr refNet,
const ComputationNodeBasePtr& refNode,
const int epochNumber,
const size_t numFramesToUseInSearch,
IDataReader<ElemType>* trainSetDataReader,
const double learnRatePerSample,
const size_t initialMinibatchSize,
const std::vector<ComputationNodeBasePtr> & featureNodes,
const std::vector<ComputationNodeBasePtr> & labelNodes,
const std::vector<ComputationNodeBasePtr> & criterionNodes,
const std::vector<ComputationNodeBasePtr> & evaluationNodes,
std::map<std::wstring, Matrix<ElemType>*>* inputMatrices,
const std::list<ComputationNodeBasePtr> & learnableNodes,
std::list<Matrix<ElemType>>& smoothedGradients,
const double learningRateAdjustmentFactor)
{
size_t minMinibatchSize = initialMinibatchSize;
size_t chosenMinibatchSize = initialMinibatchSize;
@ -1470,21 +1467,21 @@ namespace Microsoft { namespace MSR { namespace CNTK {
// uses a small percentage of training data of minibatch to
// speculatively train with various MB sizes; then picks the best
template<class ElemType>
size_t SGD<ElemType>::SearchForBestMinibatchSize(ComputationNetwork& net,
ComputationNetwork& refNet,
const ComputationNodeBasePtr& refNode,
const int epochNumber,
const size_t numFramesToUseInSearch,
IDataReader<ElemType>* trainSetDataReader,
const double learnRatePerSample,
const std::vector<ComputationNodeBasePtr> & featureNodes,
const std::vector<ComputationNodeBasePtr> & labelNodes,
const std::vector<ComputationNodeBasePtr> & criterionNodes,
const std::vector<ComputationNodeBasePtr> & evaluationNodes,
std::map<std::wstring, Matrix<ElemType>*>* inputMatrices,
const std::list<ComputationNodeBasePtr> & learnableNodes,
std::list<Matrix<ElemType>>& smoothedGradients,
const size_t minMinibatchSize, const size_t maxMinibatchSize)
size_t SGD<ElemType>::SearchForBestMinibatchSize(ComputationNetworkPtr net,
ComputationNetworkPtr refNet,
const ComputationNodeBasePtr& refNode,
const int epochNumber,
const size_t numFramesToUseInSearch,
IDataReader<ElemType>* trainSetDataReader,
const double learnRatePerSample,
const std::vector<ComputationNodeBasePtr> & featureNodes,
const std::vector<ComputationNodeBasePtr> & labelNodes,
const std::vector<ComputationNodeBasePtr> & criterionNodes,
const std::vector<ComputationNodeBasePtr> & evaluationNodes,
std::map<std::wstring, Matrix<ElemType>*>* inputMatrices,
const std::list<ComputationNodeBasePtr> & learnableNodes,
std::list<Matrix<ElemType>>& smoothedGradients,
const size_t minMinibatchSize, const size_t maxMinibatchSize)
{
// may happen for automatically reduced learning rates
if (minMinibatchSize > maxMinibatchSize)
@ -1574,10 +1571,10 @@ namespace Microsoft { namespace MSR { namespace CNTK {
// processing more utterances at the same time. Only used in Kaldi2Reader.
// TODO: move the two-forward-pass support out of the reader.
template<class ElemType>
void SGD<ElemType>::AttemptUtteranceDerivativeFeatures(ComputationNetwork& net,
IDataReader<ElemType>* trainSetDataReader,
const std::vector<ComputationNodeBasePtr> & featureNodes,
std::map<std::wstring, Matrix<ElemType>*>* inputMatrices)
void SGD<ElemType>::AttemptUtteranceDerivativeFeatures(ComputationNetworkPtr net,
IDataReader<ElemType>* trainSetDataReader,
const std::vector<ComputationNodeBasePtr> & featureNodes,
std::map<std::wstring, Matrix<ElemType>*>* inputMatrices)
{
assert(trainSetDataReader != NULL);
std::vector<std::vector<std::pair<wstring, size_t>>> uttInfo;
@ -1587,14 +1584,14 @@ namespace Microsoft { namespace MSR { namespace CNTK {
{
ComputationNetwork::UpdateEvalTimeStamps(featureNodes);
auto & outputNodes = net.OutputNodes();
auto & outputNodes = net->OutputNodes();
if (outputNodes.empty())
LogicError("no output node was found.");
//net.SetActualMiniBatchSizeFromFeatures();
trainSetDataReader->CopyMBLayoutTo(net.GetMBLayoutPtr());
net.VerifyActualNumParallelSequences(trainSetDataReader->GetNumParallelSequences());
net.Evaluate(outputNodes[0]); // Only evaluate the first output
//net->SetActualMiniBatchSizeFromFeatures();
trainSetDataReader->CopyMBLayoutTo(net->GetMBLayoutPtr());
net->VerifyActualNumParallelSequences(trainSetDataReader->GetNumParallelSequences());
net->Evaluate(outputNodes[0]); // Only evaluate the first output
trainSetDataReader->SetNetOutput(uttInfo,
dynamic_pointer_cast<ComputationNode<ElemType>>(outputNodes[0])->FunctionValues(),
pMBLayout);
@ -1636,25 +1633,25 @@ namespace Microsoft { namespace MSR { namespace CNTK {
}
template<class ElemType>
size_t SGD<ElemType>::TrainOneEpoch(ComputationNetwork& net,
ComputationNetwork& refNet,
const ComputationNodeBasePtr& refNode,
const int epochNumber,
const size_t epochSize,
IDataReader<ElemType>* trainSetDataReader,
const double learnRatePerSample,
size_t tunedMBSize,
const std::vector<ComputationNodeBasePtr> & featureNodes,
const std::vector<ComputationNodeBasePtr> & labelNodes,
const std::vector<ComputationNodeBasePtr> & criterionNodes,
const std::vector<ComputationNodeBasePtr> & evaluationNodes,
std::map<std::wstring, Matrix<ElemType>*>* inputMatrices, // TODO: why is this a pointer?
const std::list<ComputationNodeBasePtr> & learnableNodes,
std::list<Matrix<ElemType>>& smoothedGradients,
/*out*/ double& epochCriterion,
/*out*/ std::vector<double>& epochEvalErrors,
/*out*/ size_t& totalSamplesSeen,
std::string prefixMsg)
size_t SGD<ElemType>::TrainOneEpoch(ComputationNetworkPtr net,
ComputationNetworkPtr refNet,
const ComputationNodeBasePtr& refNode,
const int epochNumber,
const size_t epochSize,
IDataReader<ElemType>* trainSetDataReader,
const double learnRatePerSample,
size_t tunedMBSize,
const std::vector<ComputationNodeBasePtr> & featureNodes,
const std::vector<ComputationNodeBasePtr> & labelNodes,
const std::vector<ComputationNodeBasePtr> & criterionNodes,
const std::vector<ComputationNodeBasePtr> & evaluationNodes,
std::map<std::wstring, Matrix<ElemType>*>* inputMatrices, // TODO: why is this a pointer?
const std::list<ComputationNodeBasePtr> & learnableNodes,
std::list<Matrix<ElemType>>& smoothedGradients,
/*out*/ double& epochCriterion,
/*out*/ std::vector<double>& epochEvalErrors,
/*out*/ size_t& totalSamplesSeen,
std::string prefixMsg)
{
double totalTimeInMBs = 0; // use double since timer has sub-microsecond time resolution
double epochCriterionLastMBs = 0;
@ -1670,8 +1667,8 @@ namespace Microsoft { namespace MSR { namespace CNTK {
// NOTE: the following two local matrices are not used in distGradAgg path
// assume only one training criterion node for each epoch.
// The criterion values are accumulated here over the minibatches (without having to pull them off the GPU).
Matrix<ElemType> localEpochCriterion(1, 1, net.GetDeviceId());
Matrix<ElemType> localEpochEvalErrors(1, epochEvalErrors.size(), net.GetDeviceId());
Matrix<ElemType> localEpochCriterion(1, 1, net->GetDeviceId());
Matrix<ElemType> localEpochEvalErrors(1, epochEvalErrors.size(), net->GetDeviceId());
localEpochCriterion.SetValue(0);
localEpochEvalErrors.SetValue(0);
@ -1713,11 +1710,11 @@ namespace Microsoft { namespace MSR { namespace CNTK {
trainSetDataReader->StartMinibatchLoop(tunedMBSize, epochNumber, epochSize);
}
net.StartEvaluateMinibatchLoop(evaluationNodes);
net.StartEvaluateMinibatchLoop(criterionNodes);
net->StartEvaluateMinibatchLoop(evaluationNodes);
net->StartEvaluateMinibatchLoop(criterionNodes);
if (m_needAdaptRegularization && m_adaptationRegType == AdaptationRegType::KL && refNode)
{
refNet.StartEvaluateMinibatchLoop(refNode);
refNet->StartEvaluateMinibatchLoop(refNode);
}
// Attemps to compute the error signal for the whole utterance, which will
@ -1776,17 +1773,17 @@ namespace Microsoft { namespace MSR { namespace CNTK {
if (m_needAdaptRegularization && m_adaptationRegType == AdaptationRegType::KL && refNode)
{
#if 0 // TODO: where does refNet get its features from?
refNet.ResizeAllFeatureNodes(actualMBSize);
refNet->ResizeAllFeatureNodes(actualMBSize);
#endif
//size_t actualMBSize2 = refNet.SetActualMiniBatchSizeFromFeatures();
size_t actualMBSize2 = refNet.DetermineActualMBSizeFromFeatures();
refNet.GetMBLayoutPtr()->CopyFrom(net.GetMBLayoutPtr()); // TODO: This is UNTESTED (before this was missing, seemingly inconsistently)
refNet.VerifyActualNumParallelSequences(trainSetDataReader->GetNumParallelSequences());
//size_t actualMBSize2 = refNet->SetActualMiniBatchSizeFromFeatures();
size_t actualMBSize2 = refNet->DetermineActualMBSizeFromFeatures();
refNet->GetMBLayoutPtr()->CopyFrom(net->GetMBLayoutPtr()); // TODO: This is UNTESTED (before this was missing, seemingly inconsistently)
refNet->VerifyActualNumParallelSequences(trainSetDataReader->GetNumParallelSequences());
if (actualMBSize2 != actualMBSize)
LogicError("TrainOneEpoch: refNet has different MB size than main net??");
refNet.Evaluate(refNode);
refNet->Evaluate(refNode);
Matrix<ElemType>::ScaleAndAdd((ElemType)m_adaptationRegWeight,
dynamic_pointer_cast<ComputationNode<ElemType>>(refNode)->FunctionValues(),
(ElemType)(1.0 - m_adaptationRegWeight),
@ -1795,7 +1792,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
//compute eval node first since when gradient is computed the forward function values
//may be changed and need to be recomputed when gradient and function value share the same matrix
net.Evaluate(evaluationNodes);
net->Evaluate(evaluationNodes);
// only compute gradient when learning rate is large enough
if (learnRatePerSample > m_minLearnRate * 0.01)
@ -1804,7 +1801,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
// ==============================
// forward prop, back-prop --this is where the magic happens baby, what we have all be waiting for!
// ==============================
net.ComputeGradient<ElemType>(criterionNodes[0]);
net->ComputeGradient<ElemType>(criterionNodes[0]);
// TODO: we should split Evaluate() out from ComputeGradient(), then call them ForwardProp() and BackProp(), for clarity
}
else
@ -1813,7 +1810,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
// ==============================
// forward prop
// ==============================
net.Evaluate(criterionNodes[0]);
net->Evaluate(criterionNodes[0]);
}
} // if (actualMBSize > 0)
@ -1821,7 +1818,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
//for now since we share the same label masking flag we call this on the network.
//Later, when we apply different labels on different nodes
//we need to add code to call this function multiple times, one for each criteria node
size_t numSamplesWithLabel = net.GetNumSamplesWithLabel(actualMBSize);
size_t numSamplesWithLabel = net->GetNumSamplesWithLabel(actualMBSize);
totalSamplesProcessed += numSamplesWithLabel;
@ -1902,7 +1899,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
LogicError("%ls %ls operation has NaNs in smoothedGradient.", node->NodeName().c_str(), node->OperationName().c_str());
#endif
UpdateWeights(node, smoothedGradient, learnRatePerSample,
GetMomentumPerSample(epochNumber/*BUGBUG workaround:*/, net.GetMBLayoutPtr()->GetNumParallelSequences()), aggregateNumSamples,
GetMomentumPerSample(epochNumber/*BUGBUG workaround:*/, net->GetMBLayoutPtr()->GetNumParallelSequences()), aggregateNumSamples,
m_L2RegWeight, m_L1RegWeight,
m_needAveMultiplier);
#ifdef _DEBUG
@ -2514,14 +2511,14 @@ namespace Microsoft { namespace MSR { namespace CNTK {
// this probes the automatic gradient computation with random inputs
template<class ElemType>
bool SGD<ElemType>::GradientCheck(ComputationNetwork& net,
const std::vector<ComputationNodeBasePtr> & criterionNodes,
const std::list<ComputationNodeBasePtr> & learnableNodes,
int npos)
bool SGD<ElemType>::GradientCheck(ComputationNetworkPtr net,
const std::vector<ComputationNodeBasePtr> & criterionNodes,
const std::list<ComputationNodeBasePtr> & learnableNodes,
int npos)
{
vector<string> errMsgs;
net.StartEvaluateMinibatchLoop(criterionNodes[npos]);
net->StartEvaluateMinibatchLoop(criterionNodes[npos]);
// gradient checking
for (auto nodeIter = learnableNodes.begin(); nodeIter != learnableNodes.end(); nodeIter++)
@ -2540,11 +2537,11 @@ namespace Microsoft { namespace MSR { namespace CNTK {
fprintf(stderr, "\n###### d%ls######\n", node->NodeName().c_str());
double eOrg = node->FunctionValues()(irow, icol);
node->FunctionValues().TransferToDeviceIfNotThere(net.GetDeviceId(), true);
node->FunctionValues().TransferToDeviceIfNotThere(net->GetDeviceId(), true);
node->UpdateEvalTimeStamp();
net.ComputeGradient<ElemType>(criterionNodes[npos]);
net->ComputeGradient<ElemType>(criterionNodes[npos]);
if (node->GradientValues().GetMatrixType() == MatrixType::SPARSE)
{
@ -2556,32 +2553,32 @@ namespace Microsoft { namespace MSR { namespace CNTK {
// TODO: why is this value not used?
criterionNodes[npos]->Get00Element();
double eGradErr = node->GradientValues()(irow, icol);
node->GradientValues().TransferToDeviceIfNotThere(net.GetDeviceId(), true);
node->GradientValues().TransferToDeviceIfNotThere(net->GetDeviceId(), true);
double ePos = eOrg + EPSILON;
double eNeg = eOrg - EPSILON;
node->FunctionValues()(irow, icol) = (ElemType)ePos;
node->FunctionValues().TransferToDeviceIfNotThere(net.GetDeviceId(), true);
node->FunctionValues().TransferToDeviceIfNotThere(net->GetDeviceId(), true);
node->UpdateEvalTimeStamp();
net.Evaluate(criterionNodes[npos]);
net->Evaluate(criterionNodes[npos]);
//criterionNode should be a scalar
double mbEvalCriPos = criterionNodes[npos]->Get00Element(); // TODO: make Get00Element() a function of ComputationNodeBase
node->FunctionValues()(irow, icol) = (ElemType)eNeg;
node->FunctionValues().TransferToDeviceIfNotThere(net.GetDeviceId(), true);
node->FunctionValues().TransferToDeviceIfNotThere(net->GetDeviceId(), true);
node->UpdateEvalTimeStamp();
net.Evaluate(criterionNodes[npos]);
net->Evaluate(criterionNodes[npos]);
// criterionNode should be a scalar
double mbEvalCriNeg = criterionNodes[npos]->Get00Element();
// back to its orginal parameter value
node->FunctionValues()(irow, icol) = (ElemType)eOrg;
node->FunctionValues().TransferToDeviceIfNotThere(net.GetDeviceId(), true);
node->FunctionValues().TransferToDeviceIfNotThere(net->GetDeviceId(), true);
// check if they are consistent
double eGradNum = ((mbEvalCriPos - mbEvalCriNeg) / (ePos - eNeg));

Просмотреть файл

@ -282,26 +282,26 @@ public:
const DEVICEID_TYPE deviceID, const bool makeMode = true);
protected:
std::vector<ComputationNodeBasePtr> & GetTrainCriterionNodes(ComputationNetwork& net);
std::vector<ComputationNodeBasePtr> & GetEvalCriterionNodes(ComputationNetwork& net);
std::vector<ComputationNodeBasePtr> & GetTrainCriterionNodes(ComputationNetworkPtr net);
std::vector<ComputationNodeBasePtr> & GetEvalCriterionNodes(ComputationNetworkPtr net);
void TrainOrAdaptModel(int startEpoch, ComputationNetwork& net,
ComputationNetwork& refNet,
void TrainOrAdaptModel(int startEpoch, ComputationNetworkPtr net,
ComputationNetworkPtr refNet,
ComputationNodeBasePtr refNode,
IDataReader<ElemType>* trainSetDataReader,
IDataReader<ElemType>* validationSetDataReader);
protected:
// return true if precomputation is executed.
bool PreCompute(ComputationNetwork& net,
bool PreCompute(ComputationNetworkPtr net,
IDataReader<ElemType>* trainSetDataReader,
std::vector<ComputationNodeBasePtr> & featureNodes,
std::vector<ComputationNodeBasePtr> & labelNodes,
std::map<std::wstring, Matrix<ElemType>*>* inputMatrices);
// return a reasonable initial learning rate based on the initial mbsize
double SearchForBestLearnRate(ComputationNetwork& net,
ComputationNetwork& refNet,
double SearchForBestLearnRate(ComputationNetworkPtr net,
ComputationNetworkPtr refNet,
const ComputationNodeBasePtr& refNode, const int epochNumber,
const double curLearnRate,
IDataReader<ElemType>* trainSetDataReader,
@ -315,8 +315,8 @@ protected:
const bool learnRateInitialized,
const double largestPrevLearnRatePerSample);
void TrainOneMiniEpochAndReloadModel(ComputationNetwork& net,
ComputationNetwork& refNet,
void TrainOneMiniEpochAndReloadModel(ComputationNetworkPtr net,
ComputationNetworkPtr refNet,
const ComputationNodeBasePtr& refNode, const int epochNumber,
const size_t epochSize, IDataReader<ElemType>* trainSetDataReader,
const double learnRatePerSample,
@ -333,8 +333,8 @@ protected:
/*out*/ size_t& totalSamplesSeen,
std::string prefixMsg = "");
size_t AdaptiveMinibatchSizing(ComputationNetwork& net,
ComputationNetwork& refNet,
size_t AdaptiveMinibatchSizing(ComputationNetworkPtr net,
ComputationNetworkPtr refNet,
const ComputationNodeBasePtr& refNode,
const int epochNumber,
const size_t numFramesToUseInSearch,
@ -352,8 +352,8 @@ protected:
// uses a small percentage of training data of minibatch to
// speculatively train with various MB sizes; then picks the best
size_t SearchForBestMinibatchSize(ComputationNetwork& net,
ComputationNetwork& refNet,
size_t SearchForBestMinibatchSize(ComputationNetworkPtr net,
ComputationNetworkPtr refNet,
const ComputationNodeBasePtr& refNode,
const int epochNumber,
const size_t numFramesToUseInSearch,
@ -373,13 +373,13 @@ protected:
// for the two-forward-pass sequence and ctc training, which allows
// processing more utterances at the same time. Only used in Kaldi2Reader.
// TODO: move the two-forward-pass support out of the reader.
void AttemptUtteranceDerivativeFeatures(ComputationNetwork& net,
void AttemptUtteranceDerivativeFeatures(ComputationNetworkPtr net,
IDataReader<ElemType>* trainSetDataReader,
const std::vector<ComputationNodeBasePtr> & featureNodes,
std::map<std::wstring, Matrix<ElemType>*>* inputMatrices);
size_t TrainOneEpoch(ComputationNetwork& net,
ComputationNetwork& refNet,
size_t TrainOneEpoch(ComputationNetworkPtr net,
ComputationNetworkPtr refNet,
const ComputationNodeBasePtr& refNode,
const int epochNumber,
const size_t epochSize,
@ -455,7 +455,7 @@ public:
#define EPSILON 1e-5
bool GradientCheck(ComputationNetwork& net,
bool GradientCheck(ComputationNetworkPtr net,
const std::vector<ComputationNodeBasePtr> & criterionNodes,
const std::list<ComputationNodeBasePtr> & learnableNodes,
int npos);

Просмотреть файл

@ -33,7 +33,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
public:
SimpleEvaluator(ComputationNetwork& net, const size_t numMBsToShowResult = 100, const int traceLevel = 0)
SimpleEvaluator(ComputationNetworkPtr net, const size_t numMBsToShowResult = 100, const int traceLevel = 0)
: m_net(net), m_numMBsToShowResult(numMBsToShowResult), m_traceLevel(traceLevel)
{
}
@ -47,21 +47,21 @@ namespace Microsoft { namespace MSR { namespace CNTK {
if (evalNodeNames.size() == 0)
{
fprintf(stderr, "evalNodeNames are not specified, using all the default evalnodes and training criterion nodes.\n");
if (m_net.EvaluationNodes().size() == 0 && m_net.FinalCriterionNodes().size() == 0)
if (m_net->EvaluationNodes().size() == 0 && m_net->FinalCriterionNodes().size() == 0)
LogicError("There is no default evalnodes or training criterion node specified in the network.");
for (int i = 0; i < m_net.EvaluationNodes().size(); i++)
evalNodes.push_back(m_net.EvaluationNodes()[i]);
for (int i = 0; i < m_net->EvaluationNodes().size(); i++)
evalNodes.push_back(m_net->EvaluationNodes()[i]);
for (int i = 0; i < m_net.FinalCriterionNodes().size(); i++)
evalNodes.push_back(m_net.FinalCriterionNodes()[i]);
for (int i = 0; i < m_net->FinalCriterionNodes().size(); i++)
evalNodes.push_back(m_net->FinalCriterionNodes()[i]);
}
else
{
for (int i = 0; i < evalNodeNames.size(); i++)
{
const auto & node = m_net.GetNodeFromName(evalNodeNames[i]);
m_net.BuildAndValidateSubNetwork(node);
const auto & node = m_net->GetNodeFromName(evalNodeNames[i]);
m_net->BuildAndValidateSubNetwork(node);
if (node->GetNumRows() != 1 || node->GetNumCols() != 1)
LogicError("The nodes passed to SimpleEvaluator::Evaluate function must be either eval or training criterion nodes (which evalues to 1x1 value).");
evalNodes.push_back(node);
@ -74,8 +74,8 @@ namespace Microsoft { namespace MSR { namespace CNTK {
evalResults.push_back((double)0);
//prepare features and labels
auto & featureNodes = m_net.FeatureNodes();
auto & labelNodes = m_net.LabelNodes();
auto & featureNodes = m_net->FeatureNodes();
auto & labelNodes = m_net->LabelNodes();
std::map<std::wstring, Matrix<ElemType>*> inputMatrices;
for (size_t i = 0; i < featureNodes.size(); i++)
@ -95,24 +95,20 @@ namespace Microsoft { namespace MSR { namespace CNTK {
evalResultsLastMBs.push_back((ElemType)0);
dataReader->StartMinibatchLoop(mbSize, 0, testSize);
m_net.StartEvaluateMinibatchLoop(evalNodes);
m_net->StartEvaluateMinibatchLoop(evalNodes);
while (DataReaderHelpers::GetMinibatchIntoNetwork(*dataReader, m_net, nullptr, false, false, inputMatrices, actualMBSize))
{
ComputationNetwork::UpdateEvalTimeStamps(featureNodes);
ComputationNetwork::UpdateEvalTimeStamps(labelNodes);
//actualMBSize = m_net.SetActualMiniBatchSizeFromFeatures();
//dataReader->CopyMBLayoutTo(m_net.GetMBLayoutPtr());
//m_net.VerifyActualNumParallelSequences(dataReader->GetNumParallelSequences());
//for now since we share the same label masking flag we call this on one node only
//Later, when we apply different labels on different nodes
//we need to add code to call this function multiple times, one for each criteria node
size_t numSamplesWithLabel = m_net.GetNumSamplesWithLabel(actualMBSize);
size_t numSamplesWithLabel = m_net->GetNumSamplesWithLabel(actualMBSize);
for (int i = 0; i < evalNodes.size(); i++)
{
m_net.Evaluate(evalNodes[i]);
m_net->Evaluate(evalNodes[i]);
evalResults[i] += (double)evalNodes[i]->Get00Element(); //criterionNode should be a scalar
}
@ -203,7 +199,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
}
protected:
ComputationNetwork& m_net;
ComputationNetworkPtr m_net;
size_t m_numMBsToShowResult;
int m_traceLevel;
void operator=(const SimpleEvaluator&); // (not assignable)

Просмотреть файл

@ -26,7 +26,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
public:
SimpleOutputWriter(ComputationNetwork & net, int verbosity = 0) :
SimpleOutputWriter(ComputationNetworkPtr net, int verbosity = 0) :
m_net(net), m_verbosity(verbosity)
{ }
@ -39,20 +39,20 @@ namespace Microsoft { namespace MSR { namespace CNTK {
{
if (m_verbosity > 0)
fprintf (stderr, "OutputNodeNames are not specified, using the default outputnodes.\n");
if (m_net.OutputNodes().size() == 0)
if (m_net->OutputNodes().size() == 0)
LogicError("There is no default output node specified in the network.");
outputNodes = m_net.OutputNodes();
outputNodes = m_net->OutputNodes();
}
else
{
for (int i=0; i<outputNodeNames.size(); i++)
outputNodes.push_back(m_net.GetNodeFromName(outputNodeNames[i]));
outputNodes.push_back(m_net->GetNodeFromName(outputNodeNames[i]));
}
//specify feature value nodes
std::vector<ComputationNodeBasePtr>& featureNodes = m_net.FeatureNodes();
std::vector<ComputationNodeBasePtr>& labelNodes = m_net.LabelNodes();
std::vector<ComputationNodeBasePtr>& featureNodes = m_net->FeatureNodes();
std::vector<ComputationNodeBasePtr>& labelNodes = m_net->LabelNodes();
std::map<std::wstring, Matrix<ElemType>*> inputMatrices;
for (size_t i=0; i<featureNodes.size(); i++)
inputMatrices[featureNodes[i]->NodeName()] = &dynamic_pointer_cast<ComputationNode<ElemType>>(featureNodes[i])->FunctionValues();
@ -65,7 +65,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
dataReader.StartMinibatchLoop(mbSize, 0, numOutputSamples);
dataReader.SetNumParallelSequences(1);
m_net.StartEvaluateMinibatchLoop(outputNodes);
m_net->StartEvaluateMinibatchLoop(outputNodes);
size_t totalEpochSamples = 0;
std::map<std::wstring, void *, nocase_compare> outputMatrices;
@ -76,13 +76,13 @@ namespace Microsoft { namespace MSR { namespace CNTK {
ComputationNetwork::UpdateEvalTimeStamps(featureNodes);
ComputationNetwork::UpdateEvalTimeStamps(labelNodes);
//size_t actualMBSize = m_net.SetActualMiniBatchSizeFromFeatures();
//dataReader.CopyMBLayoutTo(m_net.GetMBLayoutPtr());
//m_net.VerifyActualNumParallelSequences(dataReader.GetNumParallelSequences());
//size_t actualMBSize = m_net->SetActualMiniBatchSizeFromFeatures();
//dataReader.CopyMBLayoutTo(m_net->GetMBLayoutPtr());
//m_net->VerifyActualNumParallelSequences(dataReader.GetNumParallelSequences());
for (int i=0; i<outputNodes.size(); i++)
{
m_net.Evaluate(outputNodes[i]);
m_net->Evaluate(outputNodes[i]);
outputMatrices[outputNodes[i]->NodeName()] = (void *)(&dynamic_pointer_cast<ComputationNode<ElemType>>(outputNodes[i])->FunctionValues());
}
@ -119,15 +119,15 @@ namespace Microsoft { namespace MSR { namespace CNTK {
if (outputNodeNames.size() == 0)
{
fprintf (stderr, "OutputNodeNames are not specified, using the default outputnodes.\n");
if (m_net.OutputNodes().size() == 0)
if (m_net->OutputNodes().size() == 0)
LogicError("There is no default output node specified in the network.");
outputNodes = m_net.OutputNodes();
outputNodes = m_net->OutputNodes();
}
else
{
for (int i=0; i<outputNodeNames.size(); i++)
outputNodes.push_back(m_net.GetNodeFromName(outputNodeNames[i]));
outputNodes.push_back(m_net->GetNodeFromName(outputNodeNames[i]));
}
std::vector<ofstream *> outputStreams;
@ -139,7 +139,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
#endif
//specify feature value nodes
auto & featureNodes = m_net.FeatureNodes();
auto & featureNodes = m_net->FeatureNodes();
std::map<std::wstring, Matrix<ElemType>*> inputMatrices;
for (size_t i=0; i<featureNodes.size(); i++)
inputMatrices[featureNodes[i]->NodeName()] = &dynamic_pointer_cast<ComputationNode<ElemType>>(featureNodes[i])->FunctionValues();
@ -147,7 +147,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
// evaluate with minibatches
dataReader.StartMinibatchLoop(mbSize, 0, numOutputSamples);
m_net.StartEvaluateMinibatchLoop(outputNodes);
m_net->StartEvaluateMinibatchLoop(outputNodes);
size_t totalEpochSamples = 0;
size_t numMBsRun = 0;
@ -159,13 +159,13 @@ namespace Microsoft { namespace MSR { namespace CNTK {
{
ComputationNetwork::UpdateEvalTimeStamps(featureNodes);
//size_t actualMBSize = m_net.SetActualMiniBatchSizeFromFeatures();
//dataReader.CopyMBLayoutTo(m_net.GetMBLayoutPtr());
//m_net.VerifyActualNumParallelSequences(dataReader.GetNumParallelSequences()); // TODO: This was added by my (fseide) but UNTESTED. If this fails, comment out and let me know.
//size_t actualMBSize = m_net->SetActualMiniBatchSizeFromFeatures();
//dataReader.CopyMBLayoutTo(m_net->GetMBLayoutPtr());
//m_net->VerifyActualNumParallelSequences(dataReader.GetNumParallelSequences()); // TODO: This was added by my (fseide) but UNTESTED. If this fails, comment out and let me know.
for (int i=0; i<outputNodes.size(); i++)
{
m_net.Evaluate(outputNodes[i]);
m_net->Evaluate(outputNodes[i]);
Matrix<ElemType> & outputValues = dynamic_pointer_cast<ComputationNode<ElemType>>(outputNodes[i])->FunctionValues();
ofstream & outputStream = *outputStreams[i];
@ -198,7 +198,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
delete [] tempArray;
}
private:
ComputationNetwork& m_net;
ComputationNetworkPtr m_net;
int m_verbosity;
void operator=(const SimpleOutputWriter&); // (not assignable)
};