made everything build again after deleting the unused MultiNetworks functions, incl. deleting related entries in SimpleNetworkBuilder;

deleted PairNetworkNode;
renamed EsotericActions.cpp to SpecialPurposeActions.cpp
This commit is contained in:
Frank Seide 2016-01-22 09:13:52 -08:00
Родитель 18b7b36cca
Коммит a4a20183aa
20 изменённых файлов: 91 добавлений и 1084 удалений

Просмотреть файл

@ -43,16 +43,6 @@ void DoWriteWordAndClassInfo(const ConfigParameters& config);
template <typename ElemType>
void DoTopologyPlot(const ConfigParameters& config);
// deprecated (EsotericActions.cp)
// special purpose (EsotericActions.cp)
template <typename ElemType>
void DoConvertFromDbn(const ConfigParameters& config);
template <typename ElemType>
void DoEvalUnroll(const ConfigParameters& config);
template <typename ElemType>
void DoEncoderDecoder(const ConfigParameters& config);
template <typename ElemType>
void DoBidirectionEncoderDecoder(const ConfigParameters& config);
template <typename ElemType>
void DoEvalEncodingBeamSearchDecoding(const ConfigParameters& config);
template <typename ElemType>
void DoBeamSearchDecoding(const ConfigParameters& config);

Просмотреть файл

@ -170,7 +170,7 @@
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">NotUsing</PrecompiledHeader>
</ClCompile>
<ClCompile Include="..\Common\TimerUtility.cpp" />
<ClCompile Include="EsotericActions.cpp" />
<ClCompile Include="SpecialPurposeActions.cpp" />
<ClCompile Include="EvalActions.cpp" />
<ClCompile Include="OtherActions.cpp" />
<ClCompile Include="TrainActions.cpp" />

Просмотреть файл

@ -19,7 +19,7 @@
<ClCompile Include="OtherActions.cpp">
<Filter>Actions</Filter>
</ClCompile>
<ClCompile Include="EsotericActions.cpp">
<ClCompile Include="SpecialPurposeActions.cpp">
<Filter>Actions</Filter>
</ClCompile>
</ItemGroup>

Просмотреть файл

@ -1,424 +0,0 @@
//
// Copyright (c) Microsoft. All rights reserved.
// Licensed under the MIT license. See LICENSE.md file in the project root for full license information.
//
// EsotericActions.cpp -- CNTK actions that are deprecated
//
#define _CRT_NONSTDC_NO_DEPRECATE // make VS accept POSIX functions without _
#include "stdafx.h"
#include "Basics.h"
#include "Actions.h"
#include "ComputationNetwork.h"
#include "ComputationNode.h"
#include "DataReader.h"
#include "DataWriter.h"
#include "SimpleNetworkBuilder.h"
#include "NDLNetworkBuilder.h"
#include "SynchronousExecutionEngine.h"
#include "ModelEditLanguage.h"
#include "SGD.h"
#include "Config.h"
#include "MultiNetworksSGD.h"
#include "SimpleEvaluator.h"
#include "SimpleOutputWriter.h"
#include "MultiNetworksEvaluator.h"
#include "BestGpu.h"
#include "ScriptableObjects.h"
#include "BrainScriptEvaluator.h"
#include "BrainScriptParser.h"
#include <string>
#include <chrono>
#include <algorithm>
#include <vector>
#include <iostream>
#include <queue>
#include <set>
#include <memory>
#ifndef let
#define let const auto
#endif
using namespace std;
using namespace Microsoft::MSR;
using namespace Microsoft::MSR::CNTK;
// ===========================================================================
// DoConvertFromDbn() - implements CNTK "convertdbn" command
// ===========================================================================
template <typename ElemType>
void DoConvertFromDbn(const ConfigParameters& config)
{
wstring modelPath = config(L"modelPath");
wstring dbnModelPath = config(L"dbnModelPath");
auto netBuilder = make_shared<SimpleNetworkBuilder<ElemType>>(config);
ComputationNetworkPtr net = netBuilder->BuildNetworkFromDbnFile(dbnModelPath);
net->Save(modelPath);
}
template void DoConvertFromDbn<float>(const ConfigParameters& config);
template void DoConvertFromDbn<double>(const ConfigParameters& config);
// ===========================================================================
// DoEvalUnroll() - implements CNTK "testunroll" command
// ===========================================================================
// Special early implementation of RNNs by emulating them as a DNN.
// The code is very restricted to simple RNNs.
// The idea can be used for more complicated network but need to know which nodes are stateful or time-dependent so that unroll is done in a correct way to represent recurrent networks.
// TODO: can probably be removed.
template <typename ElemType>
void DoEvalUnroll(const ConfigParameters& config)
{
//test
ConfigParameters readerConfig(config(L"reader"));
readerConfig.Insert("traceLevel", config(L"traceLevel", "0"));
DataReader<ElemType> testDataReader(readerConfig);
DEVICEID_TYPE deviceId = DeviceFromConfig(config);
ConfigArray minibatchSize = config(L"minibatchSize", "40960");
size_t epochSize = config(L"epochSize", "0");
if (epochSize == 0)
{
epochSize = requestDataSize;
}
wstring modelPath = config(L"modelPath");
intargvector mbSize = minibatchSize;
wstring path2EvalResults = config(L"path2EvalResults", L"");
auto net = ComputationNetwork::CreateFromFile<ElemType>(deviceId, modelPath);
MultiNetworksEvaluator<ElemType> eval(net);
double evalEntropy;
eval.EvaluateUnroll(&testDataReader, mbSize[0], evalEntropy, path2EvalResults == L"" ? nullptr : path2EvalResults.c_str(), epochSize);
}
template void DoEvalUnroll<float>(const ConfigParameters& config);
template void DoEvalUnroll<double>(const ConfigParameters& config);
// ===========================================================================
// DoEncoderDecoder() - implements CNTK "trainEncoderDecoder" command
// ===========================================================================
/**
This implements sequence to sequence translation paper in
http://arxiv.org/pdf/1409.3215.pdf
*/
template <typename ElemType>
void DoEncoderDecoder(const ConfigParameters& config)
{
vector<IComputationNetBuilder<ElemType>*> netBuilders;
vector<IDataReader<ElemType>*> trainDataReader;
vector<IDataReader<ElemType>*> validationDataReader;
ConfigParameters configSGD = config(L"SGD");
bool makeMode = config(L"makeMode", "true");
IComputationNetBuilder<ElemType>* encoderNetBuilder = NULL;
IComputationNetBuilder<ElemType>* decoderNetBuilder = NULL;
ConfigParameters readerConfig = config(L"encoderReader");
readerConfig.Insert("traceLevel", config(L"traceLevel", "0"));
DataReader<ElemType>* encoderDataReader = new DataReader<ElemType>(readerConfig);
ConfigParameters decoderReaderConfig = config(L"decoderReader");
DataReader<ElemType>* decoderDataReader = new DataReader<ElemType>(decoderReaderConfig);
ConfigParameters cvEncoderReaderConfig = config(L"encoderCVReader");
DataReader<ElemType>* cvEncoderDataReader = new DataReader<ElemType>(cvEncoderReaderConfig);
ConfigParameters cvDecoderReaderConfig = config(L"decoderCVReader");
DataReader<ElemType>* cvDecoderDataReader = new DataReader<ElemType>(cvDecoderReaderConfig);
if (config.Exists("EncoderNetworkBuilder"))
{
ConfigParameters configSNB = config(L"EncoderNetworkBuilder");
encoderNetBuilder = (IComputationNetBuilder<ElemType>*) new SimpleNetworkBuilder<ElemType>(configSNB);
}
else
{
LogicError("Need encoder network");
}
if (config.Exists("DecoderNetworkBuilder"))
{
ConfigParameters configSNB = config(L"DecoderNetworkBuilder");
decoderNetBuilder = (IComputationNetBuilder<ElemType>*) new SimpleNetworkBuilder<ElemType>(configSNB);
}
else
{
LogicError("Need decoder networks");
}
MultiNetworksSGD<ElemType> sgd(configSGD);
sgd.InitTrainEncoderDecoderWithHiddenStates(configSGD);
netBuilders.push_back(encoderNetBuilder);
netBuilders.push_back(decoderNetBuilder);
trainDataReader.push_back(encoderDataReader);
trainDataReader.push_back(decoderDataReader);
validationDataReader.push_back(cvEncoderDataReader);
validationDataReader.push_back(cvDecoderDataReader);
sgd.EncoderDecoder(netBuilders, (int) config(L"deviceId"), trainDataReader, validationDataReader, makeMode);
delete encoderDataReader;
delete decoderDataReader;
delete cvEncoderDataReader;
delete cvDecoderDataReader;
}
template void DoEncoderDecoder<float>(const ConfigParameters& config);
template void DoEncoderDecoder<double>(const ConfigParameters& config);
// ===========================================================================
// DoBidirectionEncoderDecoder() - implements CNTK "trainBidirectionEncoderDecoder" command
// ===========================================================================
/**
DoBidirecionEncoderDecoder
*/
template <typename ElemType>
void DoBidirectionEncoderDecoder(const ConfigParameters& config)
{
ConfigParameters configSGD = config(L"SGD");
bool makeMode = config(L"makeMode", "true");
IComputationNetBuilder<ElemType>* encoderNetBuilder = NULL;
IComputationNetBuilder<ElemType>* forwardDecoderNetBuilder = NULL;
IComputationNetBuilder<ElemType>* backwardDecoderNetBuilder = NULL;
vector<IComputationNetBuilder<ElemType>*> netBuilders;
vector<IDataReader<ElemType>*> trainDataReader;
vector<IDataReader<ElemType>*> validationDataReader;
ConfigParameters readerConfig = config(L"encoderReader");
readerConfig.Insert("traceLevel", config(L"traceLevel", "0"));
DataReader<ElemType>* encoderDataReader = new DataReader<ElemType>(readerConfig);
ConfigParameters decoderReaderConfig = config(L"decoderReader");
DataReader<ElemType>* decoderDataReader = new DataReader<ElemType>(decoderReaderConfig);
ConfigParameters backwardDecoderReaderConfig = config(L"backwardDecoderReader");
DataReader<ElemType>* backwardDecoderDataReader = new DataReader<ElemType>(backwardDecoderReaderConfig);
ConfigParameters cvEncoderReaderConfig = config(L"encoderCVReader");
DataReader<ElemType>* cvEncoderDataReader = new DataReader<ElemType>(cvEncoderReaderConfig);
ConfigParameters cvDecoderReaderConfig = config(L"decoderCVReader");
DataReader<ElemType>* cvDecoderDataReader = new DataReader<ElemType>(cvDecoderReaderConfig);
ConfigParameters cvBackwardDecoderReaderConfig = config(L"BackwardDecoderCVReader");
DataReader<ElemType>* cvBackwardDecoderDataReader = new DataReader<ElemType>(cvBackwardDecoderReaderConfig);
if (config.Exists("EncoderNetworkBuilder"))
{
ConfigParameters configSNB = config(L"EncoderNetworkBuilder");
encoderNetBuilder = (IComputationNetBuilder<ElemType>*) new SimpleNetworkBuilder<ElemType>(configSNB);
}
else
LogicError("Need encoder network");
if (config.Exists("DecoderNetworkBuilder"))
{
ConfigParameters configSNB = config(L"DecoderNetworkBuilder");
forwardDecoderNetBuilder = (IComputationNetBuilder<ElemType>*) new SimpleNetworkBuilder<ElemType>(configSNB);
}
else
{
LogicError("Need decoder networks");
}
if (config.Exists("BackwardDecoderNetworkBuilder"))
{
ConfigParameters configSNB = config(L"BackwardDecoderNetworkBuilder");
backwardDecoderNetBuilder = (IComputationNetBuilder<ElemType>*) new SimpleNetworkBuilder<ElemType>(configSNB);
}
else
{
LogicError("Need decoder networks");
}
MultiNetworksSGD<ElemType> sgd(configSGD);
sgd.InitTrainEncoderDecoderWithHiddenStates(configSGD);
netBuilders.push_back(encoderNetBuilder);
netBuilders.push_back(forwardDecoderNetBuilder);
netBuilders.push_back(backwardDecoderNetBuilder);
trainDataReader.push_back(encoderDataReader);
trainDataReader.push_back(decoderDataReader);
trainDataReader.push_back(backwardDecoderDataReader);
validationDataReader.push_back(cvEncoderDataReader);
validationDataReader.push_back(cvDecoderDataReader);
validationDataReader.push_back(cvBackwardDecoderDataReader);
sgd.EncoderDecoder(netBuilders, (int) config(L"deviceId"), trainDataReader, validationDataReader, makeMode);
delete encoderDataReader;
delete decoderDataReader;
delete cvEncoderDataReader;
delete cvDecoderDataReader;
delete backwardDecoderDataReader;
delete cvBackwardDecoderDataReader;
}
template void DoBidirectionEncoderDecoder<float>(const ConfigParameters& config);
template void DoBidirectionEncoderDecoder<double>(const ConfigParameters& config);
// ===========================================================================
// DoEvalEncodingBeamSearchDecoding() - implements CNTK "testEncoderDecoder" command
// ===========================================================================
/**
Originally, this is for testing models trained using the sequence to sequence translation method below
http://arxiv.org/pdf/1409.3215.pdf
Later on, it is extended to be more general to include a sequence of network operations.
*/
template <typename ElemType>
void DoEvalEncodingBeamSearchDecoding(const ConfigParameters& config)
{
DEVICEID_TYPE deviceId = DeviceFromConfig(config);
vector<IDataReader<ElemType>*> readers;
ConfigParameters readerConfig = config(L"encoderReader");
readerConfig.Insert("traceLevel", config(L"traceLevel", "0"));
DataReader<ElemType> encoderReader(readerConfig);
ConfigParameters decoderReaderConfig = config(L"decoderReader");
decoderReaderConfig.Insert("traceLevel", config(L"traceLevel", "0"));
DataReader<ElemType> decoderReader(decoderReaderConfig);
readers.push_back(&encoderReader);
readers.push_back(&decoderReader);
ConfigArray minibatchSize = config(L"minibatchSize", "40960");
size_t epochSize = config(L"epochSize", "0");
if (epochSize == 0)
{
epochSize = requestDataSize;
}
wstring encoderModelPath = config(L"encoderModelPath");
wstring decoderModelPath = config(L"decoderModelPath");
intargvector mbSize = minibatchSize;
int traceLevel = config(L"traceLevel", "0");
size_t numMBsToShowResult = config(L"numMBsToShowResult", "100");
vector<ComputationNetworkPtr> nets;
auto encoderNet = ComputationNetwork::CreateFromFile<ElemType>(deviceId, encoderModelPath, FileOptions::fileOptionsBinary, true);
auto decoderNet = ComputationNetwork::CreateFromFile<ElemType>(deviceId, decoderModelPath, FileOptions::fileOptionsBinary, false, encoderNet.get());
nets.push_back(encoderNet);
nets.push_back(decoderNet);
ConfigArray evalNodeNames = config(L"evalNodeNames");
vector<wstring> evalNodeNamesVector;
for (int i = 0; i < evalNodeNames.size(); ++i)
{
evalNodeNamesVector.push_back(evalNodeNames[i]);
}
ConfigArray outputNodeNames = config(L"outputNodeNames");
vector<wstring> outputNodeNamesVector;
for (int i = 0; i < outputNodeNames.size(); ++i)
{
outputNodeNamesVector.push_back(outputNodeNames[i]);
}
ElemType beamWidth = config(L"beamWidth", "1");
ConfigParameters writerConfig = config(L"writer");
DataWriter<ElemType> testDataWriter(writerConfig);
MultiNetworksEvaluator<ElemType> eval(decoderNet, numMBsToShowResult, traceLevel);
eval.InitTrainEncoderDecoderWithHiddenStates(config);
eval.EncodingEvaluateDecodingBeamSearch(nets, readers,
testDataWriter, evalNodeNamesVector,
outputNodeNamesVector,
mbSize[0], beamWidth, epochSize);
}
template void DoEvalEncodingBeamSearchDecoding<float>(const ConfigParameters& config);
template void DoEvalEncodingBeamSearchDecoding<double>(const ConfigParameters& config);
// ===========================================================================
// DoBeamSearchDecoding() - implements CNTK "beamSearch" command
// ===========================================================================
template <typename ElemType>
static void DoEvalBeamSearch(const ConfigParameters& config, IDataReader<ElemType>& reader)
{
DEVICEID_TYPE deviceId = DeviceFromConfig(config);
ConfigArray minibatchSize = config(L"minibatchSize", "40960");
size_t epochSize = config(L"epochSize", "0");
if (epochSize == 0)
{
epochSize = requestDataSize;
}
wstring modelPath = config(L"modelPath");
intargvector mbSize = minibatchSize;
int traceLevel = config(L"traceLevel", "0");
size_t numMBsToShowResult = config(L"numMBsToShowResult", "100");
auto net = ComputationNetwork::CreateFromFile<ElemType>(deviceId, modelPath);
ConfigArray evalNodeNames = config(L"evalNodeNames");
vector<wstring> evalNodeNamesVector;
for (int i = 0; i < evalNodeNames.size(); ++i)
{
evalNodeNamesVector.push_back(evalNodeNames[i]);
}
ConfigArray outputNodeNames = config(L"outputNodeNames");
vector<wstring> outputNodeNamesVector;
for (int i = 0; i < outputNodeNames.size(); ++i)
{
outputNodeNamesVector.push_back(outputNodeNames[i]);
}
ElemType beamWidth = config(L"beamWidth", "1");
ConfigParameters writerConfig = config(L"writer");
DataWriter<ElemType> testDataWriter(writerConfig);
MultiNetworksEvaluator<ElemType> eval(net, numMBsToShowResult, traceLevel);
eval.BeamSearch(&reader, testDataWriter, evalNodeNamesVector, outputNodeNamesVector, mbSize[0], beamWidth, epochSize);
}
/**
This is beam search decoder.
Developed by Kaisheng Yao.
It is used in the following work:
K. Yao, G. Zweig, "Sequence-to-sequence neural net models for grapheme-to-phoneme conversion" in Interspeech 2015
*/
template <typename ElemType>
void DoBeamSearchDecoding(const ConfigParameters& config)
{
//test
ConfigParameters readerConfig = config(L"reader");
readerConfig.Insert("traceLevel", config(L"traceLevel", "0"));
DataReader<ElemType> testDataReader(readerConfig);
DoEvalBeamSearch(config, testDataReader);
}
template void DoBeamSearchDecoding<float>(const ConfigParameters& config);
template void DoBeamSearchDecoding<double>(const ConfigParameters& config);

Просмотреть файл

@ -14,20 +14,12 @@
#include "ComputationNode.h"
#include "DataReader.h"
#include "DataWriter.h"
#include "SimpleNetworkBuilder.h"
#include "NDLNetworkBuilder.h"
#include "SynchronousExecutionEngine.h"
#include "ModelEditLanguage.h"
#include "SGD.h"
#include "Config.h"
#include "MultiNetworksSGD.h"
#include "SimpleEvaluator.h"
#include "SimpleOutputWriter.h"
#include "MultiNetworksEvaluator.h"
#include "BestGpu.h"
#include "ScriptableObjects.h"
#include "BrainScriptEvaluator.h"
#include "BrainScriptParser.h"
#include <string>
#include <chrono>

Просмотреть файл

@ -12,22 +12,9 @@
#include "Actions.h"
#include "ComputationNetwork.h"
#include "ComputationNode.h"
#include "DataReader.h"
#include "DataWriter.h"
#include "SimpleNetworkBuilder.h"
#include "NDLNetworkBuilder.h"
#include "SynchronousExecutionEngine.h"
#include "ModelEditLanguage.h"
#include "SGD.h"
#include "Config.h"
#include "MultiNetworksSGD.h"
#include "SimpleEvaluator.h"
#include "SimpleOutputWriter.h"
#include "MultiNetworksEvaluator.h"
#include "BestGpu.h"
#include "ScriptableObjects.h"
#include "BrainScriptEvaluator.h"
#include "BrainScriptParser.h"
#include <string>
#include <chrono>

Просмотреть файл

@ -0,0 +1,54 @@
//
// Copyright (c) Microsoft. All rights reserved.
// Licensed under the MIT license. See LICENSE.md file in the project root for full license information.
//
// EsotericActions.cpp -- CNTK actions that are deprecated
//
#define _CRT_NONSTDC_NO_DEPRECATE // make VS accept POSIX functions without _
#include "stdafx.h"
#include "Basics.h"
#include "Actions.h"
#include "ComputationNetwork.h"
#include "ComputationNode.h"
#include "DataReader.h"
#include "DataWriter.h"
#include "SimpleNetworkBuilder.h"
#include "Config.h"
#include "ScriptableObjects.h"
#include <string>
#include <chrono>
#include <algorithm>
#include <vector>
#include <iostream>
#include <queue>
#include <set>
#include <memory>
#ifndef let
#define let const auto
#endif
using namespace std;
using namespace Microsoft::MSR;
using namespace Microsoft::MSR::CNTK;
// ===========================================================================
// DoConvertFromDbn() - implements CNTK "convertdbn" command
// ===========================================================================
template <typename ElemType>
void DoConvertFromDbn(const ConfigParameters& config)
{
wstring modelPath = config(L"modelPath");
wstring dbnModelPath = config(L"dbnModelPath");
auto netBuilder = make_shared<SimpleNetworkBuilder<ElemType>>(config);
ComputationNetworkPtr net = netBuilder->BuildNetworkFromDbnFile(dbnModelPath);
net->Save(modelPath);
}
template void DoConvertFromDbn<float>(const ConfigParameters& config);
template void DoConvertFromDbn<double>(const ConfigParameters& config);

Просмотреть файл

@ -20,10 +20,8 @@
#include "ModelEditLanguage.h"
#include "SGD.h"
#include "Config.h"
#include "MultiNetworksSGD.h"
#include "SimpleEvaluator.h"
#include "SimpleOutputWriter.h"
#include "MultiNetworksEvaluator.h"
#include "BestGpu.h"
#include "ScriptableObjects.h"
#include "BrainScriptEvaluator.h"
@ -50,76 +48,15 @@ using namespace Microsoft::MSR::CNTK;
// DoTrain() - implements CNTK "train" command
// ===========================================================================
template <class ElemType>
class BrainScriptNetworkBuilder : public IComputationNetBuilder<ElemType>
{
typedef shared_ptr<ComputationNetwork> ComputationNetworkPtr;
ComputationNetworkPtr m_net;
ScriptableObjects::ConfigLambdaPtr m_createNetworkFn;
DEVICEID_TYPE m_deviceId;
public:
// the constructor remembers the config lambda
// TODO: Really this should just take the lambda itself, or rather, this class should just be replaced by a lambda. But we need the IConfigRecord for templates to be compile-compatible with old CNTK config.
BrainScriptNetworkBuilder(const ScriptableObjects::IConfigRecord& config)
{
m_deviceId = config[L"deviceId"]; // TODO: only needed for LoadNetworkFromFile() which should go away anyway
m_createNetworkFn = config[L"createNetwork"].AsPtr<ScriptableObjects::ConfigLambda>();
}
// not supported for old CNTK
BrainScriptNetworkBuilder(const ConfigParameters& config)
{
NOT_IMPLEMENTED;
}
// build a ComputationNetwork from description language
virtual /*IComputationNetBuilder::*/ ComputationNetworkPtr BuildNetworkFromDescription(ComputationNetwork* = nullptr) override
{
vector<ScriptableObjects::ConfigValuePtr> args; // this lambda has no arguments
ScriptableObjects::ConfigLambda::NamedParams namedArgs;
let netValue = m_createNetworkFn->Apply(move(args), move(namedArgs), L"BuildNetworkFromDescription");
m_net = netValue.AsPtr<ComputationNetwork>();
if (m_net->GetDeviceId() < 0)
fprintf(stderr, "BrainScriptNetworkBuilder using CPU\n");
else
fprintf(stderr, "BrainScriptNetworkBuilder using GPU %d\n", (int) m_net->GetDeviceId());
return m_net;
}
// load an existing file--this is the same code as for NDLNetworkBuilder.h (OK to copy it here because this is temporary code anyway)
// TODO: This does not belong into NetworkBuilder, since the code is the same for all. Just create the network and load the darn thing.
virtual /*IComputationNetBuilder::*/ ComputationNetwork* LoadNetworkFromFile(const wstring& modelFileName, bool forceLoad = true,
bool bAllowNoCriterionNode = false, ComputationNetwork* anotherNetwork = nullptr) override
{
if (!m_net || m_net->GetTotalNumberOfNodes() == 0 || forceLoad) //not built or force load --TODO: why all these options?
{
auto net = make_shared<ComputationNetwork>(m_deviceId);
net->Load<ElemType>(modelFileName, FileOptions::fileOptionsBinary, bAllowNoCriterionNode, anotherNetwork);
m_net = net;
}
m_net->ResetEvalTimeStamps();
return m_net.get();
}
};
// TODO: decide where these should go. Also, do we need three variables?
extern wstring standardFunctions;
extern wstring commonMacros;
extern wstring computationNodes;
// helper that returns 'float' or 'double' depending on ElemType
template <class ElemType>
static const wchar_t* ElemTypeName();
template <>
/*static*/ const wchar_t* ElemTypeName<float>()
{
return L"float";
}
template <>
/*static*/ const wchar_t* ElemTypeName<double>()
{
return L"double";
}
template <class ElemType> static const wchar_t* ElemTypeName();
template <> /*static*/ const wchar_t* ElemTypeName<float>() { return L"float"; }
template <> /*static*/ const wchar_t* ElemTypeName<double>() { return L"double"; }
function<ComputationNetworkPtr(DEVICEID_TYPE)> GetCreateNetworkFn(const ScriptableObjects::IConfigRecord& config)
{

Просмотреть файл

@ -75,51 +75,50 @@ wstring computationNodes = // TODO: use actual TypeName() here? would first need
QuaternaryStandardNode(ClassBasedCrossEntropyWithSoftmax, labelClassDescriptorVectorSequence, mainInputInfo, mainWeight, classLogProbsBeforeSoftmax)
// BUGBUG: the commented-out ones are not mentioned in the CNTK book, nor are their parameters documented in the source code
BinaryStandardNode(ColumnElementTimes, aVectorSequence, anotherVectorSequence)
BinaryStandardNode(CosDistance, aVectorSequence, anotherVectorSequence)
QuaternaryStandardNode(CosDistanceWithNegativeSamples, aVectorSequence, anotherVectorSequence, numShifts, numNegSamples)
BinaryStandardNode(CosDistance, aVectorSequence, anotherVectorSequence)
QuaternaryStandardNode(CosDistanceWithNegativeSamples, aVectorSequence, anotherVectorSequence, numShifts, numNegSamples)
//BinaryStandardNode(CosDistanceWithNegativeSamplesNode)
UnaryStandardNode(Cosine, x)
BinaryStandardNode(CrossEntropy, refProbVectorSequence, outProbVectorSequence)
BinaryStandardNode(CrossEntropyWithSoftmax, labelVectorSequence, outProbVectorSequence)
BinaryStandardNode(DiagTimes, diagonalMatrixAsColumnVector, matrix)
UnaryStandardNode(Dropout, activationVectorSequence)
BinaryStandardNode(CrossEntropy, refProbVectorSequence, outProbVectorSequence)
BinaryStandardNode(CrossEntropyWithSoftmax, labelVectorSequence, outProbVectorSequence)
BinaryStandardNode(DiagTimes, diagonalMatrixAsColumnVector, matrix)
UnaryStandardNode(Dropout, activationVectorSequence)
//BinaryStandardNode(DummyCriterionNode)
BinaryStandardNode(ElementTimes, aMatrix, anotherMatrix)
BinaryStandardNode(ErrorPrediction, labelVectorSequence, outVectorSequence) // CNTKBook: ClassificationError?
BinaryStandardNode(ErrorPrediction, labelVectorSequence, outVectorSequence) // CNTKBook: ClassificationError?
UnaryStandardNode(Exp, x)
QuaternaryStandardNode(GMMLogLikelihood, unnormalizedPriorVector, meansAsRows, logStdDevAsRows, dataVectorSequence)
UnaryStandardNode(InvStdDev, dataVectorSequence)
BinaryStandardNode(KhatriRaoProduct, leftMatrix, rightMatrix)
QuaternaryStandardNode(GMMLogLikelihood, unnormalizedPriorVector, meansAsRows, logStdDevAsRows, dataVectorSequence)
UnaryStandardNode(InvStdDev, dataVectorSequence)
BinaryStandardNode(KhatriRaoProduct, leftMatrix, rightMatrix)
//BinaryStandardNode(LSTMNode)
UnaryStandardNode(Log, x)
UnaryStandardNode(LogSoftmax, z)
UnaryStandardNode(LogSoftmax, z)
//BinaryStandardNode(LookupTableNode)
UnaryStandardNode(MatrixL1Reg, matrix)
UnaryStandardNode(MatrixL2Reg, matrix)
UnaryStandardNode(MatrixL2Reg, matrix)
// BUGBUG: CNTKBook also mentions L1Norm and L2Norm
UnaryStandardNode(Mean, dataVectorSequence)
BinaryStandardNode(Minus, leftMatrix, rightMatrix)
UnaryStandardNode(Negate, input)
BinaryStandardNode(Minus, leftMatrix, rightMatrix)
UnaryStandardNode(Negate, input)
//BinaryStandardNode(NoiseContrastiveEstimationNode)
//BinaryStandardNode(PairNetworkNode)
//BinaryStandardNode(ParallelNode)
TernaryStandardNode(PerDimMeanVarDeNormalization, dataVectorSequence, meanVector, invStdDevVector) // TODO: correct?
TernaryStandardNode(PerDimMeanVarNormalization, dataVectorSequence, meanVector, invStdDevVector)
BinaryStandardNode(Plus, leftMatrix, rightMatrix)
UnaryStandardNode(RectifiedLinear, z)
BinaryStandardNode(Plus, leftMatrix, rightMatrix)
UnaryStandardNode(RectifiedLinear, z)
//BinaryStandardNode(RowElementTimesNode)
BinaryStandardNode(Scale, scalarScalingFactor, matrix)
//BinaryStandardNode(SequenceDecoderNode)
UnaryStandardNode(Sigmoid, z)
UnaryStandardNode(Softmax, z)
UnaryStandardNode(Hardmax, z)
BinaryStandardNode(SquareError, aMatrix, anotherMatrix)
UnaryStandardNode(Softmax, z)
UnaryStandardNode(Hardmax, z)
BinaryStandardNode(SquareError, aMatrix, anotherMatrix)
//BinaryStandardNode(StrideTimesNode)
//BinaryStandardNode(SumColumnElementsNode)
UnaryStandardNode(SumElements, matrix)
UnaryStandardNode(Tanh, z)
UnaryStandardNode(TimeReverse, vectorSequence)
BinaryStandardNode(Times, leftMatrix, rightMatrix)
UnaryStandardNode(Transpose, matrix)
UnaryStandardNode(Tanh, z)
UnaryStandardNode(TimeReverse, vectorSequence)
BinaryStandardNode(Times, leftMatrix, rightMatrix)
UnaryStandardNode(Transpose, matrix)
//BinaryStandardNode(TransposeTimesNode)
;

Просмотреть файл

@ -23,10 +23,8 @@
#include "SGD.h"
#include "MPIWrapper.h"
#include "Config.h"
#include "MultiNetworksSGD.h"
#include "SimpleEvaluator.h"
#include "SimpleOutputWriter.h"
#include "MultiNetworksEvaluator.h"
#include "BestGpu.h"
#include "ProgressTracing.h"
#include "fileutil.h"
@ -240,10 +238,6 @@ void DoCommands(const ConfigParameters& config)
{
DoEval<ElemType>(commandParams);
}
else if (action[j] == "testunroll")
{
DoEvalUnroll<ElemType>(commandParams);
}
else if (action[j] == "edit")
{
DoEdit<ElemType>(commandParams);
@ -284,22 +278,6 @@ void DoCommands(const ConfigParameters& config)
{
DoParameterSVD<ElemType>(commandParams);
}
else if (action[j] == "trainEncoderDecoder")
{
DoEncoderDecoder<ElemType>(commandParams);
}
else if (action[j] == "testEncoderDecoder")
{
DoEvalEncodingBeamSearchDecoding<ElemType>(commandParams);
}
else if (action[j] == "trainBidirectionEncoderDecoder")
{
DoBidirectionEncoderDecoder<ElemType>(commandParams);
}
else if (action[j] == "beamSearch")
{
DoBeamSearchDecoding<ElemType>(commandParams);
}
else
{
RuntimeError("unknown action: %s in command set: %s", action[j].c_str(), command[i].c_str());

Просмотреть файл

@ -280,8 +280,6 @@ bool CheckFunction(std::string& p_nodeType, bool* allowUndeterminedVariable)
ret = true;
else if (EqualInsensitive(nodeType, OperationNameOf(LSTMNode), L"LSTM"))
ret = true;
else if (EqualInsensitive(nodeType, OperationNameOf(PairNetworkNode), L"PairNetwork"))
ret = true;
else if (EqualInsensitive(nodeType, OperationNameOf(StrideTimesNode), L"StrideTimes"))
ret = true;
else if (EqualInsensitive(nodeType, OperationNameOf(BatchNormalizationNode)))

Просмотреть файл

@ -59,9 +59,6 @@ ComputationNetworkPtr SimpleNetworkBuilder<ElemType>::BuildNetworkFromDescriptio
case RCRF:
net = BuildSeqTrnLSTMNetworkFromDescription();
break;
case LSTMENCODER:
net = BuildLSTMEncoderNetworkFromDescription();
break;
case UNIDIRECTIONALLSTM:
net = BuildUnidirectionalLSTMNetworksFromDescription();
break;
@ -72,35 +69,12 @@ ComputationNetworkPtr SimpleNetworkBuilder<ElemType>::BuildNetworkFromDescriptio
LogicError("BuildNetworkFromDescription: invalid m_rnnType %d", (int) m_rnnType);
}
// post-process the network
#if 1
// post-process the network
net->CompileNetwork();
#else
net->ValidateNetwork(false /*allowFragment*/, true /*bAllowNoCriterion*/); // no criterion possible because ...TODO: what's the reason?
#endif
return net;
}
// special version for a deprecated implementation of sequence-to-sequence models
template <class ElemType>
ComputationNetworkPtr SimpleNetworkBuilder<ElemType>::BuildNetworkFromDescription(ComputationNetwork* encoderNet)
{
ComputationNetworkPtr net;
switch (m_rnnType)
{
case ALIGNMENTSIMILARITYGENERATOR:
net = BuildAlignmentDecoderNetworkFromDescription(encoderNet);
net->CompileNetwork();
return net;
case ALIGNMENTSIMILARITYGFORWARDDECODER:
net = BuildAlignmentForwardDecoderNetworkFromDescription(encoderNet);
net->CompileNetwork();
return net;
}
return BuildNetworkFromDescription();
}
template <class ElemType>
ComputationNetworkPtr SimpleNetworkBuilder<ElemType>::BuildSimpleDNN()
{
@ -530,265 +504,6 @@ ComputationNetworkPtr SimpleNetworkBuilder<ElemType>::BuildConditionalLSTMNetwor
return m_net;
}
/**
this builds an alignment based LM generator
the aligment node takes a variable length input and relates each element to a variable length output
*/
template <class ElemType>
ComputationNetworkPtr SimpleNetworkBuilder<ElemType>::BuildAlignmentForwardDecoderNetworkFromDescription(ComputationNetwork* encoderNet)
{
ComputationNetworkBuilder<ElemType> builder(*m_net);
if (m_net->GetTotalNumberOfNodes() < 1) //not built yet
{
unsigned long randomSeed = 1;
size_t numHiddenLayers = m_layerSizes.size() - 2;
size_t numRecurrentLayers = m_recurrentLayers.size();
ComputationNodePtr input, encoderOutput, e,
b, w, u, pastValue, output, label, alignoutput;
ComputationNodePtr clslogpostprob;
ComputationNodePtr clsweight;
ComputationNodePtr columnStride, rowStride;
input = builder.CreateSparseInputNode(L"features", m_layerSizes[0]);
m_net->FeatureNodes().push_back(input);
if (m_lookupTableOrder > 0)
{
e = builder.CreateLearnableParameter(msra::strfun::wstrprintf(L"E%d", 0), m_layerSizes[1], m_layerSizes[0] / m_lookupTableOrder);
m_net->InitLearnableParameters(e, m_uniformInit, randomSeed++, m_initValueScale);
output = builder.LookupTable(e, input, L"LookupTable");
if (m_addDropoutNodes)
input = builder.Dropout(output);
else
input = output;
}
else
{
LogicError("BuildCLASSLSTMNetworkFromDescription: LSTMNode cannot take sparse input. Need to project sparse input to continuous vector using LookupTable. Suggest using setups below\n layerSizes=$VOCABSIZE$:100:$HIDDIM$:$VOCABSIZE$ \nto have 100 dimension projection, and lookupTableOrder=1\n to project to a single window. To use larger context window, set lookupTableOrder=3 for example with width-3 context window.\n ");
}
int recur_idx = 0;
int offset = m_lookupTableOrder > 0 ? 1 : 0;
/// the source network side output dimension needs to match the 1st layer dimension in the decoder network
std::vector<ComputationNodeBasePtr>& encoderPairNodes = encoderNet->PairNodes();
if (encoderPairNodes.size() != 1)
LogicError("BuildAlignmentDecoderNetworkFromDescription: encoder network should have only one pairoutput node as source node for the decoder network: ");
encoderOutput = builder.PairNetwork(dynamic_pointer_cast<ComputationNode<ElemType>>(encoderPairNodes[0]), L"pairNetwork");
/// the source network side output dimension needs to match the 1st layer dimension in the decoder network
std::vector<ComputationNodeBasePtr>& encoderEvaluationNodes = encoderNet->OutputNodes();
if (encoderEvaluationNodes.size() != 1)
LogicError("BuildAlignmentDecoderNetworkFromDescription: encoder network should have only one output node as source node for the decoder network: ");
if (numHiddenLayers > 0)
{
int i = 1 + offset;
u = builder.CreateLearnableParameter(msra::strfun::wstrprintf(L"U%d", i), m_layerSizes[i], m_layerSizes[offset] * (offset ? m_lookupTableOrder : 1));
m_net->InitLearnableParameters(u, m_uniformInit, randomSeed++, m_initValueScale);
w = builder.CreateLearnableParameter(msra::strfun::wstrprintf(L"W%d", i), m_layerSizes[i], m_layerSizes[i]);
m_net->InitLearnableParameters(w, m_uniformInit, randomSeed++, m_initValueScale);
pastValue = builder.PastValue(NULL, m_defaultHiddenActivity, (size_t) m_layerSizes[i], 1);
// output = (ComputationNodePtr)BuildLSTMNodeComponent(randomSeed, 0, m_layerSizes[offset] * (offset ? m_lookupTableOrder : 1), m_layerSizes[offset + 1], input);
// output = (ComputationNodePtr)BuildLSTMComponent(randomSeed, 0, m_layerSizes[offset] * (offset ? m_lookupTableOrder : 1), m_layerSizes[offset + 1], input);
/// alignment node to get weights from source to target
/// this aligment node computes weights of the current hidden state after special encoder ending symbol to all
/// states before the special encoder ending symbol. The weights are used to summarize all encoder inputs.
/// the weighted sum of inputs are then used as the additional input to the LSTM input in the next layer
e = builder.CreateLearnableParameter(msra::strfun::wstrprintf(L"MatForSimilarity%d", i), m_layerSizes[i], m_layerSizes[i]);
m_net->InitLearnableParameters(e, m_uniformInit, randomSeed++, m_initValueScale);
columnStride = builder.CreateLearnableParameter(L"columnStride", 1, 1);
columnStride->Value().SetValue(1);
columnStride->SetParameterUpdateRequired(false);
rowStride = builder.CreateLearnableParameter(L"rowStride", 1, 1);
rowStride->Value().SetValue(0);
rowStride->SetParameterUpdateRequired(false);
alignoutput = builder.StrideTimes(encoderOutput, builder.Softmax(builder.StrideTimes(builder.Times(builder.Transpose(encoderOutput), e), pastValue, rowStride)), columnStride);
// alignoutput = builder.Times(encoderOutput, builder.Softmax(builder.Times(builder.Times(builder.Transpose(encoderOutput), e), pastValue)));
output = ApplyNonlinearFunction(
builder.Plus(
builder.Times(u, input), builder.Times(w, alignoutput)),
0);
pastValue->AttachInputs(output);
input = output;
for (; i < numHiddenLayers; i++)
{
//output = (ComputationNodePtr)BuildLSTMNodeComponent(randomSeed, i, m_layerSizes[i], m_layerSizes[i + 1], input);
output = (ComputationNodePtr) BuildLSTMComponent(randomSeed, i, m_layerSizes[i], m_layerSizes[i + 1], input);
if (m_addDropoutNodes)
input = builder.Dropout(output);
else
input = output;
}
}
/// need to have [input_dim x output_dim] matrix
/// e.g., [200 x 10000], where 10000 is the vocabulary size
/// this is for speed-up issue as per word matrix can be simply obtained using column slice
w = builder.CreateLearnableParameter(msra::strfun::wstrprintf(L"OW%d", numHiddenLayers), m_layerSizes[numHiddenLayers], m_layerSizes[numHiddenLayers + 1]);
m_net->InitLearnableParameters(w, m_uniformInit, randomSeed++, m_initValueScale);
/// the label is a dense matrix. each element is the word index
label = builder.CreateInputNode(L"labels", 4);
clsweight = builder.CreateLearnableParameter(L"WeightForClassPostProb", m_nbrCls, m_layerSizes[numHiddenLayers]);
m_net->InitLearnableParameters(clsweight, m_uniformInit, randomSeed++, m_initValueScale);
clslogpostprob = builder.Times(clsweight, input, L"ClassPostProb");
output = builder.Times(builder.Transpose(w), input, L"outputs");
m_net->PairNodes().push_back(input);
m_net->OutputNodes().push_back(output);
//add softmax layer (if prob is needed or KL reg adaptation is needed)
output = builder.Softmax(output, L"PosteriorProb");
}
return m_net;
}
template <class ElemType>
ComputationNetworkPtr SimpleNetworkBuilder<ElemType>::BuildAlignmentDecoderNetworkFromDescription(ComputationNetwork* encoderNet)
{
ComputationNetworkBuilder<ElemType> builder(*m_net);
if (m_net->GetTotalNumberOfNodes() < 1) //not built yet
{
unsigned long randomSeed = 1;
size_t numHiddenLayers = m_layerSizes.size() - 2;
size_t numRecurrentLayers = m_recurrentLayers.size();
ComputationNodePtr input, encoderOutput, e,
b, w, u, pastValue, output, label, alignoutput;
ComputationNodePtr clslogpostprob;
ComputationNodePtr clsweight;
ComputationNodePtr columnStride, rowStride;
input = builder.CreateSparseInputNode(L"features", m_layerSizes[0]);
m_net->FeatureNodes().push_back(input);
if (m_lookupTableOrder > 0)
{
e = builder.CreateLearnableParameter(msra::strfun::wstrprintf(L"E%d", 0), m_layerSizes[1], m_layerSizes[0] / m_lookupTableOrder);
m_net->InitLearnableParameters(e, m_uniformInit, randomSeed++, m_initValueScale);
output = builder.LookupTable(e, input, L"LookupTable");
if (m_addDropoutNodes)
input = builder.Dropout(output);
else
input = output;
}
else
{
LogicError("BuildCLASSLSTMNetworkFromDescription: LSTMNode cannot take sparse input. Need to project sparse input to continuous vector using LookupTable. Suggest using setups below\n layerSizes=$VOCABSIZE$:100:$HIDDIM$:$VOCABSIZE$ \nto have 100 dimension projection, and lookupTableOrder=1\n to project to a single window. To use larger context window, set lookupTableOrder=3 for example with width-3 context window.\n ");
}
int recur_idx = 0;
int offset = m_lookupTableOrder > 0 ? 1 : 0;
/// the source network side output dimension needs to match the 1st layer dimension in the decoder network
std::vector<ComputationNodeBasePtr>& encoderPairNodes = encoderNet->PairNodes();
if (encoderPairNodes.size() != 1)
LogicError("BuildAlignmentDecoderNetworkFromDescription: encoder network should have only one pairoutput node as source node for the decoder network: ");
encoderOutput = builder.PairNetwork(dynamic_pointer_cast<ComputationNode<ElemType>>(encoderPairNodes[0]), L"pairNetwork");
/// the source network side output dimension needs to match the 1st layer dimension in the decoder network
std::vector<ComputationNodeBasePtr>& encoderEvaluationNodes = encoderNet->OutputNodes();
if (encoderEvaluationNodes.size() != 1)
LogicError("BuildAlignmentDecoderNetworkFromDescription: encoder network should have only one output node as source node for the decoder network: ");
if (numHiddenLayers > 0)
{
int i = 1 + offset;
u = builder.CreateLearnableParameter(msra::strfun::wstrprintf(L"U%d", i), m_layerSizes[i], m_layerSizes[offset] * (offset ? m_lookupTableOrder : 1));
m_net->InitLearnableParameters(u, m_uniformInit, randomSeed++, m_initValueScale);
w = builder.CreateLearnableParameter(msra::strfun::wstrprintf(L"W%d", i), m_layerSizes[i], m_layerSizes[i]);
m_net->InitLearnableParameters(w, m_uniformInit, randomSeed++, m_initValueScale);
pastValue = builder.PastValue(NULL, m_defaultHiddenActivity, (size_t) m_layerSizes[i], 1);
// output = (ComputationNodePtr)BuildLSTMNodeComponent(randomSeed, 0, m_layerSizes[offset] * (offset ? m_lookupTableOrder : 1), m_layerSizes[offset + 1], input);
// output = (ComputationNodePtr)BuildLSTMComponent(randomSeed, 0, m_layerSizes[offset] * (offset ? m_lookupTableOrder : 1), m_layerSizes[offset + 1], input);
/// alignment node to get weights from source to target
/// this aligment node computes weights of the current hidden state after special encoder ending symbol to all
/// states before the special encoder ending symbol. The weights are used to summarize all encoder inputs.
/// the weighted sum of inputs are then used as the additional input to the LSTM input in the next layer
e = builder.CreateLearnableParameter(msra::strfun::wstrprintf(L"MatForSimilarity%d", i), m_layerSizes[i], m_layerSizes[i]);
m_net->InitLearnableParameters(e, m_uniformInit, randomSeed++, m_initValueScale);
columnStride = builder.CreateLearnableParameter(L"columnStride", 1, 1);
columnStride->Value().SetValue(1);
columnStride->SetParameterUpdateRequired(false);
rowStride = builder.CreateLearnableParameter(L"rowStride", 1, 1);
rowStride->Value().SetValue(0);
rowStride->SetParameterUpdateRequired(false);
alignoutput = builder.StrideTimes(encoderOutput, builder.Softmax(builder.StrideTimes(builder.Times(builder.Transpose(encoderOutput), e), pastValue, rowStride)), columnStride);
// alignoutput = builder.Times(encoderOutput, builder.Softmax(builder.Times(builder.Times(builder.Transpose(encoderOutput), e), pastValue)));
output = ApplyNonlinearFunction(
builder.Plus(
builder.Times(u, input), builder.Times(w, alignoutput)),
0);
pastValue->AttachInputs(output);
input = output;
for (; i < numHiddenLayers; i++)
{
//output = (ComputationNodePtr)BuildLSTMNodeComponent(randomSeed, i, m_layerSizes[i], m_layerSizes[i + 1], input);
output = (ComputationNodePtr) BuildLSTMComponent(randomSeed, i, m_layerSizes[i], m_layerSizes[i + 1], input);
if (m_addDropoutNodes)
input = builder.Dropout(output);
else
input = output;
}
}
/// need to have [input_dim x output_dim] matrix
/// e.g., [200 x 10000], where 10000 is the vocabulary size
/// this is for speed-up issue as per word matrix can be simply obtained using column slice
w = builder.CreateLearnableParameter(msra::strfun::wstrprintf(L"OW%d", numHiddenLayers), m_layerSizes[numHiddenLayers], m_layerSizes[numHiddenLayers + 1]);
m_net->InitLearnableParameters(w, m_uniformInit, randomSeed++, m_initValueScale);
/// the label is a dense matrix. each element is the word index
label = builder.CreateInputNode(L"labels", 4);
clsweight = builder.CreateLearnableParameter(L"WeightForClassPostProb", m_nbrCls, m_layerSizes[numHiddenLayers]);
m_net->InitLearnableParameters(clsweight, m_uniformInit, randomSeed++, m_initValueScale);
clslogpostprob = builder.Times(clsweight, input, L"ClassPostProb");
output = AddTrainAndEvalCriterionNodes(input, label, w, L"TrainNodeClassBasedCrossEntropy", L"EvalNodeClassBasedCrossEntrpy",
clslogpostprob);
output = builder.Times(builder.Transpose(w), input, L"outputs");
m_net->PairNodes().push_back(input);
m_net->OutputNodes().push_back(output);
//add softmax layer (if prob is needed or KL reg adaptation is needed)
output = builder.Softmax(output, L"PosteriorProb");
}
return m_net;
}
template <class ElemType>
ComputationNetworkPtr SimpleNetworkBuilder<ElemType>::BuildLogBilinearNetworkFromDescription()
{
@ -1608,95 +1323,6 @@ ComputationNetworkPtr SimpleNetworkBuilder<ElemType>::BuildLSTMNetworkFromDescri
return m_net;
}
/**
This is encoder LSTM described in the following papers:
H. Sutskever, O. Vinyals and Q. V. Le, "Sequence to sequence learning with neural networks", http://arxiv.org/abs/1409.3215
The following code constructs the encoder and, to construct decoder, use BuildLSTMNetworkFromDescription
Developed by Kaisheng Yao
This is used in the following works:
K. Yao, G. Zweig, "Sequence-to-sequence neural net models for grapheme-to-phoneme conversion, submitted to Interspeech 2015
*/
template <class ElemType>
ComputationNetworkPtr SimpleNetworkBuilder<ElemType>::BuildLSTMEncoderNetworkFromDescription()
{
ComputationNetworkBuilder<ElemType> builder(*m_net);
if (m_net->GetTotalNumberOfNodes() < 1) //not built yet
{
ULONG randomSeed = 1;
size_t i = 0;
size_t numHiddenLayers = m_layerSizes.size() - 1;
size_t numRecurrentLayers = m_recurrentLayers.size();
ComputationNodePtr input, w, b, u, e, pastValue, output, label, prior;
if (m_sparse_input)
input = builder.CreateSparseInputNode(L"features", m_layerSizes[0]);
else
input = builder.CreateInputNode(L"features", m_layerSizes[0]);
m_net->FeatureNodes().push_back(input);
if (m_applyMeanVarNorm)
{
w = builder.Mean(input);
b = builder.InvStdDev(input);
output = builder.PerDimMeanVarNormalization(input, w, b);
input = output;
}
if (m_lookupTableOrder > 0)
{
e = builder.CreateLearnableParameter(msra::strfun::wstrprintf(L"EncoderE%d", 0), m_layerSizes[1], m_layerSizes[0] / m_lookupTableOrder);
m_net->InitLearnableParameters(e, m_uniformInit, randomSeed++, m_initValueScale);
output = builder.LookupTable(e, input, L"EncoderLookupTable");
#ifdef DEBUG_DECODER
e->Value().SetValue((ElemType) 0.01);
#endif
if (m_addDropoutNodes)
input = builder.Dropout(output);
else
input = output;
i++;
}
/// direct connect from input node to output node
int recur_idx = 0;
int offset = m_lookupTableOrder > 0 ? 1 : 0;
if (numHiddenLayers > 0)
{
//output = (ComputationNodePtr)BuildLSTMNodeComponent(randomSeed, 0, m_layerSizes[offset] * (offset ? m_lookupTableOrder : 1), m_layerSizes[offset + 1], input);
output = (ComputationNodePtr) BuildLSTMComponent(randomSeed, 0, m_layerSizes[offset] * (offset ? m_lookupTableOrder : 1), m_layerSizes[offset + 1], input);
input = output;
i++;
for (; i < numHiddenLayers; i++)
{
//output = (ComputationNodePtr)BuildLSTMNodeComponent(randomSeed, i, m_layerSizes[i], m_layerSizes[i + 1], input);
output = (ComputationNodePtr) BuildLSTMComponent(randomSeed, i, m_layerSizes[i], m_layerSizes[i + 1], input);
if (m_addDropoutNodes)
input = builder.Dropout(output);
else
input = output;
}
}
m_net->OutputNodes().push_back(output);
m_net->PairNodes().push_back(output); /// need to provide pairnodes so that the next layer of network can connect to this network
m_net->EvaluationNodes().push_back(output);
}
return m_net;
}
/**
Build unidirectional LSTM p(y_t | y_t-1, x_1^t)

Просмотреть файл

@ -38,16 +38,13 @@ enum RNNTYPE
DEEPRNN = 4,
CLASSLM = 8,
LBLM = 16,
LSTMENCODER = 18,
NPLM = 32,
CLASSLSTM = 64,
NCELSTM = 128,
CLSTM = 256,
RCRF = 512,
UNIDIRECTIONALLSTM = 19,
BIDIRECTIONALLSTM = 20,
ALIGNMENTSIMILARITYGENERATOR = 21,
ALIGNMENTSIMILARITYGFORWARDDECODER = 22
BIDIRECTIONALLSTM = 20
};
enum class TrainingCriterion : int // TODO: camel-case these
@ -191,18 +188,12 @@ public:
m_rnnType = CLSTM;
else if (std::find(strType.begin(), strType.end(), L"CRF") != strType.end())
m_rnnType = RCRF;
else if (std::find(strType.begin(), strType.end(), L"LSTMENCODER") != strType.end())
m_rnnType = LSTMENCODER;
else if (std::find(strType.begin(), strType.end(), L"TRANSDUCER") != strType.end() ||
std::find(strType.begin(), strType.end(), L"UNIDIRECTIONALLSTMWITHPASTPREDICTION") != strType.end())
m_rnnType = UNIDIRECTIONALLSTM;
else if (std::find(strType.begin(), strType.end(), L"JOINTCONDITIONALBILSTMSTREAMS") != strType.end() ||
std::find(strType.begin(), strType.end(), L"BIDIRECTIONALLSTMWITHPASTPREDICTION") != strType.end())
m_rnnType = BIDIRECTIONALLSTM;
else if (std::find(strType.begin(), strType.end(), L"ALIGNMENTSIMILARITYGENERATOR") != strType.end())
m_rnnType = ALIGNMENTSIMILARITYGENERATOR;
else if (std::find(strType.begin(), strType.end(), L"ALIGNMENTSIMILARITYGFORWARDDECODER") != strType.end())
m_rnnType = ALIGNMENTSIMILARITYGFORWARDDECODER;
else
InvalidArgument("InitRecurrentConfig: unknown value for rnnType parameter '%ls'", strType[0].c_str());
}
@ -255,7 +246,6 @@ public:
}
ComputationNetworkPtr BuildNetworkFromDescription();
ComputationNetworkPtr BuildNetworkFromDescription(ComputationNetwork* encoderNet); // legacy support of deprecated sequence-to-sequence implementation
ComputationNetworkPtr BuildNetworkFromDbnFile(const std::wstring& dbnModelFileName); // legacy support for fseide's Microsoft-internal tool "DBN.exe"
@ -287,8 +277,6 @@ protected:
ComputationNetworkPtr BuildSeqTrnLSTMNetworkFromDescription();
ComputationNetworkPtr BuildLSTMEncoderNetworkFromDescription();
ComputationNetworkPtr BuildUnidirectionalLSTMNetworksFromDescription();
ComputationNetworkPtr BuildBiDirectionalLSTMNetworksFromDescription();
@ -299,10 +287,6 @@ protected:
ComputationNetworkPtr BuildNCELSTMNetworkFromDescription();
ComputationNetworkPtr BuildAlignmentForwardDecoderNetworkFromDescription(ComputationNetwork* encoderNet);
ComputationNetworkPtr BuildAlignmentDecoderNetworkFromDescription(ComputationNetwork* encoderNet);
//layer is 0 based
ComputationNodePtr ApplyNonlinearFunction(ComputationNodePtr input, const size_t layer, const std::wstring nodeName = L"");
ComputationNodePtr AddTrainAndEvalCriterionNodes(ComputationNodePtr input, ComputationNodePtr label, ComputationNodePtr matrix = nullptr, const std::wstring trainNodeName = L"", const std::wstring evalNodeName = L"", ComputationNodePtr clspostprob = nullptr, ComputationNodePtr trans = nullptr);

Просмотреть файл

@ -183,7 +183,6 @@ static int DetermineLoopDirection(const std::vector<ComputationNodeBasePtr>& nes
// This sets index, lowLink, m_visited, and m_inStack.
void ComputationNetwork::DetermineSCCs(const ComputationNodeBasePtr& rootNode)
{
// notice that this graph including graphs from a parent networks if two or more networks are connected via PairNetworkNode
list<ComputationNodeBasePtr> sccStack;
size_t index = 0;
size_t loopId = 0; // BUGBUG: I think this is currently buggy in an edge case, and not needed (use m_allSEQNodes.size() instead).

Просмотреть файл

@ -100,8 +100,6 @@ static shared_ptr<ComputationNode<ElemType>> CreateStandardNode(const std::wstri
return New<NegateNode<ElemType>>(forward<_Types>(_Args)...);
else if (nodeType == OperationNameOf(NoiseContrastiveEstimationNode))
return New<NoiseContrastiveEstimationNode<ElemType>>(forward<_Types>(_Args)...);
else if (nodeType == OperationNameOf(PairNetworkNode))
return New<PairNetworkNode<ElemType>>(forward<_Types>(_Args)...);
else if (nodeType == OperationNameOf(ParallelNode))
return New<ParallelNode<ElemType>>(forward<_Types>(_Args)...);
else if (nodeType == OperationNameOf(PastValueNode))
@ -293,12 +291,6 @@ shared_ptr<ComputationNode<ElemType>> ComputationNetworkBuilder<ElemType>::Creat
return net.AddNodeToNetWithElemType(New<SparseInputValue<ElemType>>(net.GetDeviceId(), inputName, imageLayout));
}
template <class ElemType>
shared_ptr<ComputationNode<ElemType>> ComputationNetworkBuilder<ElemType>::CreatePairNetworkNode(const std::wstring& inputName, const size_t rows, const size_t cols)
{
return net.AddNodeToNetWithElemType(New<PairNetworkNode<ElemType>>(net.GetDeviceId(), inputName, rows, cols));
}
template <class ElemType>
shared_ptr<ComputationNode<ElemType>> ComputationNetworkBuilder<ElemType>::CreateConvolutionNode(const std::wstring& nodeName,
const size_t kernelWidth, const size_t kernelHeight, const size_t outputChannels,
@ -342,17 +334,6 @@ shared_ptr<ComputationNode<ElemType>> ComputationNetworkBuilder<ElemType>::Creat
// The following functions create nodes and link them to the network and their inputs.
// TODO: Do we need both this set and the one above that does not add inputs? Can they share more code?
template <class ElemType>
shared_ptr<ComputationNode<ElemType>> ComputationNetworkBuilder<ElemType>::PairNetwork(const ComputationNodePtr& a, const std::wstring nodeName)
{
if (net.GetNodeFromName(a->NodeName(), nullptr, false) != nullptr)
{
fprintf(stderr, "PairNetwork: asked to pair a node with name %ls in another network. However, this network has already a node with the same name. Should avoid this case.\n", a->NodeName().c_str());
RuntimeError("PairNetwork: asked to pair a node with name in another network. However, this network has already a node with the same name. Should avoid this case.\n");
}
return net.AddNodeToNetAndAttachInputs(New<PairNetworkNode<ElemType>>(net.GetDeviceId(), nodeName), a);
}
template <class ElemType>
shared_ptr<ComputationNode<ElemType>> ComputationNetworkBuilder<ElemType>::Convolution(const ComputationNodePtr weight,
const ComputationNodePtr inputValues,

Просмотреть файл

@ -51,7 +51,6 @@ public:
ComputationNodePtr CreateSparseInputNode(const std::wstring& inputName, const size_t rows);
ComputationNodePtr CreateInputNode(const std::wstring& inputName, const TensorShape& sampleLayout);
ComputationNodePtr CreateSparseInputNode(const std::wstring& inputName, const TensorShape& sampleLayout);
ComputationNodePtr CreatePairNetworkNode(const std::wstring& inputName, const size_t rows, const size_t cols);
ComputationNodePtr CreateConvolutionNode(const std::wstring& nodeName, const size_t kernelWidth, const size_t kernelHeight, const size_t outputChannels, const size_t horizontalSubsample, const size_t verticalSubsample, ImageLayoutKind imageLayoutKind, const bool zeroPadding = false, const size_t maxTempMemSizeInSamples = 0);
ComputationNodePtr CreateMaxPoolingNode(const std::wstring& nodeName, const size_t windowWidth, const size_t windowHeight, const size_t horizontalSubsample, const size_t verticalSubsample, ImageLayoutKind imageLayoutKind);
ComputationNodePtr CreateAveragePoolingNode(const std::wstring& nodeName, const size_t windowWidth, const size_t windowHeight, const size_t horizontalSubsample, const size_t verticalSubsample, ImageLayoutKind imageLayoutKind);
@ -60,7 +59,6 @@ public:
ComputationNodePtr CreateComputationNode(const std::wstring& nodeType, const std::wstring& nodeName);
// The following functions create nodes and link them to the network and their inputs.
// TODO: Do we need both this set and the one above that does not add inputs? Can they share more code?
ComputationNodePtr PairNetwork(const ComputationNodePtr& a, const std::wstring nodeName = L"");
ComputationNodePtr Convolution(const ComputationNodePtr weight,
const ComputationNodePtr inputValues,
const size_t kernelWidth, const size_t kernelHeight, const size_t outputChannels,

Просмотреть файл

@ -1662,98 +1662,6 @@ public:
template class StrideTimesNode<float>;
template class StrideTimesNode<double>;
// -----------------------------------------------------------------------
// PairNetworkNode (input)
// -----------------------------------------------------------------------
/**
pair this node to a node in another network
this node provide an interface from this network. The next layer network then can use this interface to know which node to connect to.
*/
template <class ElemType>
class PairNetworkNode : public ComputationNode<ElemType>, public NumInputs<1>
{
typedef ComputationNode<ElemType> Base;
UsingComputationNodeMembersBoilerplate;
static const std::wstring TypeName()
{
return L"PairNetwork";
}
void Init(size_t row_size, size_t /*col_size*/)
{
CreateMatrixIfNull(m_value);
SetDims(TensorShape(row_size), HasMBLayout());
UpdateFunctionValuesSize();
}
public:
DeclareConstructorFromConfigWithNumInputs(PairNetworkNode);
PairNetworkNode(DEVICEID_TYPE deviceId, const wstring& name, size_t row_size = 1, size_t col_size = 1)
: Base(deviceId, name)
{
Init(row_size, col_size);
CreateMatrixIfNull(m_gradient);
m_gradient->Resize(row_size, col_size);
m_gradient->SetValue(0.0f);
}
virtual void Load(File& fstream, size_t modelVersion) override
{
Init(1, 1); // TODO: this looks wrong; should the dimension not come from the loaded model data?
Base::Load(fstream, modelVersion);
}
/// to-do: need to change to the new way of resetting state
void BackpropToMap(const size_t inputIndex)
{
if (inputIndex > 0)
InvalidArgument("PairNetwork operation only takes one input.");
Matrix<ElemType>::ScaleAndAdd(1.0, Gradient(), Input(inputIndex)->Gradient());
}
virtual void /*ComputationNode::*/ BackpropTo(const size_t inputIndex, const FrameRange& fr) override
{
if (fr.IsAllFrames())
{
BackpropToMap(inputIndex);
return;
} // TODO: remove these one by one
assert(GetSampleMatrixNumRows() == Gradient().GetNumRows()); // original used m_value->GetNumRows() for loop dimension
assert(m_pMBLayout);
Matrix<ElemType> mTmp = Input(inputIndex)->GradientFor(fr);
Matrix<ElemType>::ScaleAndAdd(1.0, GradientFor(fr), mTmp);
}
virtual bool OutputUsedInComputingInputNodesGradients() const override
{
return false;
}
virtual bool InputUsedInComputingInputNodesGradients(size_t /*childIndex*/) const override
{
return false;
}
virtual void /*ComputationNode::*/ ForwardProp(const FrameRange& fr) override
{
Matrix<ElemType> mTmp = ValueFor(fr);
mTmp.SetValue(Input(0)->ValueFor(fr));
}
virtual void /*ComputationNodeBase::*/ Validate(bool isFinalValidationPass) override
{
Base::Validate(isFinalValidationPass);
InferMBLayoutFromInputsForStandardCase();
SetDims(Input(0));
}
};
template class PairNetworkNode<float>;
template class PairNetworkNode<double>;
// -----------------------------------------------------------------------
// ParallelNode (input0, input1)
// TODO: How is this different from RowStack?

Просмотреть файл

@ -2,12 +2,12 @@
// Copyright (c) Microsoft. All rights reserved.
// Licensed under the MIT license. See LICENSE.md file in the project root for full license information.
//
//helpful macros
// helpful macros
// TODO: the file's name is too general to be included from outside; MathHelpers.h?
//iterators
//
#pragma once
//#pragma once
// iterators
#undef foreach_row
#undef foreach_column
#undef foreach_coord
@ -19,5 +19,5 @@
for (long _i = 0; _i < (_m).GetNumRows(); _i++)
#define foreach_row_in_submat(_i, _istart, _iend, _m) for (long _i = _istart; _i < min(_iend, (_m).GetNumRows()); _i++)
//this functions returns the index of the first column element in the columnwise array representing matrix with _numRows rows
// this functions returns the index of the first column element in the columnwise array representing matrix with _numRows rows
#define column_s_ind_colwisem(_colNum, _numRows) ((_numRows) * (_colNum))

Просмотреть файл

@ -10,7 +10,6 @@
#include "CompositeComputationNodes.h" // for PrecomputeNode
#include "SimpleEvaluator.h"
#include "DataReader.h"
#include "IComputationNetBuilder.h"
#include "ScriptableObjects.h"
#include <vector>
#include <string>

Просмотреть файл

@ -8,6 +8,7 @@
#include "DataReader.h"
#include "ComputationNetwork.h"
#include "DataReaderHelpers.h"
#include "Helpers.h"
#include "fileutil.h"
#include <vector>
#include <string>