made everything build again after deleting the unused MultiNetworks functions, incl. deleting related entries in SimpleNetworkBuilder;

deleted PairNetworkNode;
renamed EsotericActions.cpp to SpecialPurposeActions.cpp
This commit is contained in:
Frank Seide 2016-01-22 09:13:52 -08:00
Родитель 18b7b36cca
Коммит a4a20183aa
20 изменённых файлов: 91 добавлений и 1084 удалений

Просмотреть файл

@ -43,16 +43,6 @@ void DoWriteWordAndClassInfo(const ConfigParameters& config);
template <typename ElemType> template <typename ElemType>
void DoTopologyPlot(const ConfigParameters& config); void DoTopologyPlot(const ConfigParameters& config);
// deprecated (EsotericActions.cp) // special purpose (EsotericActions.cp)
template <typename ElemType> template <typename ElemType>
void DoConvertFromDbn(const ConfigParameters& config); void DoConvertFromDbn(const ConfigParameters& config);
template <typename ElemType>
void DoEvalUnroll(const ConfigParameters& config);
template <typename ElemType>
void DoEncoderDecoder(const ConfigParameters& config);
template <typename ElemType>
void DoBidirectionEncoderDecoder(const ConfigParameters& config);
template <typename ElemType>
void DoEvalEncodingBeamSearchDecoding(const ConfigParameters& config);
template <typename ElemType>
void DoBeamSearchDecoding(const ConfigParameters& config);

Просмотреть файл

@ -170,7 +170,7 @@
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">NotUsing</PrecompiledHeader> <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">NotUsing</PrecompiledHeader>
</ClCompile> </ClCompile>
<ClCompile Include="..\Common\TimerUtility.cpp" /> <ClCompile Include="..\Common\TimerUtility.cpp" />
<ClCompile Include="EsotericActions.cpp" /> <ClCompile Include="SpecialPurposeActions.cpp" />
<ClCompile Include="EvalActions.cpp" /> <ClCompile Include="EvalActions.cpp" />
<ClCompile Include="OtherActions.cpp" /> <ClCompile Include="OtherActions.cpp" />
<ClCompile Include="TrainActions.cpp" /> <ClCompile Include="TrainActions.cpp" />

Просмотреть файл

@ -19,7 +19,7 @@
<ClCompile Include="OtherActions.cpp"> <ClCompile Include="OtherActions.cpp">
<Filter>Actions</Filter> <Filter>Actions</Filter>
</ClCompile> </ClCompile>
<ClCompile Include="EsotericActions.cpp"> <ClCompile Include="SpecialPurposeActions.cpp">
<Filter>Actions</Filter> <Filter>Actions</Filter>
</ClCompile> </ClCompile>
</ItemGroup> </ItemGroup>

Просмотреть файл

@ -1,424 +0,0 @@
//
// Copyright (c) Microsoft. All rights reserved.
// Licensed under the MIT license. See LICENSE.md file in the project root for full license information.
//
// EsotericActions.cpp -- CNTK actions that are deprecated
//
#define _CRT_NONSTDC_NO_DEPRECATE // make VS accept POSIX functions without _
#include "stdafx.h"
#include "Basics.h"
#include "Actions.h"
#include "ComputationNetwork.h"
#include "ComputationNode.h"
#include "DataReader.h"
#include "DataWriter.h"
#include "SimpleNetworkBuilder.h"
#include "NDLNetworkBuilder.h"
#include "SynchronousExecutionEngine.h"
#include "ModelEditLanguage.h"
#include "SGD.h"
#include "Config.h"
#include "MultiNetworksSGD.h"
#include "SimpleEvaluator.h"
#include "SimpleOutputWriter.h"
#include "MultiNetworksEvaluator.h"
#include "BestGpu.h"
#include "ScriptableObjects.h"
#include "BrainScriptEvaluator.h"
#include "BrainScriptParser.h"
#include <string>
#include <chrono>
#include <algorithm>
#include <vector>
#include <iostream>
#include <queue>
#include <set>
#include <memory>
#ifndef let
#define let const auto
#endif
using namespace std;
using namespace Microsoft::MSR;
using namespace Microsoft::MSR::CNTK;
// ===========================================================================
// DoConvertFromDbn() - implements CNTK "convertdbn" command
// ===========================================================================
template <typename ElemType>
void DoConvertFromDbn(const ConfigParameters& config)
{
wstring modelPath = config(L"modelPath");
wstring dbnModelPath = config(L"dbnModelPath");
auto netBuilder = make_shared<SimpleNetworkBuilder<ElemType>>(config);
ComputationNetworkPtr net = netBuilder->BuildNetworkFromDbnFile(dbnModelPath);
net->Save(modelPath);
}
template void DoConvertFromDbn<float>(const ConfigParameters& config);
template void DoConvertFromDbn<double>(const ConfigParameters& config);
// ===========================================================================
// DoEvalUnroll() - implements CNTK "testunroll" command
// ===========================================================================
// Special early implementation of RNNs by emulating them as a DNN.
// The code is very restricted to simple RNNs.
// The idea can be used for more complicated network but need to know which nodes are stateful or time-dependent so that unroll is done in a correct way to represent recurrent networks.
// TODO: can probably be removed.
template <typename ElemType>
void DoEvalUnroll(const ConfigParameters& config)
{
//test
ConfigParameters readerConfig(config(L"reader"));
readerConfig.Insert("traceLevel", config(L"traceLevel", "0"));
DataReader<ElemType> testDataReader(readerConfig);
DEVICEID_TYPE deviceId = DeviceFromConfig(config);
ConfigArray minibatchSize = config(L"minibatchSize", "40960");
size_t epochSize = config(L"epochSize", "0");
if (epochSize == 0)
{
epochSize = requestDataSize;
}
wstring modelPath = config(L"modelPath");
intargvector mbSize = minibatchSize;
wstring path2EvalResults = config(L"path2EvalResults", L"");
auto net = ComputationNetwork::CreateFromFile<ElemType>(deviceId, modelPath);
MultiNetworksEvaluator<ElemType> eval(net);
double evalEntropy;
eval.EvaluateUnroll(&testDataReader, mbSize[0], evalEntropy, path2EvalResults == L"" ? nullptr : path2EvalResults.c_str(), epochSize);
}
template void DoEvalUnroll<float>(const ConfigParameters& config);
template void DoEvalUnroll<double>(const ConfigParameters& config);
// ===========================================================================
// DoEncoderDecoder() - implements CNTK "trainEncoderDecoder" command
// ===========================================================================
/**
This implements sequence to sequence translation paper in
http://arxiv.org/pdf/1409.3215.pdf
*/
template <typename ElemType>
void DoEncoderDecoder(const ConfigParameters& config)
{
vector<IComputationNetBuilder<ElemType>*> netBuilders;
vector<IDataReader<ElemType>*> trainDataReader;
vector<IDataReader<ElemType>*> validationDataReader;
ConfigParameters configSGD = config(L"SGD");
bool makeMode = config(L"makeMode", "true");
IComputationNetBuilder<ElemType>* encoderNetBuilder = NULL;
IComputationNetBuilder<ElemType>* decoderNetBuilder = NULL;
ConfigParameters readerConfig = config(L"encoderReader");
readerConfig.Insert("traceLevel", config(L"traceLevel", "0"));
DataReader<ElemType>* encoderDataReader = new DataReader<ElemType>(readerConfig);
ConfigParameters decoderReaderConfig = config(L"decoderReader");
DataReader<ElemType>* decoderDataReader = new DataReader<ElemType>(decoderReaderConfig);
ConfigParameters cvEncoderReaderConfig = config(L"encoderCVReader");
DataReader<ElemType>* cvEncoderDataReader = new DataReader<ElemType>(cvEncoderReaderConfig);
ConfigParameters cvDecoderReaderConfig = config(L"decoderCVReader");
DataReader<ElemType>* cvDecoderDataReader = new DataReader<ElemType>(cvDecoderReaderConfig);
if (config.Exists("EncoderNetworkBuilder"))
{
ConfigParameters configSNB = config(L"EncoderNetworkBuilder");
encoderNetBuilder = (IComputationNetBuilder<ElemType>*) new SimpleNetworkBuilder<ElemType>(configSNB);
}
else
{
LogicError("Need encoder network");
}
if (config.Exists("DecoderNetworkBuilder"))
{
ConfigParameters configSNB = config(L"DecoderNetworkBuilder");
decoderNetBuilder = (IComputationNetBuilder<ElemType>*) new SimpleNetworkBuilder<ElemType>(configSNB);
}
else
{
LogicError("Need decoder networks");
}
MultiNetworksSGD<ElemType> sgd(configSGD);
sgd.InitTrainEncoderDecoderWithHiddenStates(configSGD);
netBuilders.push_back(encoderNetBuilder);
netBuilders.push_back(decoderNetBuilder);
trainDataReader.push_back(encoderDataReader);
trainDataReader.push_back(decoderDataReader);
validationDataReader.push_back(cvEncoderDataReader);
validationDataReader.push_back(cvDecoderDataReader);
sgd.EncoderDecoder(netBuilders, (int) config(L"deviceId"), trainDataReader, validationDataReader, makeMode);
delete encoderDataReader;
delete decoderDataReader;
delete cvEncoderDataReader;
delete cvDecoderDataReader;
}
template void DoEncoderDecoder<float>(const ConfigParameters& config);
template void DoEncoderDecoder<double>(const ConfigParameters& config);
// ===========================================================================
// DoBidirectionEncoderDecoder() - implements CNTK "trainBidirectionEncoderDecoder" command
// ===========================================================================
/**
DoBidirecionEncoderDecoder
*/
template <typename ElemType>
void DoBidirectionEncoderDecoder(const ConfigParameters& config)
{
ConfigParameters configSGD = config(L"SGD");
bool makeMode = config(L"makeMode", "true");
IComputationNetBuilder<ElemType>* encoderNetBuilder = NULL;
IComputationNetBuilder<ElemType>* forwardDecoderNetBuilder = NULL;
IComputationNetBuilder<ElemType>* backwardDecoderNetBuilder = NULL;
vector<IComputationNetBuilder<ElemType>*> netBuilders;
vector<IDataReader<ElemType>*> trainDataReader;
vector<IDataReader<ElemType>*> validationDataReader;
ConfigParameters readerConfig = config(L"encoderReader");
readerConfig.Insert("traceLevel", config(L"traceLevel", "0"));
DataReader<ElemType>* encoderDataReader = new DataReader<ElemType>(readerConfig);
ConfigParameters decoderReaderConfig = config(L"decoderReader");
DataReader<ElemType>* decoderDataReader = new DataReader<ElemType>(decoderReaderConfig);
ConfigParameters backwardDecoderReaderConfig = config(L"backwardDecoderReader");
DataReader<ElemType>* backwardDecoderDataReader = new DataReader<ElemType>(backwardDecoderReaderConfig);
ConfigParameters cvEncoderReaderConfig = config(L"encoderCVReader");
DataReader<ElemType>* cvEncoderDataReader = new DataReader<ElemType>(cvEncoderReaderConfig);
ConfigParameters cvDecoderReaderConfig = config(L"decoderCVReader");
DataReader<ElemType>* cvDecoderDataReader = new DataReader<ElemType>(cvDecoderReaderConfig);
ConfigParameters cvBackwardDecoderReaderConfig = config(L"BackwardDecoderCVReader");
DataReader<ElemType>* cvBackwardDecoderDataReader = new DataReader<ElemType>(cvBackwardDecoderReaderConfig);
if (config.Exists("EncoderNetworkBuilder"))
{
ConfigParameters configSNB = config(L"EncoderNetworkBuilder");
encoderNetBuilder = (IComputationNetBuilder<ElemType>*) new SimpleNetworkBuilder<ElemType>(configSNB);
}
else
LogicError("Need encoder network");
if (config.Exists("DecoderNetworkBuilder"))
{
ConfigParameters configSNB = config(L"DecoderNetworkBuilder");
forwardDecoderNetBuilder = (IComputationNetBuilder<ElemType>*) new SimpleNetworkBuilder<ElemType>(configSNB);
}
else
{
LogicError("Need decoder networks");
}
if (config.Exists("BackwardDecoderNetworkBuilder"))
{
ConfigParameters configSNB = config(L"BackwardDecoderNetworkBuilder");
backwardDecoderNetBuilder = (IComputationNetBuilder<ElemType>*) new SimpleNetworkBuilder<ElemType>(configSNB);
}
else
{
LogicError("Need decoder networks");
}
MultiNetworksSGD<ElemType> sgd(configSGD);
sgd.InitTrainEncoderDecoderWithHiddenStates(configSGD);
netBuilders.push_back(encoderNetBuilder);
netBuilders.push_back(forwardDecoderNetBuilder);
netBuilders.push_back(backwardDecoderNetBuilder);
trainDataReader.push_back(encoderDataReader);
trainDataReader.push_back(decoderDataReader);
trainDataReader.push_back(backwardDecoderDataReader);
validationDataReader.push_back(cvEncoderDataReader);
validationDataReader.push_back(cvDecoderDataReader);
validationDataReader.push_back(cvBackwardDecoderDataReader);
sgd.EncoderDecoder(netBuilders, (int) config(L"deviceId"), trainDataReader, validationDataReader, makeMode);
delete encoderDataReader;
delete decoderDataReader;
delete cvEncoderDataReader;
delete cvDecoderDataReader;
delete backwardDecoderDataReader;
delete cvBackwardDecoderDataReader;
}
template void DoBidirectionEncoderDecoder<float>(const ConfigParameters& config);
template void DoBidirectionEncoderDecoder<double>(const ConfigParameters& config);
// ===========================================================================
// DoEvalEncodingBeamSearchDecoding() - implements CNTK "testEncoderDecoder" command
// ===========================================================================
/**
Originally, this is for testing models trained using the sequence to sequence translation method below
http://arxiv.org/pdf/1409.3215.pdf
Later on, it is extended to be more general to include a sequence of network operations.
*/
template <typename ElemType>
void DoEvalEncodingBeamSearchDecoding(const ConfigParameters& config)
{
DEVICEID_TYPE deviceId = DeviceFromConfig(config);
vector<IDataReader<ElemType>*> readers;
ConfigParameters readerConfig = config(L"encoderReader");
readerConfig.Insert("traceLevel", config(L"traceLevel", "0"));
DataReader<ElemType> encoderReader(readerConfig);
ConfigParameters decoderReaderConfig = config(L"decoderReader");
decoderReaderConfig.Insert("traceLevel", config(L"traceLevel", "0"));
DataReader<ElemType> decoderReader(decoderReaderConfig);
readers.push_back(&encoderReader);
readers.push_back(&decoderReader);
ConfigArray minibatchSize = config(L"minibatchSize", "40960");
size_t epochSize = config(L"epochSize", "0");
if (epochSize == 0)
{
epochSize = requestDataSize;
}
wstring encoderModelPath = config(L"encoderModelPath");
wstring decoderModelPath = config(L"decoderModelPath");
intargvector mbSize = minibatchSize;
int traceLevel = config(L"traceLevel", "0");
size_t numMBsToShowResult = config(L"numMBsToShowResult", "100");
vector<ComputationNetworkPtr> nets;
auto encoderNet = ComputationNetwork::CreateFromFile<ElemType>(deviceId, encoderModelPath, FileOptions::fileOptionsBinary, true);
auto decoderNet = ComputationNetwork::CreateFromFile<ElemType>(deviceId, decoderModelPath, FileOptions::fileOptionsBinary, false, encoderNet.get());
nets.push_back(encoderNet);
nets.push_back(decoderNet);
ConfigArray evalNodeNames = config(L"evalNodeNames");
vector<wstring> evalNodeNamesVector;
for (int i = 0; i < evalNodeNames.size(); ++i)
{
evalNodeNamesVector.push_back(evalNodeNames[i]);
}
ConfigArray outputNodeNames = config(L"outputNodeNames");
vector<wstring> outputNodeNamesVector;
for (int i = 0; i < outputNodeNames.size(); ++i)
{
outputNodeNamesVector.push_back(outputNodeNames[i]);
}
ElemType beamWidth = config(L"beamWidth", "1");
ConfigParameters writerConfig = config(L"writer");
DataWriter<ElemType> testDataWriter(writerConfig);
MultiNetworksEvaluator<ElemType> eval(decoderNet, numMBsToShowResult, traceLevel);
eval.InitTrainEncoderDecoderWithHiddenStates(config);
eval.EncodingEvaluateDecodingBeamSearch(nets, readers,
testDataWriter, evalNodeNamesVector,
outputNodeNamesVector,
mbSize[0], beamWidth, epochSize);
}
template void DoEvalEncodingBeamSearchDecoding<float>(const ConfigParameters& config);
template void DoEvalEncodingBeamSearchDecoding<double>(const ConfigParameters& config);
// ===========================================================================
// DoBeamSearchDecoding() - implements CNTK "beamSearch" command
// ===========================================================================
template <typename ElemType>
static void DoEvalBeamSearch(const ConfigParameters& config, IDataReader<ElemType>& reader)
{
DEVICEID_TYPE deviceId = DeviceFromConfig(config);
ConfigArray minibatchSize = config(L"minibatchSize", "40960");
size_t epochSize = config(L"epochSize", "0");
if (epochSize == 0)
{
epochSize = requestDataSize;
}
wstring modelPath = config(L"modelPath");
intargvector mbSize = minibatchSize;
int traceLevel = config(L"traceLevel", "0");
size_t numMBsToShowResult = config(L"numMBsToShowResult", "100");
auto net = ComputationNetwork::CreateFromFile<ElemType>(deviceId, modelPath);
ConfigArray evalNodeNames = config(L"evalNodeNames");
vector<wstring> evalNodeNamesVector;
for (int i = 0; i < evalNodeNames.size(); ++i)
{
evalNodeNamesVector.push_back(evalNodeNames[i]);
}
ConfigArray outputNodeNames = config(L"outputNodeNames");
vector<wstring> outputNodeNamesVector;
for (int i = 0; i < outputNodeNames.size(); ++i)
{
outputNodeNamesVector.push_back(outputNodeNames[i]);
}
ElemType beamWidth = config(L"beamWidth", "1");
ConfigParameters writerConfig = config(L"writer");
DataWriter<ElemType> testDataWriter(writerConfig);
MultiNetworksEvaluator<ElemType> eval(net, numMBsToShowResult, traceLevel);
eval.BeamSearch(&reader, testDataWriter, evalNodeNamesVector, outputNodeNamesVector, mbSize[0], beamWidth, epochSize);
}
/**
This is beam search decoder.
Developed by Kaisheng Yao.
It is used in the following work:
K. Yao, G. Zweig, "Sequence-to-sequence neural net models for grapheme-to-phoneme conversion" in Interspeech 2015
*/
template <typename ElemType>
void DoBeamSearchDecoding(const ConfigParameters& config)
{
//test
ConfigParameters readerConfig = config(L"reader");
readerConfig.Insert("traceLevel", config(L"traceLevel", "0"));
DataReader<ElemType> testDataReader(readerConfig);
DoEvalBeamSearch(config, testDataReader);
}
template void DoBeamSearchDecoding<float>(const ConfigParameters& config);
template void DoBeamSearchDecoding<double>(const ConfigParameters& config);

Просмотреть файл

@ -14,20 +14,12 @@
#include "ComputationNode.h" #include "ComputationNode.h"
#include "DataReader.h" #include "DataReader.h"
#include "DataWriter.h" #include "DataWriter.h"
#include "SimpleNetworkBuilder.h"
#include "NDLNetworkBuilder.h"
#include "SynchronousExecutionEngine.h"
#include "ModelEditLanguage.h"
#include "SGD.h"
#include "Config.h" #include "Config.h"
#include "MultiNetworksSGD.h"
#include "SimpleEvaluator.h" #include "SimpleEvaluator.h"
#include "SimpleOutputWriter.h" #include "SimpleOutputWriter.h"
#include "MultiNetworksEvaluator.h"
#include "BestGpu.h" #include "BestGpu.h"
#include "ScriptableObjects.h" #include "ScriptableObjects.h"
#include "BrainScriptEvaluator.h" #include "BrainScriptEvaluator.h"
#include "BrainScriptParser.h"
#include <string> #include <string>
#include <chrono> #include <chrono>

Просмотреть файл

@ -12,22 +12,9 @@
#include "Actions.h" #include "Actions.h"
#include "ComputationNetwork.h" #include "ComputationNetwork.h"
#include "ComputationNode.h" #include "ComputationNode.h"
#include "DataReader.h"
#include "DataWriter.h"
#include "SimpleNetworkBuilder.h"
#include "NDLNetworkBuilder.h"
#include "SynchronousExecutionEngine.h"
#include "ModelEditLanguage.h"
#include "SGD.h"
#include "Config.h" #include "Config.h"
#include "MultiNetworksSGD.h"
#include "SimpleEvaluator.h"
#include "SimpleOutputWriter.h"
#include "MultiNetworksEvaluator.h"
#include "BestGpu.h"
#include "ScriptableObjects.h" #include "ScriptableObjects.h"
#include "BrainScriptEvaluator.h" #include "BrainScriptEvaluator.h"
#include "BrainScriptParser.h"
#include <string> #include <string>
#include <chrono> #include <chrono>

Просмотреть файл

@ -0,0 +1,54 @@
//
// Copyright (c) Microsoft. All rights reserved.
// Licensed under the MIT license. See LICENSE.md file in the project root for full license information.
//
// EsotericActions.cpp -- CNTK actions that are deprecated
//
#define _CRT_NONSTDC_NO_DEPRECATE // make VS accept POSIX functions without _
#include "stdafx.h"
#include "Basics.h"
#include "Actions.h"
#include "ComputationNetwork.h"
#include "ComputationNode.h"
#include "DataReader.h"
#include "DataWriter.h"
#include "SimpleNetworkBuilder.h"
#include "Config.h"
#include "ScriptableObjects.h"
#include <string>
#include <chrono>
#include <algorithm>
#include <vector>
#include <iostream>
#include <queue>
#include <set>
#include <memory>
#ifndef let
#define let const auto
#endif
using namespace std;
using namespace Microsoft::MSR;
using namespace Microsoft::MSR::CNTK;
// ===========================================================================
// DoConvertFromDbn() - implements CNTK "convertdbn" command
// ===========================================================================
template <typename ElemType>
void DoConvertFromDbn(const ConfigParameters& config)
{
wstring modelPath = config(L"modelPath");
wstring dbnModelPath = config(L"dbnModelPath");
auto netBuilder = make_shared<SimpleNetworkBuilder<ElemType>>(config);
ComputationNetworkPtr net = netBuilder->BuildNetworkFromDbnFile(dbnModelPath);
net->Save(modelPath);
}
template void DoConvertFromDbn<float>(const ConfigParameters& config);
template void DoConvertFromDbn<double>(const ConfigParameters& config);

Просмотреть файл

@ -20,10 +20,8 @@
#include "ModelEditLanguage.h" #include "ModelEditLanguage.h"
#include "SGD.h" #include "SGD.h"
#include "Config.h" #include "Config.h"
#include "MultiNetworksSGD.h"
#include "SimpleEvaluator.h" #include "SimpleEvaluator.h"
#include "SimpleOutputWriter.h" #include "SimpleOutputWriter.h"
#include "MultiNetworksEvaluator.h"
#include "BestGpu.h" #include "BestGpu.h"
#include "ScriptableObjects.h" #include "ScriptableObjects.h"
#include "BrainScriptEvaluator.h" #include "BrainScriptEvaluator.h"
@ -50,76 +48,15 @@ using namespace Microsoft::MSR::CNTK;
// DoTrain() - implements CNTK "train" command // DoTrain() - implements CNTK "train" command
// =========================================================================== // ===========================================================================
template <class ElemType>
class BrainScriptNetworkBuilder : public IComputationNetBuilder<ElemType>
{
typedef shared_ptr<ComputationNetwork> ComputationNetworkPtr;
ComputationNetworkPtr m_net;
ScriptableObjects::ConfigLambdaPtr m_createNetworkFn;
DEVICEID_TYPE m_deviceId;
public:
// the constructor remembers the config lambda
// TODO: Really this should just take the lambda itself, or rather, this class should just be replaced by a lambda. But we need the IConfigRecord for templates to be compile-compatible with old CNTK config.
BrainScriptNetworkBuilder(const ScriptableObjects::IConfigRecord& config)
{
m_deviceId = config[L"deviceId"]; // TODO: only needed for LoadNetworkFromFile() which should go away anyway
m_createNetworkFn = config[L"createNetwork"].AsPtr<ScriptableObjects::ConfigLambda>();
}
// not supported for old CNTK
BrainScriptNetworkBuilder(const ConfigParameters& config)
{
NOT_IMPLEMENTED;
}
// build a ComputationNetwork from description language
virtual /*IComputationNetBuilder::*/ ComputationNetworkPtr BuildNetworkFromDescription(ComputationNetwork* = nullptr) override
{
vector<ScriptableObjects::ConfigValuePtr> args; // this lambda has no arguments
ScriptableObjects::ConfigLambda::NamedParams namedArgs;
let netValue = m_createNetworkFn->Apply(move(args), move(namedArgs), L"BuildNetworkFromDescription");
m_net = netValue.AsPtr<ComputationNetwork>();
if (m_net->GetDeviceId() < 0)
fprintf(stderr, "BrainScriptNetworkBuilder using CPU\n");
else
fprintf(stderr, "BrainScriptNetworkBuilder using GPU %d\n", (int) m_net->GetDeviceId());
return m_net;
}
// load an existing file--this is the same code as for NDLNetworkBuilder.h (OK to copy it here because this is temporary code anyway)
// TODO: This does not belong into NetworkBuilder, since the code is the same for all. Just create the network and load the darn thing.
virtual /*IComputationNetBuilder::*/ ComputationNetwork* LoadNetworkFromFile(const wstring& modelFileName, bool forceLoad = true,
bool bAllowNoCriterionNode = false, ComputationNetwork* anotherNetwork = nullptr) override
{
if (!m_net || m_net->GetTotalNumberOfNodes() == 0 || forceLoad) //not built or force load --TODO: why all these options?
{
auto net = make_shared<ComputationNetwork>(m_deviceId);
net->Load<ElemType>(modelFileName, FileOptions::fileOptionsBinary, bAllowNoCriterionNode, anotherNetwork);
m_net = net;
}
m_net->ResetEvalTimeStamps();
return m_net.get();
}
};
// TODO: decide where these should go. Also, do we need three variables? // TODO: decide where these should go. Also, do we need three variables?
extern wstring standardFunctions; extern wstring standardFunctions;
extern wstring commonMacros; extern wstring commonMacros;
extern wstring computationNodes; extern wstring computationNodes;
// helper that returns 'float' or 'double' depending on ElemType // helper that returns 'float' or 'double' depending on ElemType
template <class ElemType> template <class ElemType> static const wchar_t* ElemTypeName();
static const wchar_t* ElemTypeName(); template <> /*static*/ const wchar_t* ElemTypeName<float>() { return L"float"; }
template <> template <> /*static*/ const wchar_t* ElemTypeName<double>() { return L"double"; }
/*static*/ const wchar_t* ElemTypeName<float>()
{
return L"float";
}
template <>
/*static*/ const wchar_t* ElemTypeName<double>()
{
return L"double";
}
function<ComputationNetworkPtr(DEVICEID_TYPE)> GetCreateNetworkFn(const ScriptableObjects::IConfigRecord& config) function<ComputationNetworkPtr(DEVICEID_TYPE)> GetCreateNetworkFn(const ScriptableObjects::IConfigRecord& config)
{ {

Просмотреть файл

@ -75,51 +75,50 @@ wstring computationNodes = // TODO: use actual TypeName() here? would first need
QuaternaryStandardNode(ClassBasedCrossEntropyWithSoftmax, labelClassDescriptorVectorSequence, mainInputInfo, mainWeight, classLogProbsBeforeSoftmax) QuaternaryStandardNode(ClassBasedCrossEntropyWithSoftmax, labelClassDescriptorVectorSequence, mainInputInfo, mainWeight, classLogProbsBeforeSoftmax)
// BUGBUG: the commented-out ones are not mentioned in the CNTK book, nor are their parameters documented in the source code // BUGBUG: the commented-out ones are not mentioned in the CNTK book, nor are their parameters documented in the source code
BinaryStandardNode(ColumnElementTimes, aVectorSequence, anotherVectorSequence) BinaryStandardNode(ColumnElementTimes, aVectorSequence, anotherVectorSequence)
BinaryStandardNode(CosDistance, aVectorSequence, anotherVectorSequence) BinaryStandardNode(CosDistance, aVectorSequence, anotherVectorSequence)
QuaternaryStandardNode(CosDistanceWithNegativeSamples, aVectorSequence, anotherVectorSequence, numShifts, numNegSamples) QuaternaryStandardNode(CosDistanceWithNegativeSamples, aVectorSequence, anotherVectorSequence, numShifts, numNegSamples)
//BinaryStandardNode(CosDistanceWithNegativeSamplesNode) //BinaryStandardNode(CosDistanceWithNegativeSamplesNode)
UnaryStandardNode(Cosine, x) UnaryStandardNode(Cosine, x)
BinaryStandardNode(CrossEntropy, refProbVectorSequence, outProbVectorSequence) BinaryStandardNode(CrossEntropy, refProbVectorSequence, outProbVectorSequence)
BinaryStandardNode(CrossEntropyWithSoftmax, labelVectorSequence, outProbVectorSequence) BinaryStandardNode(CrossEntropyWithSoftmax, labelVectorSequence, outProbVectorSequence)
BinaryStandardNode(DiagTimes, diagonalMatrixAsColumnVector, matrix) BinaryStandardNode(DiagTimes, diagonalMatrixAsColumnVector, matrix)
UnaryStandardNode(Dropout, activationVectorSequence) UnaryStandardNode(Dropout, activationVectorSequence)
//BinaryStandardNode(DummyCriterionNode) //BinaryStandardNode(DummyCriterionNode)
BinaryStandardNode(ElementTimes, aMatrix, anotherMatrix) BinaryStandardNode(ElementTimes, aMatrix, anotherMatrix)
BinaryStandardNode(ErrorPrediction, labelVectorSequence, outVectorSequence) // CNTKBook: ClassificationError? BinaryStandardNode(ErrorPrediction, labelVectorSequence, outVectorSequence) // CNTKBook: ClassificationError?
UnaryStandardNode(Exp, x) UnaryStandardNode(Exp, x)
QuaternaryStandardNode(GMMLogLikelihood, unnormalizedPriorVector, meansAsRows, logStdDevAsRows, dataVectorSequence) QuaternaryStandardNode(GMMLogLikelihood, unnormalizedPriorVector, meansAsRows, logStdDevAsRows, dataVectorSequence)
UnaryStandardNode(InvStdDev, dataVectorSequence) UnaryStandardNode(InvStdDev, dataVectorSequence)
BinaryStandardNode(KhatriRaoProduct, leftMatrix, rightMatrix) BinaryStandardNode(KhatriRaoProduct, leftMatrix, rightMatrix)
//BinaryStandardNode(LSTMNode) //BinaryStandardNode(LSTMNode)
UnaryStandardNode(Log, x) UnaryStandardNode(Log, x)
UnaryStandardNode(LogSoftmax, z) UnaryStandardNode(LogSoftmax, z)
//BinaryStandardNode(LookupTableNode) //BinaryStandardNode(LookupTableNode)
UnaryStandardNode(MatrixL1Reg, matrix) UnaryStandardNode(MatrixL1Reg, matrix)
UnaryStandardNode(MatrixL2Reg, matrix) UnaryStandardNode(MatrixL2Reg, matrix)
// BUGBUG: CNTKBook also mentions L1Norm and L2Norm // BUGBUG: CNTKBook also mentions L1Norm and L2Norm
UnaryStandardNode(Mean, dataVectorSequence) UnaryStandardNode(Mean, dataVectorSequence)
BinaryStandardNode(Minus, leftMatrix, rightMatrix) BinaryStandardNode(Minus, leftMatrix, rightMatrix)
UnaryStandardNode(Negate, input) UnaryStandardNode(Negate, input)
//BinaryStandardNode(NoiseContrastiveEstimationNode) //BinaryStandardNode(NoiseContrastiveEstimationNode)
//BinaryStandardNode(PairNetworkNode)
//BinaryStandardNode(ParallelNode) //BinaryStandardNode(ParallelNode)
TernaryStandardNode(PerDimMeanVarDeNormalization, dataVectorSequence, meanVector, invStdDevVector) // TODO: correct? TernaryStandardNode(PerDimMeanVarDeNormalization, dataVectorSequence, meanVector, invStdDevVector) // TODO: correct?
TernaryStandardNode(PerDimMeanVarNormalization, dataVectorSequence, meanVector, invStdDevVector) TernaryStandardNode(PerDimMeanVarNormalization, dataVectorSequence, meanVector, invStdDevVector)
BinaryStandardNode(Plus, leftMatrix, rightMatrix) BinaryStandardNode(Plus, leftMatrix, rightMatrix)
UnaryStandardNode(RectifiedLinear, z) UnaryStandardNode(RectifiedLinear, z)
//BinaryStandardNode(RowElementTimesNode) //BinaryStandardNode(RowElementTimesNode)
BinaryStandardNode(Scale, scalarScalingFactor, matrix) BinaryStandardNode(Scale, scalarScalingFactor, matrix)
//BinaryStandardNode(SequenceDecoderNode) //BinaryStandardNode(SequenceDecoderNode)
UnaryStandardNode(Sigmoid, z) UnaryStandardNode(Sigmoid, z)
UnaryStandardNode(Softmax, z) UnaryStandardNode(Softmax, z)
UnaryStandardNode(Hardmax, z) UnaryStandardNode(Hardmax, z)
BinaryStandardNode(SquareError, aMatrix, anotherMatrix) BinaryStandardNode(SquareError, aMatrix, anotherMatrix)
//BinaryStandardNode(StrideTimesNode) //BinaryStandardNode(StrideTimesNode)
//BinaryStandardNode(SumColumnElementsNode) //BinaryStandardNode(SumColumnElementsNode)
UnaryStandardNode(SumElements, matrix) UnaryStandardNode(SumElements, matrix)
UnaryStandardNode(Tanh, z) UnaryStandardNode(Tanh, z)
UnaryStandardNode(TimeReverse, vectorSequence) UnaryStandardNode(TimeReverse, vectorSequence)
BinaryStandardNode(Times, leftMatrix, rightMatrix) BinaryStandardNode(Times, leftMatrix, rightMatrix)
UnaryStandardNode(Transpose, matrix) UnaryStandardNode(Transpose, matrix)
//BinaryStandardNode(TransposeTimesNode) //BinaryStandardNode(TransposeTimesNode)
; ;

Просмотреть файл

@ -23,10 +23,8 @@
#include "SGD.h" #include "SGD.h"
#include "MPIWrapper.h" #include "MPIWrapper.h"
#include "Config.h" #include "Config.h"
#include "MultiNetworksSGD.h"
#include "SimpleEvaluator.h" #include "SimpleEvaluator.h"
#include "SimpleOutputWriter.h" #include "SimpleOutputWriter.h"
#include "MultiNetworksEvaluator.h"
#include "BestGpu.h" #include "BestGpu.h"
#include "ProgressTracing.h" #include "ProgressTracing.h"
#include "fileutil.h" #include "fileutil.h"
@ -240,10 +238,6 @@ void DoCommands(const ConfigParameters& config)
{ {
DoEval<ElemType>(commandParams); DoEval<ElemType>(commandParams);
} }
else if (action[j] == "testunroll")
{
DoEvalUnroll<ElemType>(commandParams);
}
else if (action[j] == "edit") else if (action[j] == "edit")
{ {
DoEdit<ElemType>(commandParams); DoEdit<ElemType>(commandParams);
@ -284,22 +278,6 @@ void DoCommands(const ConfigParameters& config)
{ {
DoParameterSVD<ElemType>(commandParams); DoParameterSVD<ElemType>(commandParams);
} }
else if (action[j] == "trainEncoderDecoder")
{
DoEncoderDecoder<ElemType>(commandParams);
}
else if (action[j] == "testEncoderDecoder")
{
DoEvalEncodingBeamSearchDecoding<ElemType>(commandParams);
}
else if (action[j] == "trainBidirectionEncoderDecoder")
{
DoBidirectionEncoderDecoder<ElemType>(commandParams);
}
else if (action[j] == "beamSearch")
{
DoBeamSearchDecoding<ElemType>(commandParams);
}
else else
{ {
RuntimeError("unknown action: %s in command set: %s", action[j].c_str(), command[i].c_str()); RuntimeError("unknown action: %s in command set: %s", action[j].c_str(), command[i].c_str());

Просмотреть файл

@ -280,8 +280,6 @@ bool CheckFunction(std::string& p_nodeType, bool* allowUndeterminedVariable)
ret = true; ret = true;
else if (EqualInsensitive(nodeType, OperationNameOf(LSTMNode), L"LSTM")) else if (EqualInsensitive(nodeType, OperationNameOf(LSTMNode), L"LSTM"))
ret = true; ret = true;
else if (EqualInsensitive(nodeType, OperationNameOf(PairNetworkNode), L"PairNetwork"))
ret = true;
else if (EqualInsensitive(nodeType, OperationNameOf(StrideTimesNode), L"StrideTimes")) else if (EqualInsensitive(nodeType, OperationNameOf(StrideTimesNode), L"StrideTimes"))
ret = true; ret = true;
else if (EqualInsensitive(nodeType, OperationNameOf(BatchNormalizationNode))) else if (EqualInsensitive(nodeType, OperationNameOf(BatchNormalizationNode)))

Просмотреть файл

@ -59,9 +59,6 @@ ComputationNetworkPtr SimpleNetworkBuilder<ElemType>::BuildNetworkFromDescriptio
case RCRF: case RCRF:
net = BuildSeqTrnLSTMNetworkFromDescription(); net = BuildSeqTrnLSTMNetworkFromDescription();
break; break;
case LSTMENCODER:
net = BuildLSTMEncoderNetworkFromDescription();
break;
case UNIDIRECTIONALLSTM: case UNIDIRECTIONALLSTM:
net = BuildUnidirectionalLSTMNetworksFromDescription(); net = BuildUnidirectionalLSTMNetworksFromDescription();
break; break;
@ -72,35 +69,12 @@ ComputationNetworkPtr SimpleNetworkBuilder<ElemType>::BuildNetworkFromDescriptio
LogicError("BuildNetworkFromDescription: invalid m_rnnType %d", (int) m_rnnType); LogicError("BuildNetworkFromDescription: invalid m_rnnType %d", (int) m_rnnType);
} }
// post-process the network // post-process the network
#if 1
net->CompileNetwork(); net->CompileNetwork();
#else
net->ValidateNetwork(false /*allowFragment*/, true /*bAllowNoCriterion*/); // no criterion possible because ...TODO: what's the reason?
#endif
return net; return net;
} }
// special version for a deprecated implementation of sequence-to-sequence models
template <class ElemType>
ComputationNetworkPtr SimpleNetworkBuilder<ElemType>::BuildNetworkFromDescription(ComputationNetwork* encoderNet)
{
ComputationNetworkPtr net;
switch (m_rnnType)
{
case ALIGNMENTSIMILARITYGENERATOR:
net = BuildAlignmentDecoderNetworkFromDescription(encoderNet);
net->CompileNetwork();
return net;
case ALIGNMENTSIMILARITYGFORWARDDECODER:
net = BuildAlignmentForwardDecoderNetworkFromDescription(encoderNet);
net->CompileNetwork();
return net;
}
return BuildNetworkFromDescription();
}
template <class ElemType> template <class ElemType>
ComputationNetworkPtr SimpleNetworkBuilder<ElemType>::BuildSimpleDNN() ComputationNetworkPtr SimpleNetworkBuilder<ElemType>::BuildSimpleDNN()
{ {
@ -530,265 +504,6 @@ ComputationNetworkPtr SimpleNetworkBuilder<ElemType>::BuildConditionalLSTMNetwor
return m_net; return m_net;
} }
/**
this builds an alignment based LM generator
the aligment node takes a variable length input and relates each element to a variable length output
*/
template <class ElemType>
ComputationNetworkPtr SimpleNetworkBuilder<ElemType>::BuildAlignmentForwardDecoderNetworkFromDescription(ComputationNetwork* encoderNet)
{
ComputationNetworkBuilder<ElemType> builder(*m_net);
if (m_net->GetTotalNumberOfNodes() < 1) //not built yet
{
unsigned long randomSeed = 1;
size_t numHiddenLayers = m_layerSizes.size() - 2;
size_t numRecurrentLayers = m_recurrentLayers.size();
ComputationNodePtr input, encoderOutput, e,
b, w, u, pastValue, output, label, alignoutput;
ComputationNodePtr clslogpostprob;
ComputationNodePtr clsweight;
ComputationNodePtr columnStride, rowStride;
input = builder.CreateSparseInputNode(L"features", m_layerSizes[0]);
m_net->FeatureNodes().push_back(input);
if (m_lookupTableOrder > 0)
{
e = builder.CreateLearnableParameter(msra::strfun::wstrprintf(L"E%d", 0), m_layerSizes[1], m_layerSizes[0] / m_lookupTableOrder);
m_net->InitLearnableParameters(e, m_uniformInit, randomSeed++, m_initValueScale);
output = builder.LookupTable(e, input, L"LookupTable");
if (m_addDropoutNodes)
input = builder.Dropout(output);
else
input = output;
}
else
{
LogicError("BuildCLASSLSTMNetworkFromDescription: LSTMNode cannot take sparse input. Need to project sparse input to continuous vector using LookupTable. Suggest using setups below\n layerSizes=$VOCABSIZE$:100:$HIDDIM$:$VOCABSIZE$ \nto have 100 dimension projection, and lookupTableOrder=1\n to project to a single window. To use larger context window, set lookupTableOrder=3 for example with width-3 context window.\n ");
}
int recur_idx = 0;
int offset = m_lookupTableOrder > 0 ? 1 : 0;
/// the source network side output dimension needs to match the 1st layer dimension in the decoder network
std::vector<ComputationNodeBasePtr>& encoderPairNodes = encoderNet->PairNodes();
if (encoderPairNodes.size() != 1)
LogicError("BuildAlignmentDecoderNetworkFromDescription: encoder network should have only one pairoutput node as source node for the decoder network: ");
encoderOutput = builder.PairNetwork(dynamic_pointer_cast<ComputationNode<ElemType>>(encoderPairNodes[0]), L"pairNetwork");
/// the source network side output dimension needs to match the 1st layer dimension in the decoder network
std::vector<ComputationNodeBasePtr>& encoderEvaluationNodes = encoderNet->OutputNodes();
if (encoderEvaluationNodes.size() != 1)
LogicError("BuildAlignmentDecoderNetworkFromDescription: encoder network should have only one output node as source node for the decoder network: ");
if (numHiddenLayers > 0)
{
int i = 1 + offset;
u = builder.CreateLearnableParameter(msra::strfun::wstrprintf(L"U%d", i), m_layerSizes[i], m_layerSizes[offset] * (offset ? m_lookupTableOrder : 1));
m_net->InitLearnableParameters(u, m_uniformInit, randomSeed++, m_initValueScale);
w = builder.CreateLearnableParameter(msra::strfun::wstrprintf(L"W%d", i), m_layerSizes[i], m_layerSizes[i]);
m_net->InitLearnableParameters(w, m_uniformInit, randomSeed++, m_initValueScale);
pastValue = builder.PastValue(NULL, m_defaultHiddenActivity, (size_t) m_layerSizes[i], 1);
// output = (ComputationNodePtr)BuildLSTMNodeComponent(randomSeed, 0, m_layerSizes[offset] * (offset ? m_lookupTableOrder : 1), m_layerSizes[offset + 1], input);
// output = (ComputationNodePtr)BuildLSTMComponent(randomSeed, 0, m_layerSizes[offset] * (offset ? m_lookupTableOrder : 1), m_layerSizes[offset + 1], input);
/// alignment node to get weights from source to target
/// this aligment node computes weights of the current hidden state after special encoder ending symbol to all
/// states before the special encoder ending symbol. The weights are used to summarize all encoder inputs.
/// the weighted sum of inputs are then used as the additional input to the LSTM input in the next layer
e = builder.CreateLearnableParameter(msra::strfun::wstrprintf(L"MatForSimilarity%d", i), m_layerSizes[i], m_layerSizes[i]);
m_net->InitLearnableParameters(e, m_uniformInit, randomSeed++, m_initValueScale);
columnStride = builder.CreateLearnableParameter(L"columnStride", 1, 1);
columnStride->Value().SetValue(1);
columnStride->SetParameterUpdateRequired(false);
rowStride = builder.CreateLearnableParameter(L"rowStride", 1, 1);
rowStride->Value().SetValue(0);
rowStride->SetParameterUpdateRequired(false);
alignoutput = builder.StrideTimes(encoderOutput, builder.Softmax(builder.StrideTimes(builder.Times(builder.Transpose(encoderOutput), e), pastValue, rowStride)), columnStride);
// alignoutput = builder.Times(encoderOutput, builder.Softmax(builder.Times(builder.Times(builder.Transpose(encoderOutput), e), pastValue)));
output = ApplyNonlinearFunction(
builder.Plus(
builder.Times(u, input), builder.Times(w, alignoutput)),
0);
pastValue->AttachInputs(output);
input = output;
for (; i < numHiddenLayers; i++)
{
//output = (ComputationNodePtr)BuildLSTMNodeComponent(randomSeed, i, m_layerSizes[i], m_layerSizes[i + 1], input);
output = (ComputationNodePtr) BuildLSTMComponent(randomSeed, i, m_layerSizes[i], m_layerSizes[i + 1], input);
if (m_addDropoutNodes)
input = builder.Dropout(output);
else
input = output;
}
}
/// need to have [input_dim x output_dim] matrix
/// e.g., [200 x 10000], where 10000 is the vocabulary size
/// this is for speed-up issue as per word matrix can be simply obtained using column slice
w = builder.CreateLearnableParameter(msra::strfun::wstrprintf(L"OW%d", numHiddenLayers), m_layerSizes[numHiddenLayers], m_layerSizes[numHiddenLayers + 1]);
m_net->InitLearnableParameters(w, m_uniformInit, randomSeed++, m_initValueScale);
/// the label is a dense matrix. each element is the word index
label = builder.CreateInputNode(L"labels", 4);
clsweight = builder.CreateLearnableParameter(L"WeightForClassPostProb", m_nbrCls, m_layerSizes[numHiddenLayers]);
m_net->InitLearnableParameters(clsweight, m_uniformInit, randomSeed++, m_initValueScale);
clslogpostprob = builder.Times(clsweight, input, L"ClassPostProb");
output = builder.Times(builder.Transpose(w), input, L"outputs");
m_net->PairNodes().push_back(input);
m_net->OutputNodes().push_back(output);
//add softmax layer (if prob is needed or KL reg adaptation is needed)
output = builder.Softmax(output, L"PosteriorProb");
}
return m_net;
}
template <class ElemType>
ComputationNetworkPtr SimpleNetworkBuilder<ElemType>::BuildAlignmentDecoderNetworkFromDescription(ComputationNetwork* encoderNet)
{
ComputationNetworkBuilder<ElemType> builder(*m_net);
if (m_net->GetTotalNumberOfNodes() < 1) //not built yet
{
unsigned long randomSeed = 1;
size_t numHiddenLayers = m_layerSizes.size() - 2;
size_t numRecurrentLayers = m_recurrentLayers.size();
ComputationNodePtr input, encoderOutput, e,
b, w, u, pastValue, output, label, alignoutput;
ComputationNodePtr clslogpostprob;
ComputationNodePtr clsweight;
ComputationNodePtr columnStride, rowStride;
input = builder.CreateSparseInputNode(L"features", m_layerSizes[0]);
m_net->FeatureNodes().push_back(input);
if (m_lookupTableOrder > 0)
{
e = builder.CreateLearnableParameter(msra::strfun::wstrprintf(L"E%d", 0), m_layerSizes[1], m_layerSizes[0] / m_lookupTableOrder);
m_net->InitLearnableParameters(e, m_uniformInit, randomSeed++, m_initValueScale);
output = builder.LookupTable(e, input, L"LookupTable");
if (m_addDropoutNodes)
input = builder.Dropout(output);
else
input = output;
}
else
{
LogicError("BuildCLASSLSTMNetworkFromDescription: LSTMNode cannot take sparse input. Need to project sparse input to continuous vector using LookupTable. Suggest using setups below\n layerSizes=$VOCABSIZE$:100:$HIDDIM$:$VOCABSIZE$ \nto have 100 dimension projection, and lookupTableOrder=1\n to project to a single window. To use larger context window, set lookupTableOrder=3 for example with width-3 context window.\n ");
}
int recur_idx = 0;
int offset = m_lookupTableOrder > 0 ? 1 : 0;
/// the source network side output dimension needs to match the 1st layer dimension in the decoder network
std::vector<ComputationNodeBasePtr>& encoderPairNodes = encoderNet->PairNodes();
if (encoderPairNodes.size() != 1)
LogicError("BuildAlignmentDecoderNetworkFromDescription: encoder network should have only one pairoutput node as source node for the decoder network: ");
encoderOutput = builder.PairNetwork(dynamic_pointer_cast<ComputationNode<ElemType>>(encoderPairNodes[0]), L"pairNetwork");
/// the source network side output dimension needs to match the 1st layer dimension in the decoder network
std::vector<ComputationNodeBasePtr>& encoderEvaluationNodes = encoderNet->OutputNodes();
if (encoderEvaluationNodes.size() != 1)
LogicError("BuildAlignmentDecoderNetworkFromDescription: encoder network should have only one output node as source node for the decoder network: ");
if (numHiddenLayers > 0)
{
int i = 1 + offset;
u = builder.CreateLearnableParameter(msra::strfun::wstrprintf(L"U%d", i), m_layerSizes[i], m_layerSizes[offset] * (offset ? m_lookupTableOrder : 1));
m_net->InitLearnableParameters(u, m_uniformInit, randomSeed++, m_initValueScale);
w = builder.CreateLearnableParameter(msra::strfun::wstrprintf(L"W%d", i), m_layerSizes[i], m_layerSizes[i]);
m_net->InitLearnableParameters(w, m_uniformInit, randomSeed++, m_initValueScale);
pastValue = builder.PastValue(NULL, m_defaultHiddenActivity, (size_t) m_layerSizes[i], 1);
// output = (ComputationNodePtr)BuildLSTMNodeComponent(randomSeed, 0, m_layerSizes[offset] * (offset ? m_lookupTableOrder : 1), m_layerSizes[offset + 1], input);
// output = (ComputationNodePtr)BuildLSTMComponent(randomSeed, 0, m_layerSizes[offset] * (offset ? m_lookupTableOrder : 1), m_layerSizes[offset + 1], input);
/// alignment node to get weights from source to target
/// this aligment node computes weights of the current hidden state after special encoder ending symbol to all
/// states before the special encoder ending symbol. The weights are used to summarize all encoder inputs.
/// the weighted sum of inputs are then used as the additional input to the LSTM input in the next layer
e = builder.CreateLearnableParameter(msra::strfun::wstrprintf(L"MatForSimilarity%d", i), m_layerSizes[i], m_layerSizes[i]);
m_net->InitLearnableParameters(e, m_uniformInit, randomSeed++, m_initValueScale);
columnStride = builder.CreateLearnableParameter(L"columnStride", 1, 1);
columnStride->Value().SetValue(1);
columnStride->SetParameterUpdateRequired(false);
rowStride = builder.CreateLearnableParameter(L"rowStride", 1, 1);
rowStride->Value().SetValue(0);
rowStride->SetParameterUpdateRequired(false);
alignoutput = builder.StrideTimes(encoderOutput, builder.Softmax(builder.StrideTimes(builder.Times(builder.Transpose(encoderOutput), e), pastValue, rowStride)), columnStride);
// alignoutput = builder.Times(encoderOutput, builder.Softmax(builder.Times(builder.Times(builder.Transpose(encoderOutput), e), pastValue)));
output = ApplyNonlinearFunction(
builder.Plus(
builder.Times(u, input), builder.Times(w, alignoutput)),
0);
pastValue->AttachInputs(output);
input = output;
for (; i < numHiddenLayers; i++)
{
//output = (ComputationNodePtr)BuildLSTMNodeComponent(randomSeed, i, m_layerSizes[i], m_layerSizes[i + 1], input);
output = (ComputationNodePtr) BuildLSTMComponent(randomSeed, i, m_layerSizes[i], m_layerSizes[i + 1], input);
if (m_addDropoutNodes)
input = builder.Dropout(output);
else
input = output;
}
}
/// need to have [input_dim x output_dim] matrix
/// e.g., [200 x 10000], where 10000 is the vocabulary size
/// this is for speed-up issue as per word matrix can be simply obtained using column slice
w = builder.CreateLearnableParameter(msra::strfun::wstrprintf(L"OW%d", numHiddenLayers), m_layerSizes[numHiddenLayers], m_layerSizes[numHiddenLayers + 1]);
m_net->InitLearnableParameters(w, m_uniformInit, randomSeed++, m_initValueScale);
/// the label is a dense matrix. each element is the word index
label = builder.CreateInputNode(L"labels", 4);
clsweight = builder.CreateLearnableParameter(L"WeightForClassPostProb", m_nbrCls, m_layerSizes[numHiddenLayers]);
m_net->InitLearnableParameters(clsweight, m_uniformInit, randomSeed++, m_initValueScale);
clslogpostprob = builder.Times(clsweight, input, L"ClassPostProb");
output = AddTrainAndEvalCriterionNodes(input, label, w, L"TrainNodeClassBasedCrossEntropy", L"EvalNodeClassBasedCrossEntrpy",
clslogpostprob);
output = builder.Times(builder.Transpose(w), input, L"outputs");
m_net->PairNodes().push_back(input);
m_net->OutputNodes().push_back(output);
//add softmax layer (if prob is needed or KL reg adaptation is needed)
output = builder.Softmax(output, L"PosteriorProb");
}
return m_net;
}
template <class ElemType> template <class ElemType>
ComputationNetworkPtr SimpleNetworkBuilder<ElemType>::BuildLogBilinearNetworkFromDescription() ComputationNetworkPtr SimpleNetworkBuilder<ElemType>::BuildLogBilinearNetworkFromDescription()
{ {
@ -1608,95 +1323,6 @@ ComputationNetworkPtr SimpleNetworkBuilder<ElemType>::BuildLSTMNetworkFromDescri
return m_net; return m_net;
} }
/**
This is encoder LSTM described in the following papers:
H. Sutskever, O. Vinyals and Q. V. Le, "Sequence to sequence learning with neural networks", http://arxiv.org/abs/1409.3215
The following code constructs the encoder and, to construct decoder, use BuildLSTMNetworkFromDescription
Developed by Kaisheng Yao
This is used in the following works:
K. Yao, G. Zweig, "Sequence-to-sequence neural net models for grapheme-to-phoneme conversion, submitted to Interspeech 2015
*/
template <class ElemType>
ComputationNetworkPtr SimpleNetworkBuilder<ElemType>::BuildLSTMEncoderNetworkFromDescription()
{
ComputationNetworkBuilder<ElemType> builder(*m_net);
if (m_net->GetTotalNumberOfNodes() < 1) //not built yet
{
ULONG randomSeed = 1;
size_t i = 0;
size_t numHiddenLayers = m_layerSizes.size() - 1;
size_t numRecurrentLayers = m_recurrentLayers.size();
ComputationNodePtr input, w, b, u, e, pastValue, output, label, prior;
if (m_sparse_input)
input = builder.CreateSparseInputNode(L"features", m_layerSizes[0]);
else
input = builder.CreateInputNode(L"features", m_layerSizes[0]);
m_net->FeatureNodes().push_back(input);
if (m_applyMeanVarNorm)
{
w = builder.Mean(input);
b = builder.InvStdDev(input);
output = builder.PerDimMeanVarNormalization(input, w, b);
input = output;
}
if (m_lookupTableOrder > 0)
{
e = builder.CreateLearnableParameter(msra::strfun::wstrprintf(L"EncoderE%d", 0), m_layerSizes[1], m_layerSizes[0] / m_lookupTableOrder);
m_net->InitLearnableParameters(e, m_uniformInit, randomSeed++, m_initValueScale);
output = builder.LookupTable(e, input, L"EncoderLookupTable");
#ifdef DEBUG_DECODER
e->Value().SetValue((ElemType) 0.01);
#endif
if (m_addDropoutNodes)
input = builder.Dropout(output);
else
input = output;
i++;
}
/// direct connect from input node to output node
int recur_idx = 0;
int offset = m_lookupTableOrder > 0 ? 1 : 0;
if (numHiddenLayers > 0)
{
//output = (ComputationNodePtr)BuildLSTMNodeComponent(randomSeed, 0, m_layerSizes[offset] * (offset ? m_lookupTableOrder : 1), m_layerSizes[offset + 1], input);
output = (ComputationNodePtr) BuildLSTMComponent(randomSeed, 0, m_layerSizes[offset] * (offset ? m_lookupTableOrder : 1), m_layerSizes[offset + 1], input);
input = output;
i++;
for (; i < numHiddenLayers; i++)
{
//output = (ComputationNodePtr)BuildLSTMNodeComponent(randomSeed, i, m_layerSizes[i], m_layerSizes[i + 1], input);
output = (ComputationNodePtr) BuildLSTMComponent(randomSeed, i, m_layerSizes[i], m_layerSizes[i + 1], input);
if (m_addDropoutNodes)
input = builder.Dropout(output);
else
input = output;
}
}
m_net->OutputNodes().push_back(output);
m_net->PairNodes().push_back(output); /// need to provide pairnodes so that the next layer of network can connect to this network
m_net->EvaluationNodes().push_back(output);
}
return m_net;
}
/** /**
Build unidirectional LSTM p(y_t | y_t-1, x_1^t) Build unidirectional LSTM p(y_t | y_t-1, x_1^t)

Просмотреть файл

@ -38,16 +38,13 @@ enum RNNTYPE
DEEPRNN = 4, DEEPRNN = 4,
CLASSLM = 8, CLASSLM = 8,
LBLM = 16, LBLM = 16,
LSTMENCODER = 18,
NPLM = 32, NPLM = 32,
CLASSLSTM = 64, CLASSLSTM = 64,
NCELSTM = 128, NCELSTM = 128,
CLSTM = 256, CLSTM = 256,
RCRF = 512, RCRF = 512,
UNIDIRECTIONALLSTM = 19, UNIDIRECTIONALLSTM = 19,
BIDIRECTIONALLSTM = 20, BIDIRECTIONALLSTM = 20
ALIGNMENTSIMILARITYGENERATOR = 21,
ALIGNMENTSIMILARITYGFORWARDDECODER = 22
}; };
enum class TrainingCriterion : int // TODO: camel-case these enum class TrainingCriterion : int // TODO: camel-case these
@ -191,18 +188,12 @@ public:
m_rnnType = CLSTM; m_rnnType = CLSTM;
else if (std::find(strType.begin(), strType.end(), L"CRF") != strType.end()) else if (std::find(strType.begin(), strType.end(), L"CRF") != strType.end())
m_rnnType = RCRF; m_rnnType = RCRF;
else if (std::find(strType.begin(), strType.end(), L"LSTMENCODER") != strType.end())
m_rnnType = LSTMENCODER;
else if (std::find(strType.begin(), strType.end(), L"TRANSDUCER") != strType.end() || else if (std::find(strType.begin(), strType.end(), L"TRANSDUCER") != strType.end() ||
std::find(strType.begin(), strType.end(), L"UNIDIRECTIONALLSTMWITHPASTPREDICTION") != strType.end()) std::find(strType.begin(), strType.end(), L"UNIDIRECTIONALLSTMWITHPASTPREDICTION") != strType.end())
m_rnnType = UNIDIRECTIONALLSTM; m_rnnType = UNIDIRECTIONALLSTM;
else if (std::find(strType.begin(), strType.end(), L"JOINTCONDITIONALBILSTMSTREAMS") != strType.end() || else if (std::find(strType.begin(), strType.end(), L"JOINTCONDITIONALBILSTMSTREAMS") != strType.end() ||
std::find(strType.begin(), strType.end(), L"BIDIRECTIONALLSTMWITHPASTPREDICTION") != strType.end()) std::find(strType.begin(), strType.end(), L"BIDIRECTIONALLSTMWITHPASTPREDICTION") != strType.end())
m_rnnType = BIDIRECTIONALLSTM; m_rnnType = BIDIRECTIONALLSTM;
else if (std::find(strType.begin(), strType.end(), L"ALIGNMENTSIMILARITYGENERATOR") != strType.end())
m_rnnType = ALIGNMENTSIMILARITYGENERATOR;
else if (std::find(strType.begin(), strType.end(), L"ALIGNMENTSIMILARITYGFORWARDDECODER") != strType.end())
m_rnnType = ALIGNMENTSIMILARITYGFORWARDDECODER;
else else
InvalidArgument("InitRecurrentConfig: unknown value for rnnType parameter '%ls'", strType[0].c_str()); InvalidArgument("InitRecurrentConfig: unknown value for rnnType parameter '%ls'", strType[0].c_str());
} }
@ -255,7 +246,6 @@ public:
} }
ComputationNetworkPtr BuildNetworkFromDescription(); ComputationNetworkPtr BuildNetworkFromDescription();
ComputationNetworkPtr BuildNetworkFromDescription(ComputationNetwork* encoderNet); // legacy support of deprecated sequence-to-sequence implementation
ComputationNetworkPtr BuildNetworkFromDbnFile(const std::wstring& dbnModelFileName); // legacy support for fseide's Microsoft-internal tool "DBN.exe" ComputationNetworkPtr BuildNetworkFromDbnFile(const std::wstring& dbnModelFileName); // legacy support for fseide's Microsoft-internal tool "DBN.exe"
@ -287,8 +277,6 @@ protected:
ComputationNetworkPtr BuildSeqTrnLSTMNetworkFromDescription(); ComputationNetworkPtr BuildSeqTrnLSTMNetworkFromDescription();
ComputationNetworkPtr BuildLSTMEncoderNetworkFromDescription();
ComputationNetworkPtr BuildUnidirectionalLSTMNetworksFromDescription(); ComputationNetworkPtr BuildUnidirectionalLSTMNetworksFromDescription();
ComputationNetworkPtr BuildBiDirectionalLSTMNetworksFromDescription(); ComputationNetworkPtr BuildBiDirectionalLSTMNetworksFromDescription();
@ -299,10 +287,6 @@ protected:
ComputationNetworkPtr BuildNCELSTMNetworkFromDescription(); ComputationNetworkPtr BuildNCELSTMNetworkFromDescription();
ComputationNetworkPtr BuildAlignmentForwardDecoderNetworkFromDescription(ComputationNetwork* encoderNet);
ComputationNetworkPtr BuildAlignmentDecoderNetworkFromDescription(ComputationNetwork* encoderNet);
//layer is 0 based //layer is 0 based
ComputationNodePtr ApplyNonlinearFunction(ComputationNodePtr input, const size_t layer, const std::wstring nodeName = L""); ComputationNodePtr ApplyNonlinearFunction(ComputationNodePtr input, const size_t layer, const std::wstring nodeName = L"");
ComputationNodePtr AddTrainAndEvalCriterionNodes(ComputationNodePtr input, ComputationNodePtr label, ComputationNodePtr matrix = nullptr, const std::wstring trainNodeName = L"", const std::wstring evalNodeName = L"", ComputationNodePtr clspostprob = nullptr, ComputationNodePtr trans = nullptr); ComputationNodePtr AddTrainAndEvalCriterionNodes(ComputationNodePtr input, ComputationNodePtr label, ComputationNodePtr matrix = nullptr, const std::wstring trainNodeName = L"", const std::wstring evalNodeName = L"", ComputationNodePtr clspostprob = nullptr, ComputationNodePtr trans = nullptr);

Просмотреть файл

@ -183,7 +183,6 @@ static int DetermineLoopDirection(const std::vector<ComputationNodeBasePtr>& nes
// This sets index, lowLink, m_visited, and m_inStack. // This sets index, lowLink, m_visited, and m_inStack.
void ComputationNetwork::DetermineSCCs(const ComputationNodeBasePtr& rootNode) void ComputationNetwork::DetermineSCCs(const ComputationNodeBasePtr& rootNode)
{ {
// notice that this graph including graphs from a parent networks if two or more networks are connected via PairNetworkNode
list<ComputationNodeBasePtr> sccStack; list<ComputationNodeBasePtr> sccStack;
size_t index = 0; size_t index = 0;
size_t loopId = 0; // BUGBUG: I think this is currently buggy in an edge case, and not needed (use m_allSEQNodes.size() instead). size_t loopId = 0; // BUGBUG: I think this is currently buggy in an edge case, and not needed (use m_allSEQNodes.size() instead).

Просмотреть файл

@ -100,8 +100,6 @@ static shared_ptr<ComputationNode<ElemType>> CreateStandardNode(const std::wstri
return New<NegateNode<ElemType>>(forward<_Types>(_Args)...); return New<NegateNode<ElemType>>(forward<_Types>(_Args)...);
else if (nodeType == OperationNameOf(NoiseContrastiveEstimationNode)) else if (nodeType == OperationNameOf(NoiseContrastiveEstimationNode))
return New<NoiseContrastiveEstimationNode<ElemType>>(forward<_Types>(_Args)...); return New<NoiseContrastiveEstimationNode<ElemType>>(forward<_Types>(_Args)...);
else if (nodeType == OperationNameOf(PairNetworkNode))
return New<PairNetworkNode<ElemType>>(forward<_Types>(_Args)...);
else if (nodeType == OperationNameOf(ParallelNode)) else if (nodeType == OperationNameOf(ParallelNode))
return New<ParallelNode<ElemType>>(forward<_Types>(_Args)...); return New<ParallelNode<ElemType>>(forward<_Types>(_Args)...);
else if (nodeType == OperationNameOf(PastValueNode)) else if (nodeType == OperationNameOf(PastValueNode))
@ -293,12 +291,6 @@ shared_ptr<ComputationNode<ElemType>> ComputationNetworkBuilder<ElemType>::Creat
return net.AddNodeToNetWithElemType(New<SparseInputValue<ElemType>>(net.GetDeviceId(), inputName, imageLayout)); return net.AddNodeToNetWithElemType(New<SparseInputValue<ElemType>>(net.GetDeviceId(), inputName, imageLayout));
} }
template <class ElemType>
shared_ptr<ComputationNode<ElemType>> ComputationNetworkBuilder<ElemType>::CreatePairNetworkNode(const std::wstring& inputName, const size_t rows, const size_t cols)
{
return net.AddNodeToNetWithElemType(New<PairNetworkNode<ElemType>>(net.GetDeviceId(), inputName, rows, cols));
}
template <class ElemType> template <class ElemType>
shared_ptr<ComputationNode<ElemType>> ComputationNetworkBuilder<ElemType>::CreateConvolutionNode(const std::wstring& nodeName, shared_ptr<ComputationNode<ElemType>> ComputationNetworkBuilder<ElemType>::CreateConvolutionNode(const std::wstring& nodeName,
const size_t kernelWidth, const size_t kernelHeight, const size_t outputChannels, const size_t kernelWidth, const size_t kernelHeight, const size_t outputChannels,
@ -342,17 +334,6 @@ shared_ptr<ComputationNode<ElemType>> ComputationNetworkBuilder<ElemType>::Creat
// The following functions create nodes and link them to the network and their inputs. // The following functions create nodes and link them to the network and their inputs.
// TODO: Do we need both this set and the one above that does not add inputs? Can they share more code? // TODO: Do we need both this set and the one above that does not add inputs? Can they share more code?
template <class ElemType>
shared_ptr<ComputationNode<ElemType>> ComputationNetworkBuilder<ElemType>::PairNetwork(const ComputationNodePtr& a, const std::wstring nodeName)
{
if (net.GetNodeFromName(a->NodeName(), nullptr, false) != nullptr)
{
fprintf(stderr, "PairNetwork: asked to pair a node with name %ls in another network. However, this network has already a node with the same name. Should avoid this case.\n", a->NodeName().c_str());
RuntimeError("PairNetwork: asked to pair a node with name in another network. However, this network has already a node with the same name. Should avoid this case.\n");
}
return net.AddNodeToNetAndAttachInputs(New<PairNetworkNode<ElemType>>(net.GetDeviceId(), nodeName), a);
}
template <class ElemType> template <class ElemType>
shared_ptr<ComputationNode<ElemType>> ComputationNetworkBuilder<ElemType>::Convolution(const ComputationNodePtr weight, shared_ptr<ComputationNode<ElemType>> ComputationNetworkBuilder<ElemType>::Convolution(const ComputationNodePtr weight,
const ComputationNodePtr inputValues, const ComputationNodePtr inputValues,

Просмотреть файл

@ -51,7 +51,6 @@ public:
ComputationNodePtr CreateSparseInputNode(const std::wstring& inputName, const size_t rows); ComputationNodePtr CreateSparseInputNode(const std::wstring& inputName, const size_t rows);
ComputationNodePtr CreateInputNode(const std::wstring& inputName, const TensorShape& sampleLayout); ComputationNodePtr CreateInputNode(const std::wstring& inputName, const TensorShape& sampleLayout);
ComputationNodePtr CreateSparseInputNode(const std::wstring& inputName, const TensorShape& sampleLayout); ComputationNodePtr CreateSparseInputNode(const std::wstring& inputName, const TensorShape& sampleLayout);
ComputationNodePtr CreatePairNetworkNode(const std::wstring& inputName, const size_t rows, const size_t cols);
ComputationNodePtr CreateConvolutionNode(const std::wstring& nodeName, const size_t kernelWidth, const size_t kernelHeight, const size_t outputChannels, const size_t horizontalSubsample, const size_t verticalSubsample, ImageLayoutKind imageLayoutKind, const bool zeroPadding = false, const size_t maxTempMemSizeInSamples = 0); ComputationNodePtr CreateConvolutionNode(const std::wstring& nodeName, const size_t kernelWidth, const size_t kernelHeight, const size_t outputChannels, const size_t horizontalSubsample, const size_t verticalSubsample, ImageLayoutKind imageLayoutKind, const bool zeroPadding = false, const size_t maxTempMemSizeInSamples = 0);
ComputationNodePtr CreateMaxPoolingNode(const std::wstring& nodeName, const size_t windowWidth, const size_t windowHeight, const size_t horizontalSubsample, const size_t verticalSubsample, ImageLayoutKind imageLayoutKind); ComputationNodePtr CreateMaxPoolingNode(const std::wstring& nodeName, const size_t windowWidth, const size_t windowHeight, const size_t horizontalSubsample, const size_t verticalSubsample, ImageLayoutKind imageLayoutKind);
ComputationNodePtr CreateAveragePoolingNode(const std::wstring& nodeName, const size_t windowWidth, const size_t windowHeight, const size_t horizontalSubsample, const size_t verticalSubsample, ImageLayoutKind imageLayoutKind); ComputationNodePtr CreateAveragePoolingNode(const std::wstring& nodeName, const size_t windowWidth, const size_t windowHeight, const size_t horizontalSubsample, const size_t verticalSubsample, ImageLayoutKind imageLayoutKind);
@ -60,7 +59,6 @@ public:
ComputationNodePtr CreateComputationNode(const std::wstring& nodeType, const std::wstring& nodeName); ComputationNodePtr CreateComputationNode(const std::wstring& nodeType, const std::wstring& nodeName);
// The following functions create nodes and link them to the network and their inputs. // The following functions create nodes and link them to the network and their inputs.
// TODO: Do we need both this set and the one above that does not add inputs? Can they share more code? // TODO: Do we need both this set and the one above that does not add inputs? Can they share more code?
ComputationNodePtr PairNetwork(const ComputationNodePtr& a, const std::wstring nodeName = L"");
ComputationNodePtr Convolution(const ComputationNodePtr weight, ComputationNodePtr Convolution(const ComputationNodePtr weight,
const ComputationNodePtr inputValues, const ComputationNodePtr inputValues,
const size_t kernelWidth, const size_t kernelHeight, const size_t outputChannels, const size_t kernelWidth, const size_t kernelHeight, const size_t outputChannels,

Просмотреть файл

@ -1662,98 +1662,6 @@ public:
template class StrideTimesNode<float>; template class StrideTimesNode<float>;
template class StrideTimesNode<double>; template class StrideTimesNode<double>;
// -----------------------------------------------------------------------
// PairNetworkNode (input)
// -----------------------------------------------------------------------
/**
pair this node to a node in another network
this node provide an interface from this network. The next layer network then can use this interface to know which node to connect to.
*/
template <class ElemType>
class PairNetworkNode : public ComputationNode<ElemType>, public NumInputs<1>
{
typedef ComputationNode<ElemType> Base;
UsingComputationNodeMembersBoilerplate;
static const std::wstring TypeName()
{
return L"PairNetwork";
}
void Init(size_t row_size, size_t /*col_size*/)
{
CreateMatrixIfNull(m_value);
SetDims(TensorShape(row_size), HasMBLayout());
UpdateFunctionValuesSize();
}
public:
DeclareConstructorFromConfigWithNumInputs(PairNetworkNode);
PairNetworkNode(DEVICEID_TYPE deviceId, const wstring& name, size_t row_size = 1, size_t col_size = 1)
: Base(deviceId, name)
{
Init(row_size, col_size);
CreateMatrixIfNull(m_gradient);
m_gradient->Resize(row_size, col_size);
m_gradient->SetValue(0.0f);
}
virtual void Load(File& fstream, size_t modelVersion) override
{
Init(1, 1); // TODO: this looks wrong; should the dimension not come from the loaded model data?
Base::Load(fstream, modelVersion);
}
/// to-do: need to change to the new way of resetting state
void BackpropToMap(const size_t inputIndex)
{
if (inputIndex > 0)
InvalidArgument("PairNetwork operation only takes one input.");
Matrix<ElemType>::ScaleAndAdd(1.0, Gradient(), Input(inputIndex)->Gradient());
}
virtual void /*ComputationNode::*/ BackpropTo(const size_t inputIndex, const FrameRange& fr) override
{
if (fr.IsAllFrames())
{
BackpropToMap(inputIndex);
return;
} // TODO: remove these one by one
assert(GetSampleMatrixNumRows() == Gradient().GetNumRows()); // original used m_value->GetNumRows() for loop dimension
assert(m_pMBLayout);
Matrix<ElemType> mTmp = Input(inputIndex)->GradientFor(fr);
Matrix<ElemType>::ScaleAndAdd(1.0, GradientFor(fr), mTmp);
}
virtual bool OutputUsedInComputingInputNodesGradients() const override
{
return false;
}
virtual bool InputUsedInComputingInputNodesGradients(size_t /*childIndex*/) const override
{
return false;
}
virtual void /*ComputationNode::*/ ForwardProp(const FrameRange& fr) override
{
Matrix<ElemType> mTmp = ValueFor(fr);
mTmp.SetValue(Input(0)->ValueFor(fr));
}
virtual void /*ComputationNodeBase::*/ Validate(bool isFinalValidationPass) override
{
Base::Validate(isFinalValidationPass);
InferMBLayoutFromInputsForStandardCase();
SetDims(Input(0));
}
};
template class PairNetworkNode<float>;
template class PairNetworkNode<double>;
// ----------------------------------------------------------------------- // -----------------------------------------------------------------------
// ParallelNode (input0, input1) // ParallelNode (input0, input1)
// TODO: How is this different from RowStack? // TODO: How is this different from RowStack?

Просмотреть файл

@ -2,12 +2,12 @@
// Copyright (c) Microsoft. All rights reserved. // Copyright (c) Microsoft. All rights reserved.
// Licensed under the MIT license. See LICENSE.md file in the project root for full license information. // Licensed under the MIT license. See LICENSE.md file in the project root for full license information.
// //
//helpful macros // helpful macros
// TODO: the file's name is too general to be included from outside; MathHelpers.h? // TODO: the file's name is too general to be included from outside; MathHelpers.h?
//iterators //#pragma once
//
#pragma once // iterators
#undef foreach_row #undef foreach_row
#undef foreach_column #undef foreach_column
#undef foreach_coord #undef foreach_coord
@ -19,5 +19,5 @@
for (long _i = 0; _i < (_m).GetNumRows(); _i++) for (long _i = 0; _i < (_m).GetNumRows(); _i++)
#define foreach_row_in_submat(_i, _istart, _iend, _m) for (long _i = _istart; _i < min(_iend, (_m).GetNumRows()); _i++) #define foreach_row_in_submat(_i, _istart, _iend, _m) for (long _i = _istart; _i < min(_iend, (_m).GetNumRows()); _i++)
//this functions returns the index of the first column element in the columnwise array representing matrix with _numRows rows // this functions returns the index of the first column element in the columnwise array representing matrix with _numRows rows
#define column_s_ind_colwisem(_colNum, _numRows) ((_numRows) * (_colNum)) #define column_s_ind_colwisem(_colNum, _numRows) ((_numRows) * (_colNum))

Просмотреть файл

@ -10,7 +10,6 @@
#include "CompositeComputationNodes.h" // for PrecomputeNode #include "CompositeComputationNodes.h" // for PrecomputeNode
#include "SimpleEvaluator.h" #include "SimpleEvaluator.h"
#include "DataReader.h" #include "DataReader.h"
#include "IComputationNetBuilder.h"
#include "ScriptableObjects.h" #include "ScriptableObjects.h"
#include <vector> #include <vector>
#include <string> #include <string>

Просмотреть файл

@ -8,6 +8,7 @@
#include "DataReader.h" #include "DataReader.h"
#include "ComputationNetwork.h" #include "ComputationNetwork.h"
#include "DataReaderHelpers.h" #include "DataReaderHelpers.h"
#include "Helpers.h"
#include "fileutil.h" #include "fileutil.h"
#include <vector> #include <vector>
#include <string> #include <string>