made everything build again after deleting the unused MultiNetworks functions, incl. deleting related entries in SimpleNetworkBuilder;
deleted PairNetworkNode; renamed EsotericActions.cpp to SpecialPurposeActions.cpp
This commit is contained in:
Родитель
18b7b36cca
Коммит
a4a20183aa
|
@ -43,16 +43,6 @@ void DoWriteWordAndClassInfo(const ConfigParameters& config);
|
|||
template <typename ElemType>
|
||||
void DoTopologyPlot(const ConfigParameters& config);
|
||||
|
||||
// deprecated (EsotericActions.cp)
|
||||
// special purpose (EsotericActions.cp)
|
||||
template <typename ElemType>
|
||||
void DoConvertFromDbn(const ConfigParameters& config);
|
||||
template <typename ElemType>
|
||||
void DoEvalUnroll(const ConfigParameters& config);
|
||||
template <typename ElemType>
|
||||
void DoEncoderDecoder(const ConfigParameters& config);
|
||||
template <typename ElemType>
|
||||
void DoBidirectionEncoderDecoder(const ConfigParameters& config);
|
||||
template <typename ElemType>
|
||||
void DoEvalEncodingBeamSearchDecoding(const ConfigParameters& config);
|
||||
template <typename ElemType>
|
||||
void DoBeamSearchDecoding(const ConfigParameters& config);
|
||||
|
|
|
@ -170,7 +170,7 @@
|
|||
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">NotUsing</PrecompiledHeader>
|
||||
</ClCompile>
|
||||
<ClCompile Include="..\Common\TimerUtility.cpp" />
|
||||
<ClCompile Include="EsotericActions.cpp" />
|
||||
<ClCompile Include="SpecialPurposeActions.cpp" />
|
||||
<ClCompile Include="EvalActions.cpp" />
|
||||
<ClCompile Include="OtherActions.cpp" />
|
||||
<ClCompile Include="TrainActions.cpp" />
|
||||
|
|
|
@ -19,7 +19,7 @@
|
|||
<ClCompile Include="OtherActions.cpp">
|
||||
<Filter>Actions</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="EsotericActions.cpp">
|
||||
<ClCompile Include="SpecialPurposeActions.cpp">
|
||||
<Filter>Actions</Filter>
|
||||
</ClCompile>
|
||||
</ItemGroup>
|
||||
|
|
|
@ -1,424 +0,0 @@
|
|||
//
|
||||
// Copyright (c) Microsoft. All rights reserved.
|
||||
// Licensed under the MIT license. See LICENSE.md file in the project root for full license information.
|
||||
//
|
||||
// EsotericActions.cpp -- CNTK actions that are deprecated
|
||||
//
|
||||
|
||||
#define _CRT_NONSTDC_NO_DEPRECATE // make VS accept POSIX functions without _
|
||||
|
||||
#include "stdafx.h"
|
||||
#include "Basics.h"
|
||||
#include "Actions.h"
|
||||
#include "ComputationNetwork.h"
|
||||
#include "ComputationNode.h"
|
||||
#include "DataReader.h"
|
||||
#include "DataWriter.h"
|
||||
#include "SimpleNetworkBuilder.h"
|
||||
#include "NDLNetworkBuilder.h"
|
||||
#include "SynchronousExecutionEngine.h"
|
||||
#include "ModelEditLanguage.h"
|
||||
#include "SGD.h"
|
||||
#include "Config.h"
|
||||
#include "MultiNetworksSGD.h"
|
||||
#include "SimpleEvaluator.h"
|
||||
#include "SimpleOutputWriter.h"
|
||||
#include "MultiNetworksEvaluator.h"
|
||||
#include "BestGpu.h"
|
||||
#include "ScriptableObjects.h"
|
||||
#include "BrainScriptEvaluator.h"
|
||||
#include "BrainScriptParser.h"
|
||||
|
||||
#include <string>
|
||||
#include <chrono>
|
||||
#include <algorithm>
|
||||
#include <vector>
|
||||
#include <iostream>
|
||||
#include <queue>
|
||||
#include <set>
|
||||
#include <memory>
|
||||
|
||||
#ifndef let
|
||||
#define let const auto
|
||||
#endif
|
||||
|
||||
using namespace std;
|
||||
using namespace Microsoft::MSR;
|
||||
using namespace Microsoft::MSR::CNTK;
|
||||
|
||||
// ===========================================================================
|
||||
// DoConvertFromDbn() - implements CNTK "convertdbn" command
|
||||
// ===========================================================================
|
||||
|
||||
template <typename ElemType>
|
||||
void DoConvertFromDbn(const ConfigParameters& config)
|
||||
{
|
||||
wstring modelPath = config(L"modelPath");
|
||||
wstring dbnModelPath = config(L"dbnModelPath");
|
||||
|
||||
auto netBuilder = make_shared<SimpleNetworkBuilder<ElemType>>(config);
|
||||
ComputationNetworkPtr net = netBuilder->BuildNetworkFromDbnFile(dbnModelPath);
|
||||
net->Save(modelPath);
|
||||
}
|
||||
|
||||
template void DoConvertFromDbn<float>(const ConfigParameters& config);
|
||||
template void DoConvertFromDbn<double>(const ConfigParameters& config);
|
||||
|
||||
// ===========================================================================
|
||||
// DoEvalUnroll() - implements CNTK "testunroll" command
|
||||
// ===========================================================================
|
||||
|
||||
// Special early implementation of RNNs by emulating them as a DNN.
|
||||
// The code is very restricted to simple RNNs.
|
||||
// The idea can be used for more complicated network but need to know which nodes are stateful or time-dependent so that unroll is done in a correct way to represent recurrent networks.
|
||||
// TODO: can probably be removed.
|
||||
template <typename ElemType>
|
||||
void DoEvalUnroll(const ConfigParameters& config)
|
||||
{
|
||||
//test
|
||||
ConfigParameters readerConfig(config(L"reader"));
|
||||
readerConfig.Insert("traceLevel", config(L"traceLevel", "0"));
|
||||
|
||||
DataReader<ElemType> testDataReader(readerConfig);
|
||||
|
||||
DEVICEID_TYPE deviceId = DeviceFromConfig(config);
|
||||
ConfigArray minibatchSize = config(L"minibatchSize", "40960");
|
||||
size_t epochSize = config(L"epochSize", "0");
|
||||
if (epochSize == 0)
|
||||
{
|
||||
epochSize = requestDataSize;
|
||||
}
|
||||
wstring modelPath = config(L"modelPath");
|
||||
intargvector mbSize = minibatchSize;
|
||||
wstring path2EvalResults = config(L"path2EvalResults", L"");
|
||||
|
||||
auto net = ComputationNetwork::CreateFromFile<ElemType>(deviceId, modelPath);
|
||||
|
||||
MultiNetworksEvaluator<ElemType> eval(net);
|
||||
double evalEntropy;
|
||||
eval.EvaluateUnroll(&testDataReader, mbSize[0], evalEntropy, path2EvalResults == L"" ? nullptr : path2EvalResults.c_str(), epochSize);
|
||||
}
|
||||
|
||||
template void DoEvalUnroll<float>(const ConfigParameters& config);
|
||||
template void DoEvalUnroll<double>(const ConfigParameters& config);
|
||||
|
||||
// ===========================================================================
|
||||
// DoEncoderDecoder() - implements CNTK "trainEncoderDecoder" command
|
||||
// ===========================================================================
|
||||
|
||||
/**
|
||||
This implements sequence to sequence translation paper in
|
||||
http://arxiv.org/pdf/1409.3215.pdf
|
||||
|
||||
*/
|
||||
template <typename ElemType>
|
||||
void DoEncoderDecoder(const ConfigParameters& config)
|
||||
{
|
||||
vector<IComputationNetBuilder<ElemType>*> netBuilders;
|
||||
vector<IDataReader<ElemType>*> trainDataReader;
|
||||
vector<IDataReader<ElemType>*> validationDataReader;
|
||||
|
||||
ConfigParameters configSGD = config(L"SGD");
|
||||
bool makeMode = config(L"makeMode", "true");
|
||||
IComputationNetBuilder<ElemType>* encoderNetBuilder = NULL;
|
||||
IComputationNetBuilder<ElemType>* decoderNetBuilder = NULL;
|
||||
|
||||
ConfigParameters readerConfig = config(L"encoderReader");
|
||||
readerConfig.Insert("traceLevel", config(L"traceLevel", "0"));
|
||||
|
||||
DataReader<ElemType>* encoderDataReader = new DataReader<ElemType>(readerConfig);
|
||||
|
||||
ConfigParameters decoderReaderConfig = config(L"decoderReader");
|
||||
DataReader<ElemType>* decoderDataReader = new DataReader<ElemType>(decoderReaderConfig);
|
||||
|
||||
ConfigParameters cvEncoderReaderConfig = config(L"encoderCVReader");
|
||||
DataReader<ElemType>* cvEncoderDataReader = new DataReader<ElemType>(cvEncoderReaderConfig);
|
||||
|
||||
ConfigParameters cvDecoderReaderConfig = config(L"decoderCVReader");
|
||||
DataReader<ElemType>* cvDecoderDataReader = new DataReader<ElemType>(cvDecoderReaderConfig);
|
||||
|
||||
if (config.Exists("EncoderNetworkBuilder"))
|
||||
{
|
||||
ConfigParameters configSNB = config(L"EncoderNetworkBuilder");
|
||||
encoderNetBuilder = (IComputationNetBuilder<ElemType>*) new SimpleNetworkBuilder<ElemType>(configSNB);
|
||||
}
|
||||
else
|
||||
{
|
||||
LogicError("Need encoder network");
|
||||
}
|
||||
|
||||
if (config.Exists("DecoderNetworkBuilder"))
|
||||
{
|
||||
ConfigParameters configSNB = config(L"DecoderNetworkBuilder");
|
||||
decoderNetBuilder = (IComputationNetBuilder<ElemType>*) new SimpleNetworkBuilder<ElemType>(configSNB);
|
||||
}
|
||||
else
|
||||
{
|
||||
LogicError("Need decoder networks");
|
||||
}
|
||||
|
||||
MultiNetworksSGD<ElemType> sgd(configSGD);
|
||||
|
||||
sgd.InitTrainEncoderDecoderWithHiddenStates(configSGD);
|
||||
|
||||
netBuilders.push_back(encoderNetBuilder);
|
||||
netBuilders.push_back(decoderNetBuilder);
|
||||
trainDataReader.push_back(encoderDataReader);
|
||||
trainDataReader.push_back(decoderDataReader);
|
||||
validationDataReader.push_back(cvEncoderDataReader);
|
||||
validationDataReader.push_back(cvDecoderDataReader);
|
||||
|
||||
sgd.EncoderDecoder(netBuilders, (int) config(L"deviceId"), trainDataReader, validationDataReader, makeMode);
|
||||
|
||||
delete encoderDataReader;
|
||||
delete decoderDataReader;
|
||||
delete cvEncoderDataReader;
|
||||
delete cvDecoderDataReader;
|
||||
}
|
||||
|
||||
template void DoEncoderDecoder<float>(const ConfigParameters& config);
|
||||
template void DoEncoderDecoder<double>(const ConfigParameters& config);
|
||||
|
||||
// ===========================================================================
|
||||
// DoBidirectionEncoderDecoder() - implements CNTK "trainBidirectionEncoderDecoder" command
|
||||
// ===========================================================================
|
||||
|
||||
/**
|
||||
DoBidirecionEncoderDecoder
|
||||
*/
|
||||
template <typename ElemType>
|
||||
void DoBidirectionEncoderDecoder(const ConfigParameters& config)
|
||||
{
|
||||
|
||||
ConfigParameters configSGD = config(L"SGD");
|
||||
bool makeMode = config(L"makeMode", "true");
|
||||
IComputationNetBuilder<ElemType>* encoderNetBuilder = NULL;
|
||||
IComputationNetBuilder<ElemType>* forwardDecoderNetBuilder = NULL;
|
||||
IComputationNetBuilder<ElemType>* backwardDecoderNetBuilder = NULL;
|
||||
vector<IComputationNetBuilder<ElemType>*> netBuilders;
|
||||
vector<IDataReader<ElemType>*> trainDataReader;
|
||||
vector<IDataReader<ElemType>*> validationDataReader;
|
||||
|
||||
ConfigParameters readerConfig = config(L"encoderReader");
|
||||
readerConfig.Insert("traceLevel", config(L"traceLevel", "0"));
|
||||
|
||||
DataReader<ElemType>* encoderDataReader = new DataReader<ElemType>(readerConfig);
|
||||
|
||||
ConfigParameters decoderReaderConfig = config(L"decoderReader");
|
||||
DataReader<ElemType>* decoderDataReader = new DataReader<ElemType>(decoderReaderConfig);
|
||||
|
||||
ConfigParameters backwardDecoderReaderConfig = config(L"backwardDecoderReader");
|
||||
DataReader<ElemType>* backwardDecoderDataReader = new DataReader<ElemType>(backwardDecoderReaderConfig);
|
||||
|
||||
ConfigParameters cvEncoderReaderConfig = config(L"encoderCVReader");
|
||||
DataReader<ElemType>* cvEncoderDataReader = new DataReader<ElemType>(cvEncoderReaderConfig);
|
||||
|
||||
ConfigParameters cvDecoderReaderConfig = config(L"decoderCVReader");
|
||||
DataReader<ElemType>* cvDecoderDataReader = new DataReader<ElemType>(cvDecoderReaderConfig);
|
||||
|
||||
ConfigParameters cvBackwardDecoderReaderConfig = config(L"BackwardDecoderCVReader");
|
||||
DataReader<ElemType>* cvBackwardDecoderDataReader = new DataReader<ElemType>(cvBackwardDecoderReaderConfig);
|
||||
|
||||
if (config.Exists("EncoderNetworkBuilder"))
|
||||
{
|
||||
ConfigParameters configSNB = config(L"EncoderNetworkBuilder");
|
||||
encoderNetBuilder = (IComputationNetBuilder<ElemType>*) new SimpleNetworkBuilder<ElemType>(configSNB);
|
||||
}
|
||||
else
|
||||
LogicError("Need encoder network");
|
||||
|
||||
if (config.Exists("DecoderNetworkBuilder"))
|
||||
{
|
||||
ConfigParameters configSNB = config(L"DecoderNetworkBuilder");
|
||||
forwardDecoderNetBuilder = (IComputationNetBuilder<ElemType>*) new SimpleNetworkBuilder<ElemType>(configSNB);
|
||||
}
|
||||
else
|
||||
{
|
||||
LogicError("Need decoder networks");
|
||||
}
|
||||
|
||||
if (config.Exists("BackwardDecoderNetworkBuilder"))
|
||||
{
|
||||
ConfigParameters configSNB = config(L"BackwardDecoderNetworkBuilder");
|
||||
backwardDecoderNetBuilder = (IComputationNetBuilder<ElemType>*) new SimpleNetworkBuilder<ElemType>(configSNB);
|
||||
}
|
||||
else
|
||||
{
|
||||
LogicError("Need decoder networks");
|
||||
}
|
||||
|
||||
MultiNetworksSGD<ElemType> sgd(configSGD);
|
||||
|
||||
sgd.InitTrainEncoderDecoderWithHiddenStates(configSGD);
|
||||
|
||||
netBuilders.push_back(encoderNetBuilder);
|
||||
netBuilders.push_back(forwardDecoderNetBuilder);
|
||||
netBuilders.push_back(backwardDecoderNetBuilder);
|
||||
trainDataReader.push_back(encoderDataReader);
|
||||
trainDataReader.push_back(decoderDataReader);
|
||||
trainDataReader.push_back(backwardDecoderDataReader);
|
||||
validationDataReader.push_back(cvEncoderDataReader);
|
||||
validationDataReader.push_back(cvDecoderDataReader);
|
||||
validationDataReader.push_back(cvBackwardDecoderDataReader);
|
||||
|
||||
sgd.EncoderDecoder(netBuilders, (int) config(L"deviceId"), trainDataReader, validationDataReader, makeMode);
|
||||
|
||||
delete encoderDataReader;
|
||||
delete decoderDataReader;
|
||||
delete cvEncoderDataReader;
|
||||
delete cvDecoderDataReader;
|
||||
delete backwardDecoderDataReader;
|
||||
delete cvBackwardDecoderDataReader;
|
||||
}
|
||||
|
||||
template void DoBidirectionEncoderDecoder<float>(const ConfigParameters& config);
|
||||
template void DoBidirectionEncoderDecoder<double>(const ConfigParameters& config);
|
||||
|
||||
// ===========================================================================
|
||||
// DoEvalEncodingBeamSearchDecoding() - implements CNTK "testEncoderDecoder" command
|
||||
// ===========================================================================
|
||||
|
||||
/**
|
||||
Originally, this is for testing models trained using the sequence to sequence translation method below
|
||||
http://arxiv.org/pdf/1409.3215.pdf
|
||||
Later on, it is extended to be more general to include a sequence of network operations.
|
||||
*/
|
||||
template <typename ElemType>
|
||||
void DoEvalEncodingBeamSearchDecoding(const ConfigParameters& config)
|
||||
{
|
||||
DEVICEID_TYPE deviceId = DeviceFromConfig(config);
|
||||
|
||||
vector<IDataReader<ElemType>*> readers;
|
||||
ConfigParameters readerConfig = config(L"encoderReader");
|
||||
readerConfig.Insert("traceLevel", config(L"traceLevel", "0"));
|
||||
|
||||
DataReader<ElemType> encoderReader(readerConfig);
|
||||
|
||||
ConfigParameters decoderReaderConfig = config(L"decoderReader");
|
||||
decoderReaderConfig.Insert("traceLevel", config(L"traceLevel", "0"));
|
||||
|
||||
DataReader<ElemType> decoderReader(decoderReaderConfig);
|
||||
|
||||
readers.push_back(&encoderReader);
|
||||
readers.push_back(&decoderReader);
|
||||
|
||||
ConfigArray minibatchSize = config(L"minibatchSize", "40960");
|
||||
size_t epochSize = config(L"epochSize", "0");
|
||||
if (epochSize == 0)
|
||||
{
|
||||
epochSize = requestDataSize;
|
||||
}
|
||||
|
||||
wstring encoderModelPath = config(L"encoderModelPath");
|
||||
wstring decoderModelPath = config(L"decoderModelPath");
|
||||
|
||||
intargvector mbSize = minibatchSize;
|
||||
|
||||
int traceLevel = config(L"traceLevel", "0");
|
||||
size_t numMBsToShowResult = config(L"numMBsToShowResult", "100");
|
||||
|
||||
vector<ComputationNetworkPtr> nets;
|
||||
auto encoderNet = ComputationNetwork::CreateFromFile<ElemType>(deviceId, encoderModelPath, FileOptions::fileOptionsBinary, true);
|
||||
|
||||
auto decoderNet = ComputationNetwork::CreateFromFile<ElemType>(deviceId, decoderModelPath, FileOptions::fileOptionsBinary, false, encoderNet.get());
|
||||
|
||||
nets.push_back(encoderNet);
|
||||
nets.push_back(decoderNet);
|
||||
ConfigArray evalNodeNames = config(L"evalNodeNames");
|
||||
vector<wstring> evalNodeNamesVector;
|
||||
for (int i = 0; i < evalNodeNames.size(); ++i)
|
||||
{
|
||||
evalNodeNamesVector.push_back(evalNodeNames[i]);
|
||||
}
|
||||
|
||||
ConfigArray outputNodeNames = config(L"outputNodeNames");
|
||||
vector<wstring> outputNodeNamesVector;
|
||||
for (int i = 0; i < outputNodeNames.size(); ++i)
|
||||
{
|
||||
outputNodeNamesVector.push_back(outputNodeNames[i]);
|
||||
}
|
||||
|
||||
ElemType beamWidth = config(L"beamWidth", "1");
|
||||
|
||||
ConfigParameters writerConfig = config(L"writer");
|
||||
DataWriter<ElemType> testDataWriter(writerConfig);
|
||||
|
||||
MultiNetworksEvaluator<ElemType> eval(decoderNet, numMBsToShowResult, traceLevel);
|
||||
eval.InitTrainEncoderDecoderWithHiddenStates(config);
|
||||
|
||||
eval.EncodingEvaluateDecodingBeamSearch(nets, readers,
|
||||
testDataWriter, evalNodeNamesVector,
|
||||
outputNodeNamesVector,
|
||||
mbSize[0], beamWidth, epochSize);
|
||||
}
|
||||
|
||||
template void DoEvalEncodingBeamSearchDecoding<float>(const ConfigParameters& config);
|
||||
template void DoEvalEncodingBeamSearchDecoding<double>(const ConfigParameters& config);
|
||||
|
||||
// ===========================================================================
|
||||
// DoBeamSearchDecoding() - implements CNTK "beamSearch" command
|
||||
// ===========================================================================
|
||||
|
||||
template <typename ElemType>
|
||||
static void DoEvalBeamSearch(const ConfigParameters& config, IDataReader<ElemType>& reader)
|
||||
{
|
||||
DEVICEID_TYPE deviceId = DeviceFromConfig(config);
|
||||
ConfigArray minibatchSize = config(L"minibatchSize", "40960");
|
||||
size_t epochSize = config(L"epochSize", "0");
|
||||
if (epochSize == 0)
|
||||
{
|
||||
epochSize = requestDataSize;
|
||||
}
|
||||
wstring modelPath = config(L"modelPath");
|
||||
intargvector mbSize = minibatchSize;
|
||||
|
||||
int traceLevel = config(L"traceLevel", "0");
|
||||
size_t numMBsToShowResult = config(L"numMBsToShowResult", "100");
|
||||
|
||||
auto net = ComputationNetwork::CreateFromFile<ElemType>(deviceId, modelPath);
|
||||
|
||||
ConfigArray evalNodeNames = config(L"evalNodeNames");
|
||||
vector<wstring> evalNodeNamesVector;
|
||||
for (int i = 0; i < evalNodeNames.size(); ++i)
|
||||
{
|
||||
evalNodeNamesVector.push_back(evalNodeNames[i]);
|
||||
}
|
||||
|
||||
ConfigArray outputNodeNames = config(L"outputNodeNames");
|
||||
vector<wstring> outputNodeNamesVector;
|
||||
for (int i = 0; i < outputNodeNames.size(); ++i)
|
||||
{
|
||||
outputNodeNamesVector.push_back(outputNodeNames[i]);
|
||||
}
|
||||
|
||||
ElemType beamWidth = config(L"beamWidth", "1");
|
||||
|
||||
ConfigParameters writerConfig = config(L"writer");
|
||||
DataWriter<ElemType> testDataWriter(writerConfig);
|
||||
|
||||
MultiNetworksEvaluator<ElemType> eval(net, numMBsToShowResult, traceLevel);
|
||||
eval.BeamSearch(&reader, testDataWriter, evalNodeNamesVector, outputNodeNamesVector, mbSize[0], beamWidth, epochSize);
|
||||
}
|
||||
|
||||
/**
|
||||
This is beam search decoder.
|
||||
|
||||
Developed by Kaisheng Yao.
|
||||
|
||||
It is used in the following work:
|
||||
K. Yao, G. Zweig, "Sequence-to-sequence neural net models for grapheme-to-phoneme conversion" in Interspeech 2015
|
||||
*/
|
||||
template <typename ElemType>
|
||||
void DoBeamSearchDecoding(const ConfigParameters& config)
|
||||
{
|
||||
//test
|
||||
ConfigParameters readerConfig = config(L"reader");
|
||||
readerConfig.Insert("traceLevel", config(L"traceLevel", "0"));
|
||||
|
||||
DataReader<ElemType> testDataReader(readerConfig);
|
||||
|
||||
DoEvalBeamSearch(config, testDataReader);
|
||||
}
|
||||
|
||||
template void DoBeamSearchDecoding<float>(const ConfigParameters& config);
|
||||
template void DoBeamSearchDecoding<double>(const ConfigParameters& config);
|
|
@ -14,20 +14,12 @@
|
|||
#include "ComputationNode.h"
|
||||
#include "DataReader.h"
|
||||
#include "DataWriter.h"
|
||||
#include "SimpleNetworkBuilder.h"
|
||||
#include "NDLNetworkBuilder.h"
|
||||
#include "SynchronousExecutionEngine.h"
|
||||
#include "ModelEditLanguage.h"
|
||||
#include "SGD.h"
|
||||
#include "Config.h"
|
||||
#include "MultiNetworksSGD.h"
|
||||
#include "SimpleEvaluator.h"
|
||||
#include "SimpleOutputWriter.h"
|
||||
#include "MultiNetworksEvaluator.h"
|
||||
#include "BestGpu.h"
|
||||
#include "ScriptableObjects.h"
|
||||
#include "BrainScriptEvaluator.h"
|
||||
#include "BrainScriptParser.h"
|
||||
|
||||
#include <string>
|
||||
#include <chrono>
|
||||
|
|
|
@ -12,22 +12,9 @@
|
|||
#include "Actions.h"
|
||||
#include "ComputationNetwork.h"
|
||||
#include "ComputationNode.h"
|
||||
#include "DataReader.h"
|
||||
#include "DataWriter.h"
|
||||
#include "SimpleNetworkBuilder.h"
|
||||
#include "NDLNetworkBuilder.h"
|
||||
#include "SynchronousExecutionEngine.h"
|
||||
#include "ModelEditLanguage.h"
|
||||
#include "SGD.h"
|
||||
#include "Config.h"
|
||||
#include "MultiNetworksSGD.h"
|
||||
#include "SimpleEvaluator.h"
|
||||
#include "SimpleOutputWriter.h"
|
||||
#include "MultiNetworksEvaluator.h"
|
||||
#include "BestGpu.h"
|
||||
#include "ScriptableObjects.h"
|
||||
#include "BrainScriptEvaluator.h"
|
||||
#include "BrainScriptParser.h"
|
||||
|
||||
#include <string>
|
||||
#include <chrono>
|
||||
|
|
|
@ -0,0 +1,54 @@
|
|||
//
|
||||
// Copyright (c) Microsoft. All rights reserved.
|
||||
// Licensed under the MIT license. See LICENSE.md file in the project root for full license information.
|
||||
//
|
||||
// EsotericActions.cpp -- CNTK actions that are deprecated
|
||||
//
|
||||
|
||||
#define _CRT_NONSTDC_NO_DEPRECATE // make VS accept POSIX functions without _
|
||||
|
||||
#include "stdafx.h"
|
||||
#include "Basics.h"
|
||||
#include "Actions.h"
|
||||
#include "ComputationNetwork.h"
|
||||
#include "ComputationNode.h"
|
||||
#include "DataReader.h"
|
||||
#include "DataWriter.h"
|
||||
#include "SimpleNetworkBuilder.h"
|
||||
#include "Config.h"
|
||||
#include "ScriptableObjects.h"
|
||||
|
||||
#include <string>
|
||||
#include <chrono>
|
||||
#include <algorithm>
|
||||
#include <vector>
|
||||
#include <iostream>
|
||||
#include <queue>
|
||||
#include <set>
|
||||
#include <memory>
|
||||
|
||||
#ifndef let
|
||||
#define let const auto
|
||||
#endif
|
||||
|
||||
using namespace std;
|
||||
using namespace Microsoft::MSR;
|
||||
using namespace Microsoft::MSR::CNTK;
|
||||
|
||||
// ===========================================================================
|
||||
// DoConvertFromDbn() - implements CNTK "convertdbn" command
|
||||
// ===========================================================================
|
||||
|
||||
template <typename ElemType>
|
||||
void DoConvertFromDbn(const ConfigParameters& config)
|
||||
{
|
||||
wstring modelPath = config(L"modelPath");
|
||||
wstring dbnModelPath = config(L"dbnModelPath");
|
||||
|
||||
auto netBuilder = make_shared<SimpleNetworkBuilder<ElemType>>(config);
|
||||
ComputationNetworkPtr net = netBuilder->BuildNetworkFromDbnFile(dbnModelPath);
|
||||
net->Save(modelPath);
|
||||
}
|
||||
|
||||
template void DoConvertFromDbn<float>(const ConfigParameters& config);
|
||||
template void DoConvertFromDbn<double>(const ConfigParameters& config);
|
|
@ -20,10 +20,8 @@
|
|||
#include "ModelEditLanguage.h"
|
||||
#include "SGD.h"
|
||||
#include "Config.h"
|
||||
#include "MultiNetworksSGD.h"
|
||||
#include "SimpleEvaluator.h"
|
||||
#include "SimpleOutputWriter.h"
|
||||
#include "MultiNetworksEvaluator.h"
|
||||
#include "BestGpu.h"
|
||||
#include "ScriptableObjects.h"
|
||||
#include "BrainScriptEvaluator.h"
|
||||
|
@ -50,76 +48,15 @@ using namespace Microsoft::MSR::CNTK;
|
|||
// DoTrain() - implements CNTK "train" command
|
||||
// ===========================================================================
|
||||
|
||||
template <class ElemType>
|
||||
class BrainScriptNetworkBuilder : public IComputationNetBuilder<ElemType>
|
||||
{
|
||||
typedef shared_ptr<ComputationNetwork> ComputationNetworkPtr;
|
||||
ComputationNetworkPtr m_net;
|
||||
ScriptableObjects::ConfigLambdaPtr m_createNetworkFn;
|
||||
DEVICEID_TYPE m_deviceId;
|
||||
|
||||
public:
|
||||
// the constructor remembers the config lambda
|
||||
// TODO: Really this should just take the lambda itself, or rather, this class should just be replaced by a lambda. But we need the IConfigRecord for templates to be compile-compatible with old CNTK config.
|
||||
BrainScriptNetworkBuilder(const ScriptableObjects::IConfigRecord& config)
|
||||
{
|
||||
m_deviceId = config[L"deviceId"]; // TODO: only needed for LoadNetworkFromFile() which should go away anyway
|
||||
m_createNetworkFn = config[L"createNetwork"].AsPtr<ScriptableObjects::ConfigLambda>();
|
||||
}
|
||||
// not supported for old CNTK
|
||||
BrainScriptNetworkBuilder(const ConfigParameters& config)
|
||||
{
|
||||
NOT_IMPLEMENTED;
|
||||
}
|
||||
|
||||
// build a ComputationNetwork from description language
|
||||
virtual /*IComputationNetBuilder::*/ ComputationNetworkPtr BuildNetworkFromDescription(ComputationNetwork* = nullptr) override
|
||||
{
|
||||
vector<ScriptableObjects::ConfigValuePtr> args; // this lambda has no arguments
|
||||
ScriptableObjects::ConfigLambda::NamedParams namedArgs;
|
||||
let netValue = m_createNetworkFn->Apply(move(args), move(namedArgs), L"BuildNetworkFromDescription");
|
||||
m_net = netValue.AsPtr<ComputationNetwork>();
|
||||
if (m_net->GetDeviceId() < 0)
|
||||
fprintf(stderr, "BrainScriptNetworkBuilder using CPU\n");
|
||||
else
|
||||
fprintf(stderr, "BrainScriptNetworkBuilder using GPU %d\n", (int) m_net->GetDeviceId());
|
||||
return m_net;
|
||||
}
|
||||
|
||||
// load an existing file--this is the same code as for NDLNetworkBuilder.h (OK to copy it here because this is temporary code anyway)
|
||||
// TODO: This does not belong into NetworkBuilder, since the code is the same for all. Just create the network and load the darn thing.
|
||||
virtual /*IComputationNetBuilder::*/ ComputationNetwork* LoadNetworkFromFile(const wstring& modelFileName, bool forceLoad = true,
|
||||
bool bAllowNoCriterionNode = false, ComputationNetwork* anotherNetwork = nullptr) override
|
||||
{
|
||||
if (!m_net || m_net->GetTotalNumberOfNodes() == 0 || forceLoad) //not built or force load --TODO: why all these options?
|
||||
{
|
||||
auto net = make_shared<ComputationNetwork>(m_deviceId);
|
||||
net->Load<ElemType>(modelFileName, FileOptions::fileOptionsBinary, bAllowNoCriterionNode, anotherNetwork);
|
||||
m_net = net;
|
||||
}
|
||||
m_net->ResetEvalTimeStamps();
|
||||
return m_net.get();
|
||||
}
|
||||
};
|
||||
|
||||
// TODO: decide where these should go. Also, do we need three variables?
|
||||
extern wstring standardFunctions;
|
||||
extern wstring commonMacros;
|
||||
extern wstring computationNodes;
|
||||
|
||||
// helper that returns 'float' or 'double' depending on ElemType
|
||||
template <class ElemType>
|
||||
static const wchar_t* ElemTypeName();
|
||||
template <>
|
||||
/*static*/ const wchar_t* ElemTypeName<float>()
|
||||
{
|
||||
return L"float";
|
||||
}
|
||||
template <>
|
||||
/*static*/ const wchar_t* ElemTypeName<double>()
|
||||
{
|
||||
return L"double";
|
||||
}
|
||||
template <class ElemType> static const wchar_t* ElemTypeName();
|
||||
template <> /*static*/ const wchar_t* ElemTypeName<float>() { return L"float"; }
|
||||
template <> /*static*/ const wchar_t* ElemTypeName<double>() { return L"double"; }
|
||||
|
||||
function<ComputationNetworkPtr(DEVICEID_TYPE)> GetCreateNetworkFn(const ScriptableObjects::IConfigRecord& config)
|
||||
{
|
||||
|
|
|
@ -75,51 +75,50 @@ wstring computationNodes = // TODO: use actual TypeName() here? would first need
|
|||
QuaternaryStandardNode(ClassBasedCrossEntropyWithSoftmax, labelClassDescriptorVectorSequence, mainInputInfo, mainWeight, classLogProbsBeforeSoftmax)
|
||||
// BUGBUG: the commented-out ones are not mentioned in the CNTK book, nor are their parameters documented in the source code
|
||||
BinaryStandardNode(ColumnElementTimes, aVectorSequence, anotherVectorSequence)
|
||||
BinaryStandardNode(CosDistance, aVectorSequence, anotherVectorSequence)
|
||||
QuaternaryStandardNode(CosDistanceWithNegativeSamples, aVectorSequence, anotherVectorSequence, numShifts, numNegSamples)
|
||||
BinaryStandardNode(CosDistance, aVectorSequence, anotherVectorSequence)
|
||||
QuaternaryStandardNode(CosDistanceWithNegativeSamples, aVectorSequence, anotherVectorSequence, numShifts, numNegSamples)
|
||||
//BinaryStandardNode(CosDistanceWithNegativeSamplesNode)
|
||||
UnaryStandardNode(Cosine, x)
|
||||
BinaryStandardNode(CrossEntropy, refProbVectorSequence, outProbVectorSequence)
|
||||
BinaryStandardNode(CrossEntropyWithSoftmax, labelVectorSequence, outProbVectorSequence)
|
||||
BinaryStandardNode(DiagTimes, diagonalMatrixAsColumnVector, matrix)
|
||||
UnaryStandardNode(Dropout, activationVectorSequence)
|
||||
BinaryStandardNode(CrossEntropy, refProbVectorSequence, outProbVectorSequence)
|
||||
BinaryStandardNode(CrossEntropyWithSoftmax, labelVectorSequence, outProbVectorSequence)
|
||||
BinaryStandardNode(DiagTimes, diagonalMatrixAsColumnVector, matrix)
|
||||
UnaryStandardNode(Dropout, activationVectorSequence)
|
||||
//BinaryStandardNode(DummyCriterionNode)
|
||||
BinaryStandardNode(ElementTimes, aMatrix, anotherMatrix)
|
||||
BinaryStandardNode(ErrorPrediction, labelVectorSequence, outVectorSequence) // CNTKBook: ClassificationError?
|
||||
BinaryStandardNode(ErrorPrediction, labelVectorSequence, outVectorSequence) // CNTKBook: ClassificationError?
|
||||
UnaryStandardNode(Exp, x)
|
||||
QuaternaryStandardNode(GMMLogLikelihood, unnormalizedPriorVector, meansAsRows, logStdDevAsRows, dataVectorSequence)
|
||||
UnaryStandardNode(InvStdDev, dataVectorSequence)
|
||||
BinaryStandardNode(KhatriRaoProduct, leftMatrix, rightMatrix)
|
||||
QuaternaryStandardNode(GMMLogLikelihood, unnormalizedPriorVector, meansAsRows, logStdDevAsRows, dataVectorSequence)
|
||||
UnaryStandardNode(InvStdDev, dataVectorSequence)
|
||||
BinaryStandardNode(KhatriRaoProduct, leftMatrix, rightMatrix)
|
||||
//BinaryStandardNode(LSTMNode)
|
||||
UnaryStandardNode(Log, x)
|
||||
UnaryStandardNode(LogSoftmax, z)
|
||||
UnaryStandardNode(LogSoftmax, z)
|
||||
//BinaryStandardNode(LookupTableNode)
|
||||
UnaryStandardNode(MatrixL1Reg, matrix)
|
||||
UnaryStandardNode(MatrixL2Reg, matrix)
|
||||
UnaryStandardNode(MatrixL2Reg, matrix)
|
||||
// BUGBUG: CNTKBook also mentions L1Norm and L2Norm
|
||||
UnaryStandardNode(Mean, dataVectorSequence)
|
||||
BinaryStandardNode(Minus, leftMatrix, rightMatrix)
|
||||
UnaryStandardNode(Negate, input)
|
||||
BinaryStandardNode(Minus, leftMatrix, rightMatrix)
|
||||
UnaryStandardNode(Negate, input)
|
||||
//BinaryStandardNode(NoiseContrastiveEstimationNode)
|
||||
//BinaryStandardNode(PairNetworkNode)
|
||||
//BinaryStandardNode(ParallelNode)
|
||||
TernaryStandardNode(PerDimMeanVarDeNormalization, dataVectorSequence, meanVector, invStdDevVector) // TODO: correct?
|
||||
TernaryStandardNode(PerDimMeanVarNormalization, dataVectorSequence, meanVector, invStdDevVector)
|
||||
BinaryStandardNode(Plus, leftMatrix, rightMatrix)
|
||||
UnaryStandardNode(RectifiedLinear, z)
|
||||
BinaryStandardNode(Plus, leftMatrix, rightMatrix)
|
||||
UnaryStandardNode(RectifiedLinear, z)
|
||||
//BinaryStandardNode(RowElementTimesNode)
|
||||
BinaryStandardNode(Scale, scalarScalingFactor, matrix)
|
||||
//BinaryStandardNode(SequenceDecoderNode)
|
||||
UnaryStandardNode(Sigmoid, z)
|
||||
UnaryStandardNode(Softmax, z)
|
||||
UnaryStandardNode(Hardmax, z)
|
||||
BinaryStandardNode(SquareError, aMatrix, anotherMatrix)
|
||||
UnaryStandardNode(Softmax, z)
|
||||
UnaryStandardNode(Hardmax, z)
|
||||
BinaryStandardNode(SquareError, aMatrix, anotherMatrix)
|
||||
//BinaryStandardNode(StrideTimesNode)
|
||||
//BinaryStandardNode(SumColumnElementsNode)
|
||||
UnaryStandardNode(SumElements, matrix)
|
||||
UnaryStandardNode(Tanh, z)
|
||||
UnaryStandardNode(TimeReverse, vectorSequence)
|
||||
BinaryStandardNode(Times, leftMatrix, rightMatrix)
|
||||
UnaryStandardNode(Transpose, matrix)
|
||||
UnaryStandardNode(Tanh, z)
|
||||
UnaryStandardNode(TimeReverse, vectorSequence)
|
||||
BinaryStandardNode(Times, leftMatrix, rightMatrix)
|
||||
UnaryStandardNode(Transpose, matrix)
|
||||
//BinaryStandardNode(TransposeTimesNode)
|
||||
;
|
||||
|
|
|
@ -23,10 +23,8 @@
|
|||
#include "SGD.h"
|
||||
#include "MPIWrapper.h"
|
||||
#include "Config.h"
|
||||
#include "MultiNetworksSGD.h"
|
||||
#include "SimpleEvaluator.h"
|
||||
#include "SimpleOutputWriter.h"
|
||||
#include "MultiNetworksEvaluator.h"
|
||||
#include "BestGpu.h"
|
||||
#include "ProgressTracing.h"
|
||||
#include "fileutil.h"
|
||||
|
@ -240,10 +238,6 @@ void DoCommands(const ConfigParameters& config)
|
|||
{
|
||||
DoEval<ElemType>(commandParams);
|
||||
}
|
||||
else if (action[j] == "testunroll")
|
||||
{
|
||||
DoEvalUnroll<ElemType>(commandParams);
|
||||
}
|
||||
else if (action[j] == "edit")
|
||||
{
|
||||
DoEdit<ElemType>(commandParams);
|
||||
|
@ -284,22 +278,6 @@ void DoCommands(const ConfigParameters& config)
|
|||
{
|
||||
DoParameterSVD<ElemType>(commandParams);
|
||||
}
|
||||
else if (action[j] == "trainEncoderDecoder")
|
||||
{
|
||||
DoEncoderDecoder<ElemType>(commandParams);
|
||||
}
|
||||
else if (action[j] == "testEncoderDecoder")
|
||||
{
|
||||
DoEvalEncodingBeamSearchDecoding<ElemType>(commandParams);
|
||||
}
|
||||
else if (action[j] == "trainBidirectionEncoderDecoder")
|
||||
{
|
||||
DoBidirectionEncoderDecoder<ElemType>(commandParams);
|
||||
}
|
||||
else if (action[j] == "beamSearch")
|
||||
{
|
||||
DoBeamSearchDecoding<ElemType>(commandParams);
|
||||
}
|
||||
else
|
||||
{
|
||||
RuntimeError("unknown action: %s in command set: %s", action[j].c_str(), command[i].c_str());
|
||||
|
|
|
@ -280,8 +280,6 @@ bool CheckFunction(std::string& p_nodeType, bool* allowUndeterminedVariable)
|
|||
ret = true;
|
||||
else if (EqualInsensitive(nodeType, OperationNameOf(LSTMNode), L"LSTM"))
|
||||
ret = true;
|
||||
else if (EqualInsensitive(nodeType, OperationNameOf(PairNetworkNode), L"PairNetwork"))
|
||||
ret = true;
|
||||
else if (EqualInsensitive(nodeType, OperationNameOf(StrideTimesNode), L"StrideTimes"))
|
||||
ret = true;
|
||||
else if (EqualInsensitive(nodeType, OperationNameOf(BatchNormalizationNode)))
|
||||
|
|
|
@ -59,9 +59,6 @@ ComputationNetworkPtr SimpleNetworkBuilder<ElemType>::BuildNetworkFromDescriptio
|
|||
case RCRF:
|
||||
net = BuildSeqTrnLSTMNetworkFromDescription();
|
||||
break;
|
||||
case LSTMENCODER:
|
||||
net = BuildLSTMEncoderNetworkFromDescription();
|
||||
break;
|
||||
case UNIDIRECTIONALLSTM:
|
||||
net = BuildUnidirectionalLSTMNetworksFromDescription();
|
||||
break;
|
||||
|
@ -72,35 +69,12 @@ ComputationNetworkPtr SimpleNetworkBuilder<ElemType>::BuildNetworkFromDescriptio
|
|||
LogicError("BuildNetworkFromDescription: invalid m_rnnType %d", (int) m_rnnType);
|
||||
}
|
||||
|
||||
// post-process the network
|
||||
#if 1
|
||||
// post-process the network
|
||||
net->CompileNetwork();
|
||||
#else
|
||||
net->ValidateNetwork(false /*allowFragment*/, true /*bAllowNoCriterion*/); // no criterion possible because ...TODO: what's the reason?
|
||||
#endif
|
||||
|
||||
return net;
|
||||
}
|
||||
|
||||
// special version for a deprecated implementation of sequence-to-sequence models
|
||||
template <class ElemType>
|
||||
ComputationNetworkPtr SimpleNetworkBuilder<ElemType>::BuildNetworkFromDescription(ComputationNetwork* encoderNet)
|
||||
{
|
||||
ComputationNetworkPtr net;
|
||||
switch (m_rnnType)
|
||||
{
|
||||
case ALIGNMENTSIMILARITYGENERATOR:
|
||||
net = BuildAlignmentDecoderNetworkFromDescription(encoderNet);
|
||||
net->CompileNetwork();
|
||||
return net;
|
||||
case ALIGNMENTSIMILARITYGFORWARDDECODER:
|
||||
net = BuildAlignmentForwardDecoderNetworkFromDescription(encoderNet);
|
||||
net->CompileNetwork();
|
||||
return net;
|
||||
}
|
||||
return BuildNetworkFromDescription();
|
||||
}
|
||||
|
||||
template <class ElemType>
|
||||
ComputationNetworkPtr SimpleNetworkBuilder<ElemType>::BuildSimpleDNN()
|
||||
{
|
||||
|
@ -530,265 +504,6 @@ ComputationNetworkPtr SimpleNetworkBuilder<ElemType>::BuildConditionalLSTMNetwor
|
|||
return m_net;
|
||||
}
|
||||
|
||||
/**
|
||||
this builds an alignment based LM generator
|
||||
the aligment node takes a variable length input and relates each element to a variable length output
|
||||
*/
|
||||
template <class ElemType>
|
||||
ComputationNetworkPtr SimpleNetworkBuilder<ElemType>::BuildAlignmentForwardDecoderNetworkFromDescription(ComputationNetwork* encoderNet)
|
||||
{
|
||||
ComputationNetworkBuilder<ElemType> builder(*m_net);
|
||||
if (m_net->GetTotalNumberOfNodes() < 1) //not built yet
|
||||
{
|
||||
unsigned long randomSeed = 1;
|
||||
|
||||
size_t numHiddenLayers = m_layerSizes.size() - 2;
|
||||
|
||||
size_t numRecurrentLayers = m_recurrentLayers.size();
|
||||
|
||||
ComputationNodePtr input, encoderOutput, e,
|
||||
b, w, u, pastValue, output, label, alignoutput;
|
||||
ComputationNodePtr clslogpostprob;
|
||||
ComputationNodePtr clsweight;
|
||||
ComputationNodePtr columnStride, rowStride;
|
||||
|
||||
input = builder.CreateSparseInputNode(L"features", m_layerSizes[0]);
|
||||
m_net->FeatureNodes().push_back(input);
|
||||
|
||||
if (m_lookupTableOrder > 0)
|
||||
{
|
||||
e = builder.CreateLearnableParameter(msra::strfun::wstrprintf(L"E%d", 0), m_layerSizes[1], m_layerSizes[0] / m_lookupTableOrder);
|
||||
m_net->InitLearnableParameters(e, m_uniformInit, randomSeed++, m_initValueScale);
|
||||
output = builder.LookupTable(e, input, L"LookupTable");
|
||||
|
||||
if (m_addDropoutNodes)
|
||||
input = builder.Dropout(output);
|
||||
else
|
||||
input = output;
|
||||
}
|
||||
else
|
||||
{
|
||||
LogicError("BuildCLASSLSTMNetworkFromDescription: LSTMNode cannot take sparse input. Need to project sparse input to continuous vector using LookupTable. Suggest using setups below\n layerSizes=$VOCABSIZE$:100:$HIDDIM$:$VOCABSIZE$ \nto have 100 dimension projection, and lookupTableOrder=1\n to project to a single window. To use larger context window, set lookupTableOrder=3 for example with width-3 context window.\n ");
|
||||
}
|
||||
|
||||
int recur_idx = 0;
|
||||
int offset = m_lookupTableOrder > 0 ? 1 : 0;
|
||||
|
||||
/// the source network side output dimension needs to match the 1st layer dimension in the decoder network
|
||||
std::vector<ComputationNodeBasePtr>& encoderPairNodes = encoderNet->PairNodes();
|
||||
if (encoderPairNodes.size() != 1)
|
||||
LogicError("BuildAlignmentDecoderNetworkFromDescription: encoder network should have only one pairoutput node as source node for the decoder network: ");
|
||||
|
||||
encoderOutput = builder.PairNetwork(dynamic_pointer_cast<ComputationNode<ElemType>>(encoderPairNodes[0]), L"pairNetwork");
|
||||
|
||||
/// the source network side output dimension needs to match the 1st layer dimension in the decoder network
|
||||
std::vector<ComputationNodeBasePtr>& encoderEvaluationNodes = encoderNet->OutputNodes();
|
||||
if (encoderEvaluationNodes.size() != 1)
|
||||
LogicError("BuildAlignmentDecoderNetworkFromDescription: encoder network should have only one output node as source node for the decoder network: ");
|
||||
|
||||
if (numHiddenLayers > 0)
|
||||
{
|
||||
int i = 1 + offset;
|
||||
u = builder.CreateLearnableParameter(msra::strfun::wstrprintf(L"U%d", i), m_layerSizes[i], m_layerSizes[offset] * (offset ? m_lookupTableOrder : 1));
|
||||
m_net->InitLearnableParameters(u, m_uniformInit, randomSeed++, m_initValueScale);
|
||||
w = builder.CreateLearnableParameter(msra::strfun::wstrprintf(L"W%d", i), m_layerSizes[i], m_layerSizes[i]);
|
||||
m_net->InitLearnableParameters(w, m_uniformInit, randomSeed++, m_initValueScale);
|
||||
|
||||
pastValue = builder.PastValue(NULL, m_defaultHiddenActivity, (size_t) m_layerSizes[i], 1);
|
||||
// output = (ComputationNodePtr)BuildLSTMNodeComponent(randomSeed, 0, m_layerSizes[offset] * (offset ? m_lookupTableOrder : 1), m_layerSizes[offset + 1], input);
|
||||
// output = (ComputationNodePtr)BuildLSTMComponent(randomSeed, 0, m_layerSizes[offset] * (offset ? m_lookupTableOrder : 1), m_layerSizes[offset + 1], input);
|
||||
|
||||
/// alignment node to get weights from source to target
|
||||
/// this aligment node computes weights of the current hidden state after special encoder ending symbol to all
|
||||
/// states before the special encoder ending symbol. The weights are used to summarize all encoder inputs.
|
||||
/// the weighted sum of inputs are then used as the additional input to the LSTM input in the next layer
|
||||
e = builder.CreateLearnableParameter(msra::strfun::wstrprintf(L"MatForSimilarity%d", i), m_layerSizes[i], m_layerSizes[i]);
|
||||
m_net->InitLearnableParameters(e, m_uniformInit, randomSeed++, m_initValueScale);
|
||||
|
||||
columnStride = builder.CreateLearnableParameter(L"columnStride", 1, 1);
|
||||
columnStride->Value().SetValue(1);
|
||||
columnStride->SetParameterUpdateRequired(false);
|
||||
rowStride = builder.CreateLearnableParameter(L"rowStride", 1, 1);
|
||||
rowStride->Value().SetValue(0);
|
||||
rowStride->SetParameterUpdateRequired(false);
|
||||
alignoutput = builder.StrideTimes(encoderOutput, builder.Softmax(builder.StrideTimes(builder.Times(builder.Transpose(encoderOutput), e), pastValue, rowStride)), columnStride);
|
||||
|
||||
// alignoutput = builder.Times(encoderOutput, builder.Softmax(builder.Times(builder.Times(builder.Transpose(encoderOutput), e), pastValue)));
|
||||
|
||||
output = ApplyNonlinearFunction(
|
||||
builder.Plus(
|
||||
builder.Times(u, input), builder.Times(w, alignoutput)),
|
||||
0);
|
||||
pastValue->AttachInputs(output);
|
||||
input = output;
|
||||
|
||||
for (; i < numHiddenLayers; i++)
|
||||
{
|
||||
//output = (ComputationNodePtr)BuildLSTMNodeComponent(randomSeed, i, m_layerSizes[i], m_layerSizes[i + 1], input);
|
||||
output = (ComputationNodePtr) BuildLSTMComponent(randomSeed, i, m_layerSizes[i], m_layerSizes[i + 1], input);
|
||||
|
||||
if (m_addDropoutNodes)
|
||||
input = builder.Dropout(output);
|
||||
else
|
||||
input = output;
|
||||
}
|
||||
}
|
||||
|
||||
/// need to have [input_dim x output_dim] matrix
|
||||
/// e.g., [200 x 10000], where 10000 is the vocabulary size
|
||||
/// this is for speed-up issue as per word matrix can be simply obtained using column slice
|
||||
w = builder.CreateLearnableParameter(msra::strfun::wstrprintf(L"OW%d", numHiddenLayers), m_layerSizes[numHiddenLayers], m_layerSizes[numHiddenLayers + 1]);
|
||||
m_net->InitLearnableParameters(w, m_uniformInit, randomSeed++, m_initValueScale);
|
||||
|
||||
/// the label is a dense matrix. each element is the word index
|
||||
label = builder.CreateInputNode(L"labels", 4);
|
||||
|
||||
clsweight = builder.CreateLearnableParameter(L"WeightForClassPostProb", m_nbrCls, m_layerSizes[numHiddenLayers]);
|
||||
m_net->InitLearnableParameters(clsweight, m_uniformInit, randomSeed++, m_initValueScale);
|
||||
clslogpostprob = builder.Times(clsweight, input, L"ClassPostProb");
|
||||
|
||||
output = builder.Times(builder.Transpose(w), input, L"outputs");
|
||||
|
||||
m_net->PairNodes().push_back(input);
|
||||
|
||||
m_net->OutputNodes().push_back(output);
|
||||
|
||||
//add softmax layer (if prob is needed or KL reg adaptation is needed)
|
||||
output = builder.Softmax(output, L"PosteriorProb");
|
||||
}
|
||||
|
||||
return m_net;
|
||||
}
|
||||
|
||||
template <class ElemType>
|
||||
ComputationNetworkPtr SimpleNetworkBuilder<ElemType>::BuildAlignmentDecoderNetworkFromDescription(ComputationNetwork* encoderNet)
|
||||
{
|
||||
ComputationNetworkBuilder<ElemType> builder(*m_net);
|
||||
if (m_net->GetTotalNumberOfNodes() < 1) //not built yet
|
||||
{
|
||||
unsigned long randomSeed = 1;
|
||||
|
||||
size_t numHiddenLayers = m_layerSizes.size() - 2;
|
||||
|
||||
size_t numRecurrentLayers = m_recurrentLayers.size();
|
||||
|
||||
ComputationNodePtr input, encoderOutput, e,
|
||||
b, w, u, pastValue, output, label, alignoutput;
|
||||
ComputationNodePtr clslogpostprob;
|
||||
ComputationNodePtr clsweight;
|
||||
ComputationNodePtr columnStride, rowStride;
|
||||
|
||||
input = builder.CreateSparseInputNode(L"features", m_layerSizes[0]);
|
||||
m_net->FeatureNodes().push_back(input);
|
||||
|
||||
if (m_lookupTableOrder > 0)
|
||||
{
|
||||
e = builder.CreateLearnableParameter(msra::strfun::wstrprintf(L"E%d", 0), m_layerSizes[1], m_layerSizes[0] / m_lookupTableOrder);
|
||||
m_net->InitLearnableParameters(e, m_uniformInit, randomSeed++, m_initValueScale);
|
||||
output = builder.LookupTable(e, input, L"LookupTable");
|
||||
|
||||
if (m_addDropoutNodes)
|
||||
input = builder.Dropout(output);
|
||||
else
|
||||
input = output;
|
||||
}
|
||||
else
|
||||
{
|
||||
LogicError("BuildCLASSLSTMNetworkFromDescription: LSTMNode cannot take sparse input. Need to project sparse input to continuous vector using LookupTable. Suggest using setups below\n layerSizes=$VOCABSIZE$:100:$HIDDIM$:$VOCABSIZE$ \nto have 100 dimension projection, and lookupTableOrder=1\n to project to a single window. To use larger context window, set lookupTableOrder=3 for example with width-3 context window.\n ");
|
||||
}
|
||||
|
||||
int recur_idx = 0;
|
||||
int offset = m_lookupTableOrder > 0 ? 1 : 0;
|
||||
|
||||
/// the source network side output dimension needs to match the 1st layer dimension in the decoder network
|
||||
std::vector<ComputationNodeBasePtr>& encoderPairNodes = encoderNet->PairNodes();
|
||||
if (encoderPairNodes.size() != 1)
|
||||
LogicError("BuildAlignmentDecoderNetworkFromDescription: encoder network should have only one pairoutput node as source node for the decoder network: ");
|
||||
|
||||
encoderOutput = builder.PairNetwork(dynamic_pointer_cast<ComputationNode<ElemType>>(encoderPairNodes[0]), L"pairNetwork");
|
||||
|
||||
/// the source network side output dimension needs to match the 1st layer dimension in the decoder network
|
||||
std::vector<ComputationNodeBasePtr>& encoderEvaluationNodes = encoderNet->OutputNodes();
|
||||
if (encoderEvaluationNodes.size() != 1)
|
||||
LogicError("BuildAlignmentDecoderNetworkFromDescription: encoder network should have only one output node as source node for the decoder network: ");
|
||||
|
||||
if (numHiddenLayers > 0)
|
||||
{
|
||||
int i = 1 + offset;
|
||||
u = builder.CreateLearnableParameter(msra::strfun::wstrprintf(L"U%d", i), m_layerSizes[i], m_layerSizes[offset] * (offset ? m_lookupTableOrder : 1));
|
||||
m_net->InitLearnableParameters(u, m_uniformInit, randomSeed++, m_initValueScale);
|
||||
w = builder.CreateLearnableParameter(msra::strfun::wstrprintf(L"W%d", i), m_layerSizes[i], m_layerSizes[i]);
|
||||
m_net->InitLearnableParameters(w, m_uniformInit, randomSeed++, m_initValueScale);
|
||||
|
||||
pastValue = builder.PastValue(NULL, m_defaultHiddenActivity, (size_t) m_layerSizes[i], 1);
|
||||
// output = (ComputationNodePtr)BuildLSTMNodeComponent(randomSeed, 0, m_layerSizes[offset] * (offset ? m_lookupTableOrder : 1), m_layerSizes[offset + 1], input);
|
||||
// output = (ComputationNodePtr)BuildLSTMComponent(randomSeed, 0, m_layerSizes[offset] * (offset ? m_lookupTableOrder : 1), m_layerSizes[offset + 1], input);
|
||||
|
||||
/// alignment node to get weights from source to target
|
||||
/// this aligment node computes weights of the current hidden state after special encoder ending symbol to all
|
||||
/// states before the special encoder ending symbol. The weights are used to summarize all encoder inputs.
|
||||
/// the weighted sum of inputs are then used as the additional input to the LSTM input in the next layer
|
||||
e = builder.CreateLearnableParameter(msra::strfun::wstrprintf(L"MatForSimilarity%d", i), m_layerSizes[i], m_layerSizes[i]);
|
||||
m_net->InitLearnableParameters(e, m_uniformInit, randomSeed++, m_initValueScale);
|
||||
|
||||
columnStride = builder.CreateLearnableParameter(L"columnStride", 1, 1);
|
||||
columnStride->Value().SetValue(1);
|
||||
columnStride->SetParameterUpdateRequired(false);
|
||||
rowStride = builder.CreateLearnableParameter(L"rowStride", 1, 1);
|
||||
rowStride->Value().SetValue(0);
|
||||
rowStride->SetParameterUpdateRequired(false);
|
||||
alignoutput = builder.StrideTimes(encoderOutput, builder.Softmax(builder.StrideTimes(builder.Times(builder.Transpose(encoderOutput), e), pastValue, rowStride)), columnStride);
|
||||
|
||||
// alignoutput = builder.Times(encoderOutput, builder.Softmax(builder.Times(builder.Times(builder.Transpose(encoderOutput), e), pastValue)));
|
||||
|
||||
output = ApplyNonlinearFunction(
|
||||
builder.Plus(
|
||||
builder.Times(u, input), builder.Times(w, alignoutput)),
|
||||
0);
|
||||
pastValue->AttachInputs(output);
|
||||
input = output;
|
||||
|
||||
for (; i < numHiddenLayers; i++)
|
||||
{
|
||||
//output = (ComputationNodePtr)BuildLSTMNodeComponent(randomSeed, i, m_layerSizes[i], m_layerSizes[i + 1], input);
|
||||
output = (ComputationNodePtr) BuildLSTMComponent(randomSeed, i, m_layerSizes[i], m_layerSizes[i + 1], input);
|
||||
|
||||
if (m_addDropoutNodes)
|
||||
input = builder.Dropout(output);
|
||||
else
|
||||
input = output;
|
||||
}
|
||||
}
|
||||
|
||||
/// need to have [input_dim x output_dim] matrix
|
||||
/// e.g., [200 x 10000], where 10000 is the vocabulary size
|
||||
/// this is for speed-up issue as per word matrix can be simply obtained using column slice
|
||||
w = builder.CreateLearnableParameter(msra::strfun::wstrprintf(L"OW%d", numHiddenLayers), m_layerSizes[numHiddenLayers], m_layerSizes[numHiddenLayers + 1]);
|
||||
m_net->InitLearnableParameters(w, m_uniformInit, randomSeed++, m_initValueScale);
|
||||
|
||||
/// the label is a dense matrix. each element is the word index
|
||||
label = builder.CreateInputNode(L"labels", 4);
|
||||
|
||||
clsweight = builder.CreateLearnableParameter(L"WeightForClassPostProb", m_nbrCls, m_layerSizes[numHiddenLayers]);
|
||||
m_net->InitLearnableParameters(clsweight, m_uniformInit, randomSeed++, m_initValueScale);
|
||||
clslogpostprob = builder.Times(clsweight, input, L"ClassPostProb");
|
||||
|
||||
output = AddTrainAndEvalCriterionNodes(input, label, w, L"TrainNodeClassBasedCrossEntropy", L"EvalNodeClassBasedCrossEntrpy",
|
||||
clslogpostprob);
|
||||
|
||||
output = builder.Times(builder.Transpose(w), input, L"outputs");
|
||||
|
||||
m_net->PairNodes().push_back(input);
|
||||
|
||||
m_net->OutputNodes().push_back(output);
|
||||
|
||||
//add softmax layer (if prob is needed or KL reg adaptation is needed)
|
||||
output = builder.Softmax(output, L"PosteriorProb");
|
||||
}
|
||||
|
||||
return m_net;
|
||||
}
|
||||
|
||||
template <class ElemType>
|
||||
ComputationNetworkPtr SimpleNetworkBuilder<ElemType>::BuildLogBilinearNetworkFromDescription()
|
||||
{
|
||||
|
@ -1608,95 +1323,6 @@ ComputationNetworkPtr SimpleNetworkBuilder<ElemType>::BuildLSTMNetworkFromDescri
|
|||
return m_net;
|
||||
}
|
||||
|
||||
/**
|
||||
This is encoder LSTM described in the following papers:
|
||||
H. Sutskever, O. Vinyals and Q. V. Le, "Sequence to sequence learning with neural networks", http://arxiv.org/abs/1409.3215
|
||||
|
||||
The following code constructs the encoder and, to construct decoder, use BuildLSTMNetworkFromDescription
|
||||
|
||||
Developed by Kaisheng Yao
|
||||
This is used in the following works:
|
||||
K. Yao, G. Zweig, "Sequence-to-sequence neural net models for grapheme-to-phoneme conversion, submitted to Interspeech 2015
|
||||
*/
|
||||
template <class ElemType>
|
||||
ComputationNetworkPtr SimpleNetworkBuilder<ElemType>::BuildLSTMEncoderNetworkFromDescription()
|
||||
{
|
||||
|
||||
ComputationNetworkBuilder<ElemType> builder(*m_net);
|
||||
if (m_net->GetTotalNumberOfNodes() < 1) //not built yet
|
||||
{
|
||||
ULONG randomSeed = 1;
|
||||
|
||||
size_t i = 0;
|
||||
size_t numHiddenLayers = m_layerSizes.size() - 1;
|
||||
|
||||
size_t numRecurrentLayers = m_recurrentLayers.size();
|
||||
|
||||
ComputationNodePtr input, w, b, u, e, pastValue, output, label, prior;
|
||||
|
||||
if (m_sparse_input)
|
||||
input = builder.CreateSparseInputNode(L"features", m_layerSizes[0]);
|
||||
else
|
||||
input = builder.CreateInputNode(L"features", m_layerSizes[0]);
|
||||
|
||||
m_net->FeatureNodes().push_back(input);
|
||||
|
||||
if (m_applyMeanVarNorm)
|
||||
{
|
||||
w = builder.Mean(input);
|
||||
b = builder.InvStdDev(input);
|
||||
output = builder.PerDimMeanVarNormalization(input, w, b);
|
||||
|
||||
input = output;
|
||||
}
|
||||
|
||||
if (m_lookupTableOrder > 0)
|
||||
{
|
||||
e = builder.CreateLearnableParameter(msra::strfun::wstrprintf(L"EncoderE%d", 0), m_layerSizes[1], m_layerSizes[0] / m_lookupTableOrder);
|
||||
m_net->InitLearnableParameters(e, m_uniformInit, randomSeed++, m_initValueScale);
|
||||
output = builder.LookupTable(e, input, L"EncoderLookupTable");
|
||||
#ifdef DEBUG_DECODER
|
||||
e->Value().SetValue((ElemType) 0.01);
|
||||
#endif
|
||||
|
||||
if (m_addDropoutNodes)
|
||||
input = builder.Dropout(output);
|
||||
else
|
||||
input = output;
|
||||
i++;
|
||||
}
|
||||
|
||||
/// direct connect from input node to output node
|
||||
|
||||
int recur_idx = 0;
|
||||
int offset = m_lookupTableOrder > 0 ? 1 : 0;
|
||||
if (numHiddenLayers > 0)
|
||||
{
|
||||
//output = (ComputationNodePtr)BuildLSTMNodeComponent(randomSeed, 0, m_layerSizes[offset] * (offset ? m_lookupTableOrder : 1), m_layerSizes[offset + 1], input);
|
||||
output = (ComputationNodePtr) BuildLSTMComponent(randomSeed, 0, m_layerSizes[offset] * (offset ? m_lookupTableOrder : 1), m_layerSizes[offset + 1], input);
|
||||
input = output;
|
||||
i++;
|
||||
|
||||
for (; i < numHiddenLayers; i++)
|
||||
{
|
||||
//output = (ComputationNodePtr)BuildLSTMNodeComponent(randomSeed, i, m_layerSizes[i], m_layerSizes[i + 1], input);
|
||||
output = (ComputationNodePtr) BuildLSTMComponent(randomSeed, i, m_layerSizes[i], m_layerSizes[i + 1], input);
|
||||
|
||||
if (m_addDropoutNodes)
|
||||
input = builder.Dropout(output);
|
||||
else
|
||||
input = output;
|
||||
}
|
||||
}
|
||||
|
||||
m_net->OutputNodes().push_back(output);
|
||||
m_net->PairNodes().push_back(output); /// need to provide pairnodes so that the next layer of network can connect to this network
|
||||
m_net->EvaluationNodes().push_back(output);
|
||||
}
|
||||
|
||||
return m_net;
|
||||
}
|
||||
|
||||
/**
|
||||
Build unidirectional LSTM p(y_t | y_t-1, x_1^t)
|
||||
|
||||
|
|
|
@ -38,16 +38,13 @@ enum RNNTYPE
|
|||
DEEPRNN = 4,
|
||||
CLASSLM = 8,
|
||||
LBLM = 16,
|
||||
LSTMENCODER = 18,
|
||||
NPLM = 32,
|
||||
CLASSLSTM = 64,
|
||||
NCELSTM = 128,
|
||||
CLSTM = 256,
|
||||
RCRF = 512,
|
||||
UNIDIRECTIONALLSTM = 19,
|
||||
BIDIRECTIONALLSTM = 20,
|
||||
ALIGNMENTSIMILARITYGENERATOR = 21,
|
||||
ALIGNMENTSIMILARITYGFORWARDDECODER = 22
|
||||
BIDIRECTIONALLSTM = 20
|
||||
};
|
||||
|
||||
enum class TrainingCriterion : int // TODO: camel-case these
|
||||
|
@ -191,18 +188,12 @@ public:
|
|||
m_rnnType = CLSTM;
|
||||
else if (std::find(strType.begin(), strType.end(), L"CRF") != strType.end())
|
||||
m_rnnType = RCRF;
|
||||
else if (std::find(strType.begin(), strType.end(), L"LSTMENCODER") != strType.end())
|
||||
m_rnnType = LSTMENCODER;
|
||||
else if (std::find(strType.begin(), strType.end(), L"TRANSDUCER") != strType.end() ||
|
||||
std::find(strType.begin(), strType.end(), L"UNIDIRECTIONALLSTMWITHPASTPREDICTION") != strType.end())
|
||||
m_rnnType = UNIDIRECTIONALLSTM;
|
||||
else if (std::find(strType.begin(), strType.end(), L"JOINTCONDITIONALBILSTMSTREAMS") != strType.end() ||
|
||||
std::find(strType.begin(), strType.end(), L"BIDIRECTIONALLSTMWITHPASTPREDICTION") != strType.end())
|
||||
m_rnnType = BIDIRECTIONALLSTM;
|
||||
else if (std::find(strType.begin(), strType.end(), L"ALIGNMENTSIMILARITYGENERATOR") != strType.end())
|
||||
m_rnnType = ALIGNMENTSIMILARITYGENERATOR;
|
||||
else if (std::find(strType.begin(), strType.end(), L"ALIGNMENTSIMILARITYGFORWARDDECODER") != strType.end())
|
||||
m_rnnType = ALIGNMENTSIMILARITYGFORWARDDECODER;
|
||||
else
|
||||
InvalidArgument("InitRecurrentConfig: unknown value for rnnType parameter '%ls'", strType[0].c_str());
|
||||
}
|
||||
|
@ -255,7 +246,6 @@ public:
|
|||
}
|
||||
|
||||
ComputationNetworkPtr BuildNetworkFromDescription();
|
||||
ComputationNetworkPtr BuildNetworkFromDescription(ComputationNetwork* encoderNet); // legacy support of deprecated sequence-to-sequence implementation
|
||||
|
||||
ComputationNetworkPtr BuildNetworkFromDbnFile(const std::wstring& dbnModelFileName); // legacy support for fseide's Microsoft-internal tool "DBN.exe"
|
||||
|
||||
|
@ -287,8 +277,6 @@ protected:
|
|||
|
||||
ComputationNetworkPtr BuildSeqTrnLSTMNetworkFromDescription();
|
||||
|
||||
ComputationNetworkPtr BuildLSTMEncoderNetworkFromDescription();
|
||||
|
||||
ComputationNetworkPtr BuildUnidirectionalLSTMNetworksFromDescription();
|
||||
|
||||
ComputationNetworkPtr BuildBiDirectionalLSTMNetworksFromDescription();
|
||||
|
@ -299,10 +287,6 @@ protected:
|
|||
|
||||
ComputationNetworkPtr BuildNCELSTMNetworkFromDescription();
|
||||
|
||||
ComputationNetworkPtr BuildAlignmentForwardDecoderNetworkFromDescription(ComputationNetwork* encoderNet);
|
||||
|
||||
ComputationNetworkPtr BuildAlignmentDecoderNetworkFromDescription(ComputationNetwork* encoderNet);
|
||||
|
||||
//layer is 0 based
|
||||
ComputationNodePtr ApplyNonlinearFunction(ComputationNodePtr input, const size_t layer, const std::wstring nodeName = L"");
|
||||
ComputationNodePtr AddTrainAndEvalCriterionNodes(ComputationNodePtr input, ComputationNodePtr label, ComputationNodePtr matrix = nullptr, const std::wstring trainNodeName = L"", const std::wstring evalNodeName = L"", ComputationNodePtr clspostprob = nullptr, ComputationNodePtr trans = nullptr);
|
||||
|
|
|
@ -183,7 +183,6 @@ static int DetermineLoopDirection(const std::vector<ComputationNodeBasePtr>& nes
|
|||
// This sets index, lowLink, m_visited, and m_inStack.
|
||||
void ComputationNetwork::DetermineSCCs(const ComputationNodeBasePtr& rootNode)
|
||||
{
|
||||
// notice that this graph including graphs from a parent networks if two or more networks are connected via PairNetworkNode
|
||||
list<ComputationNodeBasePtr> sccStack;
|
||||
size_t index = 0;
|
||||
size_t loopId = 0; // BUGBUG: I think this is currently buggy in an edge case, and not needed (use m_allSEQNodes.size() instead).
|
||||
|
|
|
@ -100,8 +100,6 @@ static shared_ptr<ComputationNode<ElemType>> CreateStandardNode(const std::wstri
|
|||
return New<NegateNode<ElemType>>(forward<_Types>(_Args)...);
|
||||
else if (nodeType == OperationNameOf(NoiseContrastiveEstimationNode))
|
||||
return New<NoiseContrastiveEstimationNode<ElemType>>(forward<_Types>(_Args)...);
|
||||
else if (nodeType == OperationNameOf(PairNetworkNode))
|
||||
return New<PairNetworkNode<ElemType>>(forward<_Types>(_Args)...);
|
||||
else if (nodeType == OperationNameOf(ParallelNode))
|
||||
return New<ParallelNode<ElemType>>(forward<_Types>(_Args)...);
|
||||
else if (nodeType == OperationNameOf(PastValueNode))
|
||||
|
@ -293,12 +291,6 @@ shared_ptr<ComputationNode<ElemType>> ComputationNetworkBuilder<ElemType>::Creat
|
|||
return net.AddNodeToNetWithElemType(New<SparseInputValue<ElemType>>(net.GetDeviceId(), inputName, imageLayout));
|
||||
}
|
||||
|
||||
template <class ElemType>
|
||||
shared_ptr<ComputationNode<ElemType>> ComputationNetworkBuilder<ElemType>::CreatePairNetworkNode(const std::wstring& inputName, const size_t rows, const size_t cols)
|
||||
{
|
||||
return net.AddNodeToNetWithElemType(New<PairNetworkNode<ElemType>>(net.GetDeviceId(), inputName, rows, cols));
|
||||
}
|
||||
|
||||
template <class ElemType>
|
||||
shared_ptr<ComputationNode<ElemType>> ComputationNetworkBuilder<ElemType>::CreateConvolutionNode(const std::wstring& nodeName,
|
||||
const size_t kernelWidth, const size_t kernelHeight, const size_t outputChannels,
|
||||
|
@ -342,17 +334,6 @@ shared_ptr<ComputationNode<ElemType>> ComputationNetworkBuilder<ElemType>::Creat
|
|||
// The following functions create nodes and link them to the network and their inputs.
|
||||
// TODO: Do we need both this set and the one above that does not add inputs? Can they share more code?
|
||||
|
||||
template <class ElemType>
|
||||
shared_ptr<ComputationNode<ElemType>> ComputationNetworkBuilder<ElemType>::PairNetwork(const ComputationNodePtr& a, const std::wstring nodeName)
|
||||
{
|
||||
if (net.GetNodeFromName(a->NodeName(), nullptr, false) != nullptr)
|
||||
{
|
||||
fprintf(stderr, "PairNetwork: asked to pair a node with name %ls in another network. However, this network has already a node with the same name. Should avoid this case.\n", a->NodeName().c_str());
|
||||
RuntimeError("PairNetwork: asked to pair a node with name in another network. However, this network has already a node with the same name. Should avoid this case.\n");
|
||||
}
|
||||
return net.AddNodeToNetAndAttachInputs(New<PairNetworkNode<ElemType>>(net.GetDeviceId(), nodeName), a);
|
||||
}
|
||||
|
||||
template <class ElemType>
|
||||
shared_ptr<ComputationNode<ElemType>> ComputationNetworkBuilder<ElemType>::Convolution(const ComputationNodePtr weight,
|
||||
const ComputationNodePtr inputValues,
|
||||
|
|
|
@ -51,7 +51,6 @@ public:
|
|||
ComputationNodePtr CreateSparseInputNode(const std::wstring& inputName, const size_t rows);
|
||||
ComputationNodePtr CreateInputNode(const std::wstring& inputName, const TensorShape& sampleLayout);
|
||||
ComputationNodePtr CreateSparseInputNode(const std::wstring& inputName, const TensorShape& sampleLayout);
|
||||
ComputationNodePtr CreatePairNetworkNode(const std::wstring& inputName, const size_t rows, const size_t cols);
|
||||
ComputationNodePtr CreateConvolutionNode(const std::wstring& nodeName, const size_t kernelWidth, const size_t kernelHeight, const size_t outputChannels, const size_t horizontalSubsample, const size_t verticalSubsample, ImageLayoutKind imageLayoutKind, const bool zeroPadding = false, const size_t maxTempMemSizeInSamples = 0);
|
||||
ComputationNodePtr CreateMaxPoolingNode(const std::wstring& nodeName, const size_t windowWidth, const size_t windowHeight, const size_t horizontalSubsample, const size_t verticalSubsample, ImageLayoutKind imageLayoutKind);
|
||||
ComputationNodePtr CreateAveragePoolingNode(const std::wstring& nodeName, const size_t windowWidth, const size_t windowHeight, const size_t horizontalSubsample, const size_t verticalSubsample, ImageLayoutKind imageLayoutKind);
|
||||
|
@ -60,7 +59,6 @@ public:
|
|||
ComputationNodePtr CreateComputationNode(const std::wstring& nodeType, const std::wstring& nodeName);
|
||||
// The following functions create nodes and link them to the network and their inputs.
|
||||
// TODO: Do we need both this set and the one above that does not add inputs? Can they share more code?
|
||||
ComputationNodePtr PairNetwork(const ComputationNodePtr& a, const std::wstring nodeName = L"");
|
||||
ComputationNodePtr Convolution(const ComputationNodePtr weight,
|
||||
const ComputationNodePtr inputValues,
|
||||
const size_t kernelWidth, const size_t kernelHeight, const size_t outputChannels,
|
||||
|
|
|
@ -1662,98 +1662,6 @@ public:
|
|||
template class StrideTimesNode<float>;
|
||||
template class StrideTimesNode<double>;
|
||||
|
||||
// -----------------------------------------------------------------------
|
||||
// PairNetworkNode (input)
|
||||
// -----------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
pair this node to a node in another network
|
||||
this node provide an interface from this network. The next layer network then can use this interface to know which node to connect to.
|
||||
*/
|
||||
template <class ElemType>
|
||||
class PairNetworkNode : public ComputationNode<ElemType>, public NumInputs<1>
|
||||
{
|
||||
typedef ComputationNode<ElemType> Base;
|
||||
UsingComputationNodeMembersBoilerplate;
|
||||
static const std::wstring TypeName()
|
||||
{
|
||||
return L"PairNetwork";
|
||||
}
|
||||
|
||||
void Init(size_t row_size, size_t /*col_size*/)
|
||||
{
|
||||
CreateMatrixIfNull(m_value);
|
||||
SetDims(TensorShape(row_size), HasMBLayout());
|
||||
UpdateFunctionValuesSize();
|
||||
}
|
||||
|
||||
public:
|
||||
DeclareConstructorFromConfigWithNumInputs(PairNetworkNode);
|
||||
PairNetworkNode(DEVICEID_TYPE deviceId, const wstring& name, size_t row_size = 1, size_t col_size = 1)
|
||||
: Base(deviceId, name)
|
||||
{
|
||||
Init(row_size, col_size);
|
||||
CreateMatrixIfNull(m_gradient);
|
||||
m_gradient->Resize(row_size, col_size);
|
||||
m_gradient->SetValue(0.0f);
|
||||
}
|
||||
|
||||
virtual void Load(File& fstream, size_t modelVersion) override
|
||||
{
|
||||
Init(1, 1); // TODO: this looks wrong; should the dimension not come from the loaded model data?
|
||||
Base::Load(fstream, modelVersion);
|
||||
}
|
||||
|
||||
/// to-do: need to change to the new way of resetting state
|
||||
void BackpropToMap(const size_t inputIndex)
|
||||
{
|
||||
if (inputIndex > 0)
|
||||
InvalidArgument("PairNetwork operation only takes one input.");
|
||||
|
||||
Matrix<ElemType>::ScaleAndAdd(1.0, Gradient(), Input(inputIndex)->Gradient());
|
||||
}
|
||||
|
||||
virtual void /*ComputationNode::*/ BackpropTo(const size_t inputIndex, const FrameRange& fr) override
|
||||
{
|
||||
if (fr.IsAllFrames())
|
||||
{
|
||||
BackpropToMap(inputIndex);
|
||||
return;
|
||||
} // TODO: remove these one by one
|
||||
assert(GetSampleMatrixNumRows() == Gradient().GetNumRows()); // original used m_value->GetNumRows() for loop dimension
|
||||
assert(m_pMBLayout);
|
||||
|
||||
Matrix<ElemType> mTmp = Input(inputIndex)->GradientFor(fr);
|
||||
Matrix<ElemType>::ScaleAndAdd(1.0, GradientFor(fr), mTmp);
|
||||
}
|
||||
|
||||
virtual bool OutputUsedInComputingInputNodesGradients() const override
|
||||
{
|
||||
return false;
|
||||
}
|
||||
virtual bool InputUsedInComputingInputNodesGradients(size_t /*childIndex*/) const override
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
virtual void /*ComputationNode::*/ ForwardProp(const FrameRange& fr) override
|
||||
{
|
||||
Matrix<ElemType> mTmp = ValueFor(fr);
|
||||
mTmp.SetValue(Input(0)->ValueFor(fr));
|
||||
}
|
||||
|
||||
virtual void /*ComputationNodeBase::*/ Validate(bool isFinalValidationPass) override
|
||||
{
|
||||
Base::Validate(isFinalValidationPass);
|
||||
InferMBLayoutFromInputsForStandardCase();
|
||||
|
||||
SetDims(Input(0));
|
||||
}
|
||||
};
|
||||
|
||||
template class PairNetworkNode<float>;
|
||||
template class PairNetworkNode<double>;
|
||||
|
||||
// -----------------------------------------------------------------------
|
||||
// ParallelNode (input0, input1)
|
||||
// TODO: How is this different from RowStack?
|
||||
|
|
|
@ -2,12 +2,12 @@
|
|||
// Copyright (c) Microsoft. All rights reserved.
|
||||
// Licensed under the MIT license. See LICENSE.md file in the project root for full license information.
|
||||
//
|
||||
//helpful macros
|
||||
// helpful macros
|
||||
// TODO: the file's name is too general to be included from outside; MathHelpers.h?
|
||||
|
||||
//iterators
|
||||
//
|
||||
#pragma once
|
||||
//#pragma once
|
||||
|
||||
// iterators
|
||||
#undef foreach_row
|
||||
#undef foreach_column
|
||||
#undef foreach_coord
|
||||
|
@ -19,5 +19,5 @@
|
|||
for (long _i = 0; _i < (_m).GetNumRows(); _i++)
|
||||
#define foreach_row_in_submat(_i, _istart, _iend, _m) for (long _i = _istart; _i < min(_iend, (_m).GetNumRows()); _i++)
|
||||
|
||||
//this functions returns the index of the first column element in the columnwise array representing matrix with _numRows rows
|
||||
// this functions returns the index of the first column element in the columnwise array representing matrix with _numRows rows
|
||||
#define column_s_ind_colwisem(_colNum, _numRows) ((_numRows) * (_colNum))
|
||||
|
|
|
@ -10,7 +10,6 @@
|
|||
#include "CompositeComputationNodes.h" // for PrecomputeNode
|
||||
#include "SimpleEvaluator.h"
|
||||
#include "DataReader.h"
|
||||
#include "IComputationNetBuilder.h"
|
||||
#include "ScriptableObjects.h"
|
||||
#include <vector>
|
||||
#include <string>
|
||||
|
|
|
@ -8,6 +8,7 @@
|
|||
#include "DataReader.h"
|
||||
#include "ComputationNetwork.h"
|
||||
#include "DataReaderHelpers.h"
|
||||
#include "Helpers.h"
|
||||
#include "fileutil.h"
|
||||
#include <vector>
|
||||
#include <string>
|
||||
|
|
Загрузка…
Ссылка в новой задаче